diff --git a/sabnzbd/misc.py b/sabnzbd/misc.py index 6be7705..eb87999 100644 --- a/sabnzbd/misc.py +++ b/sabnzbd/misc.py @@ -43,6 +43,7 @@ from sabnzbd.filesystem import userxbit TAB_UNITS = ("", "K", "M", "G", "T", "P") RE_UNITS = re.compile(r"(\d+\.*\d*)\s*([KMGTP]?)", re.I) RE_VERSION = re.compile(r"(\d+)\.(\d+)\.(\d+)([a-zA-Z]*)(\d*)") +RE_SAMPLE = re.compile(r"((^|[\W_])(sample|proof))", re.I) # something-sample or something-proof RE_IP4 = re.compile(r"inet\s+(addr:\s*)?(\d+\.\d+\.\d+\.\d+)") RE_IP6 = re.compile(r"inet6\s+(addr:\s*)?([0-9a-f:]+)", re.I) @@ -808,6 +809,11 @@ def get_all_passwords(nzo) -> List[str]: return unique_passwords +def is_sample(filename: str) -> bool: + """Try to determine if filename is (most likely) a sample""" + return bool(re.search(RE_SAMPLE, filename)) + + def find_on_path(targets): """Search the PATH for a program and return full path""" if sabnzbd.WIN32: diff --git a/sabnzbd/postproc.py b/sabnzbd/postproc.py index 6d12cc2..d8d8ff5 100644 --- a/sabnzbd/postproc.py +++ b/sabnzbd/postproc.py @@ -39,7 +39,7 @@ from sabnzbd.newsunpack import ( is_sfv_file, ) from threading import Thread -from sabnzbd.misc import on_cleanup_list +from sabnzbd.misc import on_cleanup_list, is_sample from sabnzbd.filesystem import ( real_path, get_unique_path, @@ -65,7 +65,7 @@ from sabnzbd.filesystem import ( get_filename, ) from sabnzbd.nzbstuff import NzbObject -from sabnzbd.sorting import Sorter, is_sample +from sabnzbd.sorting import Sorter from sabnzbd.constants import ( REPAIR_PRIORITY, FORCE_PRIORITY, diff --git a/sabnzbd/sorting.py b/sabnzbd/sorting.py index f1a2fe0..91f659b 100644 --- a/sabnzbd/sorting.py +++ b/sabnzbd/sorting.py @@ -39,6 +39,7 @@ from sabnzbd.filesystem import ( ) import sabnzbd.cfg as cfg from sabnzbd.constants import EXCLUDED_GUESSIT_PROPERTIES, IGNORED_MOVIE_FOLDERS +from sabnzbd.misc import is_sample from sabnzbd.nzbstuff import NzbObject, scan_password # Do not rename .vob files as they are usually DVD's @@ -630,40 +631,9 @@ def guess_what(name: str, sort_type: Optional[str] = None) -> MatchesDict: ): guess["type"] = "unknown" - # Remove sample indicators from groupnames, e.g. 'sample-groupname' or 'groupname-proof' - group = guess.get("release_group", "") - if group.lower().startswith(("sample-", "proof-")) or group.lower().endswith(("-sample", "-proof")): - # Set clean groupname - guess["release_group"] = re.sub("^(sample|proof)-|-(sample|proof)$", "", group, re.I) - # Add 'Sample' property to the guess - other = guess.get("other") - if not other: - guess.setdefault("other", "Sample") - else: - if "Sample" not in guess["other"]: - # Pre-existing 'other' may be a string or a list - try: - guess["other"].append("Sample") - except AttributeError: - guess["other"] = [other, "Sample"] - return guess -def is_sample(filename: str) -> bool: - """Try to determine if filename belongs to a sample""" - if os.path.splitext(filename)[0].lower().strip() in ("sample", "proof"): - # The entire filename is just 'sample.ext' or similar - return True - - # If that didn't work, start guessing - guess = guess_what(filename).get("other", "") - if isinstance(guess, list): - return any(item in ("Sample", "Proof") for item in guess) - else: - return guess in ("Sample", "Proof") - - def path_subst(path: str, mapping: List[Tuple[str, str]]) -> str: """Replace the sort string elements in the path with the real values provided by the mapping; non-elements are copied verbatim.""" diff --git a/tests/test_misc.py b/tests/test_misc.py index 6a57027..0d0f4c4 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -215,6 +215,44 @@ class TestMisc: os.unlink("test.key") @pytest.mark.parametrize( + "name, result", + [ + ("Free.Open.Source.Movie.2001.1080p.WEB-DL.DD5.1.H264-FOSS", False), # Not samples + ("Setup.exe", False), + ("23.123.hdtv-rofl", False), + ("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample", True), # Samples + ("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample.ogg", True), + ("Sumtin_Else_1080p_WEB-DL_DD5.1_H264_proof-EMRG", True), + ("Wot.Eva.540i.WEB-DL.aac.H264-Groupie sample.mp4", True), + ("file-sample.mkv", True), + ("PROOF.JPG", True), + ("Bla.s01e02.title.1080p.aac-sample proof.mkv", True), + ("Bla.s01e02.title.1080p.aac-proof.mkv", True), + ("Bla.s01e02.title.1080p.aac sample proof.mkv", True), + ("Bla.s01e02.title.1080p.aac proof.mkv", True), + ("Lwtn.s08e26.1080p.web.h264-glhf-sample.par2", True), + ("Lwtn.s08e26.1080p.web.h264-glhf-sample.vol001-002.par2", True), + ("Look at That 2011 540i WEB-DL.H265-NoSample", False), + ], + ) + def test_is_sample(self, name, result): + assert misc.is_sample(name) == result + + @pytest.mark.parametrize( + "name, result", + [ + ("Not Death Proof (2022) 1080p x264 (DD5.1) BE Subs", False), # Try to trigger some false positives + ("Proof.of.Everything.(2042).4320p.x266-4U", False), + ("Crime_Scene_S01E13_Free_Sample_For_Sale_480p-OhDear", False), + ("Sample That 2011 480p WEB-DL.H265-aMiGo", False), + ("NOT A SAMPLE.JPG", False), + ], + ) + def test_is_sample_known_false_positives(self, name, result): + """We know these fail, but don't have a better solution for them at the moment.""" + assert misc.is_sample(name) != result + + @pytest.mark.parametrize( "test_input, expected_output", [ (["cmd1", 9, "cmd3"], '"cmd1" "9" "cmd3"'), # sending all commands as valid string diff --git a/tests/test_sorting.py b/tests/test_sorting.py index 61a1d0c..7278cec 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -66,7 +66,7 @@ class TestSortingFunctions: "country": "US", }, ), - ("Test Movie 720p HDTV AAC x265 sample-MYgroup", {"release_group": "MYgroup", "other": "Sample"}), + ("Test Movie 720p HDTV AAC x265 MYgroup-Sample", {"release_group": "MYgroup", "other": "Sample"}), (None, None), # Jobname missing ("", None), ], @@ -86,33 +86,6 @@ class TestSortingFunctions: else: assert guess[key] == value - @pytest.mark.parametrize( - "name, result", - [ - ("Free.Open.Source.Movie.2001.1080p.WEB-DL.DD5.1.H264-FOSS", False), # Not samples - ("Setup.exe", False), - ("23.123.hdtv-rofl", False), - ("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample", True), # Samples - ("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample.ogg", True), - ("Sumtin_Else_1080p_WEB-DL_DD5.1_H264_proof-EMRG", True), - ("Wot.Eva.540i.WEB-DL.aac.H264-Groupie sample.mp4", True), - ("file-sample.mkv", True), - ("PROOF.JPG", True), - ("Bla.s01e02.title.1080p.aac-sample proof.mkv", True), - ("Bla.s01e02.title.1080p.aac-proof.mkv", True), - ("Bla.s01e02.title.1080p.aac sample proof.mkv", True), - ("Bla.s01e02.title.1080p.aac proof.mkv", True), - ("Not Death Proof (2022) 1080p x264 (DD5.1) BE Subs", False), # Try to trigger some false positives - ("Proof.of.Everything.(2042).4320p.x266-4U", False), - ("Crime_Scene_S01E13_Free_Sample_For_Sale_480p-OhDear", False), - ("Sample That 2011 480p WEB-DL.H265-aMiGo", False), - ("Look at That 2011 540i WEB-DL.H265-NoSample", False), - ("NOT A SAMPLE.JPG", False), - ], - ) - def test_is_sample(self, name, result): - assert sorting.is_sample(name) == result - @pytest.mark.parametrize("platform", ["linux", "darwin", "win32"]) @pytest.mark.parametrize( "path, result_unix, result_win",