Browse Source

Revert to using regex based sample detection

Closes #1964
pull/1966/head
Safihre 4 years ago
parent
commit
389a0d3afa
  1. 6
      sabnzbd/misc.py
  2. 4
      sabnzbd/postproc.py
  3. 32
      sabnzbd/sorting.py
  4. 38
      tests/test_misc.py
  5. 29
      tests/test_sorting.py

6
sabnzbd/misc.py

@ -43,6 +43,7 @@ from sabnzbd.filesystem import userxbit
TAB_UNITS = ("", "K", "M", "G", "T", "P")
RE_UNITS = re.compile(r"(\d+\.*\d*)\s*([KMGTP]?)", re.I)
RE_VERSION = re.compile(r"(\d+)\.(\d+)\.(\d+)([a-zA-Z]*)(\d*)")
RE_SAMPLE = re.compile(r"((^|[\W_])(sample|proof))", re.I) # something-sample or something-proof
RE_IP4 = re.compile(r"inet\s+(addr:\s*)?(\d+\.\d+\.\d+\.\d+)")
RE_IP6 = re.compile(r"inet6\s+(addr:\s*)?([0-9a-f:]+)", re.I)
@ -808,6 +809,11 @@ def get_all_passwords(nzo) -> List[str]:
return unique_passwords
def is_sample(filename: str) -> bool:
"""Try to determine if filename is (most likely) a sample"""
return bool(re.search(RE_SAMPLE, filename))
def find_on_path(targets):
"""Search the PATH for a program and return full path"""
if sabnzbd.WIN32:

4
sabnzbd/postproc.py

@ -39,7 +39,7 @@ from sabnzbd.newsunpack import (
is_sfv_file,
)
from threading import Thread
from sabnzbd.misc import on_cleanup_list
from sabnzbd.misc import on_cleanup_list, is_sample
from sabnzbd.filesystem import (
real_path,
get_unique_path,
@ -65,7 +65,7 @@ from sabnzbd.filesystem import (
get_filename,
)
from sabnzbd.nzbstuff import NzbObject
from sabnzbd.sorting import Sorter, is_sample
from sabnzbd.sorting import Sorter
from sabnzbd.constants import (
REPAIR_PRIORITY,
FORCE_PRIORITY,

32
sabnzbd/sorting.py

@ -39,6 +39,7 @@ from sabnzbd.filesystem import (
)
import sabnzbd.cfg as cfg
from sabnzbd.constants import EXCLUDED_GUESSIT_PROPERTIES, IGNORED_MOVIE_FOLDERS
from sabnzbd.misc import is_sample
from sabnzbd.nzbstuff import NzbObject, scan_password
# Do not rename .vob files as they are usually DVD's
@ -630,40 +631,9 @@ def guess_what(name: str, sort_type: Optional[str] = None) -> MatchesDict:
):
guess["type"] = "unknown"
# Remove sample indicators from groupnames, e.g. 'sample-groupname' or 'groupname-proof'
group = guess.get("release_group", "")
if group.lower().startswith(("sample-", "proof-")) or group.lower().endswith(("-sample", "-proof")):
# Set clean groupname
guess["release_group"] = re.sub("^(sample|proof)-|-(sample|proof)$", "", group, re.I)
# Add 'Sample' property to the guess
other = guess.get("other")
if not other:
guess.setdefault("other", "Sample")
else:
if "Sample" not in guess["other"]:
# Pre-existing 'other' may be a string or a list
try:
guess["other"].append("Sample")
except AttributeError:
guess["other"] = [other, "Sample"]
return guess
def is_sample(filename: str) -> bool:
"""Try to determine if filename belongs to a sample"""
if os.path.splitext(filename)[0].lower().strip() in ("sample", "proof"):
# The entire filename is just 'sample.ext' or similar
return True
# If that didn't work, start guessing
guess = guess_what(filename).get("other", "")
if isinstance(guess, list):
return any(item in ("Sample", "Proof") for item in guess)
else:
return guess in ("Sample", "Proof")
def path_subst(path: str, mapping: List[Tuple[str, str]]) -> str:
"""Replace the sort string elements in the path with the real values provided by the mapping;
non-elements are copied verbatim."""

38
tests/test_misc.py

@ -215,6 +215,44 @@ class TestMisc:
os.unlink("test.key")
@pytest.mark.parametrize(
"name, result",
[
("Free.Open.Source.Movie.2001.1080p.WEB-DL.DD5.1.H264-FOSS", False), # Not samples
("Setup.exe", False),
("23.123.hdtv-rofl", False),
("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample", True), # Samples
("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample.ogg", True),
("Sumtin_Else_1080p_WEB-DL_DD5.1_H264_proof-EMRG", True),
("Wot.Eva.540i.WEB-DL.aac.H264-Groupie sample.mp4", True),
("file-sample.mkv", True),
("PROOF.JPG", True),
("Bla.s01e02.title.1080p.aac-sample proof.mkv", True),
("Bla.s01e02.title.1080p.aac-proof.mkv", True),
("Bla.s01e02.title.1080p.aac sample proof.mkv", True),
("Bla.s01e02.title.1080p.aac proof.mkv", True),
("Lwtn.s08e26.1080p.web.h264-glhf-sample.par2", True),
("Lwtn.s08e26.1080p.web.h264-glhf-sample.vol001-002.par2", True),
("Look at That 2011 540i WEB-DL.H265-NoSample", False),
],
)
def test_is_sample(self, name, result):
assert misc.is_sample(name) == result
@pytest.mark.parametrize(
"name, result",
[
("Not Death Proof (2022) 1080p x264 (DD5.1) BE Subs", False), # Try to trigger some false positives
("Proof.of.Everything.(2042).4320p.x266-4U", False),
("Crime_Scene_S01E13_Free_Sample_For_Sale_480p-OhDear", False),
("Sample That 2011 480p WEB-DL.H265-aMiGo", False),
("NOT A SAMPLE.JPG", False),
],
)
def test_is_sample_known_false_positives(self, name, result):
"""We know these fail, but don't have a better solution for them at the moment."""
assert misc.is_sample(name) != result
@pytest.mark.parametrize(
"test_input, expected_output",
[
(["cmd1", 9, "cmd3"], '"cmd1" "9" "cmd3"'), # sending all commands as valid string

29
tests/test_sorting.py

@ -66,7 +66,7 @@ class TestSortingFunctions:
"country": "US",
},
),
("Test Movie 720p HDTV AAC x265 sample-MYgroup", {"release_group": "MYgroup", "other": "Sample"}),
("Test Movie 720p HDTV AAC x265 MYgroup-Sample", {"release_group": "MYgroup", "other": "Sample"}),
(None, None), # Jobname missing
("", None),
],
@ -86,33 +86,6 @@ class TestSortingFunctions:
else:
assert guess[key] == value
@pytest.mark.parametrize(
"name, result",
[
("Free.Open.Source.Movie.2001.1080p.WEB-DL.DD5.1.H264-FOSS", False), # Not samples
("Setup.exe", False),
("23.123.hdtv-rofl", False),
("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample", True), # Samples
("Something.1080p.WEB-DL.DD5.1.H264-EMRG-sample.ogg", True),
("Sumtin_Else_1080p_WEB-DL_DD5.1_H264_proof-EMRG", True),
("Wot.Eva.540i.WEB-DL.aac.H264-Groupie sample.mp4", True),
("file-sample.mkv", True),
("PROOF.JPG", True),
("Bla.s01e02.title.1080p.aac-sample proof.mkv", True),
("Bla.s01e02.title.1080p.aac-proof.mkv", True),
("Bla.s01e02.title.1080p.aac sample proof.mkv", True),
("Bla.s01e02.title.1080p.aac proof.mkv", True),
("Not Death Proof (2022) 1080p x264 (DD5.1) BE Subs", False), # Try to trigger some false positives
("Proof.of.Everything.(2042).4320p.x266-4U", False),
("Crime_Scene_S01E13_Free_Sample_For_Sale_480p-OhDear", False),
("Sample That 2011 480p WEB-DL.H265-aMiGo", False),
("Look at That 2011 540i WEB-DL.H265-NoSample", False),
("NOT A SAMPLE.JPG", False),
],
)
def test_is_sample(self, name, result):
assert sorting.is_sample(name) == result
@pytest.mark.parametrize("platform", ["linux", "darwin", "win32"])
@pytest.mark.parametrize(
"path, result_unix, result_win",

Loading…
Cancel
Save