Browse Source

Smarter extraction of filenames from NZB-subject

bugfix/handle_du
Safihre 4 years ago
parent
commit
1a848cf5fe
  1. 22
      sabnzbd/nzbstuff.py
  2. 38
      tests/test_nzbstuff.py

22
sabnzbd/nzbstuff.py

@ -85,11 +85,11 @@ import sabnzbd.cfg as cfg
import sabnzbd.nzbparser
from sabnzbd.downloader import Server
from sabnzbd.database import HistoryDB
from sabnzbd.deobfuscate_filenames import *
from sabnzbd.deobfuscate_filenames import is_probably_obfuscated
# Name patterns
SUBJECT_FN_MATCHER = re.compile(r'"([^"]*)"')
RE_NORMAL_NAME = re.compile(r"\.\w{1,5}$") # Test reasonably sized extension at the end
RE_SUBJECT_FILENAME_QUOTES = re.compile(r'"([^"]*)"') # In the subject, we expect the filename within double quotes
RE_SUBJECT_BASIC_FILENAME = re.compile(r"([\w\-+()'\s.,]*\.\w{2,4})") # Otherwise something that looks like a filename
RE_RAR = re.compile(r"(\.rar|\.r\d\d|\.s\d\d|\.t\d\d|\.u\d\d|\.v\d\d)$", re.I)
RE_PROPER = re.compile(r"(^|[\. _-])(PROPER|REAL|REPACK)([\. _-]|$)")
@ -326,7 +326,7 @@ class NzbFile(TryList):
self.date: datetime.datetime = date
self.subject: str = subject
self.type: Optional[str] = None
self.filename: str = name_extractor(subject)
self.filename: str = sanitize_filename(name_extractor(subject))
self.filename_checked = False
self.filepath: Optional[str] = None
@ -2123,10 +2123,20 @@ def scan_password(name: str) -> Tuple[str, Optional[str]]:
def name_extractor(subject: str) -> str:
""" Try to extract a file name from a subject line, return `subject` if in doubt """
result = subject
for name in re.findall(SUBJECT_FN_MATCHER, subject):
# Filename nicely wrapped in quotes
for name in re.findall(RE_SUBJECT_FILENAME_QUOTES, subject):
name = name.strip(' "')
if name and RE_NORMAL_NAME.search(name):
if name:
result = name
# Found nothing? Try a basic filename-like search
if result == subject:
for name in re.findall(RE_SUBJECT_BASIC_FILENAME, subject):
name = name.strip()
if name:
result = name
# Return the subject
return result

38
tests/test_nzbstuff.py

@ -95,3 +95,41 @@ class TestNZBStuffHelpers:
for file_name, clean_file_name in file_names.items():
assert nzbstuff.create_work_name(file_name) == clean_file_name
@pytest.mark.parametrize(
"subject, filename",
[
('Great stuff (001/143) - "Filename.txt" yEnc (1/1)', "Filename.txt"),
(
'"910a284f98ebf57f6a531cd96da48838.vol01-03.par2" yEnc (1/3)',
"910a284f98ebf57f6a531cd96da48838.vol01-03.par2",
),
('Subject-KrzpfTest [02/30] - ""KrzpfTest.part.nzb"" yEnc', "KrzpfTest.part.nzb"),
(
'[PRiVATE]-[WtFnZb]-[Supertje-_S03E11-12_-blabla_+_blabla_WEBDL-480p.mkv]-[4/12] - "" yEnc 9786 (1/1366)',
"Supertje-_S03E11-12_-blabla_+_blabla_WEBDL-480p.mkv",
),
(
'[N3wZ] MAlXD245333\\::[PRiVATE]-[WtFnZb]-[Show.S04E04.720p.AMZN.WEBRip.x264-GalaxyTV.mkv]-[1/2] - "" yEnc 293197257 (1/573)',
"Show.S04E04.720p.AMZN.WEBRip.x264-GalaxyTV.mkv",
),
(
'reftestnzb bf1664007a71 [1/6] - "20b9152c-57eb-4d02-9586-66e30b8e3ac2" yEnc (1/22) 15728640',
"20b9152c-57eb-4d02-9586-66e30b8e3ac2",
),
(
"Re: REQ Author Child's The Book-Thanks much - Child, Lee - Author - The Book.epub (1/1)",
"REQ Author Child's The Book-Thanks much - Child, Lee - Author - The Book.epub",
),
('63258-0[001/101] - "63258-2.0" yEnc (1/250) (1/250)', "63258-2.0"),
(
"Singer - A Album (2005) - [04/25] - 02 Sweetest Somebody (I Know).flac",
"- 02 Sweetest Somebody (I Know).flac",
),
("<>random!>", "<>random!>"),
("nZb]-[Supertje-_S03E11-12_", "nZb]-[Supertje-_S03E11-12_"),
("Bla [Now it's done.exe]", "Now it's done.exe"),
],
)
def test_name_extractor(self, subject, filename):
assert nzbstuff.name_extractor(subject) == filename

Loading…
Cancel
Save