diff --git a/sabnzbd/deobfuscate_filenames.py b/sabnzbd/deobfuscate_filenames.py index 37e4c1f..68f9957 100644 --- a/sabnzbd/deobfuscate_filenames.py +++ b/sabnzbd/deobfuscate_filenames.py @@ -166,7 +166,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str): # 2. if no meaningful extension, add it # 3. based on detecting obfuscated filenames - # to be sure, only keep really exsiting files: + # to be sure, only keep really existing files: filelist = [f for f in filelist if os.path.isfile(f)] # let's see if there are files with uncommon/unpopular (so: obfuscated) extensions @@ -176,7 +176,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str): for file in filelist: if file_extension.has_popular_extension(file): # common extension, like .doc or .iso, so assume OK and change nothing - logging.debug("extension of %s looks common", file) + logging.debug("Extension of %s looks common", file) newlist.append(file) else: # uncommon (so: obfuscated) extension @@ -220,6 +220,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str): # check that file is still there (and not renamed by the secondary renaming process below) if not os.path.isfile(filename): continue + logging.debug("Deobfuscate inspecting %s", filename) # Do we need to rename this file? # Criteria: big, not-excluded extension, obfuscated (in that order) diff --git a/sabnzbd/utils/file_extension.py b/sabnzbd/utils/file_extension.py index 5432ac3..b12e879 100644 --- a/sabnzbd/utils/file_extension.py +++ b/sabnzbd/utils/file_extension.py @@ -8,10 +8,11 @@ Note: extension always contains a leading dot import puremagic import os import sys +import re from typing import List -from pathlib import Path from sabnzbd.filesystem import get_ext + # common extension from https://www.computerhope.com/issues/ch001789.htm POPULAR_EXT = ( "3g2", @@ -234,16 +235,19 @@ DOWNLOAD_EXT = ( "xpi", ) -# combine to one tuple, with unique entries: +# Combine to one tuple, with unique entries: ALL_EXT = tuple(set(POPULAR_EXT + DOWNLOAD_EXT)) -# prepend a dot to each extension, because we work with a leading dot in extensions +# Prepend a dot to each extension, because we work with a leading dot in extensions ALL_EXT = tuple(["." + i for i in ALL_EXT]) +# Match old-style multi-rar extensions +SIMPLE_RAR_RE = re.compile(r"\.r\d\d\d?$", re.I) + def has_popular_extension(file_path: str) -> bool: """returns boolean if the extension of file_path is a popular, well-known extension""" file_extension = get_ext(file_path) - return file_extension in ALL_EXT + return file_extension in ALL_EXT or SIMPLE_RAR_RE.match(file_extension) def all_possible_extensions(file_path: str) -> List[str]: diff --git a/tests/test_file_extension.py b/tests/test_file_extension.py index 0d1ed00..81370a7 100644 --- a/tests/test_file_extension.py +++ b/tests/test_file_extension.py @@ -29,6 +29,10 @@ class Test_File_Extension: assert file_extension.has_popular_extension("blabla/blabla.mkv") assert file_extension.has_popular_extension("blabla/blabla.srt") assert file_extension.has_popular_extension("djjddj/aaaaa.epub") + assert file_extension.has_popular_extension("test/testing.r01") + assert file_extension.has_popular_extension("test/testing.r901") + assert not file_extension.has_popular_extension("test/testing") + assert not file_extension.has_popular_extension("test/testing.rar01") assert not file_extension.has_popular_extension("98ads098f098fa.a0ds98f098asdf") def test_what_is_most_likely_extension(self):