Browse Source

rXX files are popular extensions and don't need renames

Closes #1955
pull/1959/head
Safihre 4 years ago
parent
commit
4fe977fa47
  1. 5
      sabnzbd/deobfuscate_filenames.py
  2. 12
      sabnzbd/utils/file_extension.py
  3. 4
      tests/test_file_extension.py

5
sabnzbd/deobfuscate_filenames.py

@ -166,7 +166,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
# 2. if no meaningful extension, add it
# 3. based on detecting obfuscated filenames
# to be sure, only keep really exsiting files:
# to be sure, only keep really existing files:
filelist = [f for f in filelist if os.path.isfile(f)]
# let's see if there are files with uncommon/unpopular (so: obfuscated) extensions
@ -176,7 +176,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
for file in filelist:
if file_extension.has_popular_extension(file):
# common extension, like .doc or .iso, so assume OK and change nothing
logging.debug("extension of %s looks common", file)
logging.debug("Extension of %s looks common", file)
newlist.append(file)
else:
# uncommon (so: obfuscated) extension
@ -220,6 +220,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
# check that file is still there (and not renamed by the secondary renaming process below)
if not os.path.isfile(filename):
continue
logging.debug("Deobfuscate inspecting %s", filename)
# Do we need to rename this file?
# Criteria: big, not-excluded extension, obfuscated (in that order)

12
sabnzbd/utils/file_extension.py

@ -8,10 +8,11 @@ Note: extension always contains a leading dot
import puremagic
import os
import sys
import re
from typing import List
from pathlib import Path
from sabnzbd.filesystem import get_ext
# common extension from https://www.computerhope.com/issues/ch001789.htm
POPULAR_EXT = (
"3g2",
@ -234,16 +235,19 @@ DOWNLOAD_EXT = (
"xpi",
)
# combine to one tuple, with unique entries:
# Combine to one tuple, with unique entries:
ALL_EXT = tuple(set(POPULAR_EXT + DOWNLOAD_EXT))
# prepend a dot to each extension, because we work with a leading dot in extensions
# Prepend a dot to each extension, because we work with a leading dot in extensions
ALL_EXT = tuple(["." + i for i in ALL_EXT])
# Match old-style multi-rar extensions
SIMPLE_RAR_RE = re.compile(r"\.r\d\d\d?$", re.I)
def has_popular_extension(file_path: str) -> bool:
"""returns boolean if the extension of file_path is a popular, well-known extension"""
file_extension = get_ext(file_path)
return file_extension in ALL_EXT
return file_extension in ALL_EXT or SIMPLE_RAR_RE.match(file_extension)
def all_possible_extensions(file_path: str) -> List[str]:

4
tests/test_file_extension.py

@ -29,6 +29,10 @@ class Test_File_Extension:
assert file_extension.has_popular_extension("blabla/blabla.mkv")
assert file_extension.has_popular_extension("blabla/blabla.srt")
assert file_extension.has_popular_extension("djjddj/aaaaa.epub")
assert file_extension.has_popular_extension("test/testing.r01")
assert file_extension.has_popular_extension("test/testing.r901")
assert not file_extension.has_popular_extension("test/testing")
assert not file_extension.has_popular_extension("test/testing.rar01")
assert not file_extension.has_popular_extension("98ads098f098fa.a0ds98f098asdf")
def test_what_is_most_likely_extension(self):

Loading…
Cancel
Save