Browse Source

rXX files are popular extensions and don't need renames

Closes #1955
pull/1959/head
Safihre 4 years ago
parent
commit
4fe977fa47
  1. 5
      sabnzbd/deobfuscate_filenames.py
  2. 12
      sabnzbd/utils/file_extension.py
  3. 4
      tests/test_file_extension.py

5
sabnzbd/deobfuscate_filenames.py

@ -166,7 +166,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
# 2. if no meaningful extension, add it # 2. if no meaningful extension, add it
# 3. based on detecting obfuscated filenames # 3. based on detecting obfuscated filenames
# to be sure, only keep really exsiting files: # to be sure, only keep really existing files:
filelist = [f for f in filelist if os.path.isfile(f)] filelist = [f for f in filelist if os.path.isfile(f)]
# let's see if there are files with uncommon/unpopular (so: obfuscated) extensions # let's see if there are files with uncommon/unpopular (so: obfuscated) extensions
@ -176,7 +176,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
for file in filelist: for file in filelist:
if file_extension.has_popular_extension(file): if file_extension.has_popular_extension(file):
# common extension, like .doc or .iso, so assume OK and change nothing # common extension, like .doc or .iso, so assume OK and change nothing
logging.debug("extension of %s looks common", file) logging.debug("Extension of %s looks common", file)
newlist.append(file) newlist.append(file)
else: else:
# uncommon (so: obfuscated) extension # uncommon (so: obfuscated) extension
@ -220,6 +220,7 @@ def deobfuscate_list(filelist: List[str], usefulname: str):
# check that file is still there (and not renamed by the secondary renaming process below) # check that file is still there (and not renamed by the secondary renaming process below)
if not os.path.isfile(filename): if not os.path.isfile(filename):
continue continue
logging.debug("Deobfuscate inspecting %s", filename) logging.debug("Deobfuscate inspecting %s", filename)
# Do we need to rename this file? # Do we need to rename this file?
# Criteria: big, not-excluded extension, obfuscated (in that order) # Criteria: big, not-excluded extension, obfuscated (in that order)

12
sabnzbd/utils/file_extension.py

@ -8,10 +8,11 @@ Note: extension always contains a leading dot
import puremagic import puremagic
import os import os
import sys import sys
import re
from typing import List from typing import List
from pathlib import Path
from sabnzbd.filesystem import get_ext from sabnzbd.filesystem import get_ext
# common extension from https://www.computerhope.com/issues/ch001789.htm # common extension from https://www.computerhope.com/issues/ch001789.htm
POPULAR_EXT = ( POPULAR_EXT = (
"3g2", "3g2",
@ -234,16 +235,19 @@ DOWNLOAD_EXT = (
"xpi", "xpi",
) )
# combine to one tuple, with unique entries: # Combine to one tuple, with unique entries:
ALL_EXT = tuple(set(POPULAR_EXT + DOWNLOAD_EXT)) ALL_EXT = tuple(set(POPULAR_EXT + DOWNLOAD_EXT))
# prepend a dot to each extension, because we work with a leading dot in extensions # Prepend a dot to each extension, because we work with a leading dot in extensions
ALL_EXT = tuple(["." + i for i in ALL_EXT]) ALL_EXT = tuple(["." + i for i in ALL_EXT])
# Match old-style multi-rar extensions
SIMPLE_RAR_RE = re.compile(r"\.r\d\d\d?$", re.I)
def has_popular_extension(file_path: str) -> bool: def has_popular_extension(file_path: str) -> bool:
"""returns boolean if the extension of file_path is a popular, well-known extension""" """returns boolean if the extension of file_path is a popular, well-known extension"""
file_extension = get_ext(file_path) file_extension = get_ext(file_path)
return file_extension in ALL_EXT return file_extension in ALL_EXT or SIMPLE_RAR_RE.match(file_extension)
def all_possible_extensions(file_path: str) -> List[str]: def all_possible_extensions(file_path: str) -> List[str]:

4
tests/test_file_extension.py

@ -29,6 +29,10 @@ class Test_File_Extension:
assert file_extension.has_popular_extension("blabla/blabla.mkv") assert file_extension.has_popular_extension("blabla/blabla.mkv")
assert file_extension.has_popular_extension("blabla/blabla.srt") assert file_extension.has_popular_extension("blabla/blabla.srt")
assert file_extension.has_popular_extension("djjddj/aaaaa.epub") assert file_extension.has_popular_extension("djjddj/aaaaa.epub")
assert file_extension.has_popular_extension("test/testing.r01")
assert file_extension.has_popular_extension("test/testing.r901")
assert not file_extension.has_popular_extension("test/testing")
assert not file_extension.has_popular_extension("test/testing.rar01")
assert not file_extension.has_popular_extension("98ads098f098fa.a0ds98f098asdf") assert not file_extension.has_popular_extension("98ads098f098fa.a0ds98f098asdf")
def test_what_is_most_likely_extension(self): def test_what_is_most_likely_extension(self):

Loading…
Cancel
Save