Browse Source

deobfuscate: rename accompanying (smaller) files with same basename, and no renaming of collections with same extension (#1826)

* deobfuscate: rename accompanying (smaller) files with same basename

* deobfuscate: do not rename collections of same extension

* deobfuscate: collection ... much easier with one loop, thanks safihre.

* deobfuscate: globber_full, and cleanup

* deobfuscate: unittest test_deobfuscate_big_file_small_accompanying_files

* deobfuscate: unittest test_deobfuscate_collection_with_same_extension

* deobfuscate: unittest test_deobfuscate_collection_with_same_extension
pull/1830/head
Sander 4 years ago
committed by GitHub
parent
commit
dde453744d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 49
      sabnzbd/deobfuscate_filenames.py
  2. 106
      tests/test_deobfuscate_filenames.py

49
sabnzbd/deobfuscate_filenames.py

@ -33,7 +33,7 @@ import logging
import os
import re
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext, globber_full
from sabnzbd.par2file import is_parfile, parse_par2_file
# Files to exclude and minimal file size for renaming
@ -132,14 +132,13 @@ def is_probably_obfuscated(myinputfilename):
def deobfuscate_list(filelist, usefulname):
""" Check all files in filelist, and if wanted, deobfuscate """
""" Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname"""
# to be sure, only keep really exsiting files:
filelist = [f for f in filelist if os.path.exists(f)]
# Search for par2 files in the filelist
par2_files = [f for f in filelist if f.endswith(".par2")]
# Found any par2 files we can use?
run_renamer = True
if not par2_files:
@ -157,22 +156,56 @@ def deobfuscate_list(filelist, usefulname):
# No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
if run_renamer:
excluded_file_exts = EXCLUDED_FILE_EXTS
# If there is a collection with bigger files with the same extension, we don't want to rename it
extcounter = {}
for file in filelist:
if os.path.getsize(file) < MIN_FILE_SIZE:
# too small to care
continue
_, ext = os.path.splitext(file)
if ext in extcounter:
extcounter[ext] += 1
else:
extcounter[ext] = 1
if extcounter[ext] >= 3 and ext not in excluded_file_exts:
# collection, and extension not yet in excluded_file_exts, so add it
excluded_file_exts = (*excluded_file_exts, ext)
logging.debug(
"Found a collection of at least %s files with extension %s, so not renaming those files",
extcounter[ext],
ext,
)
logging.debug("Trying to see if there are qualifying files to be deobfuscated")
# We want to start with he biggest file ... probably the most important file
filelist = sorted(filelist, key=os.path.getsize, reverse=True)
for filename in filelist:
# check that file is still there (and not renamed by the secondary renaming process below)
if not os.path.isfile(filename):
continue
logging.debug("Deobfuscate inspecting %s", filename)
file_size = os.path.getsize(filename)
# Do we need to rename this file?
# Criteria: big, not-excluded extension, obfuscated (in that order)
if (
file_size > MIN_FILE_SIZE
and get_ext(filename) not in EXCLUDED_FILE_EXTS
os.path.getsize(filename) > MIN_FILE_SIZE
and get_ext(filename) not in excluded_file_exts
and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis
):
# OK, rename
# Rename and make sure the new filename is unique
path, file = os.path.split(filename)
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename)))
logging.info("Deobfuscate renaming %s to %s", filename, new_name)
# Rename and make sure the new filename is unique
renamer(filename, new_name)
# find other files with the same basename (in the same directory), and rename them in the same way:
basedirfile, _ = os.path.splitext(filename)
dir, basefilename = os.path.split(basedirfile)
for samebasenamefile in globber_full(dir, basefilename + "*"):
path, file = os.path.split(samebasenamefile)
remainingextension = samebasenamefile.replace(basedirfile, "") # might be ".dut.srt"
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension))
logging.info("Deobfuscate renaming %s to %s", samebasenamefile, new_name)
# Rename and make sure the new filename is unique
renamer(samebasenamefile, new_name)
else:
logging.info("No qualifying files found to deobfuscate")

106
tests/test_deobfuscate_filenames.py

@ -32,6 +32,11 @@ def create_big_file(filename):
myfile.truncate(15 * 1024 * 1024)
def create_small_file(filename):
with open(filename, "wb") as myfile:
myfile.truncate(1024)
class TestDeobfuscateFinalResult:
def test_is_probably_obfuscated(self):
# Test the base function test_is_probably_obfuscated(), which gives a boolean as RC
@ -180,6 +185,107 @@ class TestDeobfuscateFinalResult:
# Done. Remove (non-empty) directory
shutil.rmtree(dirname)
def test_deobfuscate_big_file_small_accompanying_files(self):
# input: myiso.iso, with accompanying files (.srt files in this case)
# test that the small accompanying files (with same basename) are renamed accordingly to the big ISO
# Create directory (with a random directory name)
dirname = os.path.join(SAB_DATA_DIR, "testdir" + str(random.randint(10000, 99999)))
os.mkdir(dirname)
# Create a big enough file with a non-useful filename
isofile = os.path.join(dirname, "myiso.iso")
create_big_file(isofile)
assert os.path.isfile(isofile)
# and a srt file
srtfile = os.path.join(dirname, "myiso.srt")
create_small_file(srtfile)
assert os.path.isfile(srtfile)
# and a dut.srt file
dutsrtfile = os.path.join(dirname, "myiso.dut.srt")
create_small_file(dutsrtfile)
assert os.path.isfile(dutsrtfile)
# and a non-related file
txtfile = os.path.join(dirname, "something.txt")
create_small_file(txtfile)
assert os.path.isfile(txtfile)
# create the filelist, with just the above files
myfilelist = [isofile, srtfile, dutsrtfile, txtfile]
# and now unleash the magic on that filelist, with a more useful jobname:
jobname = "My Important Download 2020"
deobfuscate_list(myfilelist, jobname)
# Check original files:
assert not os.path.isfile(isofile) # original iso not be there anymore
assert not os.path.isfile(srtfile) # ... and accompanying file neither
assert not os.path.isfile(dutsrtfile) # ... and this one neither
assert os.path.isfile(txtfile) # should still be there: not accompanying, and too small to rename
# Check the renaming
assert os.path.isfile(os.path.join(dirname, jobname + ".iso")) # ... should be renamed to the jobname
assert os.path.isfile(os.path.join(dirname, jobname + ".srt")) # ... should be renamed to the jobname
assert os.path.isfile(os.path.join(dirname, jobname + ".dut.srt")) # ... should be renamed to the jobname
# Done. Remove (non-empty) directory
shutil.rmtree(dirname)
def test_deobfuscate_collection_with_same_extension(self):
# input: a collection of bigger files with the same extension
# test that there is no renaming on the collection ... as that's useless on a collection
# Create directory (with a random directory name)
dirname = os.path.join(SAB_DATA_DIR, "testdir" + str(random.randint(10000, 99999)))
os.mkdir(dirname)
# Create big enough files with a non-useful filenames, all with same extension
file1 = os.path.join(dirname, "file1.bla")
create_big_file(file1)
assert os.path.isfile(file1)
file2 = os.path.join(dirname, "file2.bla")
create_big_file(file2)
assert os.path.isfile(file2)
file3 = os.path.join(dirname, "file3.bla")
create_big_file(file3)
assert os.path.isfile(file3)
file4 = os.path.join(dirname, "file4.bla")
create_big_file(file4)
assert os.path.isfile(file4)
# other extension ... so this one should get renamed
otherfile = os.path.join(dirname, "other.bin")
create_big_file(otherfile)
assert os.path.isfile(otherfile)
# create the filelist, with the above files
myfilelist = [file1, file2, file3, file4, otherfile]
# and now unleash the magic on that filelist, with a more useful jobname:
jobname = "My Important Download 2020"
deobfuscate_list(myfilelist, jobname)
# Check original files:
# the collection with same extension should still be there:
assert os.path.isfile(file1) # still there
assert os.path.isfile(file2) # still there
assert os.path.isfile(file3) # still there
assert os.path.isfile(file4) # still there
# but the one separate file with obfuscated name should be renamed:
assert not os.path.isfile(otherfile) # should be renamed
# Check the renaming
assert os.path.isfile(os.path.join(dirname, jobname + ".bin")) # ... should be renamed to the jobname
# Done. Remove (non-empty) directory
shutil.rmtree(dirname)
def test_deobfuscate_filelist_nasty_tests(self):
# check no problems occur with nasty use cases

Loading…
Cancel
Save