You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
212 lines
9.2 KiB
212 lines
9.2 KiB
#!/usr/bin/python3 -OO
|
|
# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
"""
|
|
|
|
Deobfuscation post-processing script:
|
|
|
|
Will check in the completed job folder if maybe there are par2 files,
|
|
for example "rename.par2", and use those to rename the files.
|
|
If there is no "rename.par2" available, it will rename large, not-excluded
|
|
files to the job-name in the queue if the filename looks obfuscated
|
|
|
|
Based on work by P1nGu1n
|
|
|
|
"""
|
|
|
|
import hashlib
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext
|
|
from sabnzbd.par2file import is_parfile, parse_par2_file
|
|
|
|
# Files to exclude and minimal file size for renaming
|
|
EXCLUDED_FILE_EXTS = (".vob", ".rar", ".par2", ".mts", ".m2ts", ".cpi", ".clpi", ".mpl", ".mpls", ".bdm", ".bdmv")
|
|
MIN_FILE_SIZE = 10 * 1024 * 1024
|
|
|
|
|
|
def decode_par2(parfile):
|
|
"""Parse a par2 file and rename files listed in the par2 to their real name"""
|
|
# Check if really a par2 file
|
|
if not is_parfile(parfile):
|
|
logging.info("Par2 file %s was not really a par2 file")
|
|
return False
|
|
|
|
# Parse the par2 file
|
|
md5of16k = {}
|
|
parse_par2_file(parfile, md5of16k)
|
|
|
|
# Parse all files in the folder
|
|
dirname = os.path.dirname(parfile)
|
|
result = False
|
|
for fn in os.listdir(dirname):
|
|
filepath = os.path.join(dirname, fn)
|
|
# Only check files
|
|
if os.path.isfile(filepath):
|
|
with open(filepath, "rb") as fileToMatch:
|
|
first16k_data = fileToMatch.read(16384)
|
|
|
|
# Check if we have this hash
|
|
file_md5of16k = hashlib.md5(first16k_data).digest()
|
|
if file_md5of16k in md5of16k:
|
|
new_path = os.path.join(dirname, md5of16k[file_md5of16k])
|
|
# Make sure it's a unique name
|
|
renamer(filepath, get_unique_filename(new_path))
|
|
result = True
|
|
return result
|
|
|
|
|
|
def is_probably_obfuscated(myinputfilename):
|
|
"""Returns boolean if filename is likely obfuscated. Default: True
|
|
myinputfilename (string) can be a plain file name, or a full path"""
|
|
|
|
# Find filebasename
|
|
path, filename = os.path.split(myinputfilename)
|
|
filebasename, fileextension = os.path.splitext(filename)
|
|
|
|
# First fixed patterns that we know of:
|
|
logging.debug("Checking: %s", filebasename)
|
|
|
|
# ...blabla.H.264/b082fa0beaa644d3aa01045d5b8d0b36.mkv is certainly obfuscated
|
|
if re.findall(r"^[a-f0-9]{32}$", filebasename):
|
|
logging.debug("Obfuscated: 32 hex digit")
|
|
# exactly 32 hex digits, so:
|
|
return True
|
|
|
|
# 0675e29e9abfd2.f7d069dab0b853283cc1b069a25f82.6547
|
|
if re.findall(r"^[a-f0-9\.]{40,}$", filebasename):
|
|
logging.debug("Obfuscated: starting with 40+ lower case hex digits and/or dots")
|
|
return True
|
|
|
|
# /some/thing/abc.xyz.a4c567edbcbf27.BLA is certainly obfuscated
|
|
if re.findall(r"^abc\.xyz", filebasename):
|
|
logging.debug("Obfuscated: starts with 'abc.xyz'")
|
|
# ... which we consider as obfuscated:
|
|
return True
|
|
|
|
# these are signals for the obfuscation versus non-obfuscation
|
|
decimals = sum(1 for c in filebasename if c.isnumeric())
|
|
upperchars = sum(1 for c in filebasename if c.isupper())
|
|
lowerchars = sum(1 for c in filebasename if c.islower())
|
|
spacesdots = sum(1 for c in filebasename if c == " " or c == "." or c == "_") # space-like symbols
|
|
|
|
# Example: "Great Distro"
|
|
if upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1:
|
|
logging.debug("Not obfuscated: upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1")
|
|
return False
|
|
|
|
# Example: "this is a download"
|
|
if spacesdots >= 3:
|
|
logging.debug("Not obfuscated: spacesdots >= 3")
|
|
return False
|
|
|
|
# Example: "Beast 2020"
|
|
if (upperchars + lowerchars >= 4) and decimals >= 4 and spacesdots >= 1:
|
|
logging.debug("Not obfuscated: (upperchars + lowerchars >= 4) and decimals > 3 and spacesdots > 1")
|
|
return False
|
|
|
|
# Example: "Catullus", starts with a capital, and most letters are lower case
|
|
if filebasename[0].isupper() and lowerchars > 2 and upperchars / lowerchars <= 0.25:
|
|
logging.debug("Not obfuscated: starts with a capital, and most letters are lower case")
|
|
return False
|
|
|
|
# If we get here, no trigger for a clear name was found, so let's default to obfuscated
|
|
logging.debug("Obfuscated (default)")
|
|
return True # default not obfuscated
|
|
|
|
|
|
def deobfuscate_list(filelist, usefulname):
|
|
"""Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname"""
|
|
|
|
# to be sure, only keep really exsiting files:
|
|
filelist = [f for f in filelist if os.path.exists(f)]
|
|
|
|
# Search for par2 files in the filelist
|
|
par2_files = [f for f in filelist if f.endswith(".par2")]
|
|
# Found any par2 files we can use?
|
|
run_renamer = True
|
|
if not par2_files:
|
|
logging.debug("No par2 files found to process, running renamer.")
|
|
else:
|
|
# Run par2 from SABnzbd on them
|
|
for par2_file in par2_files:
|
|
# Analyse data and analyse result
|
|
logging.debug("Deobfuscate par2: handling %s", par2_file)
|
|
if decode_par2(par2_file):
|
|
logging.debug("Deobfuscate par2 repair/verify finished.")
|
|
run_renamer = False
|
|
else:
|
|
logging.debug("Deobfuscate par2 repair/verify did not find anything to rename.")
|
|
|
|
# No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
|
|
if run_renamer:
|
|
excluded_file_exts = EXCLUDED_FILE_EXTS
|
|
# If there is a collection with bigger files with the same extension, we don't want to rename it
|
|
extcounter = {}
|
|
for file in filelist:
|
|
if os.path.getsize(file) < MIN_FILE_SIZE:
|
|
# too small to care
|
|
continue
|
|
_, ext = os.path.splitext(file)
|
|
if ext in extcounter:
|
|
extcounter[ext] += 1
|
|
else:
|
|
extcounter[ext] = 1
|
|
if extcounter[ext] >= 3 and ext not in excluded_file_exts:
|
|
# collection, and extension not yet in excluded_file_exts, so add it
|
|
excluded_file_exts = (*excluded_file_exts, ext)
|
|
logging.debug(
|
|
"Found a collection of at least %s files with extension %s, so not renaming those files",
|
|
extcounter[ext],
|
|
ext,
|
|
)
|
|
|
|
logging.debug("Trying to see if there are qualifying files to be deobfuscated")
|
|
# We start with he biggest file ... probably the most important file
|
|
filelist = sorted(filelist, key=os.path.getsize, reverse=True)
|
|
for filename in filelist:
|
|
# check that file is still there (and not renamed by the secondary renaming process below)
|
|
if not os.path.isfile(filename):
|
|
continue
|
|
logging.debug("Deobfuscate inspecting %s", filename)
|
|
# Do we need to rename this file?
|
|
# Criteria: big, not-excluded extension, obfuscated (in that order)
|
|
if (
|
|
os.path.getsize(filename) > MIN_FILE_SIZE
|
|
and get_ext(filename) not in excluded_file_exts
|
|
and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis
|
|
):
|
|
# Rename and make sure the new filename is unique
|
|
path, file = os.path.split(filename)
|
|
# construct new_name: <path><usefulname><extension>
|
|
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename)))
|
|
logging.info("Deobfuscate renaming %s to %s", filename, new_name)
|
|
renamer(filename, new_name)
|
|
# find other files with the same basename in filelist, and rename them in the same way:
|
|
basedirfile, _ = os.path.splitext(filename) # something like "/home/this/myiso"
|
|
for otherfile in filelist:
|
|
if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile):
|
|
# yes, same basedirfile, only different extension
|
|
remainingextension = otherfile.replace(basedirfile, "") # might be long ext, like ".dut.srt"
|
|
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension))
|
|
logging.info("Deobfuscate renaming %s to %s", otherfile, new_name)
|
|
# Rename and make sure the new filename is unique
|
|
renamer(otherfile, new_name)
|
|
else:
|
|
logging.info("No qualifying files found to deobfuscate")
|
|
|