You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

212 lines
9.2 KiB

#!/usr/bin/python3 -OO
# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Deobfuscation post-processing script:
Will check in the completed job folder if maybe there are par2 files,
for example "rename.par2", and use those to rename the files.
If there is no "rename.par2" available, it will rename large, not-excluded
files to the job-name in the queue if the filename looks obfuscated
Based on work by P1nGu1n
"""
import hashlib
import logging
import os
import re
from sabnzbd.filesystem import get_unique_filename, renamer, get_ext
from sabnzbd.par2file import is_parfile, parse_par2_file
# Files to exclude and minimal file size for renaming
EXCLUDED_FILE_EXTS = (".vob", ".rar", ".par2", ".mts", ".m2ts", ".cpi", ".clpi", ".mpl", ".mpls", ".bdm", ".bdmv")
MIN_FILE_SIZE = 10 * 1024 * 1024
def decode_par2(parfile):
"""Parse a par2 file and rename files listed in the par2 to their real name"""
# Check if really a par2 file
if not is_parfile(parfile):
logging.info("Par2 file %s was not really a par2 file")
return False
# Parse the par2 file
md5of16k = {}
parse_par2_file(parfile, md5of16k)
# Parse all files in the folder
dirname = os.path.dirname(parfile)
result = False
for fn in os.listdir(dirname):
filepath = os.path.join(dirname, fn)
# Only check files
if os.path.isfile(filepath):
with open(filepath, "rb") as fileToMatch:
first16k_data = fileToMatch.read(16384)
# Check if we have this hash
file_md5of16k = hashlib.md5(first16k_data).digest()
if file_md5of16k in md5of16k:
new_path = os.path.join(dirname, md5of16k[file_md5of16k])
# Make sure it's a unique name
renamer(filepath, get_unique_filename(new_path))
result = True
return result
def is_probably_obfuscated(myinputfilename):
"""Returns boolean if filename is likely obfuscated. Default: True
myinputfilename (string) can be a plain file name, or a full path"""
# Find filebasename
path, filename = os.path.split(myinputfilename)
filebasename, fileextension = os.path.splitext(filename)
# First fixed patterns that we know of:
logging.debug("Checking: %s", filebasename)
# ...blabla.H.264/b082fa0beaa644d3aa01045d5b8d0b36.mkv is certainly obfuscated
if re.findall(r"^[a-f0-9]{32}$", filebasename):
logging.debug("Obfuscated: 32 hex digit")
# exactly 32 hex digits, so:
return True
# 0675e29e9abfd2.f7d069dab0b853283cc1b069a25f82.6547
if re.findall(r"^[a-f0-9\.]{40,}$", filebasename):
logging.debug("Obfuscated: starting with 40+ lower case hex digits and/or dots")
return True
# /some/thing/abc.xyz.a4c567edbcbf27.BLA is certainly obfuscated
if re.findall(r"^abc\.xyz", filebasename):
logging.debug("Obfuscated: starts with 'abc.xyz'")
# ... which we consider as obfuscated:
return True
# these are signals for the obfuscation versus non-obfuscation
decimals = sum(1 for c in filebasename if c.isnumeric())
upperchars = sum(1 for c in filebasename if c.isupper())
lowerchars = sum(1 for c in filebasename if c.islower())
spacesdots = sum(1 for c in filebasename if c == " " or c == "." or c == "_") # space-like symbols
# Example: "Great Distro"
if upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1:
logging.debug("Not obfuscated: upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1")
return False
# Example: "this is a download"
if spacesdots >= 3:
logging.debug("Not obfuscated: spacesdots >= 3")
return False
# Example: "Beast 2020"
if (upperchars + lowerchars >= 4) and decimals >= 4 and spacesdots >= 1:
logging.debug("Not obfuscated: (upperchars + lowerchars >= 4) and decimals > 3 and spacesdots > 1")
return False
# Example: "Catullus", starts with a capital, and most letters are lower case
if filebasename[0].isupper() and lowerchars > 2 and upperchars / lowerchars <= 0.25:
logging.debug("Not obfuscated: starts with a capital, and most letters are lower case")
return False
# If we get here, no trigger for a clear name was found, so let's default to obfuscated
logging.debug("Obfuscated (default)")
return True # default not obfuscated
def deobfuscate_list(filelist, usefulname):
"""Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname"""
# to be sure, only keep really exsiting files:
filelist = [f for f in filelist if os.path.exists(f)]
# Search for par2 files in the filelist
par2_files = [f for f in filelist if f.endswith(".par2")]
# Found any par2 files we can use?
run_renamer = True
if not par2_files:
logging.debug("No par2 files found to process, running renamer.")
else:
# Run par2 from SABnzbd on them
for par2_file in par2_files:
# Analyse data and analyse result
logging.debug("Deobfuscate par2: handling %s", par2_file)
if decode_par2(par2_file):
logging.debug("Deobfuscate par2 repair/verify finished.")
run_renamer = False
else:
logging.debug("Deobfuscate par2 repair/verify did not find anything to rename.")
# No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
if run_renamer:
excluded_file_exts = EXCLUDED_FILE_EXTS
# If there is a collection with bigger files with the same extension, we don't want to rename it
extcounter = {}
for file in filelist:
if os.path.getsize(file) < MIN_FILE_SIZE:
# too small to care
continue
_, ext = os.path.splitext(file)
if ext in extcounter:
extcounter[ext] += 1
else:
extcounter[ext] = 1
if extcounter[ext] >= 3 and ext not in excluded_file_exts:
# collection, and extension not yet in excluded_file_exts, so add it
excluded_file_exts = (*excluded_file_exts, ext)
logging.debug(
"Found a collection of at least %s files with extension %s, so not renaming those files",
extcounter[ext],
ext,
)
logging.debug("Trying to see if there are qualifying files to be deobfuscated")
# We start with he biggest file ... probably the most important file
filelist = sorted(filelist, key=os.path.getsize, reverse=True)
for filename in filelist:
# check that file is still there (and not renamed by the secondary renaming process below)
if not os.path.isfile(filename):
continue
logging.debug("Deobfuscate inspecting %s", filename)
# Do we need to rename this file?
# Criteria: big, not-excluded extension, obfuscated (in that order)
if (
os.path.getsize(filename) > MIN_FILE_SIZE
and get_ext(filename) not in excluded_file_exts
and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis
):
# Rename and make sure the new filename is unique
path, file = os.path.split(filename)
# construct new_name: <path><usefulname><extension>
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename)))
logging.info("Deobfuscate renaming %s to %s", filename, new_name)
renamer(filename, new_name)
# find other files with the same basename in filelist, and rename them in the same way:
basedirfile, _ = os.path.splitext(filename) # something like "/home/this/myiso"
for otherfile in filelist:
if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile):
# yes, same basedirfile, only different extension
remainingextension = otherfile.replace(basedirfile, "") # might be long ext, like ".dut.srt"
new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension))
logging.info("Deobfuscate renaming %s to %s", otherfile, new_name)
# Rename and make sure the new filename is unique
renamer(otherfile, new_name)
else:
logging.info("No qualifying files found to deobfuscate")