From a2c23f24cfe17ad67b88d9d943f44a7b856a9c34 Mon Sep 17 00:00:00 2001 From: Safihre Date: Sun, 17 May 2020 16:47:29 +0200 Subject: [PATCH] Reworked SFV-verification and support SFV-based deobfuscation Closes #1438 --- sabnzbd/newsunpack.py | 158 ++++++++++++++++++++++++++++++++++---------------- sabnzbd/postproc.py | 88 ++++++++++++++-------------- 2 files changed, 153 insertions(+), 93 deletions(-) diff --git a/sabnzbd/newsunpack.py b/sabnzbd/newsunpack.py index 8417c86..e7d9886 100644 --- a/sabnzbd/newsunpack.py +++ b/sabnzbd/newsunpack.py @@ -25,18 +25,18 @@ import re import subprocess import logging import time -import binascii +import zlib import shutil import functools from subprocess import Popen import sabnzbd -from sabnzbd.encoding import platform_btou +from sabnzbd.encoding import platform_btou, correct_unknown_encoding import sabnzbd.utils.rarfile as rarfile from sabnzbd.misc import format_time_string, find_on_path, int_conv, \ get_all_passwords, calc_age, cmp, caller_name from sabnzbd.filesystem import make_script_path, real_path, globber, globber_full, \ - renamer, clip_path, long_path, remove_file, recursive_listdir, setname_from_path + renamer, clip_path, long_path, remove_file, recursive_listdir, setname_from_path, get_ext from sabnzbd.sorting import SeriesSorter import sabnzbd.cfg as cfg from sabnzbd.constants import Status @@ -2130,7 +2130,7 @@ def QuickCheck(set, nzo): for file in md5pack: found = False - file_to_ignore = os.path.splitext(file)[1].lower().replace('.', '') in ignore_ext + file_to_ignore = get_ext(file).replace('.', '') in ignore_ext for nzf in nzf_list: # Do a simple filename based check if file == nzf.filename: @@ -2210,55 +2210,115 @@ def par2_mt_check(par2_path): return False -def sfv_check(sfv_path): - """ Verify files using SFV file, - input: full path of sfv, file are assumed to be relative to sfv - returns: List of failing files or [] when all is OK - """ - failed = [] - try: - fp = open(sfv_path, 'r') - except: - logging.info('Cannot open SFV file %s', sfv_path) - failed.append(sfv_path) - return failed - root = os.path.split(sfv_path)[0] - for line in fp: - line = line.strip('\n\r ') - if line and line[0] != ';': - x = line.rfind(' ') - if x > 0: - filename = line[:x].strip() - checksum = line[x:].strip() - path = os.path.join(root, filename) - if os.path.exists(path): - if crc_check(path, checksum): - logging.debug('File %s passed SFV check', path) - else: - logging.info('File %s did not pass SFV check', path) - failed.append(filename) +def sfv_check(sfvs, nzo, workdir): + """ Verify files using SFV files """ + # Update status + nzo.status = Status.VERIFYING + nzo.set_action_line(T("Trying SFV verification"), "...") + + # We use bitwise assigment (&=) so False always wins in case of failure + # This way the renames always get saved! + result = True + nzf_list = nzo.finished_files + renames = {} + + # Files to ignore + ignore_ext = cfg.quick_check_ext_ignore() + + # We need the crc32 of all files + calculated_crc32 = {} + verifytotal = len(nzo.finished_files) + verifynum = 0 + for nzf in nzf_list: + verifynum += 1 + nzo.set_action_line(T('Verifying'), '%02d/%02d' % (verifynum, verifytotal)) + calculated_crc32[nzf.filename] = crc_calculate(os.path.join(workdir, nzf.filename)) + + sfv_parse_results = {} + nzo.set_action_line(T("Trying SFV verification"), "...") + for sfv in sfvs: + setname = setname_from_path(sfv) + nzo.set_unpack_info("Repair", T("Trying SFV verification"), setname) + + # Parse the sfv and add to the already found results + # Duplicates will be replaced + sfv_parse_results.update(parse_sfv(sfv)) + + for file in sfv_parse_results: + found = False + file_to_ignore = get_ext(file).replace('.', '') in ignore_ext + for nzf in nzf_list: + # Do a simple filename based check + if file == nzf.filename: + found = True + if nzf.filename in calculated_crc32 and calculated_crc32[nzf.filename] == sfv_parse_results[file]: + logging.debug('SFV-check of file %s OK', file) + result &= True + elif file_to_ignore: + # We don't care about these files + logging.debug('SFV-check ignoring file %s', file) + result &= True else: - logging.info('File %s missing in SFV check', path) - failed.append(filename) - fp.close() - return failed + logging.info('SFV-check of file %s failed!', file) + result = False + break + + # Now lets do obfuscation check + if nzf.filename in calculated_crc32 and calculated_crc32[nzf.filename] == sfv_parse_results[file]: + try: + logging.debug('SFV-check will rename %s to %s', nzf.filename, file) + renamer(os.path.join(nzo.downpath, nzf.filename), os.path.join(nzo.downpath, file)) + renames[file] = nzf.filename + nzf.filename = file + result &= True + found = True + break + except IOError: + # Renamed failed for some reason, probably already done + break + if not found: + if file_to_ignore: + # We don't care about these files + logging.debug('SVF-check ignoring missing file %s', file) + continue -def crc_check(path, target_crc): - """ Return True if file matches CRC """ - try: - fp = open(path, 'rb') - except: - return False + logging.info('Cannot SFV-check missing file %s!', file) + result = False + + # Save renames + if renames: + nzo.renamed_file(renames) + + return result + + +def parse_sfv(sfv_filename): + """ Parse SFV file and return dictonary of crc32's and filenames """ + results = {} + with open(sfv_filename, mode="rb") as sfv_list: + for sfv_item in sfv_list: + sfv_item = sfv_item.strip() + # Ignore comment-lines + if sfv_item.startswith(b";"): + continue + # Parse out the filename and crc32 + filename, expected_crc32 = sfv_item.strip().rsplit(maxsplit=1) + # We don't know what encoding is used when it was created + results[correct_unknown_encoding(filename)] = expected_crc32.lower() + return results + + +def crc_calculate(path): + """ Calculate crc32 of the given file """ crc = 0 - while 1: - data = fp.read(4096) - if not data: - break - crc = binascii.crc32(data, crc) - fp.close() - crc = '%08x' % (crc & 0xffffffff,) - return crc.lower() == target_crc.lower() + with open(path, "rb") as fp: + while 1: + data = fp.read(4096) + if not data: + break + crc = zlib.crc32(data, crc) + return b"%08x" % (crc & 0xffffffff) def analyse_show(name): diff --git a/sabnzbd/postproc.py b/sabnzbd/postproc.py index beab778..a9f7fe6 100644 --- a/sabnzbd/postproc.py +++ b/sabnzbd/postproc.py @@ -22,13 +22,19 @@ sabnzbd.postproc - threaded post-processing of jobs import os import logging import sabnzbd -import xml.sax.saxutils import functools import time import re import queue -from sabnzbd.newsunpack import unpack_magic, par2_repair, external_processing, sfv_check, build_filelists, rar_sort +from sabnzbd.newsunpack import ( + unpack_magic, + par2_repair, + external_processing, + sfv_check, + build_filelists, + rar_sort, +) from threading import Thread from sabnzbd.misc import on_cleanup_list from sabnzbd.filesystem import ( @@ -710,22 +716,22 @@ def prepare_extraction_path(nzo): def parring(nzo, workdir): """ Perform par processing. Returns: (par_error, re_add) """ - filename = nzo.final_name - notifier.send_notification(T("Post-processing"), filename, "pp", nzo.cat) - logging.info("Starting verification and repair of %s", filename) + job_name = nzo.final_name + notifier.send_notification(T("Post-processing"), job_name, "pp", nzo.cat) + logging.info("Starting verification and repair of %s", job_name) # Get verification status of sets verified = sabnzbd.load_data(VERIFIED_FILE, nzo.workpath, remove=False) or {} - repair_sets = list(nzo.extrapars.keys()) re_add = False par_error = False - single = len(repair_sets) == 1 + single = len(nzo.extrapars) == 1 - if repair_sets: - for setname in repair_sets: + if nzo.extrapars: + for setname in nzo.extrapars: if cfg.ignore_samples() and RE_SAMPLE.search(setname.lower()): continue + # Skip sets that were already tried if not verified.get(setname, False): logging.info("Running verification and repair on set %s", setname) parfile_nzf = nzo.partable[setname] @@ -746,16 +752,19 @@ def parring(nzo, workdir): continue par_error = par_error or not res - else: - # We must not have found any par2.. - logging.info("No par2 sets for %s", filename) - nzo.set_unpack_info("Repair", T("[%s] No par2 sets") % filename) + elif not verified.get("", False): + # No par2-sets found, skipped if already tried before + logging.info("No par2 sets for %s", job_name) + nzo.set_unpack_info("Repair", T("[%s] No par2 sets") % job_name) + + # Try SFV-based verification and rename + sfv_check_result = None if cfg.sfv_check() and not verified.get("", False): - par_error = not try_sfv_check(nzo, workdir) - verified[""] = not par_error + sfv_check_result = try_sfv_check(nzo, workdir) + par_error = sfv_check_result is False - # If still no success, do RAR-check or RAR-rename - if not par_error and cfg.enable_unrar(): + # If no luck with SFV, do RAR-check or RAR-rename + if sfv_check_result is None and cfg.enable_unrar(): _, _, rars, _, _ = build_filelists(workdir) # If there's no RAR's, they might be super-obfuscated if not rars: @@ -765,10 +774,12 @@ def parring(nzo, workdir): _, _, rars, _, _ = build_filelists(workdir) if rars: par_error = not try_rar_check(nzo, rars) - verified[""] = not par_error + + # Save that we already tried SFV/RAR-verification + verified[""] = not par_error if re_add: - logging.info("Re-added %s to queue", filename) + logging.info("Re-added %s to queue", job_name) if nzo.priority != TOP_PRIORITY: nzo.priority = REPAIR_PRIORITY nzo.status = Status.FETCHING @@ -777,43 +788,32 @@ def parring(nzo, workdir): sabnzbd.save_data(verified, VERIFIED_FILE, nzo.workpath) - logging.info("Verification and repair finished for %s", filename) + logging.info("Verification and repair finished for %s", job_name) return par_error, re_add def try_sfv_check(nzo, workdir): """ Attempt to verify set using SFV file - Return True if verified, False when failed + Return None if no SFV-sets, True/False based on verification """ - # Get list of SFV names; shortest name first, minimizes the chance on a mismatch + # Get list of SFV names sfvs = globber_full(workdir, "*.sfv") - sfvs.sort(key=lambda x: len(x)) - par_error = False - found = False - for sfv in sfvs: - found = True - setname = setname_from_path(sfv) - nzo.status = Status.VERIFYING - nzo.set_unpack_info("Repair", T("Trying SFV verification"), setname) - nzo.set_action_line(T("Trying SFV verification"), "...") - - failed = sfv_check(sfv) - if failed: - fail_msg = T('Some files failed to verify against "%s"') % setname - msg = fail_msg + "; " - msg += "; ".join(failed) - nzo.set_unpack_info("Repair", msg, setname) - par_error = True - else: - nzo.set_unpack_info("Repair", T("Verified successfully using SFV files"), setname) - # Show error in GUI - if found and par_error: + # Skip if there's no SFV's + if not sfvs: + return None + + result = sfv_check(sfvs, nzo, workdir) + if not result: + print_sfv = [os.path.basename(sfv) for sfv in sfvs] + fail_msg = T('Some files failed to verify against "%s"') % "; ".join(print_sfv) + nzo.set_unpack_info("Repair", fail_msg) nzo.status = Status.FAILED nzo.fail_msg = fail_msg return False - # Success or just no SFV's + # Success + nzo.set_unpack_info("Repair", T("Verified successfully using SFV files")) return True