Stop scanning the par2 file once we have the information of all files

4 years ago · 22f1d2f642
1 changed files with 41 additions and 23 deletions
--- a/sabnzbd/par2file.py
+++ b/sabnzbd/par2file.py
@ -23,12 +23,15 @@ import logging
 import os
 import re
 import struct
-from typing import Dict, Optional, Tuple
+from typing import Dict, Optional, Tuple, BinaryIO
 from sabnzbd.constants import MEBI
 from sabnzbd.encoding import correct_unknown_encoding
 PROBABLY_PAR2_RE = re.compile(r"(.*)\.vol(\d*)[+\-](\d*)\.par2", re.I)
 SCAN_LIMIT = 10 * MEBI
 PAR_PKT_ID = b"PAR2\x00PKT"
 PAR_MAIN_ID = b"PAR 2.0\x00Main\x00\x00\x00\x00"
 PAR_FILE_ID = b"PAR 2.0\x00FileDesc"
 PAR_CREATOR_ID = b"PAR 2.0\x00Creator\x00"
 PAR_RECOVERY_ID = b"RecvSlic"
@ -91,17 +94,17 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
    For a full description of the par2 specification, visit:
    http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html
    """
    total_size = os.path.getsize(fname)
    table = {}
    duplicates16k = []
    total_nr_files = None
    try:
        with open(fname, "rb") as f:
            header = f.read(8)
            while header:
-                name, filehash, hash16k = parse_par2_file_packet(f, header)
+                name, filehash, hash16k, nr_files = parse_par2_packet(f, header)
                if name:
                    if name in table:
                        break
                    table[name] = filehash
                    if hash16k not in md5of16k:
                        md5of16k[hash16k] = name
@ -110,6 +113,15 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
                        # Remove to avoid false-renames
                        duplicates16k.append(hash16k)
                # Store the number of files for later
                if nr_files:
                    total_nr_files = nr_files
                # On large files, we stop after seeing all the listings
                # On smaller files, we scan them fully to get the par2-creator
                if total_size > SCAN_LIMIT and len(table) == total_nr_files:
                    break
                header = f.read(8)
    except (struct.error, IndexError):
@ -131,10 +143,12 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
    return table
-def parse_par2_file_packet(f, header) -> Tuple[Optional[str], Optional[bytes], Optional[bytes]]:
+def parse_par2_packet(
-    """Look up and analyze a FileDesc package"""
+    f: BinaryIO, header: bytes
 ) -> Tuple[Optional[str], Optional[bytes], Optional[bytes], Optional[int]]:
    """Look up and analyze a PAR2 packet"""
-    nothing = None, None, None
+    filename, filehash, hash16k, nr_files = nothing = None, None, None, None
    # All packages start with a header before the body
    # 8	  : PAR2\x00PKT
@ -163,27 +177,31 @@ def parse_par2_file_packet(f, header) -> Tuple[Optional[str], Optional[bytes], O
    if md5sum != md5.digest():
        return nothing
-    # The FileDesc packet looks like:
+    # See if it's any of the packages we care about
    # 16 : "PAR 2.0\0FileDesc"
    # 16 : FileId
    # 16 : Hash for full file **
    # 16 : Hash for first 16K
    #  8 : File length
    # xx : Name (multiple of 4, padded with \0 if needed) **
    # See if it's the right packet and get name + hash
    offset = 16
-    par2id = data[offset : offset + 16]
+    par2_packet_type = data[offset : offset + 16]
-
+
-    if par2id == PAR_FILE_ID:
+    if par2_packet_type == PAR_FILE_ID:
        # The FileDesc packet looks like:
        # 16 : "PAR 2.0\0FileDesc"
        # 16 : FileId
        # 16 : Hash for full file
        # 16 : Hash for first 16K
        #  8 : File length
        # xx : Name (multiple of 4, padded with \0 if needed)
        filehash = data[offset + 32 : offset + 48]
        hash16k = data[offset + 48 : offset + 64]
        filename = correct_unknown_encoding(data[offset + 72 :].strip(b"\0"))
-        return filename, filehash, hash16k
+    elif par2_packet_type == PAR_CREATOR_ID:
    elif par2id == PAR_CREATOR_ID:
        # From here until the end is the creator-text
        # Useful in case of bugs in the par2-creating software
        par2creator = data[offset + 16 :].strip(b"\0")  # Remove any trailing \0
        logging.debug("Par2-creator of %s is: %s", os.path.basename(f.name), correct_unknown_encoding(par2creator))
-
+    elif par2_packet_type == PAR_MAIN_ID:
-    return nothing
+        # The Main packet looks like:
        # 16 : "PAR 2.0\0Main"
        # 8  : Slice size
        # 4  : Number of files in the recovery set
        nr_files = struct.unpack("<I", data[offset + 24 : offset + 28])[0]
    return filename, filehash, hash16k, nr_files