Browse Source

Stop scanning the par2 file once we have the information of all files

pull/1885/head
Safihre 4 years ago
parent
commit
22f1d2f642
  1. 64
      sabnzbd/par2file.py

64
sabnzbd/par2file.py

@ -23,12 +23,15 @@ import logging
import os
import re
import struct
from typing import Dict, Optional, Tuple
from typing import Dict, Optional, Tuple, BinaryIO
from sabnzbd.constants import MEBI
from sabnzbd.encoding import correct_unknown_encoding
PROBABLY_PAR2_RE = re.compile(r"(.*)\.vol(\d*)[+\-](\d*)\.par2", re.I)
SCAN_LIMIT = 10 * MEBI
PAR_PKT_ID = b"PAR2\x00PKT"
PAR_MAIN_ID = b"PAR 2.0\x00Main\x00\x00\x00\x00"
PAR_FILE_ID = b"PAR 2.0\x00FileDesc"
PAR_CREATOR_ID = b"PAR 2.0\x00Creator\x00"
PAR_RECOVERY_ID = b"RecvSlic"
@ -91,17 +94,17 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
For a full description of the par2 specification, visit:
http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html
"""
total_size = os.path.getsize(fname)
table = {}
duplicates16k = []
total_nr_files = None
try:
with open(fname, "rb") as f:
header = f.read(8)
while header:
name, filehash, hash16k = parse_par2_file_packet(f, header)
name, filehash, hash16k, nr_files = parse_par2_packet(f, header)
if name:
if name in table:
break
table[name] = filehash
if hash16k not in md5of16k:
md5of16k[hash16k] = name
@ -110,6 +113,15 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
# Remove to avoid false-renames
duplicates16k.append(hash16k)
# Store the number of files for later
if nr_files:
total_nr_files = nr_files
# On large files, we stop after seeing all the listings
# On smaller files, we scan them fully to get the par2-creator
if total_size > SCAN_LIMIT and len(table) == total_nr_files:
break
header = f.read(8)
except (struct.error, IndexError):
@ -131,10 +143,12 @@ def parse_par2_file(fname: str, md5of16k: Dict[bytes, str]) -> Dict[str, bytes]:
return table
def parse_par2_file_packet(f, header) -> Tuple[Optional[str], Optional[bytes], Optional[bytes]]:
"""Look up and analyze a FileDesc package"""
def parse_par2_packet(
f: BinaryIO, header: bytes
) -> Tuple[Optional[str], Optional[bytes], Optional[bytes], Optional[int]]:
"""Look up and analyze a PAR2 packet"""
nothing = None, None, None
filename, filehash, hash16k, nr_files = nothing = None, None, None, None
# All packages start with a header before the body
# 8 : PAR2\x00PKT
@ -163,27 +177,31 @@ def parse_par2_file_packet(f, header) -> Tuple[Optional[str], Optional[bytes], O
if md5sum != md5.digest():
return nothing
# The FileDesc packet looks like:
# 16 : "PAR 2.0\0FileDesc"
# 16 : FileId
# 16 : Hash for full file **
# 16 : Hash for first 16K
# 8 : File length
# xx : Name (multiple of 4, padded with \0 if needed) **
# See if it's the right packet and get name + hash
# See if it's any of the packages we care about
offset = 16
par2id = data[offset : offset + 16]
if par2id == PAR_FILE_ID:
par2_packet_type = data[offset : offset + 16]
if par2_packet_type == PAR_FILE_ID:
# The FileDesc packet looks like:
# 16 : "PAR 2.0\0FileDesc"
# 16 : FileId
# 16 : Hash for full file
# 16 : Hash for first 16K
# 8 : File length
# xx : Name (multiple of 4, padded with \0 if needed)
filehash = data[offset + 32 : offset + 48]
hash16k = data[offset + 48 : offset + 64]
filename = correct_unknown_encoding(data[offset + 72 :].strip(b"\0"))
return filename, filehash, hash16k
elif par2id == PAR_CREATOR_ID:
elif par2_packet_type == PAR_CREATOR_ID:
# From here until the end is the creator-text
# Useful in case of bugs in the par2-creating software
par2creator = data[offset + 16 :].strip(b"\0") # Remove any trailing \0
logging.debug("Par2-creator of %s is: %s", os.path.basename(f.name), correct_unknown_encoding(par2creator))
return nothing
elif par2_packet_type == PAR_MAIN_ID:
# The Main packet looks like:
# 16 : "PAR 2.0\0Main"
# 8 : Slice size
# 4 : Number of files in the recovery set
nr_files = struct.unpack("<I", data[offset + 24 : offset + 28])[0]
return filename, filehash, hash16k, nr_files

Loading…
Cancel
Save