You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

172 lines
5.5 KiB

#!/usr/bin/python3 -OO
# Copyright 2007-2018 The SABnzbd-Team <team@sabnzbd.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
sabnzbd.par2file - All par2-related functionality
"""
import os
import logging
import re
import hashlib
import struct
PROBABLY_PAR2_RE = re.compile(r'(.*)\.vol(\d*)[\+\-](\d*)\.par2', re.I)
PAR_PKT_ID = "PAR2\x00PKT"
PAR_FILE_ID = "PAR 2.0\x00FileDesc"
PAR_CREATOR_ID = "PAR 2.0\x00Creator"
PAR_RECOVERY_ID = "RecvSlic"
def is_parfile(filename):
""" Check quickly whether file has par2 signature """
try:
with open(filename, "rb") as f:
buf = f.read(8)
return buf.startswith(PAR_PKT_ID)
except:
pass
return False
def analyse_par2(name, filepath=None):
""" Check if file is a par2-file and determine vol/block
return setname, vol, block
setname is empty when not a par2 file
"""
name = name.strip()
vol = block = 0
m = PROBABLY_PAR2_RE.search(name)
if m:
setname = m.group(1)
vol = m.group(2)
block = m.group(3)
else:
# Base-par2 file
setname = os.path.splitext(name)[0].strip()
# Could not parse the filename, need deep inspection
# We already know it's a par2 from the is_parfile
if filepath:
try:
# Quick loop to find number blocks
# Assumes blocks are larger than 128 bytes
# Worst case, we only count 1, still good
with open(filepath, "rb") as f:
buf = f.read(128)
while buf:
if PAR_RECOVERY_ID in buf:
block += 1
buf = f.read(128)
except:
pass
return setname, vol, block
def parse_par2_file(nzf, fname):
""" Get the hash table and the first-16k hash table from a PAR2 file
Return as dictionary, indexed on names or hashes for the first-16 table
For a full description of the par2 specification, visit:
http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html
"""
table = {}
duplicates16k = []
try:
f = open(fname, 'rb')
except:
return table
try:
header = f.read(8)
while header:
name, hash, hash16k = parse_par2_file_packet(f, header)
if name:
table[name] = hash
if hash16k not in nzf.nzo.md5of16k:
nzf.nzo.md5of16k[hash16k] = name
elif nzf.nzo.md5of16k[hash16k] != name:
# Not unique and not already linked to this file
# Remove to avoid false-renames
duplicates16k.append(hash16k)
header = f.read(8)
except (struct.error, IndexError):
logging.info('Cannot use corrupt par2 file for QuickCheck, "%s"', fname)
logging.info('Traceback: ', exc_info=True)
table = {}
except:
logging.debug('QuickCheck parser crashed in file %s', fname)
logging.info('Traceback: ', exc_info=True)
table = {}
f.close()
# Have to remove duplicates at the end to make sure
# no trace is left in case of multi-duplicates
for hash16k in duplicates16k:
if hash16k in nzf.nzo.md5of16k:
old_name = nzf.nzo.md5of16k.pop(hash16k)
logging.debug('Par2-16k signature of %s not unique, discarding', old_name)
return table
def parse_par2_file_packet(f, header):
""" Look up and analyze a FileDesc package """
nothing = None, None, None
if header != PAR_PKT_ID:
print(header)
return nothing
# Length must be multiple of 4 and at least 20
len = struct.unpack('<Q', f.read(8))[0]
if int(len / 4) * 4 != len or len < 20:
return nothing
# Next 16 bytes is md5sum of this packet
md5sum = f.read(16)
# Read and check the data
data = f.read(len - 32)
md5 = hashlib.md5()
md5.update(data)
if md5sum != md5.digest():
return nothing
# The FileDesc packet looks like:
# 16 : "PAR 2.0\0FileDesc"
# 16 : FileId
# 16 : Hash for full file **
# 16 : Hash for first 16K
# 8 : File length
# xx : Name (multiple of 4, padded with \0 if needed) **
# See if it's the right packet and get name + hash
for offset in range(0, len, 8):
if data[offset:offset + 16] == PAR_FILE_ID:
hash = data[offset + 32:offset + 48]
hash16k = data[offset + 48:offset + 64]
filename = data[offset + 72:].strip('\0')
return filename, hash, hash16k
elif data[offset:offset + 15] == PAR_CREATOR_ID:
# Here untill the end is the creator-text
# Usefull in case of bugs in the par2-creating software
logging.debug('Par2-creator of %s is: %s', os.path.basename(f.name), data[offset+16:].rstrip())
return nothing