You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
172 lines
5.5 KiB
172 lines
5.5 KiB
#!/usr/bin/python3 -OO
|
|
# Copyright 2007-2018 The SABnzbd-Team <team@sabnzbd.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
"""
|
|
sabnzbd.par2file - All par2-related functionality
|
|
"""
|
|
import os
|
|
import logging
|
|
import re
|
|
import hashlib
|
|
import struct
|
|
|
|
|
|
PROBABLY_PAR2_RE = re.compile(r'(.*)\.vol(\d*)[\+\-](\d*)\.par2', re.I)
|
|
PAR_PKT_ID = "PAR2\x00PKT"
|
|
PAR_FILE_ID = "PAR 2.0\x00FileDesc"
|
|
PAR_CREATOR_ID = "PAR 2.0\x00Creator"
|
|
PAR_RECOVERY_ID = "RecvSlic"
|
|
|
|
|
|
def is_parfile(filename):
|
|
""" Check quickly whether file has par2 signature """
|
|
try:
|
|
with open(filename, "rb") as f:
|
|
buf = f.read(8)
|
|
return buf.startswith(PAR_PKT_ID)
|
|
except:
|
|
pass
|
|
return False
|
|
|
|
|
|
def analyse_par2(name, filepath=None):
|
|
""" Check if file is a par2-file and determine vol/block
|
|
return setname, vol, block
|
|
setname is empty when not a par2 file
|
|
"""
|
|
name = name.strip()
|
|
vol = block = 0
|
|
m = PROBABLY_PAR2_RE.search(name)
|
|
if m:
|
|
setname = m.group(1)
|
|
vol = m.group(2)
|
|
block = m.group(3)
|
|
else:
|
|
# Base-par2 file
|
|
setname = os.path.splitext(name)[0].strip()
|
|
# Could not parse the filename, need deep inspection
|
|
# We already know it's a par2 from the is_parfile
|
|
if filepath:
|
|
try:
|
|
# Quick loop to find number blocks
|
|
# Assumes blocks are larger than 128 bytes
|
|
# Worst case, we only count 1, still good
|
|
with open(filepath, "rb") as f:
|
|
buf = f.read(128)
|
|
while buf:
|
|
if PAR_RECOVERY_ID in buf:
|
|
block += 1
|
|
buf = f.read(128)
|
|
except:
|
|
pass
|
|
return setname, vol, block
|
|
|
|
|
|
def parse_par2_file(nzf, fname):
|
|
""" Get the hash table and the first-16k hash table from a PAR2 file
|
|
Return as dictionary, indexed on names or hashes for the first-16 table
|
|
For a full description of the par2 specification, visit:
|
|
http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html
|
|
"""
|
|
table = {}
|
|
duplicates16k = []
|
|
|
|
try:
|
|
f = open(fname, 'rb')
|
|
except:
|
|
return table
|
|
|
|
try:
|
|
header = f.read(8)
|
|
while header:
|
|
name, hash, hash16k = parse_par2_file_packet(f, header)
|
|
if name:
|
|
table[name] = hash
|
|
if hash16k not in nzf.nzo.md5of16k:
|
|
nzf.nzo.md5of16k[hash16k] = name
|
|
elif nzf.nzo.md5of16k[hash16k] != name:
|
|
# Not unique and not already linked to this file
|
|
# Remove to avoid false-renames
|
|
duplicates16k.append(hash16k)
|
|
|
|
header = f.read(8)
|
|
|
|
except (struct.error, IndexError):
|
|
logging.info('Cannot use corrupt par2 file for QuickCheck, "%s"', fname)
|
|
logging.info('Traceback: ', exc_info=True)
|
|
table = {}
|
|
except:
|
|
logging.debug('QuickCheck parser crashed in file %s', fname)
|
|
logging.info('Traceback: ', exc_info=True)
|
|
table = {}
|
|
f.close()
|
|
|
|
# Have to remove duplicates at the end to make sure
|
|
# no trace is left in case of multi-duplicates
|
|
for hash16k in duplicates16k:
|
|
if hash16k in nzf.nzo.md5of16k:
|
|
old_name = nzf.nzo.md5of16k.pop(hash16k)
|
|
logging.debug('Par2-16k signature of %s not unique, discarding', old_name)
|
|
|
|
return table
|
|
|
|
|
|
def parse_par2_file_packet(f, header):
|
|
""" Look up and analyze a FileDesc package """
|
|
|
|
nothing = None, None, None
|
|
|
|
if header != PAR_PKT_ID:
|
|
print(header)
|
|
return nothing
|
|
|
|
# Length must be multiple of 4 and at least 20
|
|
len = struct.unpack('<Q', f.read(8))[0]
|
|
if int(len / 4) * 4 != len or len < 20:
|
|
return nothing
|
|
|
|
# Next 16 bytes is md5sum of this packet
|
|
md5sum = f.read(16)
|
|
|
|
# Read and check the data
|
|
data = f.read(len - 32)
|
|
md5 = hashlib.md5()
|
|
md5.update(data)
|
|
if md5sum != md5.digest():
|
|
return nothing
|
|
|
|
# The FileDesc packet looks like:
|
|
# 16 : "PAR 2.0\0FileDesc"
|
|
# 16 : FileId
|
|
# 16 : Hash for full file **
|
|
# 16 : Hash for first 16K
|
|
# 8 : File length
|
|
# xx : Name (multiple of 4, padded with \0 if needed) **
|
|
|
|
# See if it's the right packet and get name + hash
|
|
for offset in range(0, len, 8):
|
|
if data[offset:offset + 16] == PAR_FILE_ID:
|
|
hash = data[offset + 32:offset + 48]
|
|
hash16k = data[offset + 48:offset + 64]
|
|
filename = data[offset + 72:].strip('\0')
|
|
return filename, hash, hash16k
|
|
elif data[offset:offset + 15] == PAR_CREATOR_ID:
|
|
# Here untill the end is the creator-text
|
|
# Usefull in case of bugs in the par2-creating software
|
|
logging.debug('Par2-creator of %s is: %s', os.path.basename(f.name), data[offset+16:].rstrip())
|
|
|
|
return nothing
|
|
|