You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

365 lines
13 KiB

#!/usr/bin/python3 -OO
# Copyright 2008-2017 The SABnzbd-Team <team@sabnzbd.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
sabnzbd.nzbparser - Parse and import NZB files
"""
import bz2
import gzip
import re
import time
import logging
import hashlib
import xml.etree.ElementTree
import datetime
from typing import Optional, Dict, Any, Union
import sabnzbd
from sabnzbd import filesystem, nzbstuff
from sabnzbd.constants import Status
from sabnzbd.encoding import utob, correct_unknown_encoding
from sabnzbd.filesystem import is_archive, get_filename
from sabnzbd.misc import name_to_cat
def nzbfile_parser(raw_data, nzo):
# Load data as file-object
raw_data = re.sub(r"""\s(xmlns="[^"]+"|xmlns='[^']+')""", "", raw_data, count=1)
nzb_tree = xml.etree.ElementTree.fromstring(raw_data)
# Hash for dupe-checking
md5sum = hashlib.md5()
# Average date
avg_age_sum = 0
# In case of failing timestamps and failing files
time_now = time.time()
skipped_files = 0
valid_files = 0
# Parse the header
if nzb_tree.find("head"):
for meta in nzb_tree.find("head").iter("meta"):
meta_type = meta.attrib.get("type")
if meta_type and meta.text:
# Meta tags can occur multiple times
if meta_type not in nzo.meta:
nzo.meta[meta_type] = []
nzo.meta[meta_type].append(meta.text)
logging.debug("NZB file meta-data = %s", nzo.meta)
# Parse the files
for file in nzb_tree.iter("file"):
# Get subject and date
file_name = ""
if file.attrib.get("subject"):
file_name = file.attrib.get("subject")
# Don't fail if no date present
try:
file_date = datetime.datetime.fromtimestamp(int(file.attrib.get("date")))
file_timestamp = int(file.attrib.get("date"))
except:
file_date = datetime.datetime.fromtimestamp(time_now)
file_timestamp = time_now
# Get group
for group in file.iter("group"):
if group.text not in nzo.groups:
nzo.groups.append(group.text)
# Get segments
raw_article_db = {}
file_bytes = 0
if file.find("segments"):
for segment in file.find("segments").iter("segment"):
try:
article_id = segment.text
segment_size = int(segment.attrib.get("bytes"))
partnum = int(segment.attrib.get("number"))
# Update hash
md5sum.update(utob(article_id))
# Duplicate parts?
if partnum in raw_article_db:
if article_id != raw_article_db[partnum][0]:
logging.info(
"Duplicate part %s, but different ID-s (%s // %s)",
partnum,
raw_article_db[partnum][0],
article_id,
)
nzo.increase_bad_articles_counter("duplicate_articles")
else:
logging.info("Skipping duplicate article (%s)", article_id)
elif segment_size <= 0 or segment_size >= 2 ** 23:
# Perform sanity check (not negative, 0 or larger than 8MB) on article size
# We use this value later to allocate memory in cache and sabyenc
logging.info("Skipping article %s due to strange size (%s)", article_id, segment_size)
nzo.increase_bad_articles_counter("bad_articles")
else:
raw_article_db[partnum] = (article_id, segment_size)
file_bytes += segment_size
except:
# In case of missing attributes
pass
# Sort the articles by part number, compatible with Python 3.5
raw_article_db_sorted = [raw_article_db[partnum] for partnum in sorted(raw_article_db)]
# Create NZF
nzf = sabnzbd.nzbstuff.NzbFile(file_date, file_name, raw_article_db_sorted, file_bytes, nzo)
# Check if we already have this exact NZF (see custom eq-checks)
if nzf in nzo.files:
logging.info("File %s occured twice in NZB, skipping", nzf.filename)
continue
# Add valid NZF's
if file_name and nzf.valid and nzf.nzf_id:
logging.info("File %s added to queue", nzf.filename)
nzo.files.append(nzf)
nzo.files_table[nzf.nzf_id] = nzf
nzo.bytes += nzf.bytes
valid_files += 1
avg_age_sum += file_timestamp
else:
logging.info("Error importing %s, skipping", file_name)
if nzf.nzf_id:
sabnzbd.remove_data(nzf.nzf_id, nzo.admin_path)
skipped_files += 1
# Final bookkeeping
nr_files = max(1, valid_files)
nzo.avg_stamp = avg_age_sum / nr_files
nzo.avg_date = datetime.datetime.fromtimestamp(avg_age_sum / nr_files)
nzo.md5sum = md5sum.hexdigest()
if skipped_files:
logging.warning(T("Failed to import %s files from %s"), skipped_files, nzo.filename)
def process_nzb_archive_file(
filename: str,
path: str,
pp: Optional[int] = None,
script: Optional[str] = None,
cat: Optional[str] = None,
catdir: Optional[str] = None,
keep: bool = False,
priority: Optional[Union[Status, str]] = None,
nzbname: Optional[str] = None,
reuse: Optional[str] = None,
nzo_info: Optional[Dict[str, Any]] = None,
dup_check: bool = True,
url: Optional[str] = None,
password: Optional[str] = None,
nzo_id: Optional[str] = None,
):
"""Analyse ZIP file and create job(s).
Accepts ZIP files with ONLY nzb/nfo/folder files in it.
returns (status, nzo_ids)
status: -1==Error, 0==OK, 1==Ignore
"""
nzo_ids = []
if catdir is None:
catdir = cat
filename, cat = name_to_cat(filename, catdir)
# Returns -1==Error/Retry, 0==OK, 1==Ignore
status, zf, extension = is_archive(path)
if status != 0:
return status, []
status = 1
names = zf.namelist()
nzbcount = 0
for name in names:
name = name.lower()
if name.endswith(".nzb"):
status = 0
nzbcount += 1
if status == 0:
if nzbcount != 1:
nzbname = None
for name in names:
if name.lower().endswith(".nzb"):
try:
data = correct_unknown_encoding(zf.read(name))
except OSError:
logging.error(T("Cannot read %s"), name, exc_info=True)
zf.close()
return -1, []
name = filesystem.setname_from_path(name)
if data:
nzo = None
try:
nzo = nzbstuff.NzbObject(
name,
pp=pp,
script=script,
nzb_data=data,
cat=cat,
url=url,
priority=priority,
nzbname=nzbname,
nzo_info=nzo_info,
reuse=reuse,
dup_check=dup_check,
)
if not nzo.password:
nzo.password = password
except (TypeError, ValueError):
# Duplicate or empty, ignore
pass
except:
# Something else is wrong, show error
logging.error(T("Error while adding %s, removing"), name, exc_info=True)
if nzo:
if nzo_id:
# Re-use existing nzo_id, when a "future" job gets it payload
sabnzbd.NzbQueue.remove(nzo_id, delete_all_data=False)
nzo.nzo_id = nzo_id
nzo_id = None
nzo_ids.append(sabnzbd.NzbQueue.add(nzo))
nzo.update_rating()
zf.close()
try:
if not keep:
filesystem.remove_file(path)
except OSError:
logging.error(T("Error removing %s"), filesystem.clip_path(path))
logging.info("Traceback: ", exc_info=True)
else:
zf.close()
status = 1
return status, nzo_ids
def process_single_nzb(
filename: str,
path: str,
pp: Optional[int] = None,
script: Optional[str] = None,
cat: Optional[str] = None,
catdir: Optional[str] = None,
keep: bool = False,
priority: Optional[Union[Status, str]] = None,
nzbname: Optional[str] = None,
reuse: Optional[str] = None,
nzo_info: Optional[Dict[str, Any]] = None,
dup_check: bool = True,
url: Optional[str] = None,
password: Optional[str] = None,
nzo_id: Optional[str] = None,
):
"""Analyze file and create a job from it
Supports NZB, NZB.BZ2, NZB.GZ and GZ.NZB-in-disguise
returns (status, nzo_ids)
status: -2==Error/retry, -1==Error, 0==OK
"""
nzo_ids = []
if catdir is None:
catdir = cat
try:
with open(path, "rb") as nzb_file:
check_bytes = nzb_file.read(2)
if check_bytes == b"\x1f\x8b":
# gzip file or gzip in disguise
filename = filename.replace(".nzb.gz", ".nzb")
nzb_reader_handler = gzip.GzipFile
elif check_bytes == b"BZ":
# bz2 file or bz2 in disguise
filename = filename.replace(".nzb.bz2", ".nzb")
nzb_reader_handler = bz2.BZ2File
else:
nzb_reader_handler = open
# Let's get some data and hope we can decode it
with nzb_reader_handler(path, "rb") as nzb_file:
data = correct_unknown_encoding(nzb_file.read())
except OSError:
logging.warning(T("Cannot read %s"), filesystem.clip_path(path))
logging.info("Traceback: ", exc_info=True)
return -2, nzo_ids
if filename:
filename, cat = name_to_cat(filename, catdir)
# The name is used as the name of the folder, so sanitize it using folder specific santization
if not nzbname:
# Prevent embedded password from being damaged by sanitize and trimming
nzbname = get_filename(filename)
try:
nzo = nzbstuff.NzbObject(
filename,
pp=pp,
script=script,
nzb_data=data,
cat=cat,
url=url,
priority=priority,
nzbname=nzbname,
nzo_info=nzo_info,
reuse=reuse,
dup_check=dup_check,
)
if not nzo.password:
nzo.password = password
except TypeError:
# Duplicate, ignore
if nzo_id:
sabnzbd.NzbQueue.remove(nzo_id)
nzo = None
except ValueError:
# Empty
return 1, nzo_ids
except:
if data.find("<nzb") >= 0 > data.find("</nzb"):
# Looks like an incomplete file, retry
return -2, nzo_ids
else:
# Something else is wrong, show error
logging.error(T("Error while adding %s, removing"), filename, exc_info=True)
return -1, nzo_ids
if nzo:
if nzo_id:
# Re-use existing nzo_id, when a "future" job gets it payload
sabnzbd.NzbQueue.remove(nzo_id, delete_all_data=False)
nzo.nzo_id = nzo_id
nzo_ids.append(sabnzbd.NzbQueue.add(nzo, quiet=bool(reuse)))
nzo.update_rating()
try:
if not keep:
filesystem.remove_file(path)
except OSError:
# Job was still added to the queue, so throw error but don't report failed add
logging.error(T("Error removing %s"), filesystem.clip_path(path))
logging.info("Traceback: ", exc_info=True)
return 0, nzo_ids