You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
365 lines
13 KiB
365 lines
13 KiB
#!/usr/bin/python3 -OO
|
|
# Copyright 2008-2017 The SABnzbd-Team <team@sabnzbd.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
"""
|
|
sabnzbd.nzbparser - Parse and import NZB files
|
|
"""
|
|
import bz2
|
|
import gzip
|
|
import re
|
|
import time
|
|
import logging
|
|
import hashlib
|
|
import xml.etree.ElementTree
|
|
import datetime
|
|
from typing import Optional, Dict, Any, Union
|
|
|
|
import sabnzbd
|
|
from sabnzbd import filesystem, nzbstuff
|
|
from sabnzbd.constants import Status
|
|
from sabnzbd.encoding import utob, correct_unknown_encoding
|
|
from sabnzbd.filesystem import is_archive, get_filename
|
|
from sabnzbd.misc import name_to_cat
|
|
|
|
|
|
def nzbfile_parser(raw_data, nzo):
|
|
# Load data as file-object
|
|
raw_data = re.sub(r"""\s(xmlns="[^"]+"|xmlns='[^']+')""", "", raw_data, count=1)
|
|
nzb_tree = xml.etree.ElementTree.fromstring(raw_data)
|
|
|
|
# Hash for dupe-checking
|
|
md5sum = hashlib.md5()
|
|
|
|
# Average date
|
|
avg_age_sum = 0
|
|
|
|
# In case of failing timestamps and failing files
|
|
time_now = time.time()
|
|
skipped_files = 0
|
|
valid_files = 0
|
|
|
|
# Parse the header
|
|
if nzb_tree.find("head"):
|
|
for meta in nzb_tree.find("head").iter("meta"):
|
|
meta_type = meta.attrib.get("type")
|
|
if meta_type and meta.text:
|
|
# Meta tags can occur multiple times
|
|
if meta_type not in nzo.meta:
|
|
nzo.meta[meta_type] = []
|
|
nzo.meta[meta_type].append(meta.text)
|
|
logging.debug("NZB file meta-data = %s", nzo.meta)
|
|
|
|
# Parse the files
|
|
for file in nzb_tree.iter("file"):
|
|
# Get subject and date
|
|
file_name = ""
|
|
if file.attrib.get("subject"):
|
|
file_name = file.attrib.get("subject")
|
|
|
|
# Don't fail if no date present
|
|
try:
|
|
file_date = datetime.datetime.fromtimestamp(int(file.attrib.get("date")))
|
|
file_timestamp = int(file.attrib.get("date"))
|
|
except:
|
|
file_date = datetime.datetime.fromtimestamp(time_now)
|
|
file_timestamp = time_now
|
|
|
|
# Get group
|
|
for group in file.iter("group"):
|
|
if group.text not in nzo.groups:
|
|
nzo.groups.append(group.text)
|
|
|
|
# Get segments
|
|
raw_article_db = {}
|
|
file_bytes = 0
|
|
if file.find("segments"):
|
|
for segment in file.find("segments").iter("segment"):
|
|
try:
|
|
article_id = segment.text
|
|
segment_size = int(segment.attrib.get("bytes"))
|
|
partnum = int(segment.attrib.get("number"))
|
|
|
|
# Update hash
|
|
md5sum.update(utob(article_id))
|
|
|
|
# Duplicate parts?
|
|
if partnum in raw_article_db:
|
|
if article_id != raw_article_db[partnum][0]:
|
|
logging.info(
|
|
"Duplicate part %s, but different ID-s (%s // %s)",
|
|
partnum,
|
|
raw_article_db[partnum][0],
|
|
article_id,
|
|
)
|
|
nzo.increase_bad_articles_counter("duplicate_articles")
|
|
else:
|
|
logging.info("Skipping duplicate article (%s)", article_id)
|
|
elif segment_size <= 0 or segment_size >= 2 ** 23:
|
|
# Perform sanity check (not negative, 0 or larger than 8MB) on article size
|
|
# We use this value later to allocate memory in cache and sabyenc
|
|
logging.info("Skipping article %s due to strange size (%s)", article_id, segment_size)
|
|
nzo.increase_bad_articles_counter("bad_articles")
|
|
else:
|
|
raw_article_db[partnum] = (article_id, segment_size)
|
|
file_bytes += segment_size
|
|
except:
|
|
# In case of missing attributes
|
|
pass
|
|
|
|
# Sort the articles by part number, compatible with Python 3.5
|
|
raw_article_db_sorted = [raw_article_db[partnum] for partnum in sorted(raw_article_db)]
|
|
|
|
# Create NZF
|
|
nzf = sabnzbd.nzbstuff.NzbFile(file_date, file_name, raw_article_db_sorted, file_bytes, nzo)
|
|
|
|
# Check if we already have this exact NZF (see custom eq-checks)
|
|
if nzf in nzo.files:
|
|
logging.info("File %s occured twice in NZB, skipping", nzf.filename)
|
|
continue
|
|
|
|
# Add valid NZF's
|
|
if file_name and nzf.valid and nzf.nzf_id:
|
|
logging.info("File %s added to queue", nzf.filename)
|
|
nzo.files.append(nzf)
|
|
nzo.files_table[nzf.nzf_id] = nzf
|
|
nzo.bytes += nzf.bytes
|
|
valid_files += 1
|
|
avg_age_sum += file_timestamp
|
|
else:
|
|
logging.info("Error importing %s, skipping", file_name)
|
|
if nzf.nzf_id:
|
|
sabnzbd.remove_data(nzf.nzf_id, nzo.admin_path)
|
|
skipped_files += 1
|
|
|
|
# Final bookkeeping
|
|
nr_files = max(1, valid_files)
|
|
nzo.avg_stamp = avg_age_sum / nr_files
|
|
nzo.avg_date = datetime.datetime.fromtimestamp(avg_age_sum / nr_files)
|
|
nzo.md5sum = md5sum.hexdigest()
|
|
|
|
if skipped_files:
|
|
logging.warning(T("Failed to import %s files from %s"), skipped_files, nzo.filename)
|
|
|
|
|
|
def process_nzb_archive_file(
|
|
filename: str,
|
|
path: str,
|
|
pp: Optional[int] = None,
|
|
script: Optional[str] = None,
|
|
cat: Optional[str] = None,
|
|
catdir: Optional[str] = None,
|
|
keep: bool = False,
|
|
priority: Optional[Union[Status, str]] = None,
|
|
nzbname: Optional[str] = None,
|
|
reuse: Optional[str] = None,
|
|
nzo_info: Optional[Dict[str, Any]] = None,
|
|
dup_check: bool = True,
|
|
url: Optional[str] = None,
|
|
password: Optional[str] = None,
|
|
nzo_id: Optional[str] = None,
|
|
):
|
|
"""Analyse ZIP file and create job(s).
|
|
Accepts ZIP files with ONLY nzb/nfo/folder files in it.
|
|
returns (status, nzo_ids)
|
|
status: -1==Error, 0==OK, 1==Ignore
|
|
"""
|
|
nzo_ids = []
|
|
if catdir is None:
|
|
catdir = cat
|
|
|
|
filename, cat = name_to_cat(filename, catdir)
|
|
# Returns -1==Error/Retry, 0==OK, 1==Ignore
|
|
status, zf, extension = is_archive(path)
|
|
|
|
if status != 0:
|
|
return status, []
|
|
|
|
status = 1
|
|
names = zf.namelist()
|
|
nzbcount = 0
|
|
for name in names:
|
|
name = name.lower()
|
|
if name.endswith(".nzb"):
|
|
status = 0
|
|
nzbcount += 1
|
|
|
|
if status == 0:
|
|
if nzbcount != 1:
|
|
nzbname = None
|
|
for name in names:
|
|
if name.lower().endswith(".nzb"):
|
|
try:
|
|
data = correct_unknown_encoding(zf.read(name))
|
|
except OSError:
|
|
logging.error(T("Cannot read %s"), name, exc_info=True)
|
|
zf.close()
|
|
return -1, []
|
|
name = filesystem.setname_from_path(name)
|
|
if data:
|
|
nzo = None
|
|
try:
|
|
nzo = nzbstuff.NzbObject(
|
|
name,
|
|
pp=pp,
|
|
script=script,
|
|
nzb_data=data,
|
|
cat=cat,
|
|
url=url,
|
|
priority=priority,
|
|
nzbname=nzbname,
|
|
nzo_info=nzo_info,
|
|
reuse=reuse,
|
|
dup_check=dup_check,
|
|
)
|
|
if not nzo.password:
|
|
nzo.password = password
|
|
except (TypeError, ValueError):
|
|
# Duplicate or empty, ignore
|
|
pass
|
|
except:
|
|
# Something else is wrong, show error
|
|
logging.error(T("Error while adding %s, removing"), name, exc_info=True)
|
|
|
|
if nzo:
|
|
if nzo_id:
|
|
# Re-use existing nzo_id, when a "future" job gets it payload
|
|
sabnzbd.NzbQueue.remove(nzo_id, delete_all_data=False)
|
|
nzo.nzo_id = nzo_id
|
|
nzo_id = None
|
|
nzo_ids.append(sabnzbd.NzbQueue.add(nzo))
|
|
nzo.update_rating()
|
|
zf.close()
|
|
try:
|
|
if not keep:
|
|
filesystem.remove_file(path)
|
|
except OSError:
|
|
logging.error(T("Error removing %s"), filesystem.clip_path(path))
|
|
logging.info("Traceback: ", exc_info=True)
|
|
else:
|
|
zf.close()
|
|
status = 1
|
|
|
|
return status, nzo_ids
|
|
|
|
|
|
def process_single_nzb(
|
|
filename: str,
|
|
path: str,
|
|
pp: Optional[int] = None,
|
|
script: Optional[str] = None,
|
|
cat: Optional[str] = None,
|
|
catdir: Optional[str] = None,
|
|
keep: bool = False,
|
|
priority: Optional[Union[Status, str]] = None,
|
|
nzbname: Optional[str] = None,
|
|
reuse: Optional[str] = None,
|
|
nzo_info: Optional[Dict[str, Any]] = None,
|
|
dup_check: bool = True,
|
|
url: Optional[str] = None,
|
|
password: Optional[str] = None,
|
|
nzo_id: Optional[str] = None,
|
|
):
|
|
"""Analyze file and create a job from it
|
|
Supports NZB, NZB.BZ2, NZB.GZ and GZ.NZB-in-disguise
|
|
returns (status, nzo_ids)
|
|
status: -2==Error/retry, -1==Error, 0==OK
|
|
"""
|
|
nzo_ids = []
|
|
if catdir is None:
|
|
catdir = cat
|
|
|
|
try:
|
|
with open(path, "rb") as nzb_file:
|
|
check_bytes = nzb_file.read(2)
|
|
|
|
if check_bytes == b"\x1f\x8b":
|
|
# gzip file or gzip in disguise
|
|
filename = filename.replace(".nzb.gz", ".nzb")
|
|
nzb_reader_handler = gzip.GzipFile
|
|
elif check_bytes == b"BZ":
|
|
# bz2 file or bz2 in disguise
|
|
filename = filename.replace(".nzb.bz2", ".nzb")
|
|
nzb_reader_handler = bz2.BZ2File
|
|
else:
|
|
nzb_reader_handler = open
|
|
|
|
# Let's get some data and hope we can decode it
|
|
with nzb_reader_handler(path, "rb") as nzb_file:
|
|
data = correct_unknown_encoding(nzb_file.read())
|
|
|
|
except OSError:
|
|
logging.warning(T("Cannot read %s"), filesystem.clip_path(path))
|
|
logging.info("Traceback: ", exc_info=True)
|
|
return -2, nzo_ids
|
|
|
|
if filename:
|
|
filename, cat = name_to_cat(filename, catdir)
|
|
# The name is used as the name of the folder, so sanitize it using folder specific santization
|
|
if not nzbname:
|
|
# Prevent embedded password from being damaged by sanitize and trimming
|
|
nzbname = get_filename(filename)
|
|
|
|
try:
|
|
nzo = nzbstuff.NzbObject(
|
|
filename,
|
|
pp=pp,
|
|
script=script,
|
|
nzb_data=data,
|
|
cat=cat,
|
|
url=url,
|
|
priority=priority,
|
|
nzbname=nzbname,
|
|
nzo_info=nzo_info,
|
|
reuse=reuse,
|
|
dup_check=dup_check,
|
|
)
|
|
if not nzo.password:
|
|
nzo.password = password
|
|
except TypeError:
|
|
# Duplicate, ignore
|
|
if nzo_id:
|
|
sabnzbd.NzbQueue.remove(nzo_id)
|
|
nzo = None
|
|
except ValueError:
|
|
# Empty
|
|
return 1, nzo_ids
|
|
except:
|
|
if data.find("<nzb") >= 0 > data.find("</nzb"):
|
|
# Looks like an incomplete file, retry
|
|
return -2, nzo_ids
|
|
else:
|
|
# Something else is wrong, show error
|
|
logging.error(T("Error while adding %s, removing"), filename, exc_info=True)
|
|
return -1, nzo_ids
|
|
|
|
if nzo:
|
|
if nzo_id:
|
|
# Re-use existing nzo_id, when a "future" job gets it payload
|
|
sabnzbd.NzbQueue.remove(nzo_id, delete_all_data=False)
|
|
nzo.nzo_id = nzo_id
|
|
nzo_ids.append(sabnzbd.NzbQueue.add(nzo, quiet=bool(reuse)))
|
|
nzo.update_rating()
|
|
|
|
try:
|
|
if not keep:
|
|
filesystem.remove_file(path)
|
|
except OSError:
|
|
# Job was still added to the queue, so throw error but don't report failed add
|
|
logging.error(T("Error removing %s"), filesystem.clip_path(path))
|
|
logging.info("Traceback: ", exc_info=True)
|
|
|
|
return 0, nzo_ids
|
|
|