You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
255 lines
9.2 KiB
255 lines
9.2 KiB
#!/usr/bin/python3 -OO
|
|
# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
"""
|
|
sabnzbd.decoder - article decoder
|
|
"""
|
|
|
|
import logging
|
|
import hashlib
|
|
import queue
|
|
from threading import Thread
|
|
from typing import Tuple, List, Optional
|
|
|
|
import sabnzbd
|
|
import sabnzbd.cfg as cfg
|
|
from sabnzbd.constants import SABYENC_VERSION_REQUIRED
|
|
from sabnzbd.nzbstuff import Article
|
|
from sabnzbd.misc import match_str
|
|
|
|
# Check for correct SABYenc version
|
|
SABYENC_VERSION = None
|
|
try:
|
|
import sabyenc3
|
|
|
|
SABYENC_ENABLED = True
|
|
SABYENC_VERSION = sabyenc3.__version__
|
|
# Verify version to at least match minor version
|
|
if SABYENC_VERSION[:3] != SABYENC_VERSION_REQUIRED[:3]:
|
|
raise ImportError
|
|
except ImportError:
|
|
SABYENC_ENABLED = False
|
|
|
|
|
|
class CrcError(Exception):
|
|
def __init__(self, needcrc: int, gotcrc: int, data: bytes):
|
|
super().__init__()
|
|
self.needcrc = needcrc
|
|
self.gotcrc = gotcrc
|
|
self.data = data
|
|
|
|
|
|
class BadYenc(Exception):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
|
|
class Decoder:
|
|
"""Implement thread-like coordinator for the decoders"""
|
|
|
|
def __init__(self):
|
|
logging.debug("Initializing decoders")
|
|
# Initialize queue and servers
|
|
self.decoder_queue = queue.Queue()
|
|
|
|
# Initialize decoders
|
|
self.decoder_workers = []
|
|
for i in range(cfg.num_decoders()):
|
|
self.decoder_workers.append(DecoderWorker(self.decoder_queue))
|
|
|
|
def start(self):
|
|
for decoder_worker in self.decoder_workers:
|
|
decoder_worker.start()
|
|
|
|
def is_alive(self) -> bool:
|
|
# Check all workers
|
|
for decoder_worker in self.decoder_workers:
|
|
if not decoder_worker.is_alive():
|
|
return False
|
|
return True
|
|
|
|
def stop(self):
|
|
# Put multiple to stop all decoders
|
|
for _ in self.decoder_workers:
|
|
self.decoder_queue.put((None, None))
|
|
|
|
def join(self):
|
|
# Wait for all decoders to finish
|
|
for decoder_worker in self.decoder_workers:
|
|
try:
|
|
decoder_worker.join()
|
|
except:
|
|
pass
|
|
|
|
def process(self, article: Article, raw_data: List[bytes]):
|
|
# We use reported article-size, just like sabyenc does
|
|
sabnzbd.ArticleCache.reserve_space(article.bytes)
|
|
self.decoder_queue.put((article, raw_data))
|
|
|
|
def queue_full(self) -> bool:
|
|
# Check if the queue size exceeds the limits
|
|
return self.decoder_queue.qsize() >= sabnzbd.ArticleCache.decoder_cache_article_limit
|
|
|
|
|
|
class DecoderWorker(Thread):
|
|
"""The actuall workhorse that handles decoding!"""
|
|
|
|
def __init__(self, decoder_queue):
|
|
super().__init__()
|
|
logging.debug("Initializing decoder %s", self.name)
|
|
|
|
self.decoder_queue: queue.Queue[Tuple[Optional[Article], Optional[List[bytes]]]] = decoder_queue
|
|
|
|
def run(self):
|
|
while 1:
|
|
# Set Article and NzbObject objects to None so references from this
|
|
# thread do not keep the parent objects alive (see #1628)
|
|
decoded_data = raw_data = article = nzo = None
|
|
article, raw_data = self.decoder_queue.get()
|
|
if not article:
|
|
logging.info("Shutting down decoder %s", self.name)
|
|
break
|
|
|
|
nzo = article.nzf.nzo
|
|
art_id = article.article
|
|
|
|
# Free space in the decoder-queue
|
|
sabnzbd.ArticleCache.free_reserved_space(article.bytes)
|
|
|
|
# Keeping track
|
|
article_success = False
|
|
|
|
try:
|
|
if nzo.precheck:
|
|
raise BadYenc
|
|
|
|
if sabnzbd.LOG_ALL:
|
|
logging.debug("Decoding %s", art_id)
|
|
|
|
decoded_data = decode(article, raw_data)
|
|
article_success = True
|
|
|
|
except MemoryError:
|
|
logging.warning(T("Decoder failure: Out of memory"))
|
|
logging.info("Decoder-Queue: %d", self.decoder_queue.qsize())
|
|
logging.info("Cache: %d, %d, %d", *sabnzbd.ArticleCache.cache_info())
|
|
logging.info("Traceback: ", exc_info=True)
|
|
sabnzbd.Downloader.pause()
|
|
|
|
# This article should be fetched again
|
|
sabnzbd.NzbQueue.reset_try_lists(article)
|
|
continue
|
|
|
|
except CrcError as crc_error:
|
|
logging.info("CRC Error in %s" % art_id)
|
|
|
|
# Continue to the next one if we found new server
|
|
if search_new_server(article):
|
|
continue
|
|
|
|
# Store data, maybe par2 can still fix it
|
|
decoded_data = crc_error.data
|
|
|
|
except (BadYenc, ValueError):
|
|
# Handles precheck and badly formed articles
|
|
if nzo.precheck and raw_data and raw_data[0].startswith(b"223 "):
|
|
# STAT was used, so we only get a status code
|
|
article_success = True
|
|
else:
|
|
# Examine headers (for precheck) or body (for download)
|
|
# Look for DMCA clues (while skipping "X-" headers)
|
|
# Detect potential UUencode
|
|
for line in raw_data:
|
|
lline = line.lower()
|
|
if b"message-id:" in lline:
|
|
article_success = True
|
|
if not lline.startswith(b"x-") and match_str(
|
|
lline, (b"dmca", b"removed", b"cancel", b"blocked")
|
|
):
|
|
article_success = False
|
|
logging.info("Article removed from server (%s)", art_id)
|
|
break
|
|
if lline.find(b"\nbegin ") >= 0:
|
|
logme = T("UUencode detected, only yEnc encoding is supported [%s]") % nzo.final_name
|
|
logging.error(logme)
|
|
nzo.fail_msg = logme
|
|
sabnzbd.NzbQueue.end_job(nzo)
|
|
break
|
|
|
|
# Pre-check, proper article found so just register
|
|
if nzo.precheck and article_success and sabnzbd.LOG_ALL:
|
|
logging.debug("Server %s has article %s", article.fetcher, art_id)
|
|
elif not article_success:
|
|
# If not pre-check, this must be a bad article
|
|
if not nzo.precheck:
|
|
logging.info("Badly formed yEnc article in %s", art_id, exc_info=True)
|
|
|
|
# Continue to the next one if we found new server
|
|
if search_new_server(article):
|
|
continue
|
|
|
|
except:
|
|
logging.warning(T("Unknown Error while decoding %s"), art_id)
|
|
logging.info("Traceback: ", exc_info=True)
|
|
|
|
# Continue to the next one if we found new server
|
|
if search_new_server(article):
|
|
continue
|
|
|
|
if decoded_data:
|
|
# If the data needs to be written to disk due to full cache, this will be slow
|
|
# Causing the decoder-queue to fill up and delay the downloader
|
|
sabnzbd.ArticleCache.save_article(article, decoded_data)
|
|
|
|
sabnzbd.NzbQueue.register_article(article, article_success)
|
|
|
|
|
|
def decode(article: Article, raw_data: List[bytes]) -> bytes:
|
|
# Let SABYenc do all the heavy lifting
|
|
decoded_data, yenc_filename, crc, crc_expected, crc_correct = sabyenc3.decode_usenet_chunks(raw_data, article.bytes)
|
|
|
|
# Mark as decoded
|
|
article.decoded = True
|
|
|
|
# Assume it is yenc
|
|
article.nzf.type = "yenc"
|
|
|
|
# Only set the name if it was found and not obfuscated
|
|
if not article.nzf.filename_checked and yenc_filename:
|
|
# Set the md5-of-16k if this is the first article
|
|
if article.lowest_partnum:
|
|
article.nzf.md5of16k = hashlib.md5(decoded_data[:16384]).digest()
|
|
|
|
# Try the rename, even if it's not the first article
|
|
# For example when the first article was missing
|
|
article.nzf.nzo.verify_nzf_filename(article.nzf, yenc_filename)
|
|
|
|
# CRC check
|
|
if not crc_correct:
|
|
raise CrcError(crc_expected, crc, decoded_data)
|
|
|
|
return decoded_data
|
|
|
|
|
|
def search_new_server(article: Article) -> bool:
|
|
"""Shorthand for searching new server or else increasing bad_articles"""
|
|
# Continue to the next one if we found new server
|
|
if not article.search_new_server():
|
|
# Increase bad articles if no new server was found
|
|
article.nzf.nzo.increase_bad_articles_counter("bad_articles")
|
|
return False
|
|
return True
|
|
|