You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

255 lines
9.2 KiB

#!/usr/bin/python3 -OO
# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
sabnzbd.decoder - article decoder
"""
import logging
import hashlib
import queue
from threading import Thread
from typing import Tuple, List, Optional
import sabnzbd
import sabnzbd.cfg as cfg
from sabnzbd.constants import SABYENC_VERSION_REQUIRED
from sabnzbd.nzbstuff import Article
from sabnzbd.misc import match_str
# Check for correct SABYenc version
SABYENC_VERSION = None
try:
import sabyenc3
SABYENC_ENABLED = True
SABYENC_VERSION = sabyenc3.__version__
# Verify version to at least match minor version
if SABYENC_VERSION[:3] != SABYENC_VERSION_REQUIRED[:3]:
raise ImportError
except ImportError:
SABYENC_ENABLED = False
class CrcError(Exception):
def __init__(self, needcrc: int, gotcrc: int, data: bytes):
super().__init__()
self.needcrc = needcrc
self.gotcrc = gotcrc
self.data = data
class BadYenc(Exception):
def __init__(self):
super().__init__()
class Decoder:
"""Implement thread-like coordinator for the decoders"""
def __init__(self):
logging.debug("Initializing decoders")
# Initialize queue and servers
self.decoder_queue = queue.Queue()
# Initialize decoders
self.decoder_workers = []
for i in range(cfg.num_decoders()):
self.decoder_workers.append(DecoderWorker(self.decoder_queue))
def start(self):
for decoder_worker in self.decoder_workers:
decoder_worker.start()
def is_alive(self) -> bool:
# Check all workers
for decoder_worker in self.decoder_workers:
if not decoder_worker.is_alive():
return False
return True
def stop(self):
# Put multiple to stop all decoders
for _ in self.decoder_workers:
self.decoder_queue.put((None, None))
def join(self):
# Wait for all decoders to finish
for decoder_worker in self.decoder_workers:
try:
decoder_worker.join()
except:
pass
def process(self, article: Article, raw_data: List[bytes]):
# We use reported article-size, just like sabyenc does
sabnzbd.ArticleCache.reserve_space(article.bytes)
self.decoder_queue.put((article, raw_data))
def queue_full(self) -> bool:
# Check if the queue size exceeds the limits
return self.decoder_queue.qsize() >= sabnzbd.ArticleCache.decoder_cache_article_limit
class DecoderWorker(Thread):
"""The actuall workhorse that handles decoding!"""
def __init__(self, decoder_queue):
super().__init__()
logging.debug("Initializing decoder %s", self.name)
self.decoder_queue: queue.Queue[Tuple[Optional[Article], Optional[List[bytes]]]] = decoder_queue
def run(self):
while 1:
# Set Article and NzbObject objects to None so references from this
# thread do not keep the parent objects alive (see #1628)
decoded_data = raw_data = article = nzo = None
article, raw_data = self.decoder_queue.get()
if not article:
logging.info("Shutting down decoder %s", self.name)
break
nzo = article.nzf.nzo
art_id = article.article
# Free space in the decoder-queue
sabnzbd.ArticleCache.free_reserved_space(article.bytes)
# Keeping track
article_success = False
try:
if nzo.precheck:
raise BadYenc
if sabnzbd.LOG_ALL:
logging.debug("Decoding %s", art_id)
decoded_data = decode(article, raw_data)
article_success = True
except MemoryError:
logging.warning(T("Decoder failure: Out of memory"))
logging.info("Decoder-Queue: %d", self.decoder_queue.qsize())
logging.info("Cache: %d, %d, %d", *sabnzbd.ArticleCache.cache_info())
logging.info("Traceback: ", exc_info=True)
sabnzbd.Downloader.pause()
# This article should be fetched again
sabnzbd.NzbQueue.reset_try_lists(article)
continue
except CrcError as crc_error:
logging.info("CRC Error in %s" % art_id)
# Continue to the next one if we found new server
if search_new_server(article):
continue
# Store data, maybe par2 can still fix it
decoded_data = crc_error.data
except (BadYenc, ValueError):
# Handles precheck and badly formed articles
if nzo.precheck and raw_data and raw_data[0].startswith(b"223 "):
# STAT was used, so we only get a status code
article_success = True
else:
# Examine headers (for precheck) or body (for download)
# Look for DMCA clues (while skipping "X-" headers)
# Detect potential UUencode
for line in raw_data:
lline = line.lower()
if b"message-id:" in lline:
article_success = True
if not lline.startswith(b"x-") and match_str(
lline, (b"dmca", b"removed", b"cancel", b"blocked")
):
article_success = False
logging.info("Article removed from server (%s)", art_id)
break
if lline.find(b"\nbegin ") >= 0:
logme = T("UUencode detected, only yEnc encoding is supported [%s]") % nzo.final_name
logging.error(logme)
nzo.fail_msg = logme
sabnzbd.NzbQueue.end_job(nzo)
break
# Pre-check, proper article found so just register
if nzo.precheck and article_success and sabnzbd.LOG_ALL:
logging.debug("Server %s has article %s", article.fetcher, art_id)
elif not article_success:
# If not pre-check, this must be a bad article
if not nzo.precheck:
logging.info("Badly formed yEnc article in %s", art_id, exc_info=True)
# Continue to the next one if we found new server
if search_new_server(article):
continue
except:
logging.warning(T("Unknown Error while decoding %s"), art_id)
logging.info("Traceback: ", exc_info=True)
# Continue to the next one if we found new server
if search_new_server(article):
continue
if decoded_data:
# If the data needs to be written to disk due to full cache, this will be slow
# Causing the decoder-queue to fill up and delay the downloader
sabnzbd.ArticleCache.save_article(article, decoded_data)
sabnzbd.NzbQueue.register_article(article, article_success)
def decode(article: Article, raw_data: List[bytes]) -> bytes:
# Let SABYenc do all the heavy lifting
decoded_data, yenc_filename, crc, crc_expected, crc_correct = sabyenc3.decode_usenet_chunks(raw_data, article.bytes)
# Mark as decoded
article.decoded = True
# Assume it is yenc
article.nzf.type = "yenc"
# Only set the name if it was found and not obfuscated
if not article.nzf.filename_checked and yenc_filename:
# Set the md5-of-16k if this is the first article
if article.lowest_partnum:
article.nzf.md5of16k = hashlib.md5(decoded_data[:16384]).digest()
# Try the rename, even if it's not the first article
# For example when the first article was missing
article.nzf.nzo.verify_nzf_filename(article.nzf, yenc_filename)
# CRC check
if not crc_correct:
raise CrcError(crc_expected, crc, decoded_data)
return decoded_data
def search_new_server(article: Article) -> bool:
"""Shorthand for searching new server or else increasing bad_articles"""
# Continue to the next one if we found new server
if not article.search_new_server():
# Increase bad articles if no new server was found
article.nzf.nzo.increase_bad_articles_counter("bad_articles")
return False
return True