|
|
|
#!/usr/bin/python3 -OO
|
|
|
|
# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
|
|
|
"""
|
|
|
|
sabnzbd.urlgrabber - Queue for grabbing NZB files from websites
|
|
|
|
"""
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import logging
|
|
|
|
import queue
|
|
|
|
import urllib.request
|
|
|
|
import urllib.error
|
|
|
|
import urllib.parse
|
|
|
|
from http.client import IncompleteRead, HTTPResponse
|
|
|
|
from threading import Thread
|
|
|
|
import base64
|
|
|
|
from typing import Tuple, Optional
|
|
|
|
|
|
|
|
import sabnzbd
|
|
|
|
from sabnzbd.constants import DEF_TIMEOUT, FUTURE_Q_FOLDER, VALID_NZB_FILES, Status, VALID_ARCHIVES
|
|
|
|
import sabnzbd.misc as misc
|
|
|
|
import sabnzbd.filesystem
|
|
|
|
import sabnzbd.cfg as cfg
|
|
|
|
import sabnzbd.emailer as emailer
|
|
|
|
import sabnzbd.notifier as notifier
|
|
|
|
from sabnzbd.encoding import ubtou, utob
|
|
|
|
from sabnzbd.nzbstuff import NzbObject
|
|
|
|
|
|
|
|
_RARTING_FIELDS = (
|
|
|
|
"x-rating-id",
|
|
|
|
"x-rating-url",
|
|
|
|
"x-rating-host",
|
|
|
|
"x-rating-video",
|
|
|
|
"x-rating-videocnt",
|
|
|
|
"x-rating-audio",
|
|
|
|
"x-rating-audiocnt",
|
|
|
|
"x-rating-voteup",
|
|
|
|
"x-rating-votedown",
|
|
|
|
"x-rating-spam",
|
|
|
|
"x-rating-confirmed-spam",
|
|
|
|
"x-rating-passworded",
|
|
|
|
"x-rating-confirmed-passworded",
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class URLGrabber(Thread):
|
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
|
|
|
self.queue: queue.Queue[Tuple[Optional[str], Optional[NzbObject]]] = queue.Queue()
|
|
|
|
for url_nzo_tup in sabnzbd.NzbQueue.get_urls():
|
|
|
|
self.queue.put(url_nzo_tup)
|
|
|
|
self.shutdown = False
|
|
|
|
|
|
|
|
def add(self, url: str, future_nzo: NzbObject, when: Optional[int] = None):
|
|
|
|
"""Add an URL to the URLGrabber queue, 'when' is seconds from now"""
|
|
|
|
if future_nzo and when:
|
|
|
|
# Always increase counter
|
|
|
|
future_nzo.url_tries += 1
|
|
|
|
|
|
|
|
# Too many tries? Cancel
|
|
|
|
if future_nzo.url_tries > cfg.max_url_retries():
|
|
|
|
self.fail_to_history(future_nzo, url, T("Maximum retries"))
|
|
|
|
return
|
|
|
|
|
|
|
|
future_nzo.url_wait = time.time() + when
|
|
|
|
|
|
|
|
self.queue.put((url, future_nzo))
|
|
|
|
|
|
|
|
def stop(self):
|
|
|
|
self.shutdown = True
|
|
|
|
self.queue.put((None, None))
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
self.shutdown = False
|
|
|
|
while not self.shutdown:
|
|
|
|
# Set NzbObject object to None so reference from this thread
|
|
|
|
# does not keep the object alive in the future (see #1628)
|
|
|
|
future_nzo = None
|
|
|
|
url, future_nzo = self.queue.get()
|
|
|
|
|
|
|
|
if not url:
|
|
|
|
# stop signal, go test self.shutdown
|
|
|
|
continue
|
|
|
|
|
|
|
|
if future_nzo:
|
|
|
|
# Re-queue when too early and still active
|
|
|
|
if future_nzo.url_wait and future_nzo.url_wait > time.time():
|
|
|
|
self.add(url, future_nzo)
|
|
|
|
time.sleep(1.0)
|
|
|
|
continue
|
|
|
|
# Paused
|
|
|
|
if future_nzo.status == Status.PAUSED:
|
|
|
|
self.add(url, future_nzo)
|
|
|
|
time.sleep(1.0)
|
|
|
|
continue
|
|
|
|
|
|
|
|
url = url.replace(" ", "")
|
|
|
|
|
|
|
|
try:
|
|
|
|
if future_nzo:
|
|
|
|
# If nzo entry deleted, give up
|
|
|
|
try:
|
|
|
|
deleted = future_nzo.deleted
|
|
|
|
except AttributeError:
|
|
|
|
deleted = True
|
|
|
|
if deleted:
|
|
|
|
logging.debug("Dropping URL %s, job entry missing", url)
|
|
|
|
continue
|
|
|
|
|
|
|
|
filename = None
|
|
|
|
category = None
|
|
|
|
nzo_info = {}
|
|
|
|
wait = 0
|
|
|
|
retry = True
|
|
|
|
fetch_request = None
|
|
|
|
|
|
|
|
logging.info("Grabbing URL %s", url)
|
|
|
|
try:
|
|
|
|
fetch_request = _build_request(url)
|
|
|
|
except Exception as e:
|
|
|
|
# Cannot list exceptions here, because of unpredictability over platforms
|
|
|
|
error0 = str(sys.exc_info()[0]).lower()
|
|
|
|
error1 = str(sys.exc_info()[1]).lower()
|
|
|
|
logging.debug('Error "%s" trying to get the url %s', error1, url)
|
|
|
|
if "certificate_verify_failed" in error1 or "certificateerror" in error0:
|
|
|
|
msg = T("Server %s uses an untrusted HTTPS certificate") % ""
|
|
|
|
msg += " - https://sabnzbd.org/certificate-errors"
|
|
|
|
retry = False
|
|
|
|
elif "nodename nor servname provided" in error1:
|
|
|
|
msg = T("Server name does not resolve")
|
|
|
|
retry = False
|
|
|
|
elif "401" in error1 or "unauthorized" in error1:
|
|
|
|
msg = T("Unauthorized access")
|
|
|
|
retry = False
|
|
|
|
elif "404" in error1:
|
|
|
|
msg = T("File not on server")
|
|
|
|
retry = False
|
|
|
|
elif hasattr(e, "headers") and "retry-after" in e.headers:
|
|
|
|
# Catch if the server send retry (e.headers is case-INsensitive)
|
|
|
|
wait = misc.int_conv(e.headers["retry-after"])
|
|
|
|
|
|
|
|
if fetch_request:
|
|
|
|
for hdr in fetch_request.headers:
|
|
|
|
try:
|
|
|
|
item = hdr.lower()
|
|
|
|
value = fetch_request.headers[hdr]
|
|
|
|
except:
|
|
|
|
continue
|
|
|
|
if item in ("category_id", "x-dnzb-category"):
|
|
|
|
category = value
|
|
|
|
elif item in ("x-dnzb-moreinfo",):
|
|
|
|
nzo_info["more_info"] = value
|
|
|
|
elif item in ("x-dnzb-name",):
|
|
|
|
filename = value
|
|
|
|
if not filename.endswith(".nzb"):
|
|
|
|
filename += ".nzb"
|
|
|
|
elif item == "x-dnzb-propername":
|
|
|
|
nzo_info["propername"] = value
|
|
|
|
elif item == "x-dnzb-episodename":
|
|
|
|
nzo_info["episodename"] = value
|
|
|
|
elif item == "x-dnzb-year":
|
|
|
|
nzo_info["year"] = value
|
|
|
|
elif item == "x-dnzb-failure":
|
|
|
|
nzo_info["failure"] = value
|
|
|
|
elif item == "x-dnzb-details":
|
|
|
|
nzo_info["details"] = value
|
|
|
|
elif item == "x-dnzb-password":
|
|
|
|
nzo_info["password"] = value
|
|
|
|
elif item == "retry-after":
|
|
|
|
wait = misc.int_conv(value)
|
|
|
|
|
|
|
|
# Rating fields
|
|
|
|
if item in _RARTING_FIELDS:
|
|
|
|
nzo_info[item] = value
|
|
|
|
|
|
|
|
# Get filename from Content-Disposition header
|
|
|
|
if not filename and "filename=" in value:
|
|
|
|
filename = value[value.index("filename=") + 9 :].strip(";").strip('"')
|
|
|
|
|
|
|
|
if wait:
|
|
|
|
# For sites that have a rate-limiting attribute
|
|
|
|
msg = ""
|
|
|
|
retry = True
|
|
|
|
fetch_request = None
|
|
|
|
elif retry:
|
|
|
|
fetch_request, msg, retry, wait, data = _analyse(fetch_request, future_nzo)
|
|
|
|
|
|
|
|
if not fetch_request:
|
|
|
|
if retry:
|
|
|
|
logging.info("Retry URL %s", url)
|
|
|
|
self.add(url, future_nzo, wait)
|
|
|
|
else:
|
|
|
|
self.fail_to_history(future_nzo, url, msg)
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not filename:
|
|
|
|
filename = os.path.basename(urllib.parse.unquote(url))
|
|
|
|
|
|
|
|
# URL was redirected, maybe the redirect has better filename?
|
|
|
|
# Check if the original URL has extension
|
|
|
|
if (
|
|
|
|
url != fetch_request.geturl()
|
|
|
|
and sabnzbd.filesystem.get_ext(filename) not in VALID_NZB_FILES + VALID_ARCHIVES
|
|
|
|
):
|
|
|
|
filename = os.path.basename(urllib.parse.unquote(fetch_request.geturl()))
|
|
|
|
elif "&nzbname=" in filename:
|
|
|
|
# Sometimes the filename contains the full URL, duh!
|
|
|
|
filename = filename[filename.find("&nzbname=") + 9 :]
|
|
|
|
|
|
|
|
pp = future_nzo.pp
|
|
|
|
script = future_nzo.script
|
|
|
|
cat = future_nzo.cat
|
|
|
|
if (cat is None or cat == "*") and category:
|
|
|
|
cat = misc.cat_convert(category)
|
|
|
|
priority = future_nzo.priority
|
|
|
|
nzbname = future_nzo.custom_name
|
|
|
|
|
|
|
|
# process data
|
|
|
|
if not data:
|
|
|
|
try:
|
|
|
|
data = fetch_request.read()
|
|
|
|
except (IncompleteRead, IOError):
|
|
|
|
self.fail_to_history(future_nzo, url, T("Server could not complete request"))
|
|
|
|
fetch_request.close()
|
|
|
|
continue
|
|
|
|
fetch_request.close()
|
|
|
|
|
|
|
|
if b"<nzb" in data and sabnzbd.filesystem.get_ext(filename) != ".nzb":
|
|
|
|
filename += ".nzb"
|
|
|
|
|
|
|
|
# Sanitize filename first (also removing forbidden Windows-names)
|
|
|
|
filename = sabnzbd.filesystem.sanitize_filename(filename)
|
|
|
|
|
|
|
|
# If no filename, make one
|
|
|
|
if not filename:
|
|
|
|
filename = sabnzbd.get_new_id("url", os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER))
|
|
|
|
|
|
|
|
# Write data to temp file
|
|
|
|
path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER, filename)
|
|
|
|
with open(path, "wb") as temp_nzb:
|
|
|
|
temp_nzb.write(data)
|
|
|
|
|
|
|
|
# Check if nzb file
|
|
|
|
if sabnzbd.filesystem.get_ext(filename) in VALID_ARCHIVES + VALID_NZB_FILES:
|
|
|
|
res, _ = sabnzbd.add_nzbfile(
|
|
|
|
path,
|
|
|
|
pp=pp,
|
|
|
|
script=script,
|
|
|
|
cat=cat,
|
|
|
|
priority=priority,
|
|
|
|
nzbname=nzbname,
|
|
|
|
nzo_info=nzo_info,
|
|
|
|
url=future_nzo.url,
|
|
|
|
keep=False,
|
|
|
|
password=future_nzo.password,
|
|
|
|
nzo_id=future_nzo.nzo_id,
|
|
|
|
)
|
|
|
|
# -2==Error/retry, -1==Error, 0==OK, 1==Empty
|
|
|
|
if res == -2:
|
|
|
|
logging.info("Incomplete NZB, retry after 5 min %s", url)
|
|
|
|
self.add(url, future_nzo, when=300)
|
|
|
|
elif res == -1:
|
|
|
|
# Error already thrown
|
|
|
|
self.fail_to_history(future_nzo, url)
|
|
|
|
elif res == 1:
|
|
|
|
# No NZB-files inside archive
|
|
|
|
self.fail_to_history(future_nzo, url, T("Empty NZB file %s") % filename)
|
|
|
|
else:
|
|
|
|
logging.info("Unknown filetype when fetching NZB, retry after 30s %s", url)
|
|
|
|
self.add(url, future_nzo, 30)
|
|
|
|
|
|
|
|
# Always clean up what we wrote to disk
|
|
|
|
try:
|
|
|
|
sabnzbd.filesystem.remove_file(path)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
except:
|
|
|
|
logging.error(T("URLGRABBER CRASHED"), exc_info=True)
|
|
|
|
logging.debug("URLGRABBER Traceback: ", exc_info=True)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def fail_to_history(nzo: NzbObject, url: str, msg="", content=False):
|
|
|
|
"""Create History entry for failed URL Fetch
|
|
|
|
msg: message to be logged
|
|
|
|
content: report in history that cause is a bad NZB file
|
|
|
|
"""
|
|
|
|
# Remove the "Trying to fetch" part
|
|
|
|
if url:
|
|
|
|
nzo.filename = url
|
|
|
|
nzo.final_name = url.strip()
|
|
|
|
|
|
|
|
if content:
|
|
|
|
# Bad content
|
|
|
|
msg = T("Unusable NZB file")
|
|
|
|
else:
|
|
|
|
# Failed fetch
|
|
|
|
msg = T("URL Fetching failed; %s") % msg
|
|
|
|
|
|
|
|
# Mark as failed
|
|
|
|
nzo.set_unpack_info("Source", msg)
|
|
|
|
nzo.fail_msg = msg
|
|
|
|
|
|
|
|
notifier.send_notification(T("URL Fetching failed; %s") % "", "%s\n%s" % (msg, url), "failed", nzo.cat)
|
|
|
|
if cfg.email_endjob() > 0:
|
|
|
|
emailer.badfetch_mail(msg, url)
|
|
|
|
|
|
|
|
# Parse category to make sure script is set correctly after a grab
|
|
|
|
nzo.cat, _, nzo.script, _ = misc.cat_to_opts(nzo.cat, script=nzo.script)
|
|
|
|
|
|
|
|
# Add to history and run script if desired
|
|
|
|
sabnzbd.NzbQueue.remove(nzo.nzo_id)
|
|
|
|
sabnzbd.PostProcessor.process(nzo)
|
|
|
|
|
|
|
|
|
|
|
|
def _build_request(url: str) -> HTTPResponse:
|
|
|
|
# Detect basic auth
|
|
|
|
# Adapted from python-feedparser
|
|
|
|
user_passwd = None
|
|
|
|
u = urllib.parse.urlparse(url)
|
|
|
|
if u.username is not None or u.password is not None:
|
|
|
|
if u.username and u.password:
|
|
|
|
user_passwd = "%s:%s" % (u.username, u.password)
|
|
|
|
host_port = u.hostname
|
|
|
|
if u.port:
|
|
|
|
host_port += ":" + str(u.port)
|
|
|
|
url = urllib.parse.urlunparse(u._replace(netloc=host_port))
|
|
|
|
|
|
|
|
# Start request
|
|
|
|
req = urllib.request.Request(url)
|
|
|
|
|
|
|
|
# Add headers
|
|
|
|
req.add_header("User-Agent", "SABnzbd/%s" % sabnzbd.__version__)
|
|
|
|
req.add_header("Accept-encoding", "gzip")
|
|
|
|
if user_passwd:
|
|
|
|
req.add_header("Authorization", "Basic " + ubtou(base64.b64encode(utob(user_passwd))).strip())
|
|
|
|
return urllib.request.urlopen(req)
|
|
|
|
|
|
|
|
|
|
|
|
def _analyse(fetch_request: HTTPResponse, future_nzo: NzbObject):
|
|
|
|
"""Analyze response of indexer
|
|
|
|
returns fetch_request|None, error-message|None, retry, wait-seconds, data
|
|
|
|
"""
|
|
|
|
data = None
|
|
|
|
if not fetch_request or fetch_request.getcode() != 200:
|
|
|
|
if fetch_request:
|
|
|
|
msg = fetch_request.msg
|
|
|
|
else:
|
|
|
|
msg = ""
|
|
|
|
|
|
|
|
# Increasing wait-time in steps for standard errors
|
|
|
|
when = DEF_TIMEOUT * (future_nzo.url_tries + 1)
|
|
|
|
logging.debug("No usable response from indexer, retry after %s sec", when)
|
|
|
|
return None, msg, True, when, data
|
|
|
|
|
|
|
|
return fetch_request, fetch_request.msg, False, 0, data
|