usenetbinary-newsreaderquickboxtraktkodistabletvshowsqnaptautullifanartsickbeardtvseriesplexswizzinembyseedboxtvdbnzbgetsubtitlewebui
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
582 lines
20 KiB
582 lines
20 KiB
# encoding:utf-8
|
|
# ---------------
|
|
# functions are placed here to remove cyclic import issues from placement in helpers
|
|
#
|
|
import codecs
|
|
import getpass
|
|
import io
|
|
import logging
|
|
import os
|
|
import re
|
|
import socket
|
|
import stat
|
|
import tempfile
|
|
import traceback
|
|
# noinspection PyPep8Naming
|
|
import encodingKludge as ek
|
|
from exceptions_helper import ex
|
|
from _23 import filter_list, html_unescape, urlparse, urlunparse
|
|
from six import iteritems, string_types, text_type
|
|
from lib.cachecontrol import CacheControl, caches
|
|
from cfscrape import CloudflareScraper
|
|
import requests
|
|
|
|
# noinspection PyUnreachableCode
|
|
if False:
|
|
# noinspection PyUnresolvedReferences
|
|
from typing import Any, AnyStr, Dict, NoReturn, Iterable, Iterator, List, Optional, Tuple, Union
|
|
from lxml_etree import etree
|
|
|
|
# Mapping error status codes to official W3C names
|
|
http_error_code = {
|
|
300: 'Multiple Choices',
|
|
301: 'Moved Permanently',
|
|
302: 'Found',
|
|
303: 'See Other',
|
|
304: 'Not Modified',
|
|
305: 'Use Proxy',
|
|
307: 'Temporary Redirect',
|
|
308: 'Permanent Redirect',
|
|
400: 'Bad Request',
|
|
401: 'Unauthorized',
|
|
402: 'Payment Required',
|
|
403: 'Forbidden',
|
|
404: 'Not Found',
|
|
405: 'Method Not Allowed',
|
|
406: 'Not Acceptable',
|
|
407: 'Proxy Authentication Required',
|
|
408: 'Request Timeout',
|
|
409: 'Conflict',
|
|
410: 'Gone',
|
|
411: 'Length Required',
|
|
412: 'Precondition Failed',
|
|
413: 'Request Entity Too Large',
|
|
414: 'Request-URI Too Long',
|
|
415: 'Unsupported Media Type',
|
|
416: 'Requested Range Not Satisfiable',
|
|
417: 'Expectation Failed',
|
|
429: 'Too Many Requests',
|
|
431: 'Request Header Fields Too Large',
|
|
444: 'No Response',
|
|
451: 'Unavailable For Legal Reasons',
|
|
500: 'Internal Server Error',
|
|
501: 'Not Implemented',
|
|
502: 'Bad Gateway',
|
|
503: 'Service Unavailable',
|
|
504: 'Gateway Timeout',
|
|
505: 'HTTP Version Not Supported',
|
|
511: 'Network Authentication Required'}
|
|
|
|
logger = logging.getLogger('sg_helper')
|
|
logger.addHandler(logging.NullHandler())
|
|
|
|
USER_AGENT = ''
|
|
CACHE_DIR = None
|
|
PROXY_SETTING = None
|
|
NOTIFIERS = None
|
|
|
|
|
|
# try to convert to int, if it fails the default will be returned
|
|
def try_int(s, s_default=0):
|
|
try:
|
|
return int(s)
|
|
except (BaseException, Exception):
|
|
return s_default
|
|
|
|
|
|
def _maybe_request_url(e, def_url=''):
|
|
return hasattr(e, 'request') and hasattr(e.request, 'url') and ' ' + e.request.url or def_url
|
|
|
|
|
|
def clean_data(data):
|
|
"""Cleans up strings, lists, dicts returned
|
|
|
|
Issues corrected:
|
|
- Replaces & with &
|
|
- Trailing whitespace
|
|
- Decode html entities
|
|
:param data: data
|
|
:type data: List or Dict or AnyStr
|
|
:return:
|
|
:rtype: List or Dict or AnyStr
|
|
"""
|
|
|
|
if isinstance(data, list):
|
|
return [clean_data(d) for d in data]
|
|
if isinstance(data, dict):
|
|
return {k: clean_data(v) for k, v in iteritems(data)}
|
|
if isinstance(data, string_types):
|
|
return html_unescape(data).strip().replace(u'&', u'&')
|
|
return data
|
|
|
|
|
|
def get_system_temp_dir():
|
|
"""
|
|
:return: Returns the [system temp dir]/tvdb_api-u501 (or tvdb_api-myuser)
|
|
:rtype: AnyStr
|
|
"""
|
|
if hasattr(os, 'getuid'):
|
|
uid = 'u%d' % (os.getuid())
|
|
else:
|
|
# For Windows
|
|
try:
|
|
uid = getpass.getuser()
|
|
except ImportError:
|
|
return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear')
|
|
|
|
return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear-%s' % uid)
|
|
|
|
|
|
def proxy_setting(setting, request_url, force=False):
|
|
"""
|
|
Returns a list of a) proxy_setting address value or a PAC is fetched and parsed if proxy_setting
|
|
starts with "PAC:" (case-insensitive) and b) True/False if "PAC" is found in the proxy_setting.
|
|
|
|
The PAC data parser is crude, javascript is not eval'd. The first "PROXY URL" found is extracted with a list
|
|
of "url_a_part.url_remaining", "url_b_part.url_remaining", "url_n_part.url_remaining" and so on.
|
|
Also, PAC data items are escaped for matching therefore regular expression items will not match a request_url.
|
|
|
|
If force is True or request_url contains a PAC parsed data item then the PAC proxy address is returned else False.
|
|
None is returned in the event of an error fetching PAC data.
|
|
|
|
"""
|
|
|
|
# check for "PAC" usage
|
|
match = re.search(r'^\s*PAC:\s*(.*)', setting, re.I)
|
|
if not match:
|
|
return setting, False
|
|
pac_url = match.group(1)
|
|
|
|
# prevent a recursive test with existing proxy setting when fetching PAC url
|
|
global PROXY_SETTING
|
|
proxy_setting_backup = PROXY_SETTING
|
|
PROXY_SETTING = ''
|
|
|
|
resp = ''
|
|
try:
|
|
resp = get_url(pac_url)
|
|
except (BaseException, Exception):
|
|
pass
|
|
PROXY_SETTING = proxy_setting_backup
|
|
|
|
if not resp:
|
|
return None, False
|
|
|
|
proxy_address = None
|
|
request_url_match = False
|
|
parsed_url = urlparse(request_url)
|
|
netloc = parsed_url.netloc
|
|
for pac_data in re.finditer(r"""(?:[^'"]*['"])([^.]+\.[^'"]*)(?:['"])""", resp, re.I):
|
|
data = re.search(r"""PROXY\s+([^'"]+)""", pac_data.group(1), re.I)
|
|
if data:
|
|
if force:
|
|
return data.group(1), True
|
|
proxy_address = (proxy_address, data.group(1))[None is proxy_address]
|
|
elif re.search(re.escape(pac_data.group(1)), netloc, re.I):
|
|
request_url_match = True
|
|
if None is not proxy_address:
|
|
break
|
|
|
|
if None is proxy_address:
|
|
return None, True
|
|
|
|
return (False, proxy_address)[request_url_match], True
|
|
|
|
|
|
def get_url(url, # type: AnyStr
|
|
post_data=None, # type: Optional
|
|
params=None, # type: Optional
|
|
headers=None, # type: Optional[Dict]
|
|
timeout=30, # type: int
|
|
session=None, # type: Optional[requests.Session]
|
|
parse_json=False, # type: bool
|
|
raise_status_code=False, # type: bool
|
|
raise_exceptions=False, # type: bool
|
|
as_binary=False, # type: bool
|
|
encoding=None, # type: Optional[AnyStr]
|
|
**kwargs
|
|
):
|
|
# type: (...) -> Optional[Union[AnyStr, bool, bytes, Dict, Tuple[Union[Dict, List], requests.Session]]]
|
|
"""
|
|
Either
|
|
1) Returns a byte-string retrieved from the url provider.
|
|
2) Return True/False if success after using kwargs 'savefile' set to file pathname.
|
|
3) Returns Tuple response, session if success after setting kwargs 'resp_sess' True.
|
|
4) JSON Dict if parse_json=True.
|
|
|
|
:param url: url
|
|
:param post_data: post data
|
|
:param params:
|
|
:param headers: headers to add
|
|
:param timeout: timeout
|
|
:param session: optional session object
|
|
:param parse_json: return JSON Dict
|
|
:param raise_status_code: raise exception for status codes
|
|
:param raise_exceptions: raise exceptions
|
|
:param as_binary: return bytes instead of text
|
|
:param encoding: overwrite encoding return header if as_binary is False
|
|
:param kwargs:
|
|
:return:
|
|
"""
|
|
|
|
response_attr = ('text', 'content')[as_binary]
|
|
|
|
# selectively mute some errors
|
|
mute = filter_list(lambda x: kwargs.pop(x, False), [
|
|
'mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout', 'mute_http_error'])
|
|
|
|
# reuse or instantiate request session
|
|
resp_sess = kwargs.pop('resp_sess', None)
|
|
if None is session:
|
|
session = CloudflareScraper.create_scraper()
|
|
session.headers.update({'User-Agent': USER_AGENT})
|
|
|
|
# download and save file or simply fetch url
|
|
savename = kwargs.pop('savename', None)
|
|
if savename:
|
|
# session streaming
|
|
session.stream = True
|
|
|
|
if not kwargs.pop('nocache', False):
|
|
cache_dir = CACHE_DIR or get_system_temp_dir()
|
|
session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions')))
|
|
|
|
provider = kwargs.pop('provider', None)
|
|
|
|
# handle legacy uses of `json` param
|
|
if kwargs.get('json'):
|
|
parse_json = kwargs.pop('json')
|
|
|
|
# session master headers
|
|
req_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Encoding': 'gzip,deflate'}
|
|
if headers:
|
|
req_headers.update(headers)
|
|
if hasattr(session, 'reserved') and 'headers' in session.reserved:
|
|
req_headers.update(session.reserved['headers'] or {})
|
|
session.headers.update(req_headers)
|
|
|
|
# session parameters
|
|
session.params = params
|
|
|
|
# session ssl verify
|
|
session.verify = False
|
|
|
|
# don't trust os environments (auth, proxies, ...)
|
|
session.trust_env = False
|
|
|
|
response = None
|
|
try:
|
|
# sanitise url
|
|
parsed = list(urlparse(url))
|
|
parsed[2] = re.sub('/{2,}', '/', parsed[2]) # replace two or more / with one
|
|
url = urlunparse(parsed)
|
|
|
|
# session proxies
|
|
if PROXY_SETTING:
|
|
(proxy_address, pac_found) = proxy_setting(PROXY_SETTING, url)
|
|
msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
|
|
if None is proxy_address:
|
|
logger.debug('Proxy error, aborted the request using %s' % msg)
|
|
return
|
|
elif proxy_address:
|
|
logger.debug('Using %s' % msg)
|
|
session.proxies = {'http': proxy_address, 'https': proxy_address}
|
|
|
|
# decide if we get or post data to server
|
|
if post_data or 'post_json' in kwargs:
|
|
if True is post_data:
|
|
post_data = None
|
|
|
|
if post_data:
|
|
kwargs.setdefault('data', post_data)
|
|
|
|
if 'post_json' in kwargs:
|
|
kwargs.setdefault('json', kwargs.pop('post_json'))
|
|
|
|
response = session.post(url, timeout=timeout, **kwargs)
|
|
else:
|
|
response = session.get(url, timeout=timeout, **kwargs)
|
|
if response.ok and not response.content and 'url=' in response.headers.get('Refresh', '').lower():
|
|
url = response.headers.get('Refresh').lower().split('url=')[1].strip('/')
|
|
if not url.startswith('http'):
|
|
parsed[2] = '/%s' % url
|
|
url = urlunparse(parsed)
|
|
response = session.get(url, timeout=timeout, **kwargs)
|
|
|
|
# if encoding is not in header try to use best guess
|
|
# ignore downloads with savename
|
|
if not savename and not as_binary:
|
|
if encoding:
|
|
response.encoding = encoding
|
|
elif not response.encoding or 'charset' not in response.headers.get('Content-Type', ''):
|
|
response.encoding = response.apparent_encoding
|
|
|
|
# noinspection PyProtectedMember
|
|
if provider and provider._has_signature(response.text):
|
|
return getattr(response, response_attr)
|
|
|
|
if raise_status_code:
|
|
response.raise_for_status()
|
|
|
|
if not response.ok:
|
|
http_err_text = 'CloudFlare Ray ID' in response.text and \
|
|
'CloudFlare reports, "Website is offline"; ' or ''
|
|
if response.status_code in http_error_code:
|
|
http_err_text += http_error_code[response.status_code]
|
|
elif response.status_code in range(520, 527):
|
|
http_err_text += 'Origin server connection failure'
|
|
else:
|
|
http_err_text = 'Custom HTTP error code'
|
|
if 'mute_http_error' not in mute:
|
|
logger.debug(u'Response not ok. %s: %s from requested url %s'
|
|
% (response.status_code, http_err_text, url))
|
|
return
|
|
|
|
except requests.exceptions.HTTPError as e:
|
|
if raise_status_code:
|
|
response.raise_for_status()
|
|
logger.warning(u'HTTP error %s while loading URL%s' % (
|
|
e.errno, _maybe_request_url(e)))
|
|
return
|
|
except requests.exceptions.ConnectionError as e:
|
|
if 'mute_connect_err' not in mute:
|
|
logger.warning(u'Connection error msg:%s while loading URL%s' % (
|
|
ex(e), _maybe_request_url(e)))
|
|
if raise_exceptions:
|
|
raise e
|
|
return
|
|
except requests.exceptions.ReadTimeout as e:
|
|
if 'mute_read_timeout' not in mute:
|
|
logger.warning(u'Read timed out msg:%s while loading URL%s' % (
|
|
ex(e), _maybe_request_url(e)))
|
|
if raise_exceptions:
|
|
raise e
|
|
return
|
|
except (requests.exceptions.Timeout, socket.timeout) as e:
|
|
if 'mute_connect_timeout' not in mute:
|
|
logger.warning(u'Connection timed out msg:%s while loading URL %s' % (
|
|
ex(e), _maybe_request_url(e, url)))
|
|
if raise_exceptions:
|
|
raise e
|
|
return
|
|
except (BaseException, Exception) as e:
|
|
if ex(e):
|
|
logger.warning(u'Exception caught while loading URL %s\r\nDetail... %s\r\n%s'
|
|
% (url, ex(e), traceback.format_exc()))
|
|
else:
|
|
logger.warning(u'Unknown exception while loading URL %s\r\nDetail... %s'
|
|
% (url, traceback.format_exc()))
|
|
if raise_exceptions:
|
|
raise e
|
|
return
|
|
|
|
if parse_json:
|
|
try:
|
|
data_json = response.json()
|
|
if resp_sess:
|
|
return ({}, data_json)[isinstance(data_json, (dict, list))], session
|
|
return ({}, data_json)[isinstance(data_json, (dict, list))]
|
|
except (TypeError, Exception) as e:
|
|
logger.warning(u'JSON data issue from URL %s\r\nDetail... %s' % (url, ex(e)))
|
|
if raise_exceptions:
|
|
raise e
|
|
return None
|
|
|
|
if savename:
|
|
try:
|
|
write_file(savename, response, raw=True, raise_exceptions=raise_exceptions)
|
|
except (BaseException, Exception) as e:
|
|
if raise_exceptions:
|
|
raise e
|
|
return
|
|
return True
|
|
|
|
if resp_sess:
|
|
return getattr(response, response_attr), session
|
|
|
|
return getattr(response, response_attr)
|
|
|
|
|
|
def file_bit_filter(mode):
|
|
for bit in [stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH, stat.S_ISUID, stat.S_ISGID]:
|
|
if mode & bit:
|
|
mode -= bit
|
|
|
|
return mode
|
|
|
|
|
|
def remove_file_failed(filename):
|
|
"""
|
|
delete given file
|
|
|
|
:param filename: filename
|
|
:type filename: AnyStr
|
|
"""
|
|
try:
|
|
ek.ek(os.remove, filename)
|
|
except (BaseException, Exception):
|
|
pass
|
|
|
|
|
|
def chmod_as_parent(child_path):
|
|
"""
|
|
|
|
:param child_path: path
|
|
:type child_path: AnyStr
|
|
:return:
|
|
:rtype: None
|
|
"""
|
|
if os.name in ('nt', 'ce'):
|
|
return
|
|
|
|
parent_path = ek.ek(os.path.dirname, child_path)
|
|
|
|
if not parent_path:
|
|
logger.debug(u'No parent path provided in %s, unable to get permissions from it' % child_path)
|
|
return
|
|
|
|
parent_path_stat = ek.ek(os.stat, parent_path)
|
|
parent_mode = stat.S_IMODE(parent_path_stat[stat.ST_MODE])
|
|
|
|
child_path_stat = ek.ek(os.stat, child_path)
|
|
child_path_mode = stat.S_IMODE(child_path_stat[stat.ST_MODE])
|
|
|
|
if ek.ek(os.path.isfile, child_path):
|
|
child_mode = file_bit_filter(parent_mode)
|
|
else:
|
|
child_mode = parent_mode
|
|
|
|
if child_path_mode == child_mode:
|
|
return
|
|
|
|
child_path_owner = child_path_stat.st_uid
|
|
user_id = os.geteuid() # only available on UNIX
|
|
|
|
if 0 != user_id and user_id != child_path_owner:
|
|
logger.debug(u'Not running as root or owner of %s, not trying to set permissions' % child_path)
|
|
return
|
|
|
|
try:
|
|
ek.ek(os.chmod, child_path, child_mode)
|
|
logger.debug(u'Setting permissions for %s to %o as parent directory has %o'
|
|
% (child_path, child_mode, parent_mode))
|
|
except OSError:
|
|
logger.error(u'Failed to set permission for %s to %o' % (child_path, child_mode))
|
|
|
|
|
|
def make_dirs(path, syno=False):
|
|
"""
|
|
Creates any folders that are missing and assigns them the permissions of their
|
|
parents
|
|
:param path: path
|
|
:type path: AnyStr
|
|
:param syno: whether to trigger a syno library update for path
|
|
:type syno: bool
|
|
:return: success
|
|
:rtype: bool
|
|
"""
|
|
if not ek.ek(os.path.isdir, path):
|
|
# Windows, create all missing folders
|
|
if os.name in ('nt', 'ce'):
|
|
try:
|
|
logger.debug(u'Path %s doesn\'t exist, creating it' % path)
|
|
ek.ek(os.makedirs, path)
|
|
except (OSError, IOError) as e:
|
|
logger.error(u'Failed creating %s : %s' % (path, ex(e)))
|
|
return False
|
|
|
|
# not Windows, create all missing folders and set permissions
|
|
else:
|
|
sofar = ''
|
|
folder_list = path.split(os.path.sep)
|
|
|
|
# look through each sub folder and make sure they all exist
|
|
for cur_folder in folder_list:
|
|
sofar += cur_folder + os.path.sep
|
|
|
|
# if it exists then just keep walking down the line
|
|
if ek.ek(os.path.isdir, sofar):
|
|
continue
|
|
|
|
try:
|
|
logger.debug(u'Path %s doesn\'t exist, creating it' % sofar)
|
|
ek.ek(os.mkdir, sofar)
|
|
# use normpath to remove end separator, otherwise checks permissions against itself
|
|
chmod_as_parent(ek.ek(os.path.normpath, sofar))
|
|
if syno:
|
|
# do the library update for synoindex
|
|
NOTIFIERS.NotifierFactory().get('SYNOINDEX').addFolder(sofar)
|
|
except (OSError, IOError) as e:
|
|
logger.error(u'Failed creating %s : %s' % (sofar, ex(e)))
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def write_file(filepath, # type: AnyStr
|
|
data, # type: Union[AnyStr, etree.Element, requests.Response]
|
|
raw=False, # type: bool
|
|
xmltree=False, # type: bool
|
|
utf8=False, # type: bool
|
|
raise_exceptions=False # type: bool
|
|
): # type: (...) -> bool
|
|
"""
|
|
|
|
:param filepath: filepath
|
|
:param data: data to write
|
|
:param raw: write binary or text
|
|
:param xmltree: use xmel tree
|
|
:param utf8: use UTF8
|
|
:param raise_exceptions: raise excepitons
|
|
:return: succuess
|
|
"""
|
|
result = False
|
|
|
|
if make_dirs(ek.ek(os.path.dirname, filepath)):
|
|
try:
|
|
if raw:
|
|
with ek.ek(io.FileIO, filepath, 'wb') as fh:
|
|
for chunk in data.iter_content(chunk_size=1024):
|
|
if chunk:
|
|
fh.write(chunk)
|
|
fh.flush()
|
|
ek.ek(os.fsync, fh.fileno())
|
|
else:
|
|
w_mode = 'w'
|
|
if utf8:
|
|
w_mode = 'a'
|
|
with ek.ek(io.FileIO, filepath, 'wb') as fh:
|
|
fh.write(codecs.BOM_UTF8)
|
|
|
|
if xmltree:
|
|
with ek.ek(io.FileIO, filepath, w_mode) as fh:
|
|
if utf8:
|
|
data.write(fh, encoding='utf-8')
|
|
else:
|
|
data.write(fh)
|
|
else:
|
|
if isinstance(data, text_type):
|
|
with ek.ek(io.open, filepath, w_mode, encoding='utf-8') as fh:
|
|
fh.write(data)
|
|
else:
|
|
with ek.ek(io.FileIO, filepath, w_mode) as fh:
|
|
fh.write(data)
|
|
|
|
chmod_as_parent(filepath)
|
|
|
|
result = True
|
|
except (EnvironmentError, IOError) as e:
|
|
logger.error('Unable to write file %s : %s' % (filepath, ex(e)))
|
|
if raise_exceptions:
|
|
raise e
|
|
|
|
return result
|
|
|
|
|
|
def long_path(path):
|
|
# type: (AnyStr) -> AnyStr
|
|
"""add long path prefix for Windows"""
|
|
if 'nt' == os.name and 260 < len(path) and not path.startswith('\\\\?\\') and ek.ek(os.path.isabs, path):
|
|
return '\\\\?\\' + path
|
|
return path
|
|
|