# encoding:utf-8 # --------------- # functions are placed here to remove cyclic import issues from placement in helpers # import codecs import getpass import io import logging import os import re import socket import stat import tempfile import traceback # noinspection PyPep8Naming import encodingKludge as ek from exceptions_helper import ex from _23 import filter_list, html_unescape, urlparse, urlunparse from six import iteritems, string_types, text_type from lib.cachecontrol import CacheControl, caches from cfscrape import CloudflareScraper import requests # noinspection PyUnreachableCode if False: # noinspection PyUnresolvedReferences from typing import Any, AnyStr, Dict, NoReturn, Iterable, Iterator, List, Optional, Tuple, Union from lxml_etree import etree # Mapping error status codes to official W3C names http_error_code = { 300: 'Multiple Choices', 301: 'Moved Permanently', 302: 'Found', 303: 'See Other', 304: 'Not Modified', 305: 'Use Proxy', 307: 'Temporary Redirect', 308: 'Permanent Redirect', 400: 'Bad Request', 401: 'Unauthorized', 402: 'Payment Required', 403: 'Forbidden', 404: 'Not Found', 405: 'Method Not Allowed', 406: 'Not Acceptable', 407: 'Proxy Authentication Required', 408: 'Request Timeout', 409: 'Conflict', 410: 'Gone', 411: 'Length Required', 412: 'Precondition Failed', 413: 'Request Entity Too Large', 414: 'Request-URI Too Long', 415: 'Unsupported Media Type', 416: 'Requested Range Not Satisfiable', 417: 'Expectation Failed', 429: 'Too Many Requests', 431: 'Request Header Fields Too Large', 444: 'No Response', 451: 'Unavailable For Legal Reasons', 500: 'Internal Server Error', 501: 'Not Implemented', 502: 'Bad Gateway', 503: 'Service Unavailable', 504: 'Gateway Timeout', 505: 'HTTP Version Not Supported', 511: 'Network Authentication Required'} logger = logging.getLogger('sg_helper') logger.addHandler(logging.NullHandler()) USER_AGENT = '' CACHE_DIR = None PROXY_SETTING = None NOTIFIERS = None # try to convert to int, if it fails the default will be returned def try_int(s, s_default=0): try: return int(s) except (BaseException, Exception): return s_default def _maybe_request_url(e, def_url=''): return hasattr(e, 'request') and hasattr(e.request, 'url') and ' ' + e.request.url or def_url def clean_data(data): """Cleans up strings, lists, dicts returned Issues corrected: - Replaces & with & - Trailing whitespace - Decode html entities :param data: data :type data: List or Dict or AnyStr :return: :rtype: List or Dict or AnyStr """ if isinstance(data, list): return [clean_data(d) for d in data] if isinstance(data, dict): return {k: clean_data(v) for k, v in iteritems(data)} if isinstance(data, string_types): return html_unescape(data).strip().replace(u'&', u'&') return data def get_system_temp_dir(): """ :return: Returns the [system temp dir]/tvdb_api-u501 (or tvdb_api-myuser) :rtype: AnyStr """ if hasattr(os, 'getuid'): uid = 'u%d' % (os.getuid()) else: # For Windows try: uid = getpass.getuser() except ImportError: return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear') return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear-%s' % uid) def proxy_setting(setting, request_url, force=False): """ Returns a list of a) proxy_setting address value or a PAC is fetched and parsed if proxy_setting starts with "PAC:" (case-insensitive) and b) True/False if "PAC" is found in the proxy_setting. The PAC data parser is crude, javascript is not eval'd. The first "PROXY URL" found is extracted with a list of "url_a_part.url_remaining", "url_b_part.url_remaining", "url_n_part.url_remaining" and so on. Also, PAC data items are escaped for matching therefore regular expression items will not match a request_url. If force is True or request_url contains a PAC parsed data item then the PAC proxy address is returned else False. None is returned in the event of an error fetching PAC data. """ # check for "PAC" usage match = re.search(r'^\s*PAC:\s*(.*)', setting, re.I) if not match: return setting, False pac_url = match.group(1) # prevent a recursive test with existing proxy setting when fetching PAC url global PROXY_SETTING proxy_setting_backup = PROXY_SETTING PROXY_SETTING = '' resp = '' try: resp = get_url(pac_url) except (BaseException, Exception): pass PROXY_SETTING = proxy_setting_backup if not resp: return None, False proxy_address = None request_url_match = False parsed_url = urlparse(request_url) netloc = parsed_url.netloc for pac_data in re.finditer(r"""(?:[^'"]*['"])([^.]+\.[^'"]*)(?:['"])""", resp, re.I): data = re.search(r"""PROXY\s+([^'"]+)""", pac_data.group(1), re.I) if data: if force: return data.group(1), True proxy_address = (proxy_address, data.group(1))[None is proxy_address] elif re.search(re.escape(pac_data.group(1)), netloc, re.I): request_url_match = True if None is not proxy_address: break if None is proxy_address: return None, True return (False, proxy_address)[request_url_match], True def get_url(url, # type: AnyStr post_data=None, # type: Optional params=None, # type: Optional headers=None, # type: Optional[Dict] timeout=30, # type: int session=None, # type: Optional[requests.Session] parse_json=False, # type: bool raise_status_code=False, # type: bool raise_exceptions=False, # type: bool as_binary=False, # type: bool encoding=None, # type: Optional[AnyStr] **kwargs ): # type: (...) -> Optional[Union[AnyStr, bool, bytes, Dict, Tuple[Union[Dict, List], requests.Session]]] """ Either 1) Returns a byte-string retrieved from the url provider. 2) Return True/False if success after using kwargs 'savefile' set to file pathname. 3) Returns Tuple response, session if success after setting kwargs 'resp_sess' True. 4) JSON Dict if parse_json=True. :param url: url :param post_data: post data :param params: :param headers: headers to add :param timeout: timeout :param session: optional session object :param parse_json: return JSON Dict :param raise_status_code: raise exception for status codes :param raise_exceptions: raise exceptions :param as_binary: return bytes instead of text :param encoding: overwrite encoding return header if as_binary is False :param kwargs: :return: """ response_attr = ('text', 'content')[as_binary] # selectively mute some errors mute = filter_list(lambda x: kwargs.pop(x, False), [ 'mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout', 'mute_http_error']) # reuse or instantiate request session resp_sess = kwargs.pop('resp_sess', None) if None is session: session = CloudflareScraper.create_scraper() session.headers.update({'User-Agent': USER_AGENT}) # download and save file or simply fetch url savename = kwargs.pop('savename', None) if savename: # session streaming session.stream = True if not kwargs.pop('nocache', False): cache_dir = CACHE_DIR or get_system_temp_dir() session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions'))) provider = kwargs.pop('provider', None) # handle legacy uses of `json` param if kwargs.get('json'): parse_json = kwargs.pop('json') # session master headers req_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip,deflate'} if headers: req_headers.update(headers) if hasattr(session, 'reserved') and 'headers' in session.reserved: req_headers.update(session.reserved['headers'] or {}) session.headers.update(req_headers) # session parameters session.params = params # session ssl verify session.verify = False # don't trust os environments (auth, proxies, ...) session.trust_env = False response = None try: # sanitise url parsed = list(urlparse(url)) parsed[2] = re.sub('/{2,}', '/', parsed[2]) # replace two or more / with one url = urlunparse(parsed) # session proxies if PROXY_SETTING: (proxy_address, pac_found) = proxy_setting(PROXY_SETTING, url) msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url) if None is proxy_address: logger.debug('Proxy error, aborted the request using %s' % msg) return elif proxy_address: logger.debug('Using %s' % msg) session.proxies = {'http': proxy_address, 'https': proxy_address} # decide if we get or post data to server if post_data or 'post_json' in kwargs: if True is post_data: post_data = None if post_data: kwargs.setdefault('data', post_data) if 'post_json' in kwargs: kwargs.setdefault('json', kwargs.pop('post_json')) response = session.post(url, timeout=timeout, **kwargs) else: response = session.get(url, timeout=timeout, **kwargs) if response.ok and not response.content and 'url=' in response.headers.get('Refresh', '').lower(): url = response.headers.get('Refresh').lower().split('url=')[1].strip('/') if not url.startswith('http'): parsed[2] = '/%s' % url url = urlunparse(parsed) response = session.get(url, timeout=timeout, **kwargs) # if encoding is not in header try to use best guess # ignore downloads with savename if not savename and not as_binary: if encoding: response.encoding = encoding elif not response.encoding or 'charset' not in response.headers.get('Content-Type', ''): response.encoding = response.apparent_encoding # noinspection PyProtectedMember if provider and provider._has_signature(response.text): return getattr(response, response_attr) if raise_status_code: response.raise_for_status() if not response.ok: http_err_text = 'CloudFlare Ray ID' in response.text and \ 'CloudFlare reports, "Website is offline"; ' or '' if response.status_code in http_error_code: http_err_text += http_error_code[response.status_code] elif response.status_code in range(520, 527): http_err_text += 'Origin server connection failure' else: http_err_text = 'Custom HTTP error code' if 'mute_http_error' not in mute: logger.debug(u'Response not ok. %s: %s from requested url %s' % (response.status_code, http_err_text, url)) return except requests.exceptions.HTTPError as e: if raise_status_code: response.raise_for_status() logger.warning(u'HTTP error %s while loading URL%s' % ( e.errno, _maybe_request_url(e))) return except requests.exceptions.ConnectionError as e: if 'mute_connect_err' not in mute: logger.warning(u'Connection error msg:%s while loading URL%s' % ( ex(e), _maybe_request_url(e))) if raise_exceptions: raise e return except requests.exceptions.ReadTimeout as e: if 'mute_read_timeout' not in mute: logger.warning(u'Read timed out msg:%s while loading URL%s' % ( ex(e), _maybe_request_url(e))) if raise_exceptions: raise e return except (requests.exceptions.Timeout, socket.timeout) as e: if 'mute_connect_timeout' not in mute: logger.warning(u'Connection timed out msg:%s while loading URL %s' % ( ex(e), _maybe_request_url(e, url))) if raise_exceptions: raise e return except (BaseException, Exception) as e: if ex(e): logger.warning(u'Exception caught while loading URL %s\r\nDetail... %s\r\n%s' % (url, ex(e), traceback.format_exc())) else: logger.warning(u'Unknown exception while loading URL %s\r\nDetail... %s' % (url, traceback.format_exc())) if raise_exceptions: raise e return if parse_json: try: data_json = response.json() if resp_sess: return ({}, data_json)[isinstance(data_json, (dict, list))], session return ({}, data_json)[isinstance(data_json, (dict, list))] except (TypeError, Exception) as e: logger.warning(u'JSON data issue from URL %s\r\nDetail... %s' % (url, ex(e))) if raise_exceptions: raise e return None if savename: try: write_file(savename, response, raw=True, raise_exceptions=raise_exceptions) except (BaseException, Exception) as e: if raise_exceptions: raise e return return True if resp_sess: return getattr(response, response_attr), session return getattr(response, response_attr) def file_bit_filter(mode): for bit in [stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH, stat.S_ISUID, stat.S_ISGID]: if mode & bit: mode -= bit return mode def remove_file_failed(filename): """ delete given file :param filename: filename :type filename: AnyStr """ try: ek.ek(os.remove, filename) except (BaseException, Exception): pass def chmod_as_parent(child_path): """ :param child_path: path :type child_path: AnyStr :return: :rtype: None """ if os.name in ('nt', 'ce'): return parent_path = ek.ek(os.path.dirname, child_path) if not parent_path: logger.debug(u'No parent path provided in %s, unable to get permissions from it' % child_path) return parent_path_stat = ek.ek(os.stat, parent_path) parent_mode = stat.S_IMODE(parent_path_stat[stat.ST_MODE]) child_path_stat = ek.ek(os.stat, child_path) child_path_mode = stat.S_IMODE(child_path_stat[stat.ST_MODE]) if ek.ek(os.path.isfile, child_path): child_mode = file_bit_filter(parent_mode) else: child_mode = parent_mode if child_path_mode == child_mode: return child_path_owner = child_path_stat.st_uid user_id = os.geteuid() # only available on UNIX if 0 != user_id and user_id != child_path_owner: logger.debug(u'Not running as root or owner of %s, not trying to set permissions' % child_path) return try: ek.ek(os.chmod, child_path, child_mode) logger.debug(u'Setting permissions for %s to %o as parent directory has %o' % (child_path, child_mode, parent_mode)) except OSError: logger.error(u'Failed to set permission for %s to %o' % (child_path, child_mode)) def make_dirs(path, syno=False): """ Creates any folders that are missing and assigns them the permissions of their parents :param path: path :type path: AnyStr :param syno: whether to trigger a syno library update for path :type syno: bool :return: success :rtype: bool """ if not ek.ek(os.path.isdir, path): # Windows, create all missing folders if os.name in ('nt', 'ce'): try: logger.debug(u'Path %s doesn\'t exist, creating it' % path) ek.ek(os.makedirs, path) except (OSError, IOError) as e: logger.error(u'Failed creating %s : %s' % (path, ex(e))) return False # not Windows, create all missing folders and set permissions else: sofar = '' folder_list = path.split(os.path.sep) # look through each sub folder and make sure they all exist for cur_folder in folder_list: sofar += cur_folder + os.path.sep # if it exists then just keep walking down the line if ek.ek(os.path.isdir, sofar): continue try: logger.debug(u'Path %s doesn\'t exist, creating it' % sofar) ek.ek(os.mkdir, sofar) # use normpath to remove end separator, otherwise checks permissions against itself chmod_as_parent(ek.ek(os.path.normpath, sofar)) if syno: # do the library update for synoindex NOTIFIERS.NotifierFactory().get('SYNOINDEX').addFolder(sofar) except (OSError, IOError) as e: logger.error(u'Failed creating %s : %s' % (sofar, ex(e))) return False return True def write_file(filepath, # type: AnyStr data, # type: Union[AnyStr, etree.Element, requests.Response] raw=False, # type: bool xmltree=False, # type: bool utf8=False, # type: bool raise_exceptions=False # type: bool ): # type: (...) -> bool """ :param filepath: filepath :param data: data to write :param raw: write binary or text :param xmltree: use xmel tree :param utf8: use UTF8 :param raise_exceptions: raise excepitons :return: succuess """ result = False if make_dirs(ek.ek(os.path.dirname, filepath)): try: if raw: with ek.ek(io.FileIO, filepath, 'wb') as fh: for chunk in data.iter_content(chunk_size=1024): if chunk: fh.write(chunk) fh.flush() ek.ek(os.fsync, fh.fileno()) else: w_mode = 'w' if utf8: w_mode = 'a' with ek.ek(io.FileIO, filepath, 'wb') as fh: fh.write(codecs.BOM_UTF8) if xmltree: with ek.ek(io.FileIO, filepath, w_mode) as fh: if utf8: data.write(fh, encoding='utf-8') else: data.write(fh) else: if isinstance(data, text_type): with ek.ek(io.open, filepath, w_mode, encoding='utf-8') as fh: fh.write(data) else: with ek.ek(io.FileIO, filepath, w_mode) as fh: fh.write(data) chmod_as_parent(filepath) result = True except (EnvironmentError, IOError) as e: logger.error('Unable to write file %s : %s' % (filepath, ex(e))) if raise_exceptions: raise e return result def long_path(path): # type: (AnyStr) -> AnyStr """add long path prefix for Windows""" if 'nt' == os.name and 260 < len(path) and not path.startswith('\\\\?\\') and ek.ek(os.path.isabs, path): return '\\\\?\\' + path return path