# encoding:utf-8
# ---------------
# functions are placed here to remove cyclic import issues from placement in helpers
#
import codecs
import datetime
import getpass
import hashlib
import io
import logging
import os
import re
import socket
import stat
import tempfile
import threading
import traceback
# noinspection PyPep8Naming
import encodingKludge as ek
from exceptions_helper import ex, ConnectionSkipException
from _23 import decode_bytes, filter_list, html_unescape, urlparse, urlsplit, urlunparse
from six import integer_types, iteritems, iterkeys, itervalues, PY2, string_types, text_type
from lib.cachecontrol import CacheControl, caches
from cfscrape import CloudflareScraper
import requests

# noinspection PyUnreachableCode
if False:
    # noinspection PyUnresolvedReferences
    from typing import Any, AnyStr, Dict, NoReturn, integer_types, Iterable, Iterator, List, Optional, Tuple, Union
    from lxml_etree import etree

# Mapping error status codes to official W3C names
http_error_code = {
    300: 'Multiple Choices',
    301: 'Moved Permanently',
    302: 'Found',
    303: 'See Other',
    304: 'Not Modified',
    305: 'Use Proxy',
    307: 'Temporary Redirect',
    308: 'Permanent Redirect',
    400: 'Bad Request',
    401: 'Unauthorized',
    402: 'Payment Required',
    403: 'Forbidden',
    404: 'Not Found',
    405: 'Method Not Allowed',
    406: 'Not Acceptable',
    407: 'Proxy Authentication Required',
    408: 'Request Timeout',
    409: 'Conflict',
    410: 'Gone',
    411: 'Length Required',
    412: 'Precondition Failed',
    413: 'Request Entity Too Large',
    414: 'Request-URI Too Long',
    415: 'Unsupported Media Type',
    416: 'Requested Range Not Satisfiable',
    417: 'Expectation Failed',
    429: 'Too Many Requests',
    431: 'Request Header Fields Too Large',
    444: 'No Response',
    451: 'Unavailable For Legal Reasons',
    500: 'Internal Server Error',
    501: 'Not Implemented',
    502: 'Bad Gateway',
    503: 'Service Unavailable',
    504: 'Gateway Timeout',
    505: 'HTTP Version Not Supported',
    511: 'Network Authentication Required'}

logger = logging.getLogger('sg.helper')
logger.addHandler(logging.NullHandler())

USER_AGENT = ''
CACHE_DIR = None
DATA_DIR = None
PROXY_SETTING = None
NOTIFIERS = None

db = None


class ConnectionFailTypes(object):
    http = 1
    connection = 2
    connection_timeout = 3
    timeout = 4
    other = 5
    limit = 6
    nodata = 7

    names = {http: 'http', timeout: 'timeout',
             connection: 'connection', connection_timeout: 'connection_timeout',
             nodata: 'nodata', other: 'other', limit: 'limit'}

    def __init__(self):
        pass


class ConnectionFail(object):
    def __init__(self, fail_type=ConnectionFailTypes.other, code=None, fail_time=None):
        self.code = code
        self.fail_type = fail_type
        self.fail_time = (datetime.datetime.now(), fail_time)[isinstance(fail_time, datetime.datetime)]


class ConnectionFailDict(object):
    def __init__(self):
        self.domain_list = {}  # type: Dict[AnyStr, ConnectionFailList]
        self.lock = threading.RLock()
        self.load_from_db()

    def load_from_db(self):
        if None is not db:
            with self.lock:
                my_db = db.DBConnection('cache.db')
                if my_db.hasTable('connection_fails'):
                    domains = my_db.select('SELECT DISTINCT domain_url from connection_fails')
                    for domain in domains:
                        self.domain_list[domain['domain_url']] = ConnectionFailList(domain['domain_url'])

    @staticmethod
    def get_domain(url):
        # type: (AnyStr) -> Optional[AnyStr]
        try:
            return urlsplit(url).hostname.lower()
        except (BaseException, Exception):
            pass

    def add_failure(self, url, fail_type):
        # type: (AnyStr, ConnectionFail) -> None
        host = self.get_domain(url)
        if None is not host:
            with self.lock:
                self.domain_list.setdefault(host, ConnectionFailList(host)).add_fail(fail_type)

    def inc_failure_count(self,
                          url,  # type: AnyStr
                          *args, **kwargs):
        host = self.get_domain(url)
        if None is not host:
            with self.lock:
                if host in self.domain_list:
                    domain = self.domain_list[host]
                    fail_type = ('fail_type' in kwargs and kwargs['fail_type'].fail_type) or \
                                (isinstance(args, tuple) and isinstance(args[0], ConnectionFail) and args[0].fail_type)
                    # noinspection PyProtectedMember
                    if not isinstance(domain.failure_time, datetime.datetime) or \
                            fail_type != domain._last_fail_type or \
                            domain.fail_newest_delta() > datetime.timedelta(seconds=3):
                        domain.failure_count += 1
                        domain.failure_time = datetime.datetime.now()
                        domain._last_fail_type = fail_type
                        domain.add_fail(*args, **kwargs)
                    else:
                        logger.debug('%s: Not logging same failure within 3 seconds' % url)

    def should_skip(self, url, log_warning=True, use_tmr_limit=True):
        # type: (AnyStr, bool, bool) -> bool
        host = self.get_domain(url)
        if None is not host:
            with self.lock:
                if host in self.domain_list:
                    return self.domain_list[host].should_skip(log_warning=log_warning, use_tmr_limit=use_tmr_limit)
        return False


DOMAIN_FAILURES = ConnectionFailDict()


class ConnectionFailList(object):
    def __init__(self, url):
        # type: (AnyStr) -> None
        self.url = url
        self._fails = []  # type: List[ConnectionFail]
        self.lock = threading.Lock()
        self.clear_old()
        self.load_list()
        self.last_save = datetime.datetime.now()  # type: datetime.datetime
        self._failure_count = 0  # type: int
        self._failure_time = None  # type: Optional[datetime.datetime]
        self._tmr_limit_count = 0  # type: int
        self._tmr_limit_time = None  # type: Optional[datetime.datetime]
        self._tmr_limit_wait = None  # type: Optional[datetime.timedelta]
        self._last_fail_type = None  # type: Optional[ConnectionFail]
        self.has_limit = False  # type: bool
        self.fail_times = {1: (0, 15), 2: (0, 30), 3: (1, 0), 4: (2, 0), 5: (3, 0), 6: (6, 0), 7: (12, 0), 8: (24, 0)}
        self._load_fail_values()
        self.dirty = False  # type: bool

    @property
    def failure_time(self):
        # type: (...) -> Union[None, datetime.datetime]
        return self._failure_time

    @failure_time.setter
    def failure_time(self, value):
        if None is value or isinstance(value, datetime.datetime):
            changed_val = self._failure_time != value
            self._failure_time = value
            if changed_val:
                # noinspection PyCallByClass,PyTypeChecker
                self._save_fail_value('failure_time', (_totimestamp(value), value)[None is value])

    @property
    def tmr_limit_count(self):
        # type: (...) -> int
        return self._tmr_limit_count

    @tmr_limit_count.setter
    def tmr_limit_count(self, value):
        changed_val = self._tmr_limit_count != value
        self._tmr_limit_count = value
        if changed_val:
            self._save_fail_value('tmr_limit_count', value)

    def tmr_limit_update(self, period, unit, desc):
        # type: (Optional[AnyStr], Optional[AnyStr], AnyStr) -> None
        self.tmr_limit_time = datetime.datetime.now()
        self.tmr_limit_count += 1
        limit_set = False
        if None not in (period, unit):
            limit_set = True
            if unit in ('s', 'sec', 'secs', 'seconds', 'second'):
                self.tmr_limit_wait = datetime.timedelta(seconds=try_int(period))
            elif unit in ('m', 'min', 'mins', 'minutes', 'minute'):
                self.tmr_limit_wait = datetime.timedelta(minutes=try_int(period))
            elif unit in ('h', 'hr', 'hrs', 'hours', 'hour'):
                self.tmr_limit_wait = datetime.timedelta(hours=try_int(period))
            elif unit in ('d', 'days', 'day'):
                self.tmr_limit_wait = datetime.timedelta(days=try_int(period))
            else:
                limit_set = False
        if not limit_set:
            time_index = self.fail_time_index(base_limit=0)
            self.tmr_limit_wait = self.wait_time(time_index)
        logger.warning('Request limit reached. Waiting for %s until next retry. Message: %s' %
                       (self.tmr_limit_wait, desc or 'none found'))

    @property
    def tmr_limit_time(self):
        # type: (...) -> Union[None, datetime.datetime]
        return self._tmr_limit_time

    @tmr_limit_time.setter
    def tmr_limit_time(self, value):
        if None is value or isinstance(value, datetime.datetime):
            changed_val = self._tmr_limit_time != value
            self._tmr_limit_time = value
            if changed_val:
                # noinspection PyCallByClass,PyTypeChecker
                self._save_fail_value('tmr_limit_time', (_totimestamp(value), value)[None is value])

    @property
    def last_fail(self):
        # type: (...) -> Optional[int]
        try:
            return sorted(self.fails, key=lambda x: x.fail_time, reverse=True)[0].fail_type
        except (BaseException, Exception):
            pass

    @property
    def failure_count(self):
        # type: (...) -> int
        return self._failure_count

    @failure_count.setter
    def failure_count(self, value):
        changed_val = self._failure_count != value
        self._failure_count = value
        if changed_val:
            self._save_fail_value('failure_count', value)

    def is_waiting(self):
        # type: (...) -> bool
        return self.fail_newest_delta() < self.wait_time()

    @property
    def max_index(self):
        # type: (...) -> int
        return len(self.fail_times)

    @property
    def tmr_limit_wait(self):
        # type: (...) -> Optional[datetime.timedelta]
        return self._tmr_limit_wait

    @tmr_limit_wait.setter
    def tmr_limit_wait(self, value):
        if isinstance(getattr(self, 'fails', None), ConnectionFailList) and isinstance(value, datetime.timedelta):
            self.add_fail(ConnectionFail(fail_type=ConnectionFailTypes.limit))
        changed_val = self._tmr_limit_wait != value
        self._tmr_limit_wait = value
        if changed_val:
            if None is value:
                self._save_fail_value('tmr_limit_wait', value)
            elif isinstance(value, datetime.timedelta):
                self._save_fail_value('tmr_limit_wait', value.total_seconds())

    def fail_time_index(self, base_limit=2):
        # type: (int) -> int
        i = self.failure_count - base_limit
        return (i, self.max_index)[i >= self.max_index]

    def valid_tmr_time(self):
        # type: (...) -> bool
        return isinstance(self.tmr_limit_wait, datetime.timedelta) and \
            isinstance(self.tmr_limit_time, datetime.datetime)

    def wait_time(self, time_index=None):
        # type: (Optional[int]) -> datetime.timedelta
        """
        Return a suitable wait time, selected by parameter, or based on the current failure count

        :param time_index: A key value index into the fail_times dict, or selects using failure count if None
        :return: Time
        """
        if None is time_index:
            time_index = self.fail_time_index()
        return datetime.timedelta(hours=self.fail_times[time_index][0], minutes=self.fail_times[time_index][1])

    def fail_newest_delta(self):
        # type: (...) -> datetime.timedelta
        """
        Return how long since most recent failure
        :return: Period since most recent failure on record
        """
        try:
            return datetime.datetime.now() - self.failure_time
        except (BaseException, Exception):
            return datetime.timedelta(days=1000)

    @property
    def get_next_try_time(self):
        # type: (...) -> datetime.timedelta
        n = None
        h = datetime.timedelta(seconds=0)
        f = datetime.timedelta(seconds=0)
        if self.valid_tmr_time():
            h = self.tmr_limit_time + self.tmr_limit_wait - datetime.datetime.now()
        if 3 <= self.failure_count and isinstance(self.failure_time, datetime.datetime) and self.is_waiting():
            h = self.failure_time + self.wait_time() - datetime.datetime.now()
        if datetime.timedelta(seconds=0) < max((h, f)):
            n = max((h, f))
        return n

    def retry_next(self):
        if self.valid_tmr_time():
            self.tmr_limit_time = datetime.datetime.now() - self.tmr_limit_wait
        if 3 <= self.failure_count and isinstance(self.failure_time, datetime.datetime) and self.is_waiting():
            self.failure_time = datetime.datetime.now() - self.wait_time()

    @staticmethod
    def fmt_delta(delta):
        # type: (Union[datetime.datetime, datetime.timedelta]) -> AnyStr
        return str(delta).rsplit('.')[0]

    def should_skip(self, log_warning=True, use_tmr_limit=True):
        # type: (bool, bool) -> bool
        """
        Determine if a subsequent server request should be skipped.  The result of this logic is based on most recent
        server connection activity including, exhausted request limits, and counting connect failures to determine a
        "cool down" period before recommending reconnection attempts; by returning False.
        :param log_warning: Output to log if True (default) otherwise set False for no output.
        :param use_tmr_limit: Setting this to False will ignore a tmr limit being reached and will instead return False.
        :return: True for any known issue that would prevent a subsequent server connection, otherwise False.
        """
        if self.valid_tmr_time():
            time_left = self.tmr_limit_time + self.tmr_limit_wait - datetime.datetime.now()
            if time_left > datetime.timedelta(seconds=0):
                if log_warning:
                    logger.warning('%sToo many requests reached at %s, waiting for %s' % (
                        self.url, self.fmt_delta(self.tmr_limit_time), self.fmt_delta(time_left)))
                return use_tmr_limit
            else:
                self.tmr_limit_time = None
                self.tmr_limit_wait = None
        if 3 <= self.failure_count:
            if None is self.failure_time:
                self.failure_time = datetime.datetime.now()
            if self.is_waiting():
                if log_warning:
                    time_left = self.wait_time() - self.fail_newest_delta()
                    logger.warning('Failed %s times, skipping domain %s for %s, '
                                   'last failure at %s with fail type: %s' %
                                   (self.failure_count, self.url, self.fmt_delta(time_left),
                                    self.fmt_delta(self.failure_time), ConnectionFailTypes.names.get(
                                       self.last_fail, ConnectionFailTypes.names[ConnectionFailTypes.other])))
                return True
        return False

    @property
    def fails(self):
        # type: (...) -> List
        return self._fails

    @property
    def fails_sorted(self):
        # type: (...) -> List
        fail_dict = {}
        b_d = {'count': 0}
        for e in self._fails:
            fail_date = e.fail_time.date()
            fail_hour = e.fail_time.time().hour
            date_time = datetime.datetime.combine(fail_date, datetime.time(hour=fail_hour))
            if ConnectionFailTypes.names[e.fail_type] not in fail_dict.get(date_time, {}):
                default = {'date': str(fail_date), 'date_time': date_time,
                           'timestamp': try_int(_totimestamp(e.fail_time)), 'multirow': False}
                for et in itervalues(ConnectionFailTypes.names):
                    default[et] = b_d.copy()
                fail_dict.setdefault(date_time, default)[ConnectionFailTypes.names[e.fail_type]]['count'] = 1
            else:
                fail_dict[date_time][ConnectionFailTypes.names[e.fail_type]]['count'] += 1
            if ConnectionFailTypes.http == e.fail_type:
                if e.code in fail_dict[date_time].get(ConnectionFailTypes.names[e.fail_type],
                                                      {'code': {}}).get('code', {}):
                    fail_dict[date_time][ConnectionFailTypes.names[e.fail_type]]['code'][e.code] += 1
                else:
                    fail_dict[date_time][ConnectionFailTypes.names[e.fail_type]].setdefault('code', {})[e.code] = 1

        row_count = {}
        for (k, v) in iteritems(fail_dict):
            row_count.setdefault(v.get('date'), 0)
            if v.get('date') in row_count:
                row_count[v.get('date')] += 1
        for (k, v) in iteritems(fail_dict):
            if 1 < row_count.get(v.get('date')):
                fail_dict[k]['multirow'] = True

        fail_list = sorted([fail_dict[k] for k in iterkeys(fail_dict)], key=lambda y: y.get('date_time'), reverse=True)

        totals = {}
        for fail_date in set([fail.get('date') for fail in fail_list]):
            daytotals = {}
            for et in itervalues(ConnectionFailTypes.names):
                daytotals.update({et: sum([x.get(et).get('count') for x in fail_list if fail_date == x.get('date')])})
            totals.update({fail_date: daytotals})
        for (fail_date, total) in iteritems(totals):
            for i, item in enumerate(fail_list):
                if fail_date == item.get('date'):
                    if item.get('multirow'):
                        fail_list[i:i] = [item.copy()]
                        for et in itervalues(ConnectionFailTypes.names):
                            fail_list[i][et] = {'count': total[et]}
                            if et == ConnectionFailTypes.names[ConnectionFailTypes.http]:
                                fail_list[i][et]['code'] = {}
                    break

        return fail_list

    def add_fail(self,
                 fail  # type: ConnectionFail
                 ):
        if isinstance(fail, ConnectionFail):
            with self.lock:
                self.dirty = True
                self._fails.append(fail)
                logger.debug('Adding fail.%s for %s' % (ConnectionFailTypes.names.get(
                    fail.fail_type, ConnectionFailTypes.names[ConnectionFailTypes.other]), self.url))
            self.save_list()

    def _load_fail_values(self):
        if None is not DATA_DIR:
            my_db = db.DBConnection('cache.db')
            if my_db.hasTable('connection_fails_count'):
                r = my_db.select('SELECT * FROM connection_fails_count WHERE domain_url = ?', [self.url])
                if r:
                    self._failure_count = try_int(r[0]['failure_count'], 0)
                    if r[0]['failure_time']:
                        self._failure_time = datetime.datetime.fromtimestamp(r[0]['failure_time'])
                    else:
                        self._failure_time = None
                    self._tmr_limit_count = try_int(r[0]['tmr_limit_count'], 0)
                    if r[0]['tmr_limit_time']:
                        self._tmr_limit_time = datetime.datetime.fromtimestamp(r[0]['tmr_limit_time'])
                    else:
                        self._tmr_limit_time = None
                    if r[0]['tmr_limit_wait']:
                        self._tmr_limit_wait = datetime.timedelta(seconds=try_int(r[0]['tmr_limit_wait'], 0))
                    else:
                        self._tmr_limit_wait = None
                self._last_fail_type = self.last_fail

    def _save_fail_value(self, field, value):
        my_db = db.DBConnection('cache.db')
        if my_db.hasTable('connection_fails_count'):
            r = my_db.action('UPDATE connection_fails_count SET %s = ? WHERE domain_url = ?' % field,
                             [value, self.url])
            if 0 == r.rowcount:
                my_db.action('REPLACE INTO connection_fails_count (domain_url, %s) VALUES (?,?)' % field,
                             [self.url, value])

    def save_list(self):
        if self.dirty:
            self.clear_old()
            if None is not db:
                with self.lock:
                    try:
                        my_db = db.DBConnection('cache.db')
                        cl = []
                        for f in self._fails:
                            cl.append(['INSERT OR IGNORE INTO connection_fails (domain_url, fail_type, fail_code, '
                                       'fail_time) '
                                       'VALUES (?,?,?,?)', [self.url, f.fail_type, f.code,
                                                            _totimestamp(f.fail_time)]])
                        self.dirty = False
                        if cl:
                            my_db.mass_action(cl)
                    except (BaseException, Exception):
                        pass
            self.last_save = datetime.datetime.now()

    def load_list(self):
        if None is not db:
            with self.lock:
                try:
                    my_db = db.DBConnection('cache.db')
                    if my_db.hasTable('connection_fails'):
                        results = my_db.select('SELECT * FROM connection_fails WHERE domain_url = ?', [self.url])
                        self._fails = []
                        for r in results:
                            try:
                                self._fails.append(ConnectionFail(
                                    fail_type=try_int(r['fail_type']), code=try_int(r['fail_code']),
                                    fail_time=datetime.datetime.fromtimestamp(try_int(r['fail_time']))))
                            except (BaseException, Exception):
                                continue
                except (BaseException, Exception):
                    pass

    def clear_old(self):
        if None is not db:
            with self.lock:
                try:
                    my_db = db.DBConnection('cache.db')
                    if my_db.hasTable('connection_fails'):
                        # noinspection PyCallByClass,PyTypeChecker
                        time_limit = _totimestamp(datetime.datetime.now() - datetime.timedelta(days=28))
                        my_db.action('DELETE FROM connection_fails WHERE fail_time < ?', [time_limit])
                except (BaseException, Exception):
                    pass


def _totimestamp(dt=None):
    # type: (Optional[datetime.datetime]) -> integer_types
    """ This function should only be used in this module due to its 1970s+ limitation as that's all we need here and
    sgdatatime can't be used at this module level
    """
    try:
        if PY2:
            import time
            return int(time.mktime(dt.timetuple()))
        return int(datetime.datetime.timestamp(dt))
    except (BaseException, Exception):
        return 0


def _log_failure_url(url, post_data=None, post_json=None):
    # type: (AnyStr, Optional[AnyStr], Optional[AnyStr]) -> None
    if DOMAIN_FAILURES.should_skip(url, log_warning=False):
        post = []
        if post_data:
            post += [' .. Post params: [%s]' % '&'.join([post_data])]
        if post_json:
            post += [' .. Json params: [%s]' % '&'.join([post_json])]
        logger.warning('Failure URL: %s%s' % (url, ''.join(post)))


# try to convert to int, if it fails the default will be returned
def try_int(s, s_default=0):
    try:
        return int(s)
    except (BaseException, Exception):
        return s_default


def _maybe_request_url(e, def_url=''):
    return hasattr(e, 'request') and hasattr(e.request, 'url') and ' ' + e.request.url or def_url


def clean_data(data):
    """Cleans up strings, lists, dicts returned

    Issues corrected:
    - Replaces &amp; with &
    - Trailing whitespace
    - Decode html entities
    :param data: data
    :type data: List or Dict or AnyStr
    :return:
    :rtype: List or Dict or AnyStr
    """

    if isinstance(data, list):
        return [clean_data(d) for d in data]
    if isinstance(data, dict):
        return {k: clean_data(v) for k, v in iteritems(data)}
    if isinstance(data, string_types):
        return html_unescape(data).strip().replace(u'&amp;', u'&')
    return data


def get_system_temp_dir():
    """
    :return: Returns the [system temp dir]/tvdb_api-u501 (or tvdb_api-myuser)
    :rtype: AnyStr
    """
    if hasattr(os, 'getuid'):
        uid = 'u%d' % (os.getuid())
    else:
        # For Windows
        try:
            uid = getpass.getuser()
        except ImportError:
            return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear')

    return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear-%s' % uid)


def proxy_setting(setting, request_url, force=False):
    """
    Returns a list of a) proxy_setting address value or a PAC is fetched and parsed if proxy_setting
    starts with "PAC:" (case-insensitive) and b) True/False if "PAC" is found in the proxy_setting.

    The PAC data parser is crude, javascript is not eval'd. The first "PROXY URL" found is extracted with a list
    of "url_a_part.url_remaining", "url_b_part.url_remaining", "url_n_part.url_remaining" and so on.
    Also, PAC data items are escaped for matching therefore regular expression items will not match a request_url.

    If force is True or request_url contains a PAC parsed data item then the PAC proxy address is returned else False.
    None is returned in the event of an error fetching PAC data.

    """

    # check for "PAC" usage
    match = re.search(r'^\s*PAC:\s*(.*)', setting, re.I)
    if not match:
        return setting, False
    pac_url = match.group(1)

    # prevent a recursive test with existing proxy setting when fetching PAC url
    global PROXY_SETTING
    proxy_setting_backup = PROXY_SETTING
    PROXY_SETTING = ''

    resp = ''
    try:
        resp = get_url(pac_url)
    except (BaseException, Exception):
        pass
    PROXY_SETTING = proxy_setting_backup

    if not resp:
        return None, False

    proxy_address = None
    request_url_match = False
    parsed_url = urlparse(request_url)
    netloc = parsed_url.netloc
    for pac_data in re.finditer(r"""(?:[^'"]*['"])([^.]+\.[^'"]*)(?:['"])""", resp, re.I):
        data = re.search(r"""PROXY\s+([^'"]+)""", pac_data.group(1), re.I)
        if data:
            if force:
                return data.group(1), True
            # noinspection PyUnresolvedReferences
            proxy_address = (proxy_address, data.group(1))[None is proxy_address]
        elif re.search(re.escape(pac_data.group(1)), netloc, re.I):
            request_url_match = True
            if None is not proxy_address:
                break

    if None is proxy_address:
        return None, True

    return (False, proxy_address)[request_url_match], True


def get_url(url,  # type: AnyStr
            post_data=None,  # type: Optional
            params=None,  # type: Optional
            headers=None,  # type: Optional[Dict]
            timeout=30,  # type: int
            session=None,  # type: Optional[requests.Session]
            parse_json=False,  # type: bool
            raise_status_code=False,  # type: bool
            raise_exceptions=False,  # type: bool
            as_binary=False,  # type: bool
            encoding=None,  # type: Optional[AnyStr]
            failure_monitor=True,  # type: bool
            use_tmr_limit=True,  # type: bool
            raise_skip_exception=False,  # type: bool
            exclude_client_http_codes=True,  # type: bool
            exclude_http_codes=(404, 429),  # type: Tuple[integer_types]
            exclude_no_data=True,  # type: bool
            **kwargs):
    # type: (...) -> Optional[Union[AnyStr, bool, bytes, Dict, Tuple[Union[Dict, List], requests.Session]]]
    """
    Return data from a URI with a possible check for authentication prior to the data fetch.
    Raised errors and no data in responses are tracked for making future logic decisions.

    Returned data is either:
    1) a byte-string retrieved from the URL provider.
    2) a boolean if successfully used kwargs 'savefile' set to file pathname.
    3) JSON dict if parse_json is True, and `Requests::session` when kwargs 'resp_sess' True.
    4) `Requests::response`, and `Requests::session` when kwargs 'resp_sess' is True.

    :param url: address to request fetch data from
    :param post_data: post data
    :param params:
    :param headers: headers to add
    :param timeout: timeout
    :param session: optional session object
    :param parse_json: return JSON Dict
    :param raise_status_code: raise exception for status codes
    :param raise_exceptions: raise exceptions
    :param as_binary: return bytes instead of text
    :param encoding: overwrite encoding return header if as_binary is False
    :param failure_monitor: if True, will enable failure monitor for this request
    :param use_tmr_limit: an API limit can be +ve before a fetch, but unwanted, set False to short should_skip
    :param raise_skip_exception: if True, will raise ConnectionSkipException if this request should be skipped
    :param exclude_client_http_codes: if True, exclude client http codes 4XX from failure monitor
    :param exclude_http_codes: http codes to exclude from failure monitor, default: (404, 429)
    :param exclude_no_data: exclude no data as failure
    :param kwargs: keyword params to passthru to Requests
    :return: None or data fetched from address
    """

    domain = None
    if failure_monitor:
        domain = DOMAIN_FAILURES.get_domain(url)
        if domain not in DOMAIN_FAILURES.domain_list:
            DOMAIN_FAILURES.domain_list[domain] = ConnectionFailList(domain)

        if DOMAIN_FAILURES.should_skip(url, use_tmr_limit=use_tmr_limit):
            if raise_skip_exception:
                raise ConnectionSkipException
            return

    response_attr = ('text', 'content')[as_binary]

    # selectively mute some errors
    mute = filter_list(lambda x: kwargs.pop(x, False), [
        'mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout', 'mute_http_error'])

    # reuse or instantiate request session
    resp_sess = kwargs.pop('resp_sess', None)
    if None is session:
        session = CloudflareScraper.create_scraper()
        session.headers.update({'User-Agent': USER_AGENT})

    # download and save file or simply fetch url
    savename = kwargs.pop('savename', None)
    if savename:
        # session streaming
        session.stream = True

    if not kwargs.pop('nocache', False):
        cache_dir = CACHE_DIR or get_system_temp_dir()
        session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions')))

    provider = kwargs.pop('provider', None)

    # handle legacy uses of `json` param
    if kwargs.get('json'):
        parse_json = kwargs.pop('json')
    post_json = kwargs.pop('post_json', None)

    # session master headers
    req_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                   'Accept-Encoding': 'gzip,deflate'}
    if headers:
        req_headers.update(headers)
    if hasattr(session, 'reserved') and 'headers' in session.reserved:
        req_headers.update(session.reserved['headers'] or {})
    session.headers.update(req_headers)

    # session parameters
    session.params = params

    # session ssl verify
    session.verify = False

    # don't trust os environments (auth, proxies, ...)
    session.trust_env = False

    result = response = raised = connection_fail_params = log_failure_url = None
    try:
        # sanitise url
        parsed = list(urlparse(url))
        parsed[2] = re.sub('/{2,}', '/', parsed[2])  # replace two or more / with one
        url = urlunparse(parsed)

        # session proxies
        if PROXY_SETTING:
            (proxy_address, pac_found) = proxy_setting(PROXY_SETTING, url)
            msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
            if None is proxy_address:
                logger.debug('Proxy error, aborted the request using %s' % msg)
                return
            elif proxy_address:
                logger.debug('Using %s' % msg)
                session.proxies = {'http': proxy_address, 'https': proxy_address}

        # decide if we get or post data to server
        if post_data or post_json:
            if True is post_data:
                post_data = None

            if post_data:
                kwargs.setdefault('data', post_data)

            if post_json:
                kwargs.setdefault('json', post_json)

            response = session.post(url, timeout=timeout, **kwargs)
        else:
            response = session.get(url, timeout=timeout, **kwargs)
            if response.ok and not response.content and 'url=' in response.headers.get('Refresh', '').lower():
                url = response.headers.get('Refresh').lower().split('url=')[1].strip('/')
                if not url.startswith('http'):
                    parsed[2] = '/%s' % url
                    url = urlunparse(parsed)
                response = session.get(url, timeout=timeout, **kwargs)

        # if encoding is not in header try to use best guess
        # ignore downloads with savename
        if not savename and not as_binary:
            if encoding:
                response.encoding = encoding
            elif not response.encoding or 'charset' not in response.headers.get('Content-Type', ''):
                response.encoding = response.apparent_encoding

        # noinspection PyProtectedMember
        if provider and provider._has_signature(response.text):
            result = getattr(response, response_attr)
        else:
            if raise_status_code:
                response.raise_for_status()

            if not response.ok:
                http_err_text = 'CloudFlare Ray ID' in response.text and \
                                'CloudFlare reports, "Website is offline"; ' or ''
                if response.status_code in http_error_code:
                    http_err_text += http_error_code[response.status_code]
                elif response.status_code in range(520, 527):
                    http_err_text += 'Origin server connection failure'
                else:
                    http_err_text = 'Custom HTTP error code'
                    if 'mute_http_error' not in mute:
                        logger.debug(u'Response not ok. %s: %s from requested url %s'
                                     % (response.status_code, http_err_text, url))

    except requests.exceptions.HTTPError as e:
        raised = e
        is_client_error = 400 <= e.response.status_code < 500
        if failure_monitor and e.response.status_code not in exclude_http_codes and \
                not (exclude_client_http_codes and is_client_error):
            connection_fail_params = dict(fail_type=ConnectionFailTypes.http, code=e.response.status_code)
        if not raise_status_code:
            logger.warning(u'HTTP error %s while loading URL%s' % (e.errno, _maybe_request_url(e)))
    except requests.exceptions.ConnectionError as e:
        raised = e
        if 'mute_connect_err' not in mute:
            logger.warning(u'Connection error msg:%s while loading URL%s' % (ex(e), _maybe_request_url(e)))
        if failure_monitor:
            connection_fail_params = dict(fail_type=ConnectionFailTypes.connection)
    except requests.exceptions.ReadTimeout as e:
        raised = e
        if 'mute_read_timeout' not in mute:
            logger.warning(u'Read timed out msg:%s while loading URL%s' % (ex(e), _maybe_request_url(e)))
        if failure_monitor:
            connection_fail_params = dict(fail_type=ConnectionFailTypes.timeout)
    except (requests.exceptions.Timeout, socket.timeout) as e:
        raised = e
        if 'mute_connect_timeout' not in mute:
            logger.warning(u'Connection timed out msg:%s while loading URL %s' % (ex(e), _maybe_request_url(e, url)))
        if failure_monitor:
            connection_fail_params = dict(fail_type=ConnectionFailTypes.connection_timeout)
    except (BaseException, Exception) as e:
        raised = e
        logger.warning((u'Exception caught while loading URL {0}\r\nDetail... %s\r\n{1}' % ex(e),
                        u'Unknown exception while loading URL {0}\r\nDetail... {1}')[not ex(e)]
                       .format(url, traceback.format_exc()))
        if failure_monitor:
            connection_fail_params = dict(fail_type=ConnectionFailTypes.other)
            log_failure_url = True
    finally:
        if None is not connection_fail_params:
            DOMAIN_FAILURES.inc_failure_count(url, ConnectionFail(**connection_fail_params))
            save_failure(url, domain, log_failure_url, post_data, post_json)

        if isinstance(raised, Exception):
            if raise_exceptions or raise_status_code:
                raise raised
            return

    if None is result and None is not response and response.ok:
        if parse_json:
            try:
                data_json = response.json()
                result = ({}, data_json)[isinstance(data_json, (dict, list))]
                if resp_sess:
                    result = result, session
            except (TypeError, Exception) as e:
                raised = e
                logger.warning(u'JSON data issue from URL %s\r\nDetail... %s' % (url, ex(e)))

        elif savename:
            try:
                write_file(savename, response, raw=True, raise_exceptions=raise_exceptions)
                result = True
            except (BaseException, Exception) as e:
                raised = e

        else:
            result = getattr(response, response_attr)
            if resp_sess:
                result = result, session

        if raise_exceptions and isinstance(raised, Exception):
            raise raised

    if failure_monitor:
        if result and not isinstance(result, tuple) \
                or isinstance(result, tuple) and result[0]:
            domain = DOMAIN_FAILURES.get_domain(url)
            if 0 != DOMAIN_FAILURES.domain_list[domain].failure_count:
                logger.info('Unblocking: %s' % domain)
            DOMAIN_FAILURES.domain_list[domain].failure_count = 0
            DOMAIN_FAILURES.domain_list[domain].failure_time = None
            save_failure(url, domain, False, post_data, post_json)
        elif not exclude_no_data:
            DOMAIN_FAILURES.inc_failure_count(url, ConnectionFail(fail_type=ConnectionFailTypes.nodata))
            save_failure(url, domain, True, post_data, post_json)

    return result


def save_failure(url, domain, log_failure_url, post_data, post_json):
    DOMAIN_FAILURES.domain_list[domain].save_list()
    if log_failure_url:
        _log_failure_url(url, post_data, post_json)


def file_bit_filter(mode):
    for bit in [stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH, stat.S_ISUID, stat.S_ISGID]:
        if mode & bit:
            mode -= bit

    return mode


def remove_file_failed(filename):
    """
    delete given file

    :param filename: filename
    :type filename: AnyStr
    """
    try:
        ek.ek(os.remove, filename)
    except (BaseException, Exception):
        pass


def chmod_as_parent(child_path):
    """

    :param child_path: path
    :type child_path: AnyStr
    :return:
    :rtype: None
    """
    if os.name in ('nt', 'ce'):
        return

    parent_path = ek.ek(os.path.dirname, child_path)

    if not parent_path:
        logger.debug(u'No parent path provided in %s, unable to get permissions from it' % child_path)
        return

    parent_path_stat = ek.ek(os.stat, parent_path)
    parent_mode = stat.S_IMODE(parent_path_stat[stat.ST_MODE])

    child_path_stat = ek.ek(os.stat, child_path)
    child_path_mode = stat.S_IMODE(child_path_stat[stat.ST_MODE])

    if ek.ek(os.path.isfile, child_path):
        child_mode = file_bit_filter(parent_mode)
    else:
        child_mode = parent_mode

    if child_path_mode == child_mode:
        return

    child_path_owner = child_path_stat.st_uid
    user_id = os.geteuid()  # only available on UNIX

    if 0 != user_id and user_id != child_path_owner:
        logger.debug(u'Not running as root or owner of %s, not trying to set permissions' % child_path)
        return

    try:
        ek.ek(os.chmod, child_path, child_mode)
        logger.debug(u'Setting permissions for %s to %o as parent directory has %o'
                     % (child_path, child_mode, parent_mode))
    except OSError:
        logger.error(u'Failed to set permission for %s to %o' % (child_path, child_mode))


def make_dirs(path, syno=False):
    """
    Creates any folders that are missing and assigns them the permissions of their
    parents
    :param path: path
    :type path: AnyStr
    :param syno: whether to trigger a syno library update for path
    :type syno: bool
    :return: success
    :rtype: bool
    """
    if not ek.ek(os.path.isdir, path):
        # Windows, create all missing folders
        if os.name in ('nt', 'ce'):
            try:
                logger.debug(u'Path %s doesn\'t exist, creating it' % path)
                ek.ek(os.makedirs, path)
            except (OSError, IOError) as e:
                logger.error(u'Failed creating %s : %s' % (path, ex(e)))
                return False

        # not Windows, create all missing folders and set permissions
        else:
            sofar = ''
            folder_list = path.split(os.path.sep)

            # look through each sub folder and make sure they all exist
            for cur_folder in folder_list:
                sofar += cur_folder + os.path.sep

                # if it exists then just keep walking down the line
                if ek.ek(os.path.isdir, sofar):
                    continue

                try:
                    logger.debug(u'Path %s doesn\'t exist, creating it' % sofar)
                    ek.ek(os.mkdir, sofar)
                    # use normpath to remove end separator, otherwise checks permissions against itself
                    chmod_as_parent(ek.ek(os.path.normpath, sofar))
                    if syno:
                        # do the library update for synoindex
                        NOTIFIERS.NotifierFactory().get('SYNOINDEX').addFolder(sofar)
                except (OSError, IOError) as e:
                    logger.error(u'Failed creating %s : %s' % (sofar, ex(e)))
                    return False

    return True


def write_file(filepath,  # type: AnyStr
               data,  # type: Union[AnyStr, etree.Element, requests.Response]
               raw=False,  # type: bool
               xmltree=False,  # type: bool
               utf8=False,  # type: bool
               raise_exceptions=False  # type: bool
               ):  # type: (...) -> bool
    """

    :param filepath: filepath
    :param data: data to write
    :param raw: write binary or text
    :param xmltree: use xmel tree
    :param utf8: use UTF8
    :param raise_exceptions: raise excepitons
    :return: succuess
    """
    result = False

    if make_dirs(ek.ek(os.path.dirname, filepath)):
        try:
            if raw:
                with ek.ek(io.FileIO, filepath, 'wb') as fh:
                    for chunk in data.iter_content(chunk_size=1024):
                        if chunk:
                            fh.write(chunk)
                            fh.flush()
                    ek.ek(os.fsync, fh.fileno())
            else:
                w_mode = 'w'
                if utf8:
                    w_mode = 'a'
                    with ek.ek(io.FileIO, filepath, 'wb') as fh:
                        fh.write(codecs.BOM_UTF8)

                if xmltree:
                    with ek.ek(io.FileIO, filepath, w_mode) as fh:
                        if utf8:
                            data.write(fh, encoding='utf-8')
                        else:
                            data.write(fh)
                else:
                    if isinstance(data, text_type):
                        with ek.ek(io.open, filepath, w_mode, encoding='utf-8') as fh:
                            fh.write(data)
                    else:
                        with ek.ek(io.FileIO, filepath, w_mode) as fh:
                            fh.write(data)

            chmod_as_parent(filepath)

            result = True
        except (EnvironmentError, IOError) as e:
            logger.error('Unable to write file %s : %s' % (filepath, ex(e)))
            if raise_exceptions:
                raise e

    return result


def long_path(path):
    # type: (AnyStr) -> AnyStr
    """add long path prefix for Windows"""
    if 'nt' == os.name and 260 < len(path) and not path.startswith('\\\\?\\') and ek.ek(os.path.isabs, path):
        return '\\\\?\\' + path
    return path


def md5_for_text(text):
    """

    :param text: test
    :type text: AnyStr
    :return:
    :rtype: AnyStr or None
    """
    result = None
    try:
        md5 = hashlib.md5()
        md5.update(decode_bytes(str(text)))
        raw_md5 = md5.hexdigest()
        result = raw_md5[17:] + raw_md5[9:17] + raw_md5[0:9]
    except (BaseException, Exception):
        pass
    return result