SickGear/sickbeard/providers/tvchaosuk.py


								# coding=utf-8

								#

								# This file is part of SickGear.

								#

								# SickGear is free software: you can redistribute it and/or modify

								# it under the terms of the GNU General Public License as published by

								# the Free Software Foundation, either version 3 of the License, or

								# (at your option) any later version.

								#

								# SickGear is distributed in the hope that it will be useful,

								# but WITHOUT ANY WARRANTY; without even the implied warranty of

								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								# GNU General Public License for more details.

								#

								# You should have received a copy of the GNU General Public License

								# along with SickGear.  If not, see <http://www.gnu.org/licenses/>.


								import random

								import re

								import time

								import traceback


								from . import generic

								from .. import logger

								from ..config import naming_ep_type

								from ..helpers import try_int

								from bs4_parser import BS4Parser

								from dateutil.parser import parse


								from _23 import unidecode, unquote_plus

								from six import iteritems


								class TVChaosUKProvider(generic.TorrentProvider):


								    def __init__(self):

								        generic.TorrentProvider.__init__(self, 'TVChaosUK')


								        self.url_base = 'https://www.tvchaosuk.com/'

								        self.urls = {'config_provider_home_uri': self.url_base,

								                     'login_action': self.url_base + 'login.php',

								                     'search': self.url_base + 'browse.php'}


								        self.url = self.urls['config_provider_home_uri']


								        self.username, self.password, self.freeleech, self.minseed, self.minleech, self.use_after_get_data = 6 * [None]

								        self.search_fallback = True


								    def _authorised(self, **kwargs):


								        return super(TVChaosUKProvider, self)._authorised(

								            logged_in=(lambda y=None: self.has_all_cookies(pre='c_secure_')))


								    def _search_provider(self, search_params, **kwargs):


								        results = []

								        if not self._authorised():

								            return results


								        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}


								        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in

								                   iteritems({'info': 'detail', 'get': 'download', 'fl': 'free'})])

								        for mode in search_params:

								            for search_string in search_params[mode]:

								                search_string = unidecode(search_string)

								                search_string = re.sub(r'(?i)[^a-z0-9\s]', '%', unquote_plus(search_string))


								                kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0',

								                                         'category': '0', 'search_type': 't_name', 'sort': 'added',

								                                         'order': 'desc', 'daysprune': '-1'})


								                vals = [i for i in range(5, 16)]

								                random.SystemRandom().shuffle(vals)

								                attempts = html = soup = tbl = None

								                fetch = 'failed fetch'

								                for attempts, s in enumerate((0, vals[0], vals[5], vals[10])):

								                    time.sleep(s)

								                    html = self.get_url(self.urls['search'], **kwargs)

								                    if self.should_skip():

								                        return results

								                    if html:

								                        try:

								                            soup = BS4Parser(html).soup

								                            tbl = soup.find('table', id='sortabletable')

								                            if tbl:

								                                fetch = 'data fetched'

								                                break

								                        except (BaseException, Exception):

								                            pass

								                if attempts:

								                    logger.log('%s %s after %s attempts' % (mode, fetch, attempts+1))


								                cnt = len(items[mode])

								                try:

								                    if not html or self._has_no_results(html) or not tbl:

								                        raise generic.HaltParseException


								                    tbl_rows = tbl.find_all('tr')

								                    get_detail = True


								                    if 2 > len(tbl_rows):

								                        raise generic.HaltParseException


								                    head = None

								                    for tr in tbl_rows[1:]:

								                        cells = tr.find_all('td')

								                        if 6 > len(cells):

								                            continue

								                        try:

								                            head = head if None is not head else self._header_row(tr)

								                            seeders, leechers, size = [try_int(n, n) for n in [

								                                cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]

								                            if self._reject_item(seeders, leechers, self.freeleech and (

								                                    None is cells[1].find('img', title=rc['fl']))):

								                                continue


								                            info = tr.find('a', href=rc['info'])

								                            title = (tr.find('div', class_='tooltip-content').get_text() or info.get_text()).strip()

								                            title = re.findall('(?m)(^[^\r\n]+)', title)[0]

								                            download_url = self._link(tr.find('a', href=rc['get'])['href'])

								                        except (BaseException, Exception):

								                            continue


								                        if get_detail and title.endswith('...'):

								                            try:

								                                with BS4Parser(self.get_url('%s%s' % (

								                                        self.urls['config_provider_home_uri'], info['href'].lstrip('/').replace(

								                                            self.urls['config_provider_home_uri'], '')))) as soup_detail:

								                                    title = soup_detail.find(

								                                        'td', class_='thead', attrs={'colspan': '3'}).get_text().strip()

								                                    title = re.findall('(?m)(^[^\r\n]+)', title)[0]

								                            except IndexError:

								                                continue

								                            except (BaseException, Exception):

								                                get_detail = False


								                        try:

								                            titles = self.regulate_title(title, mode, search_string)

								                            if download_url and titles:

								                                for title in titles:

								                                    items[mode].append((title, download_url, seeders, self._bytesizer(size)))

								                        except (BaseException, Exception):

								                            pass


								                except generic.HaltParseException:

								                    pass

								                except (BaseException, Exception):

								                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)


								                if soup:

								                    soup.clear(True)

								                    del soup


								                self._log_search(mode, len(items[mode]) - cnt,

								                                 ('search string: ' + search_string.replace('%', '%%'), self.name)['Cache' == mode])


								                if mode in 'Season' and len(items[mode]):

								                    break


								            results = self._sort_seeding(mode, results + items[mode])


								        return results


								    @staticmethod

								    def regulate_title(title, mode='-', search_string=''):


								        # normalise abnormal naming patterns e.g. 2019/20 -> 2019

								        title = re.sub(r'((?:19|20)\d\d)/20(\d\d)?', r'\1', title)

								        # s<x> ep<y> -> s<x>e<y>

								        title = re.sub(r'(?i)s(\d\d+)[\W]*?e+(?:p|pisode)*(\d\d+)', r'S\1E\2', title)


								        has_series = re.findall(r'(?i)(.*?series[^\d]*?\d+)(.*)', title)

								        if has_series:

								            rc_xtras = re.compile(r'(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)')

								            has_special = rc_xtras.findall(has_series[0][1])

								            if has_special:

								                title = has_series[0][0] + rc_xtras.sub(list(set(

								                    list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1])

								            title = re.sub('(?i)series', r'Season', title)


								        years = re.findall(r'((?:19|20)\d\d)', title)

								        title = re.sub(r'(19|20)\d\d', r'{{yr}}', title)

								        title_parts = re.findall(

								            r'(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' +

								            r'(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title)

								        sxe_build = None


								        if len(title_parts):

								            new_parts = [try_int(part, part) for part in title_parts[0]]

								            if not new_parts[1]:

								                new_parts[1] = 1

								            new_parts[2] = ('E%02d', ' Pack %d')[any([re.search('(?i)season|series', title),

								                                                      mode in 'Season'])] % new_parts[2]

								            sxe_build = 'S%02d%s' % tuple(new_parts[1:3])

								            title = '%s`%s`%s' % (new_parts[0], sxe_build, new_parts[-1])

								        for yr in years:

								            # noinspection RegExpRedundantEscape

								            title = re.sub(r'\{\{yr\}\}', yr, title, count=1)


								        date_re = r'(?i)([(\s.]*)((?:\d+[\s.]*(?:st|nd|rd|th)?[\s.])?)([adfjmnos]\w{2,}[\s.]+)((?:19|20)\d\d)([)\s.]*)'

								        dated = re.findall(date_re, title)

								        dnew = None

								        for d in dated:

								            try:

								                dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')

								                dnew = dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)]

								                title = title.replace(''.join(d), '%s%s%s' % (('', ' ')[1 < len(d[0])], dnew, ('', ' ')[1 < len(d[4])]))

								            except (BaseException, Exception):

								                pass

								        if dated:

								            add_pad = re.findall(r'((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title)

								            if any(add_pad) and add_pad[0][1] not in [' ', '.']:

								                title = title.replace(''.join(

								                    add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1]))

								            title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title)


								        t = ['']

								        bl = r'[*\[({]+\s*'

								        br = r'\s*[})\]*]+'

								        title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', title)

								        for r in (r'\s+-\s+', r'(?:19|20)\d\d(?:\-\d\d\-\d\d)?', r'S\d\d+(?:E\d\d+)?'):

								            m = re.findall('(.*%s)(.*)' % r, title)

								            if any(m) and len(m[0][0]) > len(t[0]):

								                t = m[0]

								        t = ([title], t)[any(t)]


								        tags = [re.findall(x, t[-1], flags=re.X) for x in

								                ('(?i)%sProper%s|\bProper\b$' % (bl, br),

								                 r'(?i)(?:\d{3,4}(?:[pi]|hd)|hd(?:tv)?\s*\d{3,4}(?:[pi])?)',

								                 '''

								                 (?i)(hr.ws.pdtv|blu.?ray|hddvd|

								                 pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2)

								                 ''', '''

								                 (?i)([hx].?26[45]|divx|xvid)

								                 ''', '''

								                 (?i)(avi|mkv|mp4|sub(?:b?ed|pack|s))

								                 ''')]

								        title = ('%s`%s' % (

								            re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]),

								            re.sub(r'(?i)(?:hd(?:tv)?\s*)?(\d{3,4})(?:hd|p)?', r'\1p',

								                   '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) +

								            ('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])]))

								        title = re.sub(r'([hx]26[45])p', r'\1', title)

								        for r in [(r'(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'),

								                  ('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), (r'%s\s*%s' % (bl, br), '`')]:

								            title = re.sub(r[0], r[1], title)


								        title = re.sub(r'[][]', '', title)

								        title = '%s%s-nogrp' % (('', t[0])[1 < len(t)], title)

								        for r in [(r'\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', ' ')]:

								            title = re.sub(r[0], r[1], title)


								        titles = []

								        if dnew:

								            snew = None

								            dated_s = re.findall(date_re, search_string)

								            for d in dated_s:

								                try:

								                    sout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')

								                    snew = sout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(sout)]

								                except (BaseException, Exception):

								                    pass


								            if snew and dnew and snew != dnew:

								                return titles


								            try:

								                sxxexx_r = r'(?i)S\d\d+E\d\d+'

								                if dnew and re.search(sxxexx_r, title):

								                    titles += [re.sub(sxxexx_r, dnew, re.sub(r'[_.\-\s]?%s' % dnew, '', title))]

								            except (BaseException, Exception):

								                pass


								        titles += [title]


								        result = []

								        for cur_item in titles:

								            sxe_find = r'(?i)%s' % (sxe_build, r'S\d\d+E\d\d+|season\s*\d+')[not sxe_build]

								            sxe = re.findall(sxe_find, cur_item) or ''

								            if sxe:

								                sxe = sxe[0]

								                cur_item = re.sub(sxe, r'{{sxe}}', cur_item)

								            dated = dnew and re.findall(dnew, cur_item) or ''

								            if dated:

								                dated = dated[0]

								                cur_item = re.sub(dated, r'{{dated}}', cur_item)


								            parts = []

								            pre_post = re.findall(r'(.*?){{.*}}[.]*(.*)', cur_item)

								            item = re.sub(r'{{(sxe|dated)}}[.]*', '', cur_item)

								            end = [item]

								            if pre_post and (sxe or dated):

								                divider = ':'

								                tail = re.findall(r'(?i)^([^%s]+)(.*)' % divider, item)[0]

								                if tail[1]:  # show name divider found

								                    parts = [tail[0].strip()]

								                    end = [tail[1].lstrip('%s ' % divider)]

								                else:

								                    parts = [pre_post[0][0]]

								                    end = [pre_post[0][1]]


								            parts += ([sxe], [])[not sxe] + ([dated], [])[not dated] + end

								            result += [re.sub(r'(\s\.|\.\s|\s+)', '.', ' '.join(parts))]


								        return result


								    def after_get_data(self, result):

								        if self.use_after_get_data:

								            tid = None

								            try:

								                tid = re.findall(r'id=(\d+)$', result.url)[0]

								            except IndexError:

								                pass

								            if tid:

								                response = self.get_url(self.url_base + 'takethanks.php', post_data={'torrentid': tid})

								                if not self.should_skip():

								                    msg = '' if not response else ' err=%s' % re.sub('</?error>', '', response)

								                    if not re.search('(?i)remove[^>]+?thank', msg):

								                        logger.log('Failed to "Say thanks!" to uploader of id=%s%s' % (tid, msg), logger.DEBUG)


								    def _season_strings(self, ep_obj, **kwargs):


								        return self.show_name_wildcard(

								            generic.TorrentProvider._season_strings(

								                self, ep_obj, scene=False, prefix='%', sp_detail=(

								                    lambda e: [(('', 'Series %(seasonnumber)d%%')[1 < try_int(e.get('seasonnumber'))]

								                                + '%(episodenumber)dof') % e, 'Series %(seasonnumber)d' % e])))


								    def _episode_strings(self, ep_obj, **kwargs):


								        return self.show_name_wildcard(

								            super(TVChaosUKProvider, self)._episode_strings(

								                ep_obj, scene=False, prefix='%', date_detail=(

								                    lambda date: ['%s %s%% %s'.lstrip('0') % x for x in

								                                  [((d[-1], '%s' % m, y), (d, m, y)) + (((d, mf, y),), ())[m == mf]

								                                   for (d, m, mf, y) in [(date.strftime(x) for x in ('%d', '%b', '%B', '%Y'))]][0]]),

								            ep_detail=(lambda e: [naming_ep_type[2] % e] + (

								                    [], ['%(episodenumber)dof' % e])[1 == try_int(e.get('seasonnumber'))]), **kwargs))


								    @staticmethod

								    def show_name_wildcard(search_items):

								        for d in search_items:

								            for k, v in d.items():

								                for i, val in enumerate(v):

								                    v[i] = v[i].replace(' %', '% %', 1)

								        return search_items


								    @staticmethod

								    def ui_string(key):


								        return ('tvchaosuk_tip' == key

								                and 'releases are often "Air by date release names" - edit search settings of show if required'

								                or 'tvchaosuk_use_after_get_data' == key and 'Send "Say thanks!"'

								                or 'tvchaosuk_use_after_get_data_tip' == key and 'to each release that is snatched'

								                or '')


								provider = TVChaosUKProvider()