SickGear/sickbeard/providers/tvchaosuk.py

# coding=utf-8
#
# This file is part of SickGear.
#
# SickGear is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SickGear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SickGear.  If not, see <http://www.gnu.org/licenses/>.

import random
import re
import time
import traceback

from . import generic
from .. import logger
from ..config import naming_ep_type
from ..helpers import try_int
from bs4_parser import BS4Parser
from dateutil.parser import parse

from _23 import unidecode, unquote_plus
from six import iteritems


class TVChaosUKProvider(generic.TorrentProvider):

    def __init__(self):
        generic.TorrentProvider.__init__(self, 'TVChaosUK')

        self.url_base = 'https://www.tvchaosuk.com/'
        self.urls = {'config_provider_home_uri': self.url_base,
                     'login_action': self.url_base + 'login.php',
                     'search': self.url_base + 'browse.php'}

        self.url = self.urls['config_provider_home_uri']

        self.username, self.password, self.freeleech, self.minseed, self.minleech, self.use_after_get_data = 6 * [None]
        self.search_fallback = True

    def _authorised(self, **kwargs):

        return super(TVChaosUKProvider, self)._authorised(
            logged_in=(lambda y=None: self.has_all_cookies(pre='c_secure_')))

    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in
                   iteritems({'info': 'detail', 'get': 'download', 'fl': 'free'})])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_string = re.sub(r'(?i)[^a-z0-9\s]', '%', unquote_plus(search_string))

                kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0',
                                         'category': '0', 'search_type': 't_name', 'sort': 'added',
                                         'order': 'desc', 'daysprune': '-1'})

                vals = [i for i in range(5, 16)]
                random.SystemRandom().shuffle(vals)
                attempts = html = soup = tbl = None
                fetch = 'failed fetch'
                for attempts, s in enumerate((0, vals[0], vals[5], vals[10])):
                    time.sleep(s)
                    html = self.get_url(self.urls['search'], **kwargs)
                    if self.should_skip():
                        return results
                    if html:
                        try:
                            soup = BS4Parser(html).soup
                            tbl = soup.find('table', id='sortabletable')
                            if tbl:
                                fetch = 'data fetched'                                                 
                                break
                        except (BaseException, Exception):
                            pass
                if attempts:
                    logger.log('%s %s after %s attempts' % (mode, fetch, attempts+1))

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html) or not tbl:
                        raise generic.HaltParseException

                    tbl_rows = tbl.find_all('tr')
                    get_detail = True

                    if 2 > len(tbl_rows):
                        raise generic.HaltParseException

                    head = None
                    for tr in tbl_rows[1:]:
                        cells = tr.find_all('td')
                        if 6 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(tr)
                            seeders, leechers, size = [try_int(n, n) for n in [
                                cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]
                            if self._reject_item(seeders, leechers, self.freeleech and (
                                    None is cells[1].find('img', title=rc['fl']))):
                                continue

                            info = tr.find('a', href=rc['info'])
                            title = (tr.find('div', class_='tooltip-content').get_text() or info.get_text()).strip()
                            title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                            download_url = self._link(tr.find('a', href=rc['get'])['href'])
                        except (BaseException, Exception):
                            continue

                        if get_detail and title.endswith('...'):
                            try:
                                with BS4Parser(self.get_url('%s%s' % (
                                        self.urls['config_provider_home_uri'], info['href'].lstrip('/').replace(
                                            self.urls['config_provider_home_uri'], '')))) as soup_detail:
                                    title = soup_detail.find(
                                        'td', class_='thead', attrs={'colspan': '3'}).get_text().strip()
                                    title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                            except IndexError:
                                continue
                            except (BaseException, Exception):
                                get_detail = False

                        try:
                            titles = self.regulate_title(title, mode, search_string)
                            if download_url and titles:
                                for title in titles:
                                    items[mode].append((title, download_url, seeders, self._bytesizer(size)))
                        except (BaseException, Exception):
                            pass

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                if soup:
                    soup.clear(True)
                    del soup

                self._log_search(mode, len(items[mode]) - cnt,
                                 ('search string: ' + search_string.replace('%', '%%'), self.name)['Cache' == mode])

                if mode in 'Season' and len(items[mode]):
                    break

            results = self._sort_seeding(mode, results + items[mode])

        return results

    @staticmethod
    def regulate_title(title, mode='-', search_string=''):

        # normalise abnormal naming patterns e.g. 2019/20 -> 2019
        title = re.sub(r'((?:19|20)\d\d)/20(\d\d)?', r'\1', title)
        # s<x> ep<y> -> s<x>e<y>
        title = re.sub(r'(?i)s(\d\d+)[\W]*?e+(?:p|pisode)*(\d\d+)', r'S\1E\2', title)
        
        has_series = re.findall(r'(?i)(.*?series[^\d]*?\d+)(.*)', title)
        if has_series:
            rc_xtras = re.compile(r'(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)')
            has_special = rc_xtras.findall(has_series[0][1])
            if has_special:
                title = has_series[0][0] + rc_xtras.sub(list(set(
                    list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1])
            title = re.sub('(?i)series', r'Season', title)

        years = re.findall(r'((?:19|20)\d\d)', title)
        title = re.sub(r'(19|20)\d\d', r'{{yr}}', title)
        title_parts = re.findall(
            r'(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' +
            r'(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title)
        sxe_build = None

        if len(title_parts):
            new_parts = [try_int(part, part) for part in title_parts[0]]
            if not new_parts[1]:
                new_parts[1] = 1
            new_parts[2] = ('E%02d', ' Pack %d')[any([re.search('(?i)season|series', title),
                                                      mode in 'Season'])] % new_parts[2]
            sxe_build = 'S%02d%s' % tuple(new_parts[1:3])
            title = '%s`%s`%s' % (new_parts[0], sxe_build, new_parts[-1])
        for yr in years:
            # noinspection RegExpRedundantEscape
            title = re.sub(r'\{\{yr\}\}', yr, title, count=1)

        date_re = r'(?i)([(\s.]*)((?:\d+[\s.]*(?:st|nd|rd|th)?[\s.])?)([adfjmnos]\w{2,}[\s.]+)((?:19|20)\d\d)([)\s.]*)'
        dated = re.findall(date_re, title)
        dnew = None
        for d in dated:
            try:
                dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
                dnew = dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)]
                title = title.replace(''.join(d), '%s%s%s' % (('', ' ')[1 < len(d[0])], dnew, ('', ' ')[1 < len(d[4])]))
            except (BaseException, Exception):
                pass
        if dated:
            add_pad = re.findall(r'((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title)
            if any(add_pad) and add_pad[0][1] not in [' ', '.']:
                title = title.replace(''.join(
                    add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1]))
            title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title)

        t = ['']
        bl = r'[*\[({]+\s*'
        br = r'\s*[})\]*]+'
        title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', title)
        for r in (r'\s+-\s+', r'(?:19|20)\d\d(?:\-\d\d\-\d\d)?', r'S\d\d+(?:E\d\d+)?'):
            m = re.findall('(.*%s)(.*)' % r, title)
            if any(m) and len(m[0][0]) > len(t[0]):
                t = m[0]
        t = ([title], t)[any(t)]

        tags = [re.findall(x, t[-1], flags=re.X) for x in
                ('(?i)%sProper%s|\bProper\b$' % (bl, br),
                 r'(?i)(?:\d{3,4}(?:[pi]|hd)|hd(?:tv)?\s*\d{3,4}(?:[pi])?)',
                 '''
                 (?i)(hr.ws.pdtv|blu.?ray|hddvd|
                 pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2)
                 ''', '''
                 (?i)([hx].?26[45]|divx|xvid)
                 ''', '''
                 (?i)(avi|mkv|mp4|sub(?:b?ed|pack|s))
                 ''')]
        title = ('%s`%s' % (
            re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]),
            re.sub(r'(?i)(?:hd(?:tv)?\s*)?(\d{3,4})(?:hd|p)?', r'\1p',
                   '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) +
            ('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])]))
        title = re.sub(r'([hx]26[45])p', r'\1', title)
        for r in [(r'(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'),
                  ('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), (r'%s\s*%s' % (bl, br), '`')]:
            title = re.sub(r[0], r[1], title)
            
        title = re.sub(r'[][]', '', title)
        title = '%s%s-nogrp' % (('', t[0])[1 < len(t)], title)
        for r in [(r'\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', ' ')]:
            title = re.sub(r[0], r[1], title)

        titles = []
        if dnew:
            snew = None
            dated_s = re.findall(date_re, search_string)
            for d in dated_s:
                try:
                    sout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
                    snew = sout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(sout)]
                except (BaseException, Exception):
                    pass

            if snew and dnew and snew != dnew:
                return titles

            try:
                sxxexx_r = r'(?i)S\d\d+E\d\d+'
                if dnew and re.search(sxxexx_r, title):
                    titles += [re.sub(sxxexx_r, dnew, re.sub(r'[_.\-\s]?%s' % dnew, '', title))]
            except (BaseException, Exception):
                pass

        titles += [title]

        result = []
        for cur_item in titles:
            sxe_find = r'(?i)%s' % (sxe_build, r'S\d\d+E\d\d+|season\s*\d+')[not sxe_build]
            sxe = re.findall(sxe_find, cur_item) or ''
            if sxe:
                sxe = sxe[0]
                cur_item = re.sub(sxe, r'{{sxe}}', cur_item)
            dated = dnew and re.findall(dnew, cur_item) or ''
            if dated:
                dated = dated[0]
                cur_item = re.sub(dated, r'{{dated}}', cur_item)

            parts = []
            pre_post = re.findall(r'(.*?){{.*}}[.]*(.*)', cur_item)
            item = re.sub(r'{{(sxe|dated)}}[.]*', '', cur_item)
            end = [item]
            if pre_post and (sxe or dated):
                divider = ':'
                tail = re.findall(r'(?i)^([^%s]+)(.*)' % divider, item)[0]
                if tail[1]:  # show name divider found
                    parts = [tail[0].strip()]
                    end = [tail[1].lstrip('%s ' % divider)]
                else:
                    parts = [pre_post[0][0]]
                    end = [pre_post[0][1]]

            parts += ([sxe], [])[not sxe] + ([dated], [])[not dated] + end
            result += [re.sub(r'(\s\.|\.\s|\s+)', '.', ' '.join(parts))]

        return result

    def after_get_data(self, result):
        if self.use_after_get_data:
            tid = None
            try:
                tid = re.findall(r'id=(\d+)$', result.url)[0]
            except IndexError:
                pass
            if tid:
                response = self.get_url(self.url_base + 'takethanks.php', post_data={'torrentid': tid})
                if not self.should_skip():
                    msg = '' if not response else ' err=%s' % re.sub('</?error>', '', response)
                    if not re.search('(?i)remove[^>]+?thank', msg):
                        logger.log('Failed to "Say thanks!" to uploader of id=%s%s' % (tid, msg), logger.DEBUG)

    def _season_strings(self, ep_obj, **kwargs):

        return self.show_name_wildcard(
            generic.TorrentProvider._season_strings(
                self, ep_obj, scene=False, prefix='%', sp_detail=(
                    lambda e: [(('', 'Series %(seasonnumber)d%%')[1 < try_int(e.get('seasonnumber'))]
                                + '%(episodenumber)dof') % e, 'Series %(seasonnumber)d' % e])))

    def _episode_strings(self, ep_obj, **kwargs):

        return self.show_name_wildcard(
            super(TVChaosUKProvider, self)._episode_strings(
                ep_obj, scene=False, prefix='%', date_detail=(
                    lambda date: ['%s %s%% %s'.lstrip('0') % x for x in
                                  [((d[-1], '%s' % m, y), (d, m, y)) + (((d, mf, y),), ())[m == mf]
                                   for (d, m, mf, y) in [(date.strftime(x) for x in ('%d', '%b', '%B', '%Y'))]][0]]),
            ep_detail=(lambda e: [naming_ep_type[2] % e] + (
                    [], ['%(episodenumber)dof' % e])[1 == try_int(e.get('seasonnumber'))]), **kwargs))

    @staticmethod
    def show_name_wildcard(search_items):
        for d in search_items:
            for k, v in d.items():
                for i, val in enumerate(v):
                    v[i] = v[i].replace(' %', '% %', 1)
        return search_items

    @staticmethod
    def ui_string(key):

        return ('tvchaosuk_tip' == key
                and 'releases are often "Air by date release names" - edit search settings of show if required'
                or 'tvchaosuk_use_after_get_data' == key and 'Send "Say thanks!"'
                or 'tvchaosuk_use_after_get_data_tip' == key and 'to each release that is snatched'
                or '')


provider = TVChaosUKProvider()