|
|
|
# coding=utf-8
|
|
|
|
#
|
|
|
|
# This file is part of SickGear.
|
|
|
|
#
|
|
|
|
# SickGear is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# SickGear is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with SickGear. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import random
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
from . import generic
|
|
|
|
from .. import logger
|
|
|
|
from ..config import naming_ep_type
|
|
|
|
from ..helpers import try_int
|
|
|
|
from bs4_parser import BS4Parser
|
|
|
|
from dateutil.parser import parse
|
|
|
|
|
|
|
|
from _23 import unidecode, unquote_plus
|
|
|
|
from six import iteritems
|
|
|
|
|
|
|
|
|
|
|
|
class TVChaosUKProvider(generic.TorrentProvider):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
generic.TorrentProvider.__init__(self, 'TVChaosUK')
|
|
|
|
|
Change validate and improve specific Torrent provider connections, IPT, KAT, SCC, TPB, TB, TD, TT.
Change refactor cache for torrent providers to reduce code.
Change improve search category selection BMTV, FSH, FF, TB.
Change identify more SD release qualities.
Change update SpeedCD, MoreThan, TVChaosuk.
Add torrent provider HD4Free.
Remove torrent provider BitSoup.
Change only create threads for providers needing a recent search instead of for all enabled.
Add 4489 as experimental value to "Recent search frequency" to use provider freqs instead of fixed width for all.
Fix searching nzb season packs.
Change remove some logging cruft.
9 years ago
|
|
|
self.url_base = 'https://www.tvchaosuk.com/'
|
|
|
|
self.urls = {'config_provider_home_uri': self.url_base,
|
Change validate and improve specific Torrent provider connections, IPT, KAT, SCC, TPB, TB, TD, TT.
Change refactor cache for torrent providers to reduce code.
Change improve search category selection BMTV, FSH, FF, TB.
Change identify more SD release qualities.
Change update SpeedCD, MoreThan, TVChaosuk.
Add torrent provider HD4Free.
Remove torrent provider BitSoup.
Change only create threads for providers needing a recent search instead of for all enabled.
Add 4489 as experimental value to "Recent search frequency" to use provider freqs instead of fixed width for all.
Fix searching nzb season packs.
Change remove some logging cruft.
9 years ago
|
|
|
'login_action': self.url_base + 'login.php',
|
|
|
|
'search': self.url_base + 'browse.php'}
|
|
|
|
|
|
|
|
self.url = self.urls['config_provider_home_uri']
|
|
|
|
|
|
|
|
self.username, self.password, self.freeleech, self.minseed, self.minleech, self.use_after_get_data = 6 * [None]
|
|
|
|
self.search_fallback = True
|
|
|
|
|
|
|
|
def _authorised(self, **kwargs):
|
|
|
|
|
|
|
|
return super(TVChaosUKProvider, self)._authorised(
|
|
|
|
logged_in=(lambda y=None: self.has_all_cookies(pre='c_secure_')))
|
|
|
|
|
|
|
|
def _search_provider(self, search_params, **kwargs):
|
|
|
|
|
|
|
|
results = []
|
|
|
|
if not self._authorised():
|
|
|
|
return results
|
|
|
|
|
|
|
|
items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}
|
|
|
|
|
|
|
|
rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in
|
|
|
|
iteritems({'info': 'detail', 'get': 'download', 'fl': 'free'})])
|
|
|
|
for mode in search_params:
|
|
|
|
for search_string in search_params[mode]:
|
|
|
|
search_string = unidecode(search_string)
|
|
|
|
search_string = re.sub(r'(?i)[^a-z0-9\s]', '%', unquote_plus(search_string))
|
|
|
|
|
|
|
|
kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0',
|
|
|
|
'category': '0', 'search_type': 't_name', 'sort': 'added',
|
|
|
|
'order': 'desc', 'daysprune': '-1'})
|
|
|
|
|
|
|
|
vals = [i for i in range(5, 16)]
|
|
|
|
random.SystemRandom().shuffle(vals)
|
|
|
|
attempts = html = soup = tbl = None
|
|
|
|
fetch = 'failed fetch'
|
|
|
|
for attempts, s in enumerate((0, vals[0], vals[5], vals[10])):
|
|
|
|
time.sleep(s)
|
|
|
|
html = self.get_url(self.urls['search'], **kwargs)
|
|
|
|
if self.should_skip():
|
|
|
|
return results
|
|
|
|
if html:
|
|
|
|
try:
|
|
|
|
soup = BS4Parser(html).soup
|
|
|
|
tbl = soup.find('table', id='sortabletable')
|
|
|
|
if tbl:
|
|
|
|
fetch = 'data fetched'
|
|
|
|
break
|
|
|
|
except (BaseException, Exception):
|
|
|
|
pass
|
|
|
|
if attempts:
|
|
|
|
logger.log('%s %s after %s attempts' % (mode, fetch, attempts+1))
|
|
|
|
|
|
|
|
cnt = len(items[mode])
|
|
|
|
try:
|
|
|
|
if not html or self._has_no_results(html) or not tbl:
|
|
|
|
raise generic.HaltParseException
|
|
|
|
|
|
|
|
tbl_rows = tbl.find_all('tr')
|
|
|
|
get_detail = True
|
|
|
|
|
|
|
|
if 2 > len(tbl_rows):
|
|
|
|
raise generic.HaltParseException
|
|
|
|
|
|
|
|
head = None
|
|
|
|
for tr in tbl_rows[1:]:
|
|
|
|
cells = tr.find_all('td')
|
|
|
|
if 6 > len(cells):
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
head = head if None is not head else self._header_row(tr)
|
|
|
|
seeders, leechers, size = [try_int(n, n) for n in [
|
|
|
|
cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]
|
|
|
|
if self._reject_item(seeders, leechers, self.freeleech and (
|
|
|
|
None is cells[1].find('img', title=rc['fl']))):
|
|
|
|
continue
|
|
|
|
|
|
|
|
info = tr.find('a', href=rc['info'])
|
|
|
|
title = (tr.find('div', class_='tooltip-content').get_text() or info.get_text()).strip()
|
|
|
|
title = re.findall('(?m)(^[^\r\n]+)', title)[0]
|
|
|
|
download_url = self._link(tr.find('a', href=rc['get'])['href'])
|
|
|
|
except (BaseException, Exception):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if get_detail and title.endswith('...'):
|
|
|
|
try:
|
|
|
|
with BS4Parser(self.get_url('%s%s' % (
|
|
|
|
self.urls['config_provider_home_uri'], info['href'].lstrip('/').replace(
|
|
|
|
self.urls['config_provider_home_uri'], '')))) as soup_detail:
|
|
|
|
title = soup_detail.find(
|
|
|
|
'td', class_='thead', attrs={'colspan': '3'}).get_text().strip()
|
|
|
|
title = re.findall('(?m)(^[^\r\n]+)', title)[0]
|
|
|
|
except IndexError:
|
|
|
|
continue
|
|
|
|
except (BaseException, Exception):
|
|
|
|
get_detail = False
|
|
|
|
|
|
|
|
try:
|
|
|
|
titles = self.regulate_title(title, mode, search_string)
|
|
|
|
if download_url and titles:
|
|
|
|
for title in titles:
|
|
|
|
items[mode].append((title, download_url, seeders, self._bytesizer(size)))
|
|
|
|
except (BaseException, Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
except generic.HaltParseException:
|
|
|
|
pass
|
|
|
|
except (BaseException, Exception):
|
|
|
|
logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
|
|
|
|
|
|
|
|
if soup:
|
|
|
|
soup.clear(True)
|
|
|
|
del soup
|
|
|
|
|
|
|
|
self._log_search(mode, len(items[mode]) - cnt,
|
|
|
|
('search string: ' + search_string.replace('%', '%%'), self.name)['Cache' == mode])
|
|
|
|
|
|
|
|
if mode in 'Season' and len(items[mode]):
|
|
|
|
break
|
|
|
|
|
|
|
|
results = self._sort_seeding(mode, results + items[mode])
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def regulate_title(title, mode='-', search_string=''):
|
|
|
|
|
|
|
|
# normalise abnormal naming patterns e.g. 2019/20 -> 2019
|
|
|
|
title = re.sub(r'((?:19|20)\d\d)/20(\d\d)?', r'\1', title)
|
|
|
|
# s<x> ep<y> -> s<x>e<y>
|
|
|
|
title = re.sub(r'(?i)s(\d\d+)[\W]*?e+(?:p|pisode)*(\d\d+)', r'S\1E\2', title)
|
|
|
|
|
|
|
|
has_series = re.findall(r'(?i)(.*?series[^\d]*?\d+)(.*)', title)
|
|
|
|
if has_series:
|
|
|
|
rc_xtras = re.compile(r'(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)')
|
|
|
|
has_special = rc_xtras.findall(has_series[0][1])
|
|
|
|
if has_special:
|
|
|
|
title = has_series[0][0] + rc_xtras.sub(list(set(
|
|
|
|
list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1])
|
|
|
|
title = re.sub('(?i)series', r'Season', title)
|
|
|
|
|
|
|
|
years = re.findall(r'((?:19|20)\d\d)', title)
|
|
|
|
title = re.sub(r'(19|20)\d\d', r'{{yr}}', title)
|
|
|
|
title_parts = re.findall(
|
|
|
|
r'(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' +
|
|
|
|
r'(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title)
|
|
|
|
sxe_build = None
|
|
|
|
|
|
|
|
if len(title_parts):
|
|
|
|
new_parts = [try_int(part, part) for part in title_parts[0]]
|
|
|
|
if not new_parts[1]:
|
|
|
|
new_parts[1] = 1
|
|
|
|
new_parts[2] = ('E%02d', ' Pack %d')[any([re.search('(?i)season|series', title),
|
|
|
|
mode in 'Season'])] % new_parts[2]
|
|
|
|
sxe_build = 'S%02d%s' % tuple(new_parts[1:3])
|
|
|
|
title = '%s`%s`%s' % (new_parts[0], sxe_build, new_parts[-1])
|
|
|
|
for yr in years:
|
|
|
|
# noinspection RegExpRedundantEscape
|
|
|
|
title = re.sub(r'\{\{yr\}\}', yr, title, count=1)
|
|
|
|
|
|
|
|
date_re = r'(?i)([(\s.]*)((?:\d+[\s.]*(?:st|nd|rd|th)?[\s.])?)([adfjmnos]\w{2,}[\s.]+)((?:19|20)\d\d)([)\s.]*)'
|
|
|
|
dated = re.findall(date_re, title)
|
|
|
|
dnew = None
|
|
|
|
for d in dated:
|
|
|
|
try:
|
|
|
|
dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
|
|
|
|
dnew = dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)]
|
|
|
|
title = title.replace(''.join(d), '%s%s%s' % (('', ' ')[1 < len(d[0])], dnew, ('', ' ')[1 < len(d[4])]))
|
|
|
|
except (BaseException, Exception):
|
|
|
|
pass
|
|
|
|
if dated:
|
|
|
|
add_pad = re.findall(r'((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title)
|
|
|
|
if any(add_pad) and add_pad[0][1] not in [' ', '.']:
|
|
|
|
title = title.replace(''.join(
|
|
|
|
add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1]))
|
|
|
|
title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title)
|
|
|
|
|
|
|
|
t = ['']
|
|
|
|
bl = r'[*\[({]+\s*'
|
|
|
|
br = r'\s*[})\]*]+'
|
|
|
|
title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', title)
|
|
|
|
for r in (r'\s+-\s+', r'(?:19|20)\d\d(?:\-\d\d\-\d\d)?', r'S\d\d+(?:E\d\d+)?'):
|
|
|
|
m = re.findall('(.*%s)(.*)' % r, title)
|
|
|
|
if any(m) and len(m[0][0]) > len(t[0]):
|
|
|
|
t = m[0]
|
|
|
|
t = ([title], t)[any(t)]
|
|
|
|
|
|
|
|
tags = [re.findall(x, t[-1], flags=re.X) for x in
|
|
|
|
('(?i)%sProper%s|\bProper\b$' % (bl, br),
|
|
|
|
r'(?i)(?:\d{3,4}(?:[pi]|hd)|hd(?:tv)?\s*\d{3,4}(?:[pi])?)',
|
|
|
|
'''
|
|
|
|
(?i)(hr.ws.pdtv|blu.?ray|hddvd|
|
|
|
|
pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2)
|
|
|
|
''', '''
|
|
|
|
(?i)([hx].?26[45]|divx|xvid)
|
|
|
|
''', '''
|
|
|
|
(?i)(avi|mkv|mp4|sub(?:b?ed|pack|s))
|
|
|
|
''')]
|
|
|
|
title = ('%s`%s' % (
|
|
|
|
re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]),
|
|
|
|
re.sub(r'(?i)(?:hd(?:tv)?\s*)?(\d{3,4})(?:hd|p)?', r'\1p',
|
|
|
|
'`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) +
|
|
|
|
('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])]))
|
|
|
|
title = re.sub(r'([hx]26[45])p', r'\1', title)
|
|
|
|
for r in [(r'(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'),
|
|
|
|
('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), (r'%s\s*%s' % (bl, br), '`')]:
|
|
|
|
title = re.sub(r[0], r[1], title)
|
|
|
|
|
|
|
|
title = re.sub(r'[][]', '', title)
|
|
|
|
title = '%s%s-nogrp' % (('', t[0])[1 < len(t)], title)
|
|
|
|
for r in [(r'\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', ' ')]:
|
|
|
|
title = re.sub(r[0], r[1], title)
|
|
|
|
|
|
|
|
titles = []
|
|
|
|
if dnew:
|
|
|
|
snew = None
|
|
|
|
dated_s = re.findall(date_re, search_string)
|
|
|
|
for d in dated_s:
|
|
|
|
try:
|
|
|
|
sout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
|
|
|
|
snew = sout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(sout)]
|
|
|
|
except (BaseException, Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
if snew and dnew and snew != dnew:
|
|
|
|
return titles
|
|
|
|
|
|
|
|
try:
|
|
|
|
sxxexx_r = r'(?i)S\d\d+E\d\d+'
|
|
|
|
if dnew and re.search(sxxexx_r, title):
|
|
|
|
titles += [re.sub(sxxexx_r, dnew, re.sub(r'[_.\-\s]?%s' % dnew, '', title))]
|
|
|
|
except (BaseException, Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
titles += [title]
|
|
|
|
|
|
|
|
result = []
|
|
|
|
for cur_item in titles:
|
|
|
|
sxe_find = r'(?i)%s' % (sxe_build, r'S\d\d+E\d\d+|season\s*\d+')[not sxe_build]
|
|
|
|
sxe = re.findall(sxe_find, cur_item) or ''
|
|
|
|
if sxe:
|
|
|
|
sxe = sxe[0]
|
|
|
|
cur_item = re.sub(sxe, r'{{sxe}}', cur_item)
|
|
|
|
dated = dnew and re.findall(dnew, cur_item) or ''
|
|
|
|
if dated:
|
|
|
|
dated = dated[0]
|
|
|
|
cur_item = re.sub(dated, r'{{dated}}', cur_item)
|
|
|
|
|
|
|
|
parts = []
|
|
|
|
pre_post = re.findall(r'(.*?){{.*}}[.]*(.*)', cur_item)
|
|
|
|
item = re.sub(r'{{(sxe|dated)}}[.]*', '', cur_item)
|
|
|
|
end = [item]
|
|
|
|
if pre_post and (sxe or dated):
|
|
|
|
divider = ':'
|
|
|
|
tail = re.findall(r'(?i)^([^%s]+)(.*)' % divider, item)[0]
|
|
|
|
if tail[1]: # show name divider found
|
|
|
|
parts = [tail[0].strip()]
|
|
|
|
end = [tail[1].lstrip('%s ' % divider)]
|
|
|
|
else:
|
|
|
|
parts = [pre_post[0][0]]
|
|
|
|
end = [pre_post[0][1]]
|
|
|
|
|
|
|
|
parts += ([sxe], [])[not sxe] + ([dated], [])[not dated] + end
|
|
|
|
result += [re.sub(r'(\s\.|\.\s|\s+)', '.', ' '.join(parts))]
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
def after_get_data(self, result):
|
|
|
|
if self.use_after_get_data:
|
|
|
|
tid = None
|
|
|
|
try:
|
|
|
|
tid = re.findall(r'id=(\d+)$', result.url)[0]
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
if tid:
|
|
|
|
response = self.get_url(self.url_base + 'takethanks.php', post_data={'torrentid': tid})
|
|
|
|
if not self.should_skip():
|
|
|
|
msg = '' if not response else ' err=%s' % re.sub('</?error>', '', response)
|
|
|
|
if not re.search('(?i)remove[^>]+?thank', msg):
|
|
|
|
logger.log('Failed to "Say thanks!" to uploader of id=%s%s' % (tid, msg), logger.DEBUG)
|
|
|
|
|
|
|
|
def _season_strings(self, ep_obj, **kwargs):
|
|
|
|
|
|
|
|
return self.show_name_wildcard(
|
|
|
|
generic.TorrentProvider._season_strings(
|
|
|
|
self, ep_obj, scene=False, prefix='%', sp_detail=(
|
|
|
|
lambda e: [(('', 'Series %(seasonnumber)d%%')[1 < try_int(e.get('seasonnumber'))]
|
|
|
|
+ '%(episodenumber)dof') % e, 'Series %(seasonnumber)d' % e])))
|
|
|
|
|
|
|
|
def _episode_strings(self, ep_obj, **kwargs):
|
|
|
|
|
|
|
|
return self.show_name_wildcard(
|
|
|
|
super(TVChaosUKProvider, self)._episode_strings(
|
|
|
|
ep_obj, scene=False, prefix='%', date_detail=(
|
|
|
|
lambda date: ['%s %s%% %s'.lstrip('0') % x for x in
|
|
|
|
[((d[-1], '%s' % m, y), (d, m, y)) + (((d, mf, y),), ())[m == mf]
|
|
|
|
for (d, m, mf, y) in [(date.strftime(x) for x in ('%d', '%b', '%B', '%Y'))]][0]]),
|
|
|
|
ep_detail=(lambda e: [naming_ep_type[2] % e] + (
|
|
|
|
[], ['%(episodenumber)dof' % e])[1 == try_int(e.get('seasonnumber'))]), **kwargs))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def show_name_wildcard(search_items):
|
|
|
|
for d in search_items:
|
|
|
|
for k, v in d.items():
|
|
|
|
for i, val in enumerate(v):
|
|
|
|
v[i] = v[i].replace(' %', '% %', 1)
|
|
|
|
return search_items
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def ui_string(key):
|
|
|
|
|
|
|
|
return ('tvchaosuk_tip' == key
|
|
|
|
and 'releases are often "Air by date release names" - edit search settings of show if required'
|
|
|
|
or 'tvchaosuk_use_after_get_data' == key and 'Send "Say thanks!"'
|
|
|
|
or 'tvchaosuk_use_after_get_data_tip' == key and 'to each release that is snatched'
|
|
|
|
or '')
|
|
|
|
|
|
|
|
|
|
|
|
provider = TVChaosUKProvider()
|