diff --git a/CHANGES.md b/CHANGES.md index 74fea8d..4c263eb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,13 @@ +### 0.20.1 (2019-08-02 20:45:00 UTC) + +* Change ensure TVDb statuses display as "Continuing" on home page where applicable +* Change improve handling an enabled Emby server that becomes unreachable +* Change improve performance of parsing provider search results + + ### 0.20.0 (2019-07-15 21:25:00 UTC) * Change if episode name is not known at point of rename, then use 'tba' diff --git a/gui/slick/interfaces/default/home.tmpl b/gui/slick/interfaces/default/home.tmpl index f0259f5..17a2a82 100644 --- a/gui/slick/interfaces/default/home.tmpl +++ b/gui/slick/interfaces/default/home.tmpl @@ -131,7 +131,7 @@ #set $display_name = (re.sub('^((?:A(?!\s+to)n?)|The)\s(\w)', r'\1 \2', $curShow.name), $curShow.name)[$sg_var('SORT_ARTICLE')] #set $poster_id += 1 #if None is not $display_status - #if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status) + #if re.search(r'(?i)(?:(?:new|returning)\s*series|upcoming)', $curShow.status) #set $display_status = 'Continuing' #else if re.search(r'(?i)(?:nded)', $curShow.status) #set $display_status = 'Ended' @@ -452,7 +452,7 @@ #set $display_status = $curShow.status #if None is not $display_status - #if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status) + #if re.search(r'(?i)(?:(?:new|returning)\s*series|upcoming)', $curShow.status) #set $display_status = 'Continuing' #else if re.search(r'(?i)(?:nded)', $curShow.status) #set $display_status = 'Ended' diff --git a/sickbeard/bs4_parser.py b/sickbeard/bs4_parser.py index 76735e3..66c625f 100644 --- a/sickbeard/bs4_parser.py +++ b/sickbeard/bs4_parser.py @@ -1,4 +1,4 @@ -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, SoupStrainer import re @@ -11,6 +11,17 @@ class BS4Parser: if 'features' in k and isinstance(v, list): v = [item for item in v if item in ['html5lib', 'html.parser', 'html', 'lxml', 'xml']][0] + elif 'parse_only' in k: + if isinstance(v, dict): + (parse_key, filter_dict), = kwargs[k].items() + v = SoupStrainer(parse_key, filter_dict) + else: + v = SoupStrainer(v) + + elif 'preclean' in k and v: + args = (re.sub(r'(?si)(|)', '', args[0]),) + args[1:] + continue + kwargs_new[k] = v tag, attr = [x in kwargs_new and kwargs_new.pop(x) or y for (x, y) in [('tag', 'table'), ('attr', '')]] diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index 1295bc5..377f60e 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -1067,20 +1067,19 @@ def touch_file(fname, atime=None): return False -def _getTempDir(): - """Returns the [system temp dir]/tvdb_api-u501 (or - tvdb_api-myuser) +def get_system_temp_dir(): + """Returns the [system temp dir]/tvdb_api-u501 (or tvdb_api-myuser) """ if hasattr(os, 'getuid'): - uid = "u%d" % (os.getuid()) + uid = 'u%d' % (os.getuid()) else: # For Windows try: uid = getpass.getuser() except ImportError: - return ek.ek(os.path.join, tempfile.gettempdir(), "SickGear") + return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear') - return ek.ek(os.path.join, tempfile.gettempdir(), "SickGear-%s" % (uid)) + return ek.ek(os.path.join, tempfile.gettempdir(), 'SickGear-%s' % uid) def proxy_setting(proxy_setting, request_url, force=False): @@ -1164,7 +1163,7 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N session.stream = True if not kwargs.pop('nocache', False): - cache_dir = sickbeard.CACHE_DIR or _getTempDir() + cache_dir = sickbeard.CACHE_DIR or get_system_temp_dir() session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions'))) provider = kwargs.pop('provider', None) diff --git a/sickbeard/providers/alpharatio.py b/sickbeard/providers/alpharatio.py index 627b048..8064eea 100644 --- a/sickbeard/providers/alpharatio.py +++ b/sickbeard/providers/alpharatio.py @@ -73,15 +73,14 @@ class AlphaRatioProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find(id='torrent_table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'torrent_table'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -105,7 +104,7 @@ class AlphaRatioProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/bithdtv.py b/sickbeard/providers/bithdtv.py index 9bfe514..05c86fb 100644 --- a/sickbeard/providers/bithdtv.py +++ b/sickbeard/providers/bithdtv.py @@ -81,15 +81,14 @@ class BitHDTVProvider(generic.TorrentProvider): html = '\s*([^<]*)\1 len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -112,7 +111,7 @@ class BitHDTVProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/blutopia.py b/sickbeard/providers/blutopia.py index ce4f2b7..1162d82 100644 --- a/sickbeard/providers/blutopia.py +++ b/sickbeard/providers/blutopia.py @@ -88,7 +88,7 @@ class BlutopiaProvider(generic.TorrentProvider): + (not all([x not in filters for x in 'free', 'double']) and ['freedouble'] or []) + (not all([x not in filters for x in 'feat', 'double']) and ['featdouble'] or [])) )[non_marked] - rc['filter'] = re.compile('(?i)^(%s)$' % '|'.join( + rc['filter'] = re.compile(r'(?i)^(%s)$' % '|'.join( ['%s' % f for f in filters if (f in self.may_filter and self.may_filter[f][1]) or f])) log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join( [f in self.may_filter and self.may_filter[f][0] or f for f in filters])) @@ -114,7 +114,7 @@ class BlutopiaProvider(generic.TorrentProvider): try: from lib import simplejson as json resp_json = json.loads(resp) - except (StandardError, Exception): + except (BaseException, Exception): pass cnt = len(items[mode]) @@ -125,15 +125,14 @@ class BlutopiaProvider(generic.TorrentProvider): html = '%s' % \ (resp if None is self.resp else self.resp.replace('', '%s' % ''.join(resp_json.get('result', [])))) - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'class': (lambda at: at and 'table' in at)})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -166,7 +165,7 @@ class BlutopiaProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) diff --git a/sickbeard/providers/btn.py b/sickbeard/providers/btn.py index 6eb9948..d585f74 100644 --- a/sickbeard/providers/btn.py +++ b/sickbeard/providers/btn.py @@ -102,8 +102,8 @@ class BTNProvider(generic.TorrentProvider): (''.join(random.sample('abcdefghijklmnopqrstuvwxyz0123456789', 8)), self.api_key, json.dumps(param_dct), items_per_page, offset)) + response, error_text = None, None try: - response, error_text = None, None if api_up and self.api_key: self.session.headers['Content-Type'] = 'application/json-rpc' response = self.get_url(self.url_api, post_data=json_rpc(params), json=True) @@ -211,19 +211,19 @@ class BTNProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find(id='torrent_table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find(id='torrent_table') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'cats': '(?i)cat\[(?:%s)\]' % self._categories_string(mode, template='', delimiter='|'), + 'cats': r'(?i)cat\[(?:%s)\]' % self._categories_string(mode, template='', delimiter='|'), 'get': 'download'}.items()) head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -246,7 +246,7 @@ class BTNProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(results) - cnt, search_url) diff --git a/sickbeard/providers/dh.py b/sickbeard/providers/dh.py index 61b7eee..4a855a9 100644 --- a/sickbeard/providers/dh.py +++ b/sickbeard/providers/dh.py @@ -45,7 +45,7 @@ class DHProvider(generic.TorrentProvider): def _authorised(self, **kwargs): return super(DHProvider, self)._authorised( - logged_in=(lambda y=None: (None is y or re.search('(?i)rss\slink', y)) and self.has_all_cookies() and + logged_in=(lambda y=None: (None is y or re.search(r'(?i)rss\slink', y)) and self.has_all_cookies() and self.session.cookies['uid'] in self.digest and self.session.cookies['pass'] in self.digest), failed_msg=(lambda y=None: u'Invalid cookie details for %s. Check settings')) @@ -73,15 +73,14 @@ class DHProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'cellpadding': 0}) - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'cellpadding': 0})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -102,7 +101,7 @@ class DHProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, self.session.response.get('url')) diff --git a/sickbeard/providers/ettv.py b/sickbeard/providers/ettv.py index c3b1532..40271ec 100644 --- a/sickbeard/providers/ettv.py +++ b/sickbeard/providers/ettv.py @@ -72,15 +72,14 @@ class ETTVProvider(generic.TorrentProvider): try: if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'class': (lambda at: at and 'table' in at)})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if not len(torrent_rows): + if not len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -103,7 +102,7 @@ class ETTVProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/eztv.py b/sickbeard/providers/eztv.py index b5a3b4a..5b07738 100644 --- a/sickbeard/providers/eztv.py +++ b/sickbeard/providers/eztv.py @@ -34,9 +34,9 @@ class EztvProvider(generic.TorrentProvider): self.url_home = ['https://eztv.ag/'] + \ ['https://%s/' % base64.b64decode(x) for x in [''.join(x) for x in [ - [re.sub('[v\sz]+', '', x[::-1]) for x in [ + [re.sub(r'[v\sz]+', '', x[::-1]) for x in [ '0vp XZ', 'uvEj d', 'i5 Wzd', 'j9 vGb', 'kV2v a', '0zdvnL', '==vg Z']], - [re.sub('[f\sT]+', '', x[::-1]) for x in [ + [re.sub(r'[f\sT]+', '', x[::-1]) for x in [ '0TpfXZ', 'ufTEjd', 'i5WTTd', 'j9f Gb', 'kV f2a', 'z1mTTL']], ]]] self.url_vars = {'search': 'search/%s', 'browse': 'page_%s'} @@ -74,19 +74,19 @@ class EztvProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.findAll('table', attrs={'class': ['table', 'forum_header_border']})[-1] - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') - for tr in torrent_rows: + with BS4Parser(html) as soup: + tbl = soup.findAll('table', attrs={'class': ['table', 'forum_header_border']})[-1] + tbl_rows = [] if not tbl else tbl.find_all('tr') + for tr in tbl_rows: if 5 > len(tr.find_all('td')): tr.decompose() - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') try: head = head if None is not head else self._header_row(tr) @@ -105,7 +105,7 @@ class EztvProvider(generic.TorrentProvider): except (generic.HaltParseException, IndexError): pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/fano.py b/sickbeard/providers/fano.py index e5f4562..6c388ac 100644 --- a/sickbeard/providers/fano.py +++ b/sickbeard/providers/fano.py @@ -89,15 +89,14 @@ class FanoProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', id='line') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'line'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if (5 > len(cells) or (any(self.filter) @@ -121,7 +120,7 @@ class FanoProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) diff --git a/sickbeard/providers/filelist.py b/sickbeard/providers/filelist.py index 71f68e3..03db275 100644 --- a/sickbeard/providers/filelist.py +++ b/sickbeard/providers/filelist.py @@ -70,13 +70,13 @@ class FLProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_rows = soup.find_all('div', 'torrentrow') + with BS4Parser(html) as soup: + tbl_rows = soup.find_all('div', 'torrentrow') - if not len(torrent_rows): + if not len(tbl_rows): raise generic.HaltParseException - for tr in torrent_rows: + for tr in tbl_rows: cells = tr.select('span[style*="cell"]') if 6 > len(cells): continue @@ -96,7 +96,7 @@ class FLProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, self.session.response.get('url')) diff --git a/sickbeard/providers/funfile.py b/sickbeard/providers/funfile.py index 1ddcedc..c92ee20 100644 --- a/sickbeard/providers/funfile.py +++ b/sickbeard/providers/funfile.py @@ -74,15 +74,15 @@ class FunFileProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('td', class_='colhead').find_parent('table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find('td', class_='colhead').find_parent('table') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') info = tr.find('a', href=rc['info']) if 5 > len(cells) or not info: @@ -105,7 +105,7 @@ class FunFileProvider(generic.TorrentProvider): except (generic.HaltParseException, AttributeError): pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index 6fbd880..e162334 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -181,9 +181,9 @@ class ProviderFailList(object): self._fails.append(ProviderFail( fail_type=helpers.tryInt(r['fail_type']), code=helpers.tryInt(r['fail_code']), fail_time=datetime.datetime.fromtimestamp(helpers.tryInt(r['fail_time'])))) - except (StandardError, Exception): + except (BaseException, Exception): continue - except (StandardError, Exception): + except (BaseException, Exception): pass def clear_old(self): @@ -193,7 +193,7 @@ class ProviderFailList(object): if my_db.hasTable('provider_fails'): time_limit = sbdatetime.totimestamp(datetime.datetime.now() - datetime.timedelta(days=28)) my_db.action('DELETE FROM provider_fails WHERE fail_time < ?', [time_limit]) - except (StandardError, Exception): + except (BaseException, Exception): pass @@ -285,7 +285,7 @@ class GenericProvider(object): def last_fail(self): try: return sorted(self.fails.fails, key=lambda x: x.fail_time, reverse=True)[0].fail_type - except (StandardError, Exception): + except (BaseException, Exception): return None @property @@ -541,7 +541,7 @@ class GenericProvider(object): self.inc_failure_count(ProviderFail(fail_type=ProviderFailTypes.timeout)) except (requests.exceptions.Timeout, socket.timeout): self.inc_failure_count(ProviderFail(fail_type=ProviderFailTypes.connection_timeout)) - except (StandardError, Exception) as e: + except (BaseException, Exception) as e: log_failure_url = True self.inc_failure_count(ProviderFail(fail_type=ProviderFailTypes.other)) @@ -564,7 +564,7 @@ class GenericProvider(object): @staticmethod def make_id(name): - return re.sub('[^\w\d_]', '_', name.strip().lower()) + return re.sub(r'[^\w\d_]', '_', name.strip().lower()) def image_name(self, *default_name): @@ -640,11 +640,11 @@ class GenericProvider(object): try: btih = None try: - btih = re.findall('urn:btih:([\w]{32,40})', result.url)[0] + btih = re.findall(r'urn:btih:([\w]{32,40})', result.url)[0] if 32 == len(btih): from base64 import b16encode, b32decode btih = b16encode(b32decode(btih)) - except (StandardError, Exception): + except (BaseException, Exception): pass if not btih or not re.search('(?i)[0-9a-f]{32,40}', btih): @@ -655,7 +655,7 @@ class GenericProvider(object): urls = ['http%s://%s/torrent/%s.torrent' % (u + (btih.upper(),)) for u in (('s', 'itorrents.org'), ('s', 'torrage.info'), ('', 'reflektor.karmorra.info'), ('', 'thetorrent.org'))] - except (StandardError, Exception): + except (BaseException, Exception): link_type = 'torrent' urls = [result.url] @@ -670,7 +670,7 @@ class GenericProvider(object): ref_state = 'Referer' in self.session.headers and self.session.headers['Referer'] saved = False for url in urls: - cache_dir = sickbeard.CACHE_DIR or helpers._getTempDir() + cache_dir = sickbeard.CACHE_DIR or helpers.get_system_temp_dir() base_name = '%s.%s' % (re.sub('.%s$' % self.providerType, '', helpers.sanitizeFileName(result.name)), self.providerType) final_file = ek.ek(os.path.join, final_dir, base_name) @@ -715,7 +715,7 @@ class GenericProvider(object): if 'blackhole' == sickbeard.TORRENT_METHOD: logger.log('Tip: If your client fails to load magnet in files, ' + 'change blackhole to a client connection method in search settings') - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to save magnet link to file, %s' % final_file) elif not saved: if 'torrent' == link_type and result.provider.get_id() in sickbeard.PROVIDER_HOMES: @@ -746,13 +746,14 @@ class GenericProvider(object): try: stream = FileInputStream(file_name) parser = guessParser(stream) - except (StandardError, Exception): + except (BaseException, Exception): pass result = parser and 'application/x-bittorrent' == parser.mime_type try: + # noinspection PyProtectedMember stream._input.close() - except (StandardError, Exception): + except (BaseException, Exception): pass return result @@ -794,7 +795,7 @@ class GenericProvider(object): try: title, url = isinstance(item, tuple) and (item[0], item[1]) or \ (item.get('title', None), item.get('link', None)) - except (StandardError, Exception): + except (BaseException, Exception): pass title = title and re.sub(r'\s+', '.', u'%s' % title) @@ -834,8 +835,8 @@ class GenericProvider(object): ((any([cell.get_text()]) and any([rc[x].search(cell.get_text()) for x in rc.keys()]) and cell.get_text()) or (cell.attrs.get('id') and any([rc[x].search(cell['id']) for x in rc.keys()]) and cell['id']) or (cell.attrs.get('title') and any([rc[x].search(cell['title']) for x in rc.keys()]) and cell['title']) - or next(iter(set(filter(lambda z: any([z]), [ - next(iter(set(filter(lambda y: any([y]), [ + or next(iter(set(filter(lambda rz: any([rz]), [ + next(iter(set(filter(lambda ry: any([ry]), [ cell.find(tag, **p) for p in [{attr: rc[x]} for x in rc.keys()]]))), {}).get(attr) for (tag, attr) in [ ('img', 'title'), ('img', 'src'), ('i', 'title'), ('i', 'class'), @@ -873,7 +874,7 @@ class GenericProvider(object): if 32 == len(btih): btih = b16encode(b32decode(btih)).lower() btih = re.search('(?i)[0-9a-f]{32,40}', btih) and btih or None - except (StandardError, Exception): + except (BaseException, Exception): btih = None return (btih and 'magnet:?xt=urn:btih:%s&dn=%s&tr=%s' % (btih, quote_plus(name or btih), '&tr='.join( [quote_plus(tr) for tr in @@ -1151,14 +1152,14 @@ class GenericProvider(object): str1, thing, str3 = (('', '%s item' % mode.lower(), ''), (' usable', 'proper', ' found'))['Propers' == mode] logger.log((u'%s %s in response%s from %s' % (('No' + str1, count)[0 < count], ( '%s%s%s%s' % (('', 'freeleech ')[getattr(self, 'freeleech', False)], thing, maybe_plural(count), str3)), - ('', ' (rejects: %s)' % rejects)[bool(rejects)], re.sub('(\s)\s+', r'\1', url))).replace('%%', '%')) + ('', ' (rejects: %s)' % rejects)[bool(rejects)], re.sub(r'(\s)\s+', r'\1', url))).replace('%%', '%')) def check_auth_cookie(self): if hasattr(self, 'cookies'): cookies = self.cookies - if not (cookies and re.match('^(?:\w+=[^;\s]+[;\s]*)+$', cookies)): + if not (cookies and re.match(r'^(?:\w+=[^;\s]+[;\s]*)+$', cookies)): return False cj = requests.utils.add_dict_to_cookiejar(self.session.cookies, @@ -1196,13 +1197,13 @@ class GenericProvider(object): def _bytesizer(size_dim=''): try: - value = float('.'.join(re.findall('(?i)(\d+)(?:[.,](\d+))?', size_dim)[0])) + value = float('.'.join(re.findall(r'(?i)(\d+)(?:[.,](\d+))?', size_dim)[0])) except TypeError: return size_dim except IndexError: return None try: - value *= 1024 ** ['b', 'k', 'm', 'g', 't'].index(re.findall('(t|g|m|k)[i]?b', size_dim.lower())[0]) + value *= 1024 ** ['b', 'k', 'm', 'g', 't'].index(re.findall('([tgmk])[i]?b', size_dim.lower())[0]) except IndexError: pass return long(math.ceil(value)) @@ -1520,9 +1521,9 @@ class TorrentProvider(GenericProvider): for x in obf.keys(): if self.__module__.endswith(self._decode(bytearray(b64decode(x)), c)): - for u in obf[x]: + for ux in obf[x]: urls += [self._decode(bytearray( - b64decode(''.join([re.sub('[\s%s]+' % u[0], '', x[::-1]) for x in u[1:]]))), c)] + b64decode(''.join([re.sub(r'[\s%s]+' % ux[0], '', x[::-1]) for x in ux[1:]]))), c)] url_exclude = url_exclude or [] if url_exclude: urls = urls[1:] @@ -1532,16 +1533,17 @@ class TorrentProvider(GenericProvider): setattr(sickbeard, seen_attr, list(set(getattr(sickbeard, seen_attr, []) + [self.__module__]))) if not urls: - urls = filter(lambda u: 'http' in u, getattr(self, 'url_home', [])) + urls = filter(lambda uh: 'http' in uh, getattr(self, 'url_home', [])) return urls + # noinspection DuplicatedCode @staticmethod def _decode(data, c): try: result = ''.join(chr(int(str( bytearray((8 * c)[i] ^ x for i, x in enumerate(data))[i:i + 2]), 16)) for i in range(0, len(data), 2)) - except (StandardError, Exception): + except (BaseException, Exception): result = '|' return result @@ -1591,6 +1593,9 @@ class TorrentProvider(GenericProvider): sickbeard.save_config() return cur_url + seen_attr = 'PROVIDER_SEEN' + setattr(sickbeard, seen_attr, filter(lambda u: self.__module__ not in u, getattr(sickbeard, seen_attr, []))) + self.failure_count = 3 * bool(failure_count) if self.should_skip(): return None @@ -1617,13 +1622,13 @@ class TorrentProvider(GenericProvider): def _authorised(self, logged_in=None, post_params=None, failed_msg=None, url=None, timeout=30, **kwargs): maxed_out = (lambda y: re.search(r'(?i)[1-3]((<[^>]+>)|\W)*' + - '(attempts|tries|remain)[\W\w]{,40}?(remain|left|attempt)', y)) + r'(attempts|tries|remain)[\W\w]{,40}?(remain|left|attempt)', y)) logged_in, failed_msg = [None is not a and a or b for (a, b) in ( (logged_in, (lambda y=None: self.has_all_cookies())), (failed_msg, (lambda y='': maxed_out(y) and u'Urgent abort, running low on login attempts. ' + u'Password flushed to prevent service disruption to %s.' or (re.search(r'(?i)(username|password)((<[^>]+>)|\W)*' + - '(or|and|/|\s)((<[^>]+>)|\W)*(password|incorrect)', y) and + r'(or|and|/|\s)((<[^>]+>)|\W)*(password|incorrect)', y) and u'Invalid username or password for %s. Check settings' or u'Failed to authenticate or parse a response from %s, abort provider'))) )] @@ -1794,13 +1799,13 @@ class TorrentProvider(GenericProvider): @staticmethod def _has_no_results(html): - return re.search(r'(?i)<(?:b|div|h\d|p|span|strong|td)[^>]*>\s*(?:' + - 'your\ssearch.*?did\snot\smatch|' + - '(?:nothing|0\s+torrents)\sfound|' + - '(?:sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' + - 'no\s(?:match|results|torrents)!*|' - '[^<]*?there\sare\sno\sresults|' + - '[^<]*?no\shits\.\sTry\sadding' + + return re.search(r'(?i)<(?:b|div|font|h\d|p|span|strong|td)[^>]*>\s*(?:' + + r'your\ssearch.*?did\snot\smatch|' + + r'(?:nothing|0\s+torrents)\sfound|' + + r'(?:sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' + + r'no\s(?:match|results|torrents)!*|' + r'[^<]*?there\sare\sno\sresults|' + + r'[^<]*?no\shits\.\sTry\sadding' + ')', html) def _cache_data(self, **kwargs): diff --git a/sickbeard/providers/grabtheinfo.py b/sickbeard/providers/grabtheinfo.py index 5eeae12..a4791d8 100644 --- a/sickbeard/providers/grabtheinfo.py +++ b/sickbeard/providers/grabtheinfo.py @@ -81,24 +81,24 @@ class GrabTheInfoProvider(generic.TorrentProvider): html = html.replace('', '') html = re.sub(r'()[^<]*', r'\1', html) html = re.sub(r'( (len(torrent_rows) - shows_found): + if not shows_found or 2 > (len(tbl_rows) - shows_found): raise generic.HaltParseException head = None - for tr in torrent_rows[1 + shows_found:]: + for tr in tbl_rows[1 + shows_found:]: cells = tr.find_all('td') if 4 > len(cells): continue try: - head = head if None is not head else self._header_row(torrent_rows[shows_found]) + head = head if None is not head else self._header_row(tbl_rows[shows_found]) seeders, leechers, size = [tryInt(n, n) for n in [ cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._reject_item(seeders, leechers): @@ -115,7 +115,7 @@ class GrabTheInfoProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/hdme.py b/sickbeard/providers/hdme.py index fb994ab..c123eb6 100644 --- a/sickbeard/providers/hdme.py +++ b/sickbeard/providers/hdme.py @@ -75,15 +75,14 @@ class HDMEProvider(generic.TorrentProvider): html = re.sub(r'(?s)]+font[^>]+>', '', html) html = re.sub(r'(?s)(]+>(?!<[ab]).*?)(?:(?:)+)', r'\1', html) html = re.sub(r'(?m)^
', r'', html) - with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="parse"') as soup: - torrent_table = soup.find('table', id='parse') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'parse'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -105,7 +104,7 @@ class HDMEProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/hdspace.py b/sickbeard/providers/hdspace.py index 606cc9f..b347d16 100644 --- a/sickbeard/providers/hdspace.py +++ b/sickbeard/providers/hdspace.py @@ -62,7 +62,7 @@ class HDSpaceProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'info': 'torrent-details', 'get': 'download', 'peers': 'page=peers', 'nodots': '[\.\s]+'}.items()) + 'info': 'torrent-details', 'get': 'download', 'peers': 'page=peers', 'nodots': r'[\.\s]+'}.items()) log = '' if self.filter: non_marked = 'f0' in self.filter @@ -89,17 +89,17 @@ class HDSpaceProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive'], - attr='width="100%"\Wclass="lista"') as soup: - torrent_table = soup.find_all('table', class_='lista')[-1] - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, attr=r'width="100%"\Wclass="lista"') as soup: + tbl = soup.find_all('table', class_='lista')[-1] + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') + # noinspection PyUnboundLocalVariable if (6 > len(cells) or tr.find('td', class_='header') or (any(self.filter) and ((non_marked and tr.find('img', src=rc['filter'])) @@ -127,7 +127,7 @@ class HDSpaceProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) diff --git a/sickbeard/providers/hdtorrents.py b/sickbeard/providers/hdtorrents.py index c0ce782..39c5832 100644 --- a/sickbeard/providers/hdtorrents.py +++ b/sickbeard/providers/hdtorrents.py @@ -92,23 +92,24 @@ class HDTorrentsProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - html = re.sub('(?ims)]+display:\s*none;.*?', '', html) + html = re.sub(r'(?ims)]+display:\s*none;.*?', '', html) html = re.sub('(?im)href=([^\\"][^>]+)>', r'href="\1">', html) html = (html.replace('"/>', '" />') .replace('"title="', '" title="') .replace('', '')) html = re.sub('(?im)]+)', r'\1 len(cells) or any(self.filter) and ((non_marked and tr.find('img', src=rc['filter'])) or (not non_marked and not tr.find('img', src=rc['filter'])))): @@ -129,7 +130,7 @@ class HDTorrentsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) diff --git a/sickbeard/providers/horriblesubs.py b/sickbeard/providers/horriblesubs.py index e07be49..de1bee5 100644 --- a/sickbeard/providers/horriblesubs.py +++ b/sickbeard/providers/horriblesubs.py @@ -75,7 +75,7 @@ class HorribleSubsProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser('%s' % html, features=['html5lib', 'permissive']) as soup: + with BS4Parser('%s' % html) as soup: for link in soup.find_all('a'): try: variants = map(lambda t: t.get_text().replace('SD', '480p'), @@ -91,7 +91,7 @@ class HorribleSubsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) @@ -110,7 +110,7 @@ class HorribleSubsProvider(generic.TorrentProvider): html = self.get_url(url) if self.should_skip(): return result - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: + with BS4Parser(html) as soup: re_showid = re.compile(r'(?i)hs_showid\s*=\s*(\d+)') try: hs_id = re_showid.findall( @@ -120,7 +120,7 @@ class HorribleSubsProvider(generic.TorrentProvider): html = self.get_url(self.urls['get_data'] % hs_id) if self.should_skip(): return result - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: + with BS4Parser(html) as soup: try: result = sorted(map(lambda t: t.get('href'), soup.find(id=re.findall(r'.*#(\d+-\d+\w)$', url)[0]) diff --git a/sickbeard/providers/immortalseed.py b/sickbeard/providers/immortalseed.py index e85835d..a8e34a3 100644 --- a/sickbeard/providers/immortalseed.py +++ b/sickbeard/providers/immortalseed.py @@ -48,7 +48,7 @@ class ImmortalSeedProvider(generic.TorrentProvider): def _check_auth(self, **kwargs): try: secret_key = 'secret_key=' + re.split('secret_key\s*=\s*([0-9a-zA-Z]+)', self.api_key)[1] - except (StandardError, Exception): + except (BaseException, Exception): raise sickbeard.exceptions.AuthException('Invalid secret key for %s in Media Providers/Options' % self.name) if secret_key != self.api_key: @@ -90,7 +90,7 @@ class ImmortalSeedProvider(generic.TorrentProvider): continue title = rc['title'].sub(r'\1', item.title.strip()) download_url = self._link(rc['get'].findall(getattr(item, 'link', ''))[0]) - except (StandardError, Exception): + except (BaseException, Exception): continue if download_url and title: diff --git a/sickbeard/providers/iptorrents.py b/sickbeard/providers/iptorrents.py index fd089d9..38f6f35 100644 --- a/sickbeard/providers/iptorrents.py +++ b/sickbeard/providers/iptorrents.py @@ -33,9 +33,9 @@ class IPTorrentsProvider(generic.TorrentProvider): self.url_home = (['https://iptorrents.com/'] + [base64.b64decode(x) for x in [''.join(x) for x in [ - [re.sub('(?i)[q\s1]+', '', x[::-1]) for x in [ + [re.sub(r'(?i)[q\s1]+', '', x[::-1]) for x in [ 'c0RHa', 'vo1QD', 'hJ2L', 'GdhdXe', 'vdnLoN', 'J21cptmc', '5yZulmcv', '02bj', '=iq=']], - [re.sub('(?i)[q\seg]+', '', x[::-1]) for x in [ + [re.sub(r'(?i)[q\seg]+', '', x[::-1]) for x in [ 'RqHEa', 'LvEoDc0', 'Zvex2', 'LuF2', 'NXdu Vn', 'XZwQxeWY1', 'Yu42bzJ', 'tgG92']], ]]]) @@ -87,15 +87,15 @@ class IPTorrentsProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find(id='torrents') or soup.find('table', class_='torrents') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find(id='torrents') or soup.find('table', class_='torrents') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -119,7 +119,7 @@ class IPTorrentsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/limetorrents.py b/sickbeard/providers/limetorrents.py index 9581ed1..321f064 100644 --- a/sickbeard/providers/limetorrents.py +++ b/sickbeard/providers/limetorrents.py @@ -34,9 +34,9 @@ class LimeTorrentsProvider(generic.TorrentProvider): self.url_home = ['https://www.limetorrents.cc/'] + \ ['https://%s/' % base64.b64decode(x) for x in [''.join(x) for x in [ - [re.sub('[F\sp]+', '', x[::-1]) for x in [ + [re.sub(r'[F\sp]+', '', x[::-1]) for x in [ 'XZFtlpGb', 'lJn pcvR', 'nFLpzRnb', 'v xpmYuV', 'CZlt F2Y', '=F QXYs5']], - [re.sub('[K\sP]+', '', x[::-1]) for x in [ + [re.sub(r'[K\sP]+', '', x[::-1]) for x in [ 'XZKtPlGb', 'lJncPPvR', 'nKLzRnKb', 'vxm Y uV', 'CZlPt2PY', '==wYK2P5']], ]]] @@ -76,18 +76,17 @@ class LimeTorrentsProvider(generic.TorrentProvider): try: if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find_all('table', class_='table2') - torrent_rows = [] if not torrent_table else [ - t.select('tr[bgcolor]') for t in torrent_table if + with BS4Parser(html, parse_only=dict(table={'class': (lambda at: at and 'table2' in at)})) as tbl: + tbl_rows = [] if not tbl else [ + t.select('tr[bgcolor]') for t in tbl if all([x in ' '.join(x.get_text() for x in t.find_all('th')).lower() for x in ['torrent', 'size']])] - if not len(torrent_rows): + if not len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[0]: # 0 = all rows + for tr in tbl_rows[0]: # 0 = all rows cells = tr.find_all('td') if 5 > len(cells): continue @@ -110,7 +109,7 @@ class LimeTorrentsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/magnetdl.py b/sickbeard/providers/magnetdl.py index 276afa6..85806c5 100644 --- a/sickbeard/providers/magnetdl.py +++ b/sickbeard/providers/magnetdl.py @@ -68,15 +68,14 @@ class MagnetDLProvider(generic.TorrentProvider): if 'Cache' == mode: html = re.sub(r'(?mis)^\s*?]+?id="pages">.*?\s*?\r?\n', '', html) - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'class': 'download'}) - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'class': 'download'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -98,7 +97,7 @@ class MagnetDLProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/milkie.py b/sickbeard/providers/milkie.py index 77faf08..6737856 100644 --- a/sickbeard/providers/milkie.py +++ b/sickbeard/providers/milkie.py @@ -35,7 +35,7 @@ class MilkieProvider(generic.TorrentProvider): self.urls = {'config_provider_home_uri': self.url_base, 'login': self.api + 'auth/sessions', 'auth': self.api + 'auth', 'get': self.api + 'torrents/%s/torrent?key=%s', - 'search': self.api + 'torrents?pi=0&ps=100&query=%s&categories=2&mode=release'} + 'search': self.api + 'torrents?pi=0&ps=100&query=%s&categories=2&mode=release&t.o=native'} self.username, self.email, self.password, self.minseed, self.minleech, self._token, self._dkey = 7 * [None] diff --git a/sickbeard/providers/morethan.py b/sickbeard/providers/morethan.py index 4faf94e..517f4d5 100644 --- a/sickbeard/providers/morethan.py +++ b/sickbeard/providers/morethan.py @@ -73,17 +73,15 @@ class MoreThanProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='torrent_table') - torrent_rows = [] - if torrent_table: - torrent_rows = torrent_table.find_all('tr') + parse_only = dict(table={'class': (lambda at: at and 'torrent_table' in at)}) + with BS4Parser(html, parse_only=parse_only, preclean=True) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or tr.find('img', alt=rc['nuked']): continue @@ -107,7 +105,7 @@ class MoreThanProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/ncore.py b/sickbeard/providers/ncore.py index c581109..d5ec744 100644 --- a/sickbeard/providers/ncore.py +++ b/sickbeard/providers/ncore.py @@ -76,15 +76,15 @@ class NcoreProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('div', class_=rc['list']) - torrent_rows = [] if not torrent_table else torrent_table.find_all('div', class_='box_torrent') + parse_only = dict(div={'class': (lambda at: at and rc['list'].search(at))}) + with BS4Parser(html, parse_only=parse_only) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('div', class_='box_torrent') key = rc['key'].findall(html)[0] - if not len(torrent_rows): + if not len(tbl_rows): raise generic.HaltParseException - for tr in torrent_rows: + for tr in tbl_rows: try: seeders, leechers, size = [tryInt(n, n) for n in [ tr.find('div', class_=x).get_text().strip() @@ -103,7 +103,7 @@ class NcoreProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/nebulance.py b/sickbeard/providers/nebulance.py index 3fbd50c..f38a971 100644 --- a/sickbeard/providers/nebulance.py +++ b/sickbeard/providers/nebulance.py @@ -66,7 +66,7 @@ class NebulanceProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} - rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': '[\.\s]+'}.items()) + rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'nodots': r'[\.\s]+'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string @@ -92,11 +92,11 @@ class NebulanceProvider(generic.TorrentProvider): try: title_parts = group_name.split('[') - maybe_res = re.findall('((?:72|108|216)0\w)', title_parts[1]) + maybe_res = re.findall(r'((?:72|108|216)0\w)', title_parts[1]) maybe_ext = re.findall('(?i)(%s)' % '|'.join(common.mediaExtensions), title_parts[1]) detail = title_parts[1].split('/') detail[1] = detail[1].strip().lower().replace('mkv', 'x264') - title = '%s.%s' % (BS4Parser(title_parts[0].strip(), 'html.parser').soup.string, '.'.join( + title = '%s.%s' % (BS4Parser(title_parts[0].strip()).soup.string, '.'.join( (maybe_res and [maybe_res[0]] or []) + [detail[0].strip(), detail[1], maybe_ext and maybe_ext[0].lower() or 'mkv'])) except (IndexError, KeyError): @@ -106,7 +106,7 @@ class NebulanceProvider(generic.TorrentProvider): if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) @@ -121,10 +121,10 @@ class NebulanceProvider(generic.TorrentProvider): return t_param t = [''] - bl = '[*\[({]+\s*' - br = '\s*[})\]*]+' + bl = r'[*\[({]+\s*' + br = r'\s*[})\]*]+' title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', item['groupName']) - for r in '\s+-\s+', '(?:19|20)\d\d(?:\-\d\d\-\d\d)?', 'S\d\d+(?:E\d\d+)?': + for r in r'\s+-\s+', r'(?:19|20)\d\d(?:\-\d\d\-\d\d)?', r'S\d\d+(?:E\d\d+)?': m = re.findall('(.*%s)(.*)' % r, title) if any(m) and len(m[0][0]) > len(t[0]): t = m[0] @@ -133,7 +133,7 @@ class NebulanceProvider(generic.TorrentProvider): tag_str = '_'.join(item['tags']) tags = [re.findall(x, tag_str, flags=re.X) for x in ('(?i)%sProper%s|\bProper\b$' % (bl, br), - '(?i)\d{3,4}(?:[pi]|hd)', + r'(?i)\d{3,4}(?:[pi]|hd)', ''' (?i)(hr.ws.pdtv|blu.?ray|hddvd| pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2) @@ -145,17 +145,17 @@ class NebulanceProvider(generic.TorrentProvider): title = ('%s`%s' % ( re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]), - re.sub('(?i)(\d{3,4})hd', r'\1p', '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) + + re.sub(r'(?i)(\d{3,4})hd', r'\1p', '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) + ('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])])) - for r in [('(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'), - ('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), ('%s\s*%s' % (bl, br), '`')]: + for r in [(r'(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'), + ('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), (r'%s\s*%s' % (bl, br), '`')]: title = re.sub(r[0], r[1], title) grp = filter(lambda rn: '.release' in rn.lower(), item['tags']) title = '%s%s-%s' % (('', t[0])[1 < len(t)], title, (any(grp) and grp[0] or 'nogrp').upper().replace('.RELEASE', '')) - for r in [('\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', '.')]: + for r in [(r'\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', '.')]: title = re.sub(r[0], r[1], title) title += + any(tags[4]) and ('.%s' % tags[4][0]) or '' diff --git a/sickbeard/providers/newznab.py b/sickbeard/providers/newznab.py index c3c985d..d59b6db 100755 --- a/sickbeard/providers/newznab.py +++ b/sickbeard/providers/newznab.py @@ -182,7 +182,7 @@ class NewznabProvider(generic.NZBProvider): res = my_db.select('SELECT' + ' "datetime" FROM "lastrecentsearch" WHERE "name"=?', [self.get_id()]) if res: self._last_recent_search = datetime.datetime.fromtimestamp(int(res[0]['datetime'])) - except (StandardError, Exception): + except (BaseException, Exception): pass return self._last_recent_search @@ -192,7 +192,7 @@ class NewznabProvider(generic.NZBProvider): my_db = db.DBConnection('cache.db') my_db.action('INSERT OR REPLACE INTO "lastrecentsearch" (name, datetime) VALUES (?,?)', [self.get_id(), sbdatetime.totimestamp(value, default=0)]) - except (StandardError, Exception): + except (BaseException, Exception): pass self._last_recent_search = value @@ -284,7 +284,7 @@ class NewznabProvider(generic.NZBProvider): for s, v in NewznabConstants.catSearchStrings.iteritems(): if None is not re.search(s, cat_name, re.IGNORECASE): cats.setdefault(v, []).append(cat_id) - except (StandardError, Exception): + except (BaseException, Exception): continue elif category.get('name', '').upper() in ['XXX', 'OTHER', 'MISC']: for subcat in category.findall('subcat'): @@ -292,9 +292,9 @@ class NewznabProvider(generic.NZBProvider): if None is not re.search(r'^Anime$', subcat.attrib['name'], re.IGNORECASE): cats.setdefault(NewznabConstants.CAT_ANIME, []).append(subcat.attrib['id']) break - except (StandardError, Exception): + except (BaseException, Exception): continue - except (StandardError, Exception): + except (BaseException, Exception): logger.log('Error parsing result for [%s]' % self.name, logger.DEBUG) if not caps and self._caps and not all_cats and self._caps_all_cats and not cats and self._caps_cats: @@ -505,7 +505,7 @@ class NewznabProvider(generic.NZBProvider): title = re.sub(pattern, repl, title) parts = re.findall('(.*(?:(?:h.?|x)26[45]|vp9|av1|hevc|xvid|divx)[^-]*)(.*)', title, re.I)[0] title = '%s-%s' % (parts[0], remove_non_release_groups(parts[1].split('-')[1])) - except (StandardError, Exception): + except (BaseException, Exception): pass return title, url @@ -668,11 +668,11 @@ class NewznabProvider(generic.NZBProvider): p = parser.parse(p, fuzzy=True) try: p = p.astimezone(sb_timezone) - except (StandardError, Exception): + except (BaseException, Exception): pass if isinstance(p, datetime.datetime): parsed_date = p.replace(tzinfo=None) - except (StandardError, Exception): + except (BaseException, Exception): pass return parsed_date @@ -688,7 +688,7 @@ class NewznabProvider(generic.NZBProvider): parsed_size = helpers.tryInt(attr.get('value'), -1) elif 'guid' == attr.get('name', ''): uid = attr.get('value') - except (StandardError, Exception): + except (BaseException, Exception): pass return parsed_size, uid @@ -804,7 +804,7 @@ class NewznabProvider(generic.NZBProvider): try: parsed_xml, n_spaces = self.cache.parse_and_get_ns(data) items = parsed_xml.findall('channel/item') - except (StandardError, Exception): + except (BaseException, Exception): logger.log('Error trying to load %s RSS feed' % self.name, logger.WARNING) break @@ -992,7 +992,7 @@ class NewznabCache(tvcache.TVCache): items = None else: (items, n_spaces) = self.provider.cache_data(needed=needed) - except (StandardError, Exception): + except (BaseException, Exception): items = None if items: diff --git a/sickbeard/providers/nyaa.py b/sickbeard/providers/nyaa.py index c5576a3..a52d8b1 100644 --- a/sickbeard/providers/nyaa.py +++ b/sickbeard/providers/nyaa.py @@ -61,15 +61,15 @@ class NyaaProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='torrent-list') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + parse_only = dict(table={'class': (lambda at: at and 'torrent-list' in at)}) + with BS4Parser(html, parse_only=parse_only) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -90,7 +90,7 @@ class NyaaProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/omgwtfnzbs.py b/sickbeard/providers/omgwtfnzbs.py index 747e5c3..66850f0 100644 --- a/sickbeard/providers/omgwtfnzbs.py +++ b/sickbeard/providers/omgwtfnzbs.py @@ -228,16 +228,14 @@ class OmgwtfnzbsProvider(generic.NZBProvider): if not html: raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'id': 'table_table'}) - torrent_rows = [] - if torrent_table: - torrent_rows = torrent_table.find('tbody').find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find('table', attrs={'id': 'table_table'}) + tbl_rows = [] if not tbl else tbl.find('tbody').find_all('tr') - if 1 > len(torrent_rows): + if 1 > len(tbl_rows): raise generic.HaltParseException - for tr in torrent_rows: + for tr in tbl_rows: try: if tr.find('img', src=rc['nuked']) or not tr.find('a', href=rc['cat']): continue @@ -255,7 +253,7 @@ class OmgwtfnzbsProvider(generic.NZBProvider): except generic.HaltParseException: time.sleep(1.1) pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) mode = (mode, search_mode)['Propers' == search_mode] @@ -276,7 +274,7 @@ class OmgwtfnzbsProvider(generic.NZBProvider): title, url = self._title_and_url(item) try: result_date = datetime.fromtimestamp(int(item['usenetage'])) - except (StandardError, Exception): + except (BaseException, Exception): result_date = None if result_date: @@ -293,7 +291,7 @@ class OmgwtfnzbsProvider(generic.NZBProvider): api_key = self._check_auth() if not api_key.startswith('cookie:'): return api_key - except (StandardError, Exception): + except (BaseException, Exception): return None self.cookies = re.sub(r'(?i)([\s\']+|cookie\s*:)', '', api_key) diff --git a/sickbeard/providers/pisexy.py b/sickbeard/providers/pisexy.py index 7d9c6f6..3fcbc79 100644 --- a/sickbeard/providers/pisexy.py +++ b/sickbeard/providers/pisexy.py @@ -51,8 +51,8 @@ class PiSexyProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'get': 'info.php\?id', 'cats': 'cat=(?:0|50[12])', 'filter': 'free', - 'title': r'Download\s*([^\s]+).*', 'seeders': r'(^\d+)', 'leechers': r'(\d+)$'}.items()) + 'get': r'info.php\?id', 'cats': 'cat=(?:0|50[12])', 'filter': 'free', + 'title': r'Download\s([^"\']+)', 'seeders': r'(^\d+)', 'leechers': r'(\d+)$'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string @@ -67,15 +67,14 @@ class PiSexyProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', 'listor') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'class': 'listor'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -89,7 +88,14 @@ class PiSexyProvider(generic.TorrentProvider): continue info = tr.find('a', href=rc['get']) - title = (rc['title'].sub(r'\1', info.attrs.get('title', '')) or info.get_text()).strip() + tag = tr.find('a', alt=rc['title']) or tr.find('a', title=rc['title']) + title = tag and rc['title'].findall(str(tag)) + title = title and title[0] + if not isinstance(title, basestring) or 10 > len(title): + title = (rc['title'].sub(r'\1', info.attrs.get('title', '')) + or info.get_text()).strip() + if (10 > len(title)) or (4 > len(re.sub(r'[^.\-\s]', '', title))): + continue size = cells[head['size']].get_text().strip() download_url = self._link(info['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): @@ -100,7 +106,7 @@ class PiSexyProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) @@ -116,7 +122,7 @@ class PiSexyProvider(generic.TorrentProvider): return result try: - result = self._link(re.findall('(?i)"([^"]*?download\.php[^"]+?&(?!pimp)[^"]*)"', html)[0]) + result = self._link(re.findall(r'(?i)"([^"]*?download\.php[^"]+?&(?!pimp)[^"]*)"', html)[0]) except IndexError: logger.log('Failed no torrent in response', logger.DEBUG) return result diff --git a/sickbeard/providers/potuk.py b/sickbeard/providers/potuk.py index 9bda91a..96f955d 100644 --- a/sickbeard/providers/potuk.py +++ b/sickbeard/providers/potuk.py @@ -61,7 +61,7 @@ class PotUKProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} - opts = re.findall('(?sim)forumchoice\[\][^<]+(.*?)', self.resp)[0] + opts = re.findall(r'(?sim)forumchoice\[\][^<]+(.*?)', self.resp)[0] cat_opts = re.findall(r'(?mis)]*?value=[\'"](\d+)[^>]*>(.*?)', opts) include = [] tv = False @@ -102,20 +102,19 @@ class PotUKProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', id='threadslist') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'threadslist'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: if 6 > len(tr.find_all('td')) or not tr.select('img[alt*="ttach"]'): continue try: link = tr.select('td[id^="td_threadtitle"]')[0].select('a[id*="title"]')[0] title = link.get_text().strip() - download_url = self.urls['get_data'] % re.findall('t=(\d+)', link['href'])[0] + download_url = self.urls['get_data'] % re.findall(r't=(\d+)', link['href'])[0] except (AttributeError, TypeError, ValueError, IndexError): continue @@ -124,7 +123,7 @@ class PotUKProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search( @@ -141,7 +140,7 @@ class PotUKProvider(generic.TorrentProvider): return result try: - result = self._link(re.findall('(?i)"(attachment\.php[^"]+?)"', html)[0]) + result = self._link(re.findall(r'(?i)"(attachment\.php[^"]+?)"', html)[0]) except IndexError: logger.log('Failed no torrent in response', logger.DEBUG) return result diff --git a/sickbeard/providers/privatehd.py b/sickbeard/providers/privatehd.py index fdd229d..6b404c7 100644 --- a/sickbeard/providers/privatehd.py +++ b/sickbeard/providers/privatehd.py @@ -65,7 +65,7 @@ class PrivateHDProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) - for (k, v) in {'info': '.*?details\s*-\s*', 'get': 'download'}.items()) + for (k, v) in {'info': r'.*?details\s*-\s*', 'get': 'download'}.items()) log = '' if self.filter: non_marked = 'f0' in self.filter @@ -103,15 +103,14 @@ class PrivateHDProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'class': (lambda at: at and 'table' in at)})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or (self.confirmed and tr.find('i', title=re.compile('(?i)unverified'))): continue @@ -119,6 +118,7 @@ class PrivateHDProvider(generic.TorrentProvider): marked = ','.join([x.attrs.get('title', '').lower() for x in tr.find_all( 'i', attrs={'class': ['fa-star', 'fa-diamond', 'fa-star-half-o']})]) munged = ''.join(filter(marked.__contains__, ['free', 'half', 'double'])) + # noinspection PyUnboundLocalVariable if ((non_marked and rc['filter'].search(munged)) or (not non_marked and not rc['filter'].search(munged))): continue @@ -139,7 +139,7 @@ class PrivateHDProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + search_url) diff --git a/sickbeard/providers/ptf.py b/sickbeard/providers/ptf.py index 96adce8..a8f290d 100644 --- a/sickbeard/providers/ptf.py +++ b/sickbeard/providers/ptf.py @@ -99,15 +99,14 @@ class PTFProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', id='tortable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'tortable'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -115,8 +114,9 @@ class PTFProvider(generic.TorrentProvider): marker = '' try: marker = tr.select('a[href^="browse"] .tip')[0].get_text().strip() - except (StandardError, Exception): + except (BaseException, Exception): pass + # noinspection PyUnboundLocalVariable if ((non_marked and rc['filter'].search(marker)) or (not non_marked and not rc['filter'].search(marker))): continue @@ -141,7 +141,7 @@ class PTFProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, log + self.session.response.get('url')) diff --git a/sickbeard/providers/revtt.py b/sickbeard/providers/revtt.py index cba92a8..ffe0717 100644 --- a/sickbeard/providers/revtt.py +++ b/sickbeard/providers/revtt.py @@ -71,15 +71,14 @@ class RevTTProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', id='torrents-table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'torrents-table'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -102,7 +101,7 @@ class RevTTProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, self.session.response.get('url')) diff --git a/sickbeard/providers/rsstorrent.py b/sickbeard/providers/rsstorrent.py index d43eab9..a387185 100644 --- a/sickbeard/providers/rsstorrent.py +++ b/sickbeard/providers/rsstorrent.py @@ -65,7 +65,7 @@ class TorrentRssProvider(generic.TorrentProvider): for cur_attempt in attempt_list: try: url = cur_attempt() - except (StandardError, Exception): + except (BaseException, Exception): continue if title and url: @@ -93,7 +93,7 @@ class TorrentRssProvider(generic.TorrentProvider): if 32 == len(btih): from base64 import b16encode, b32decode btih = b16encode(b32decode(btih)) - except (StandardError, Exception): + except (BaseException, Exception): pass if re.search('(?i)[0-9a-f]{32,40}', btih): break @@ -105,7 +105,7 @@ class TorrentRssProvider(generic.TorrentProvider): try: bdecode(torrent_file) break - except (StandardError, Exception): + except (BaseException, Exception): pass else: return False, '%s fetched RSS feed data: %s' % \ diff --git a/sickbeard/providers/scenehd.py b/sickbeard/providers/scenehd.py index 287a786..e943bbe 100644 --- a/sickbeard/providers/scenehd.py +++ b/sickbeard/providers/scenehd.py @@ -69,15 +69,15 @@ class SceneHDProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive'], attr='cellpadding="5"') as soup: - torrent_table = soup.find('table', class_='browse') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, attr='cellpadding="5"') as soup: + tbl = soup.find('table', class_='browse') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -87,7 +87,7 @@ class SceneHDProvider(generic.TorrentProvider): seeders, leechers, size = [n for n in [ cells[head[x]].get_text().strip() for x in 'leech', 'leech', 'size']] seeders, leechers, size = [tryInt(n, n) for n in - list(re.findall('^(\d+)[^\d]+?(\d+)', leechers)[0]) + list(re.findall(r'^(\d+)[^\d]+?(\d+)', leechers)[0]) + re.findall('^[^\n\t]+', size)] if self._reject_item(seeders, leechers, self.freeleech and (not tr.find('a', class_=rc['filter'])), @@ -105,7 +105,7 @@ class SceneHDProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/scenetime.py b/sickbeard/providers/scenetime.py index b175bc6..4215d65 100644 --- a/sickbeard/providers/scenetime.py +++ b/sickbeard/providers/scenetime.py @@ -64,7 +64,7 @@ class SceneTimeProvider(generic.TorrentProvider): for mode in search_params.keys(): rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'info': 'detail', 'get': '.*id=(\d+).*', 'fl': '\[freeleech\]', + 'info': 'detail', 'get': r'.*id=(\d+).*', 'fl': r'\[freeleech\]', 'cats': 'cat=(?:%s)' % self._categories_string(mode=mode, template='', delimiter='|')}.items()) for search_string in search_params[mode]: @@ -82,15 +82,15 @@ class SceneTimeProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'cellpadding': 5}) - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find('table', attrs={'cellpadding': 5}) + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -115,7 +115,7 @@ class SceneTimeProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/shazbat.py b/sickbeard/providers/shazbat.py index aebd8f9..e9641fb 100644 --- a/sickbeard/providers/shazbat.py +++ b/sickbeard/providers/shazbat.py @@ -60,7 +60,7 @@ class ShazbatProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} - rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'show_id': '"show\?id=(\d+)[^>]+>([^<]+)<\/a>', + rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'show_id': r'"show\?id=(\d+)[^>]+>([^<]+)<\/a>', 'get': 'load_torrent'}.items()) search_types = sorted([x for x in search_params.items()], key=lambda tup: tup[0], reverse=True) maybe_only = search_types[0][0] @@ -98,14 +98,14 @@ class ShazbatProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_rows = soup.tbody.find_all('tr') or soup.table.find_all('tr') or [] + with BS4Parser(html) as soup: + tbl_rows = soup.tbody.find_all('tr') or soup.table.find_all('tr') or [] - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[0:]: + for tr in tbl_rows[0:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -113,11 +113,11 @@ class ShazbatProvider(generic.TorrentProvider): head = head if None is not head else self._header_row(tr) stats = cells[head['leech']].get_text().strip() seeders, leechers = [(tryInt(x[0], 0), tryInt(x[1], 0)) for x in - re.findall('(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0]][0] + re.findall(r'(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0]][0] if self._reject_item(seeders, leechers): continue sizes = [(tryInt(x[0], x[0]), tryInt(x[1], False)) for x in - re.findall('([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?', stats) if x[0]][0] + re.findall(r'([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?', stats) if x[0]][0] size = sizes[(0, 1)[1 < len(sizes)]] for element in [x for x in cells[2].contents[::-1] if unicode(x).strip()]: @@ -134,7 +134,7 @@ class ShazbatProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/showrss.py b/sickbeard/providers/showrss.py index 0e0978e..9e14859 100644 --- a/sickbeard/providers/showrss.py +++ b/sickbeard/providers/showrss.py @@ -94,13 +94,13 @@ class ShowRSSProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_rows = soup.select('ul.user-timeline > li') + with BS4Parser(html) as soup: + tbl_rows = soup.select('ul.user-timeline > li') - if not len(torrent_rows): + if not len(tbl_rows): raise generic.HaltParseException - for tr in torrent_rows: + for tr in tbl_rows: try: anchor = tr.find('a', href=rc['get']) title = self.regulate_title(anchor) @@ -113,7 +113,7 @@ class ShowRSSProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/skytorrents.py b/sickbeard/providers/skytorrents.py index 3004014..cb5c582 100644 --- a/sickbeard/providers/skytorrents.py +++ b/sickbeard/providers/skytorrents.py @@ -45,7 +45,7 @@ class SkytorrentsProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'info': '(^(info|torrent)/|/[\w+]{40,}\s*$)', 'get': '^magnet:'}.items()) + 'info': r'(^(info|torrent)/|/[\w+]{40,}\s*$)', 'get': '^magnet:'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: @@ -64,15 +64,15 @@ class SkytorrentsProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'class': ['table', 'is-striped']}) - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + parse_only = dict(table={'class': (lambda at: at and 'is-striped' in at)}) + with BS4Parser(html, parse_only=parse_only, preclean=True) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -83,10 +83,10 @@ class SkytorrentsProvider(generic.TorrentProvider): if self._reject_item(seeders, leechers): continue - info = tr.select( - '[alt*="magnet"], [title*="magnet"], [alt*="torrent"], [title*="torrent"]')[0] \ + info = tr.select_one( + '[alt*="magnet"], [title*="magnet"], [alt*="torrent"], [title*="torrent"]') \ or tr.find('a', href=rc['info']) - title = re.sub('\s(using|use|magnet|link)', '', ( + title = re.sub(r'\s(using|use|magnet|link)', '', ( info.attrs.get('title') or info.attrs.get('alt') or info.get_text())).strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError): @@ -97,7 +97,7 @@ class SkytorrentsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/snowfl.py b/sickbeard/providers/snowfl.py index 949f6c6..0ba98b6 100644 --- a/sickbeard/providers/snowfl.py +++ b/sickbeard/providers/snowfl.py @@ -116,7 +116,7 @@ class SnowflProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/speedcd.py b/sickbeard/providers/speedcd.py index a770c1c..4a88b3c 100644 --- a/sickbeard/providers/speedcd.py +++ b/sickbeard/providers/speedcd.py @@ -17,6 +17,7 @@ import re import time +import urlparse from urllib import quote, unquote from . import generic @@ -29,19 +30,16 @@ import sickbeard class SpeedCDProvider(generic.TorrentProvider): def __init__(self): - generic.TorrentProvider.__init__(self, 'SpeedCD', update_freq=7*60) + generic.TorrentProvider.__init__(self, 'SpeedCD', update_freq=4*60) - self.url_base = 'https://speed.cd/' - self.urls = {'config_provider_home_uri': self.url_base, - 'login': self.url_base + 'rss.php', - 'login_action': None, - 'do_login': self.url_base, - 'search': self.url_base + 'V3/API/API.php'} + self.url_home = ['https://speed.cd/'] - self.categories = {'Season': [41, 53], 'Episode': [2, 49, 50, 55], 'anime': [30]} - self.categories['Cache'] = self.categories['Season'] + self.categories['Episode'] + self.url_vars = {'login': 'rss.php', 'search': 'V3/API/'} + self.url_tmpl = dict(config_provider_home_uri='%(home)s', login='%(home)s%(vars)s', do_login='%(home)s', + login_action='', search='%(home)s%(vars)s') - self.url = self.urls['config_provider_home_uri'] + self.categories = {'Season': [41, 53, 57], 'Episode': [2, 49, 50, 55, 57], 'anime': [30]} + self.categories['Cache'] = self.categories['Season'] + self.categories['Episode'] self.username, self.password, self.digest, self.freeleech, self.minseed, self.minleech = 6 * [None] @@ -52,16 +50,16 @@ class SpeedCDProvider(generic.TorrentProvider): self.digest = digest[2] + digest[1] + quote(unquote(digest[0])) params = dict( logged_in=(lambda y='': all( - [self.session.cookies.get_dict(domain='.speed.cd') and - self.session.cookies.clear('.speed.cd') is None or True] + + [self.url and self.session.cookies.get_dict(domain='.' + urlparse.urlparse(self.url).netloc) and + self.session.cookies.clear('.' + urlparse.urlparse(self.url).netloc) is None or True] + ['RSS' in y, 'type="password"' not in y, self.has_all_cookies(['speedian'], 'inSpeed_')] + [(self.session.cookies.get('inSpeed_' + c) or 'sg!no!pw') in self.digest for c in ['speedian']])), failed_msg=(lambda y=None: None), post_params={'login': False}) result = super(SpeedCDProvider, self)._authorised(**params) if not result and not self.failure_count: - if self.digest: - self.get_url('%slogout.php' % self.url_base, skip_auth=True, post_data={'submit.x': 24, 'submit.y': 11}) + if self.url and self.digest: + self.get_url('%slogout.php' % self.url, skip_auth=True, post_data={'submit.x': 24, 'submit.y': 11}) self.digest = '' params = dict( logged_in=(lambda y='': all( @@ -72,13 +70,13 @@ class SpeedCDProvider(generic.TorrentProvider): or (self.session.cookies.get('inSpeed_speedian') or 'sg!no!pw') in self.digest])), failed_msg=(lambda y='': ( re.search(r'(?i)(username|password)((<[^>]+>)|\W)*' + - '(or|and|/|\s)((<[^>]+>)|\W)*(password|incorrect)', y) and + r'(or|and|/|\s)((<[^>]+>)|\W)*(password|incorrect)', y) and u'Invalid username or password for %s. Check settings' or u'Failed to authenticate or parse a response from %s, abort provider')), post_params={'form_tmpl': True}) self.urls['login_action'] = self.urls.get('do_login') session = super(SpeedCDProvider, self)._authorised(session=None, resp_sess=True, **params) - self.urls['login_action'] = None + self.urls['login_action'] = '' if session: self.digest = 'inSpeed_speedian=%s' % session.cookies.get('inSpeed_speedian') sickbeard.save_config() @@ -97,10 +95,11 @@ class SpeedCDProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'info': '/t/', 'get': 'download', 'fl': '\[freeleech\]'}.items()) + 'info': '/t/', 'get': 'download', 'fl': r'\[freeleech\]'}.items()) for mode in search_params.keys(): - rc['cats'] = re.compile('(?i)(cat|c\[\])=(?:%s)' % self._categories_string(mode, template='', delimiter='|')) + rc['cats'] = re.compile(r'(?i)(cat|c\[\])=(?:%s)' + % self._categories_string(mode, template='', delimiter='|')) for search_string in search_params[mode]: post_data = dict((x.split('=') for x in self._categories_string(mode).split('&')), search=search_string.replace('.', ' ').replace('^@^', '.'), @@ -116,15 +115,14 @@ class SpeedCDProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', attrs={'cellspacing': 0}) or soup.find('table') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only='table') as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -146,7 +144,7 @@ class SpeedCDProvider(generic.TorrentProvider): if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) - except (StandardError, Exception): + except (BaseException, Exception): time.sleep(1.1) self._log_search(mode, len(items[mode]) - cnt, diff --git a/sickbeard/providers/thepiratebay.py b/sickbeard/providers/thepiratebay.py index 7650552..35ef94b 100644 --- a/sickbeard/providers/thepiratebay.py +++ b/sickbeard/providers/thepiratebay.py @@ -39,15 +39,17 @@ class ThePirateBayProvider(generic.TorrentProvider): self.url_home = ['https://thepiratebay.se/'] + \ ['https://%s/' % base64.b64decode(x) for x in [''.join(x) for x in [ - [re.sub('[h\sI]+', '', x[::-1]) for x in [ + [re.sub(r'[h\sI]+', '', x[::-1]) for x in [ 'm IY', '5 F', 'HhIc', 'vI J', 'HIhe', 'uI k', '2 d', 'uh l']], - [re.sub('[N\sQ]+', '', x[::-1]) for x in [ + [re.sub(r'[N\sQ]+', '', x[::-1]) for x in [ 'lN Gc', 'X Yy', 'c lNR', 'vNJNH', 'kQNHe', 'GQdQu', 'wNN9']], ]]] - self.url_vars = {'search': 'search/%s/0/7/200', 'browse': 'tv/latest/'} - self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'search': '%(home)s%(vars)s', - 'browse': '%(home)s%(vars)s'} + self.url_vars = {'search': 'search/%s/0/7/200', 'browse': 'tv/latest/', + 'search2': 'search.php?q=%s&video=on&category=0&page=0&orderby=99', 'browse2': '?load=/recent'} + self.url_tmpl = {'config_provider_home_uri': '%(home)s', + 'search': '%(home)s%(vars)s', 'search2': '%(home)s%(vars)s', + 'browse': '%(home)s%(vars)s', 'browse2': '%(home)s%(vars)s'} self.proper_search_terms = None @@ -132,7 +134,6 @@ class ThePirateBayProvider(generic.TorrentProvider): return super(ThePirateBayProvider, self)._episode_strings( ep_obj, date_or=True, - ep_detail=lambda x: '%s*|%s*' % (config.naming_ep_type[2] % x, config.naming_ep_type[0] % x), ep_detail_anime=lambda x: '%02i' % x, **kwargs) def _search_provider(self, search_params, search_mode='eponly', epcount=0, **kwargs): @@ -145,33 +146,39 @@ class ThePirateBayProvider(generic.TorrentProvider): rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': 'detail', 'get': 'download[^"]+magnet', 'tid': r'.*/(\d{5,}).*', - 'verify': '(?:helper|moderator|trusted|vip)', 'size': 'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'}.items()) + 'verify': '(?:helper|moderator|trusted|vip)', 'size': r'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'}.items()) for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string - search_url = self.urls['browse'] if 'Cache' == mode \ - else self.urls['search'] % (urllib.quote(search_string)) - html = self.get_url(search_url) - if self.should_skip(): - return results + s_mode = 'browse' if 'Cache' == mode else 'search' + for i in ('', '2'): + search_url = self.urls['%s%s' % (s_mode, i)] + if 'Cache' != mode: + search_url = search_url % urllib.quote(search_string) + html = self.get_url(search_url) + if self.should_skip(): + return results + + if html and not self._has_no_results(html): + break + cnt = len(items[mode]) try: if not html or self._has_no_results(html): self._url = None raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive'], attr='id="searchResult"') as soup: - torrent_table = soup.find(id='searchResult') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'searchResult'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_table.find_all('tr')[1:]: + for tr in tbl.find_all('tr')[1:]: cells = tr.find_all('td') if 3 > len(cells): continue @@ -202,14 +209,14 @@ class ThePirateBayProvider(generic.TorrentProvider): size = None try: size = rc['size'].findall(tr.find_all(class_='detDesc')[0].get_text())[0] - except (StandardError, Exception): + except (BaseException, Exception): pass items[mode].append((title, download_magnet, seeders, self._bytesizer(size))) except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/tokyotoshokan.py b/sickbeard/providers/tokyotoshokan.py index 5fc3ef5..eb7c45c 100644 --- a/sickbeard/providers/tokyotoshokan.py +++ b/sickbeard/providers/tokyotoshokan.py @@ -59,13 +59,12 @@ class TokyoToshokanProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='listing') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') - if torrent_rows: - a = (0, 1)[None is not torrent_rows[0].find('td', class_='centertext')] + with BS4Parser(html, parse_only=dict(table={'class': (lambda at: at and 'listing' in at)})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') + if tbl_rows: + a = (0, 1)[None is not tbl_rows[0].find('td', class_='centertext')] - for top, bottom in zip(torrent_rows[a::2], torrent_rows[a+1::2]): + for top, bottom in zip(tbl_rows[a::2], tbl_rows[a+1::2]): try: bottom_text = bottom.get_text() or '' stats = rc['stats'].findall(bottom_text) @@ -86,7 +85,7 @@ class TokyoToshokanProvider(generic.TorrentProvider): if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) - except (StandardError, Exception): + except (BaseException, Exception): time.sleep(1.1) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/torlock.py b/sickbeard/providers/torlock.py index d11de26..8fea9ef 100644 --- a/sickbeard/providers/torlock.py +++ b/sickbeard/providers/torlock.py @@ -34,9 +34,9 @@ class TorLockProvider(generic.TorrentProvider): self.url_home = ['https://www.torlock.com/'] + \ ['https://%s/' % base64.b64decode(x) for x in [''.join(x) for x in [ - [re.sub('[g\sF]+', '', x[::-1]) for x in [ + [re.sub(r'[g\sF]+', '', x[::-1]) for x in [ 'y9FFGd', 'j9FgGb', '15 Fya', 'sF Jmb', 'rN 2Fb', 'uQW FZ', '0Vmg Y']], - [re.sub('[O\si]+', '', x[::-1]) for x in [ + [re.sub(r'[O\si]+', '', x[::-1]) for x in [ 'byO9Gid', 'y aji9G', '02O bj1', 'vJ Hicu', 'cz 5OCe', 'QZij FG', '= =']], ]]] @@ -61,7 +61,7 @@ class TorLockProvider(generic.TorrentProvider): items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { - 'info': 'torrent.?(\d+)', 'versrc': r'ver\.', 'verified': 'Verified'}.iteritems()) + 'info': r'torrent.?(\d+)', 'versrc': r'ver\.', 'verified': 'Verified'}.iteritems()) for mode in search_params.keys(): for search_string in search_params[mode]: @@ -79,21 +79,21 @@ class TorLockProvider(generic.TorrentProvider): try: if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: + with BS4Parser(html.replace('thead', 'tr')) as soup: - torrent_table = soup.find( + tbl = soup.find( 'div', class_=('panel panel-default', 'table-responsive')['Cache' == mode]) - if None is torrent_table: + if None is tbl: raise generic.HaltParseException - torrent_table = torrent_table.find( + tbl = tbl.find( 'table', class_='table table-striped table-bordered table-hover table-condensed') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -117,7 +117,7 @@ class TorLockProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/torrentday.py b/sickbeard/providers/torrentday.py index ceceabe..2c86d87 100644 --- a/sickbeard/providers/torrentday.py +++ b/sickbeard/providers/torrentday.py @@ -86,15 +86,15 @@ class TorrentDayProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive'], tag='table', attr='torrentTable') as soup: - torrent_table = soup.find('table', id='torrentTable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, tag='table', attr='torrentTable') as soup: + tbl = soup.find('table', id='torrentTable') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -118,7 +118,7 @@ class TorrentDayProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): time.sleep(1.1) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/torrenting.py b/sickbeard/providers/torrenting.py index fbfb81e..a607e7a 100644 --- a/sickbeard/providers/torrenting.py +++ b/sickbeard/providers/torrenting.py @@ -77,15 +77,14 @@ class TorrentingProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', id='torrentsTable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html, parse_only=dict(table={'id': 'torrentsTable'})) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -107,7 +106,7 @@ class TorrentingProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/torrentleech.py b/sickbeard/providers/torrentleech.py index eaddc9d..1c54823 100644 --- a/sickbeard/providers/torrentleech.py +++ b/sickbeard/providers/torrentleech.py @@ -70,15 +70,15 @@ class TorrentLeechProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find(id='torrenttable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + with BS4Parser(html) as soup: + tbl = soup.find(id='torrenttable') + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -101,7 +101,7 @@ class TorrentLeechProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/providers/tvchaosuk.py b/sickbeard/providers/tvchaosuk.py index 5ebf6e0..1e132ff 100644 --- a/sickbeard/providers/tvchaosuk.py +++ b/sickbeard/providers/tvchaosuk.py @@ -26,7 +26,7 @@ from sickbeard import logger from sickbeard.bs4_parser import BS4Parser from sickbeard.config import naming_ep_type from sickbeard.helpers import tryInt -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from dateutil.parser import parse from lib.unidecode import unidecode @@ -71,7 +71,7 @@ class TVChaosUKProvider(generic.TorrentProvider): vals = [i for i in range(5, 16)] random.SystemRandom().shuffle(vals) - attempts = html = soup = torrent_table = None + attempts = html = soup = tbl = None fetch = 'failed fetch' for attempts, s in enumerate((0, vals[0], vals[5], vals[10])): time.sleep(s) @@ -79,27 +79,30 @@ class TVChaosUKProvider(generic.TorrentProvider): if self.should_skip(): return results if html: - soup = BeautifulSoup(html, 'html.parser') - torrent_table = soup.find('table', id='sortabletable') - if torrent_table: - fetch = 'data fetched' - break + try: + soup = BS4Parser(html).soup + tbl = soup.find('table', id='sortabletable') + if tbl: + fetch = 'data fetched' + break + except(BaseException, Exception): + pass if attempts: logger.log('%s %s after %s attempts' % (mode, fetch, attempts+1)) cnt = len(items[mode]) try: - if not html or self._has_no_results(html) or not torrent_table: + if not html or self._has_no_results(html) or not tbl: raise generic.HaltParseException - torrent_rows = torrent_table.find_all('tr') + tbl_rows = tbl.find_all('tr') get_detail = True - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -115,21 +118,20 @@ class TVChaosUKProvider(generic.TorrentProvider): title = (tr.find('div', class_='tooltip-content').get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self._link(tr.find('a', href=rc['get'])['href']) - except (StandardError, Exception): + except (BaseException, Exception): continue if get_detail and title.endswith('...'): try: with BS4Parser(self.get_url('%s%s' % ( self.urls['config_provider_home_uri'], info['href'].lstrip('/').replace( - self.urls['config_provider_home_uri'], ''))), - 'html.parser') as soup_detail: + self.urls['config_provider_home_uri'], '')))) as soup_detail: title = soup_detail.find( 'td', class_='thead', attrs={'colspan': '3'}).get_text().strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] except IndexError: continue - except (StandardError, Exception): + except (BaseException, Exception): get_detail = False try: @@ -137,12 +139,12 @@ class TVChaosUKProvider(generic.TorrentProvider): if download_url and titles: for title in titles: items[mode].append((title, download_url, seeders, self._bytesizer(size))) - except (StandardError, Exception): + except (BaseException, Exception): pass except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) if soup: @@ -195,7 +197,7 @@ class TVChaosUKProvider(generic.TorrentProvider): dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d') dnew = dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)] title = title.replace(''.join(d), '%s%s%s' % (('', ' ')[1 < len(d[0])], dnew, ('', ' ')[1 < len(d[4])])) - except (StandardError, Exception): + except (BaseException, Exception): pass if dated: add_pad = re.findall(r'((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title) @@ -246,7 +248,7 @@ class TVChaosUKProvider(generic.TorrentProvider): try: sout = parse(''.join(d[1:4])).strftime('%Y-%m-%d') snew = sout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(sout)] - except (StandardError, Exception): + except (BaseException, Exception): pass if snew and dnew and snew != dnew: @@ -256,7 +258,7 @@ class TVChaosUKProvider(generic.TorrentProvider): sxxexx_r = r'(?i)S\d\d+E\d\d+' if dnew and re.search(sxxexx_r, title): titles += [re.sub(sxxexx_r, dnew, re.sub(r'[_.\-\s]?%s' % dnew, '', title))] - except (StandardError, Exception): + except (BaseException, Exception): pass titles += [title] diff --git a/sickbeard/providers/wop.py b/sickbeard/providers/wop.py index 60adcf5..952c915 100644 --- a/sickbeard/providers/wop.py +++ b/sickbeard/providers/wop.py @@ -47,7 +47,7 @@ class WOPProvider(generic.TorrentProvider): return super(WOPProvider, self)._authorised( logged_in=(lambda y=None: all( - [(None is y or re.search('(?i)rss\slink', y)), self.has_all_cookies()] + + [(None is y or re.search(r'(?i)rss\slink', y)), self.has_all_cookies()] + [(self.session.cookies.get(x) or 'sg!no!pw') in self.digest for x in ['hashv']])), failed_msg=(lambda y=None: u'Invalid cookie details for %s. Check settings')) @@ -79,15 +79,15 @@ class WOPProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='yenitorrenttable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + parse_only = dict(table={'class': (lambda at: at and 'yenitorrenttable' in at)}) + with BS4Parser(html, tag='table', attr='yenitorrenttable', parse_only=parse_only) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue @@ -110,7 +110,7 @@ class WOPProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) @@ -130,7 +130,7 @@ class WOPProvider(generic.TorrentProvider): @staticmethod def _search_params(search_params): - return [dict((k, ['*%s*' % re.sub('[.\s]', '*', v) for v in v]) for k, v in d.items()) for d in search_params] + return [dict((k, ['*%s*' % re.sub(r'[.\s]', '*', v) for v in v]) for k, v in d.items()) for d in search_params] @staticmethod def ui_string(key): diff --git a/sickbeard/providers/xspeeds.py b/sickbeard/providers/xspeeds.py index 6a9dce4..b0a1540 100644 --- a/sickbeard/providers/xspeeds.py +++ b/sickbeard/providers/xspeeds.py @@ -66,7 +66,7 @@ class XspeedsProvider(generic.TorrentProvider): return results for search_string in search_params[mode]: search_string = search_string.replace(u'£', '%') - search_string = re.sub('[\s\.]+', '%', search_string) + search_string = re.sub(r'[\s.]+', '%', search_string) search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0', @@ -82,16 +82,16 @@ class XspeedsProvider(generic.TorrentProvider): if not html or self._has_no_results(html): raise generic.HaltParseException - with BS4Parser(html, 'html.parser') as soup: - torrent_table = soup.find('table', id='sortabletable') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + parse_only = dict(table={'id': (lambda at: at and 'sortabletable' in at)}) + with BS4Parser(html, parse_only=parse_only) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') get_detail = True - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue @@ -107,21 +107,20 @@ class XspeedsProvider(generic.TorrentProvider): title = (tr.find('div', class_='tooltip-content').get_text() or info.get_text()).strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] download_url = self._link(tr.find('a', href=rc['get'])['href']) - except (StandardError, Exception): + except (BaseException, Exception): continue if get_detail and title.endswith('...'): try: with BS4Parser(self.get_url('%s%s' % ( self.urls['config_provider_home_uri'], info['href'].lstrip('/').replace( - self.urls['config_provider_home_uri'], ''))), - 'html.parser') as soup_detail: + self.urls['config_provider_home_uri'], '')))) as soup_detail: title = soup_detail.find( 'td', class_='thead', attrs={'colspan': '3'}).get_text().strip() title = re.findall('(?m)(^[^\r\n]+)', title)[0] except IndexError: continue - except (StandardError, Exception): + except (BaseException, Exception): get_detail = False title = self.regulate_title(title) @@ -130,7 +129,7 @@ class XspeedsProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, @@ -155,7 +154,7 @@ class XspeedsProvider(generic.TorrentProvider): form = re.findall('(?is).*()', html)[0] save_url = self._link(re.findall('(?i)action="([^"]+?)"', form)[0]) tags = re.findall(r'(?is)(]*?name=[\'"][^\'"]+[^>]*)', form) - except (StandardError, Exception): + except (BaseException, Exception): return None, None cats, params = [], {} @@ -165,7 +164,7 @@ class XspeedsProvider(generic.TorrentProvider): if 'cat' == name[0:3] and 'checkbox' == itype.lower(): if any(checked): try: - cats += [re.findall('(\d+)[^\d]*$', name)[0]] + cats += [re.findall(r'(\d+)[^\d]*$', name)[0]] except IndexError: pass elif 'hidden' == itype.lower() or 'nothing' in name or \ @@ -175,7 +174,7 @@ class XspeedsProvider(generic.TorrentProvider): for select in selects: name, values, index = None, None, 0 try: - name = re.findall('(?is) ', '').replace('"href=', '" href=').replace('"style', '" style') - with BS4Parser(html, features=['html5lib', 'permissive']) as soup: - torrent_table = soup.find('table', class_='table-torrents') - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') + parse_only = dict(table={'class': (lambda at: at and 'table-torrents' in at)}) + with BS4Parser(html, parse_only=parse_only) as tbl: + tbl_rows = [] if not tbl else tbl.find_all('tr') - if 2 > len(torrent_rows): + if 2 > len(tbl_rows): raise generic.HaltParseException head = None - for tr in torrent_rows[1:]: + for tr in tbl_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue @@ -106,7 +106,7 @@ class ZooqleProvider(generic.TorrentProvider): except generic.HaltParseException: pass - except (StandardError, Exception): + except (BaseException, Exception): logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) self._log_search(mode, len(items[mode]) - cnt, search_url) diff --git a/sickbeard/search.py b/sickbeard/search.py index fc09163..472d8f0 100644 --- a/sickbeard/search.py +++ b/sickbeard/search.py @@ -884,7 +884,7 @@ def search_providers(show, episodes, manual_search=False, torrent_only=False, tr if 'blackhole' != sickbeard.TORRENT_METHOD: best_result.content = None else: - cache_file = ek.ek(os.path.join, sickbeard.CACHE_DIR or helpers._getTempDir(), + cache_file = ek.ek(os.path.join, sickbeard.CACHE_DIR or helpers.get_system_temp_dir(), '%s.torrent' % (helpers.sanitizeFileName(best_result.name))) if not helpers.download_file(best_result.url, cache_file, session=best_result.provider.session): continue diff --git a/sickbeard/webserve.py b/sickbeard/webserve.py index d5517fe..a142595 100644 --- a/sickbeard/webserve.py +++ b/sickbeard/webserve.py @@ -5583,7 +5583,7 @@ class History(MainHandler): users = sickbeard.helpers.getURL(base_url, headers=headers, params=dict(format='json'), timeout=10, json=True) - for user_id in [u.get('Id') for u in users if u.get('Id')]: + for user_id in users and [u.get('Id') for u in users if u.get('Id')] or []: user_url = '%s/%s' % (base_url, user_id) user = sickbeard.helpers.getURL(user_url, headers=headers, params=dict(format='json'), timeout=10, json=True) diff --git a/sickgear.py b/sickgear.py index ffc7023..a9982b6 100755 --- a/sickgear.py +++ b/sickgear.py @@ -37,6 +37,7 @@ warnings.filterwarnings('ignore', module=r'.*Cheetah.*') warnings.filterwarnings('ignore', module=r'.*connectionpool.*', message='.*certificate verification.*') warnings.filterwarnings('ignore', module=r'.*ssl_.*', message='.*SSLContext object.*') warnings.filterwarnings('ignore', module=r'.*zoneinfo.*', message='.*file or directory.*') +warnings.filterwarnings('ignore', module=r'.*bs4_parser.*', message='.*No parser was explicitly specified.*') if not (2, 7, 9) <= sys.version_info < (3, 0): print('Python %s.%s.%s detected.' % sys.version_info[:3])