@ -17,18 +17,15 @@
from __future__ import with_statement , division
from __future__ import with_statement , division
import os
import re
import re
import traceback
import traceback
from . import generic
from . import generic
from . . import logger , show_name_helpers
from . . import logger
from . . common import mediaExtensions , Quality
from . . helpers import try_int
from . . helpers import try_int
from . . name_parser . parser import InvalidNameException , InvalidShowException , NameParser
from bs4_parser import BS4Parser
from bs4_parser import BS4Parser
from _23 import b64decodestring , filter_list , quote , unidecode
from _23 import b64decodestring , unidecode
from six import iteritems
from six import iteritems
@ -37,7 +34,7 @@ class ThePirateBayProvider(generic.TorrentProvider):
def __init__ ( self ) :
def __init__ ( self ) :
generic . TorrentProvider . __init__ ( self , ' The Pirate Bay ' )
generic . TorrentProvider . __init__ ( self , ' The Pirate Bay ' )
self . url_home = [ ' https://thepiratebay.se / ' ] + \
self . url_home = [ ' https://thepiratebay.org / ' ] + \
[ ' https:// %s / ' % b64decodestring ( x ) for x in [ ' ' . join ( x ) for x in [
[ ' https:// %s / ' % b64decodestring ( x ) for x in [ ' ' . join ( x ) for x in [
[ re . sub ( r ' [h \ sI]+ ' , ' ' , x [ : : - 1 ] ) for x in [
[ re . sub ( r ' [h \ sI]+ ' , ' ' , x [ : : - 1 ] ) for x in [
' m IY ' , ' 5 F ' , ' HhIc ' , ' vI J ' , ' HIhe ' , ' uI k ' , ' 2 d ' , ' uh l ' ] ] ,
' m IY ' , ' 5 F ' , ' HhIc ' , ' vI J ' , ' HIhe ' , ' uI k ' , ' 2 d ' , ' uh l ' ] ] ,
@ -45,11 +42,11 @@ class ThePirateBayProvider(generic.TorrentProvider):
' lN Gc ' , ' X Yy ' , ' c lNR ' , ' vNJNH ' , ' kQNHe ' , ' GQdQu ' , ' wNN9 ' ] ] ,
' lN Gc ' , ' X Yy ' , ' c lNR ' , ' vNJNH ' , ' kQNHe ' , ' GQdQu ' , ' wNN9 ' ] ] ,
] ] ]
] ] ]
self . url_vars = { ' search ' : ' search / %s /0/7/200 ' , ' browse ' : ' tv/latest/ ' ,
self . url_vars = { ' search ' : ' / s/?q= %s &video=on&page=0&orderby= ' ,
' search2 ' : ' search.php?q= %s &video=on&category=0&page=0&orderby=99 ' , ' browse2 ' : ' ?load=/recent ' }
' search2 ' : ' search.php?q= %s &video=on&search=Pirate+Search&page=0&orderby= ' }
self . url_tmpl = { ' config_provider_home_uri ' : ' %(home)s ' ,
self . url_tmpl = { ' config_provider_home_uri ' : ' %(home)s ' ,
' search ' : ' %(home)s %(vars)s ' , ' search2 ' : ' %(home)s %(vars)s ' ,
' search ' : ' %(home)s %(vars)s ' , ' search2 ' : ' %(home)s %(vars)s ' }
' browse ' : ' %(home)s %(vars)s ' , ' browse2 ' : ' %(home)s %(vars) s ' }
self . urls = { ' api ' : ' https://apibay.org/q.php?q= % s' }
self . proper_search_terms = None
self . proper_search_terms = None
@ -60,63 +57,6 @@ class ThePirateBayProvider(generic.TorrentProvider):
def _has_signature ( data = None ) :
def _has_signature ( data = None ) :
return data and re . search ( r ' Pirate \ sBay ' , data [ 33 : 7632 : ] )
return data and re . search ( r ' Pirate \ sBay ' , data [ 33 : 7632 : ] )
def _find_season_quality ( self , title , torrent_id , ep_number ) :
""" Return the modified title of a Season Torrent with the quality found inspecting torrent file list """
if not self . url :
return False
quality = Quality . UNKNOWN
file_name = None
data = self . get_url ( ' %s ajax_details_filelist.php?id= %s ' % ( self . url , torrent_id ) )
if self . should_skip ( ) or not data :
return None
files_list = re . findall ( ' <td.+>(.*?)</td> ' , data )
if not files_list :
logger . log ( u ' Unable to get the torrent file list for ' + title , logger . ERROR )
video_files = filter_list ( lambda x : x . rpartition ( ' . ' ) [ 2 ] . lower ( ) in mediaExtensions , files_list )
# Filtering SingleEpisode/MultiSeason Torrent
if ep_number > len ( video_files ) or float ( ep_number * 1.1 ) < len ( video_files ) :
logger . log ( u ' Result %s has episode %s and total episodes retrieved in torrent are %s '
% ( title , str ( ep_number ) , str ( len ( video_files ) ) ) , logger . DEBUG )
logger . log ( u ' Result %s seems to be a single episode or multiseason torrent, skipping result... '
% title , logger . DEBUG )
return None
if Quality . UNKNOWN != Quality . sceneQuality ( title ) :
return title
for file_name in video_files :
quality = Quality . sceneQuality ( os . path . basename ( file_name ) )
if Quality . UNKNOWN != quality :
break
if None is not file_name and Quality . UNKNOWN == quality :
quality = Quality . assumeQuality ( os . path . basename ( file_name ) )
if Quality . UNKNOWN == quality :
logger . log ( u ' Unable to obtain a Season Quality for ' + title , logger . DEBUG )
return None
try :
my_parser = NameParser ( show_obj = self . show_obj , indexer_lookup = False )
parse_result = my_parser . parse ( file_name )
except ( InvalidNameException , InvalidShowException ) :
return None
logger . log ( u ' Season quality for %s is %s ' % ( title , Quality . qualityStrings [ quality ] ) , logger . DEBUG )
if parse_result . series_name and parse_result . season_number :
title = ' %s S %02d %s ' % ( parse_result . series_name ,
int ( parse_result . season_number ) ,
self . _reverse_quality ( quality ) )
return title
def _season_strings ( self , ep_obj , * * kwargs ) :
def _season_strings ( self , ep_obj , * * kwargs ) :
if ep_obj . show_obj . air_by_date or ep_obj . show_obj . sports :
if ep_obj . show_obj . air_by_date or ep_obj . show_obj . sports :
@ -145,75 +85,162 @@ class ThePirateBayProvider(generic.TorrentProvider):
items = { ' Cache ' : [ ] , ' Season ' : [ ] , ' Episode ' : [ ] , ' Propers ' : [ ] }
items = { ' Cache ' : [ ] , ' Season ' : [ ] , ' Episode ' : [ ] , ' Propers ' : [ ] }
rc = dict ( [ ( k , re . compile ( ' (?i) ' + v ) ) for ( k , v ) in iteritems ( {
rc = dict ( [ ( k , re . compile ( ' (?i) ' + v ) ) for ( k , v ) in iteritems ( {
' info ' : ' detail ' , ' get ' : ' download[^ " ]+ magnet' , ' tid ' : r ' .*/( \ d { 5,}).* ' ,
' info ' : ' detail|descript ' , ' get ' : ' magnet ' ,
' verify ' : ' (?:helper|moderator|trusted|vip) ' , ' size ' : r ' size[^ \ d]+( \ d+(?:[.,] \ d+)? \ W*[bkmgt] \ w+) ' } ) ] )
' verify ' : ' (?:helper|moderator|trusted|vip) ' , ' size ' : r ' size[^ \ d]+( \ d+(?:[.,] \ d+)? \ W*[bkmgt] \ w+) ' } ) ] )
for mode in search_params :
for mode in search_params :
for search_string in search_params [ mode ] :
for search_string in search_params [ mode ] :
search_string = unidecode ( search_string )
search_string = unidecode ( search_string )
s_mode = ' browse ' if ' Cache ' == mode else ' search '
if ' Cache ' != mode :
for i in ( ' ' , ' 2 ' ) :
search_url = self . urls [ ' api ' ] % search_string
search_url = self . urls [ ' %s %s ' % ( s_mode , i ) ]
pages = [ self . get_url ( search_url , parse_json = True ) ]
if ' Cache ' != mode :
else :
search_url = search_url % quote ( search_string )
urls = [ self . urls [ ' api ' ] % ' category: %s ' % cur_cat for cur_cat in ( 205 , 208 ) ]
search_url = ' , ' . join ( urls )
pages = [ self . get_url ( cur_url , parse_json = True ) for cur_url in urls ]
seen_not_found = False
if any ( pages ) :
cnt = len ( items [ mode ] )
for cur_page in pages :
for cur_item in cur_page or [ ] :
title , total_found = [ cur_item . get ( k ) for k in ( ' name ' , ' total_found ' ) ]
if 1 == try_int ( total_found ) :
seen_not_found = True
continue
seeders , leechers , size = [ try_int ( n , n ) for n in [
cur_item . get ( k ) for k in ( ' seeders ' , ' leechers ' , ' size ' ) ] ]
if not self . _reject_item ( seeders , leechers ) :
status , info_hash = [ cur_item . get ( k ) for k in ( ' status ' , ' info_hash ' ) ]
if self . confirmed and not rc [ ' verify ' ] . search ( status ) :
logger . log ( u ' Skipping untrusted non-verified result: ' + title , logger . DEBUG )
continue
download_magnet = info_hash if ' &tr= ' in info_hash \
else self . _dhtless_magnet ( info_hash , title )
if title and download_magnet :
items [ mode ] . append ( ( title , download_magnet , seeders , self . _bytesizer ( size ) ) )
html = self . get_url ( search_url )
if len ( items [ mode ] ) :
if self . should_skip ( ) :
self . _log_search ( mode , len ( items [ mode ] ) - cnt , search_url )
return results
continue
if seen_not_found and not len ( items [ mode ] ) :
continue
html = self . get_url ( self . urls [ ' config_provider_home_uri ' ] )
if self . should_skip ( ) or not html :
return results
body = re . sub ( r ' (?sim).*?(<body.*?)<foot.* ' , r ' \ 1</body> ' , html )
with BS4Parser ( body ) as soup :
if ' Cache ' != mode :
search_url = None
if ' action= " /s/ ' in body :
search_url = self . urls [ ' search ' ] % search_string
elif ' action= " /search.php ' in body :
search_url = self . urls [ ' search2 ' ] % search_string
if search_url :
try :
pages = [ self . get_url ( search_url , proxy_browser = True ) ]
except ValueError :
pass
else :
try :
html = self . get_url ( self . _link ( soup . find ( ' a ' , title = " Browse Torrents " ) [ ' href ' ] ) )
if html :
js = re . findall ( r ' check \ sthat \ s+( \ w+.js) \ s ' , html )
if js :
js_file = re . findall ( ' <script[^ " ]+? " ([^ " ]*? %s [^ " ]*?).*?</script> ' % js [ 0 ] , html )
if js_file :
html = self . get_url ( self . _link ( js_file [ 0 ] ) )
if html : # could be none from previous get_url for js
# html or js can be source for parsing cat|browse links
urls = re . findall (
' (?i)<a[^>]+?href= " ([^>]+?(?:cat|browse)[^>]+?) " [^>]+?>[^>]*?tv shows< ' , html )
search_url = ' , ' . join ( [ self . _link ( cur_url ) for cur_url in urls ] )
pages = [ self . get_url ( self . _link ( cur_url ) , proxy_browser = True ) for cur_url in urls ]
except ValueError :
pass
if not any ( pages ) :
return results
list_type = None
head = None
rows = ' '
if len ( pages ) and ' <thead ' in pages [ 0 ] :
list_type = 0
headers = ' seed|leech|size '
for cur_html in pages :
try :
with BS4Parser ( cur_html , parse_only = dict ( table = { ' id ' : ' searchResult ' } ) ) as tbl :
rows + = ' ' . join ( [ _r . prettify ( ) for _r in tbl . select ( ' tr ' ) [ 1 : ] ] )
if not head :
header = [ re . sub ( r ' (?i).*?(?:order \ sy \ s)?( %s )(?:ers)?.*? ' % headers , r ' \ 1 ' ,
' ' if not x else x . get ( ' title ' , ' ' ) . lower ( ) ) for x in
[ t . select_one ( ' [title] ' ) for t in
tbl . find ( ' tr ' , class_ = ' header ' ) . find_all ( ' th ' ) ] ]
head = dict ( ( k , header . index ( k ) - len ( header ) ) for k in headers . split ( ' | ' ) )
except ( BaseException , Exception ) :
pass
html = ( ' ' , ' <table><tr data= " header-placeholder " ></tr> %s </table> ' % rows ) [ all ( [ head , rows ] ) ]
elif len ( pages ) and ' <ol ' in pages [ 0 ] :
list_type = 1
headers = ' seed|leech|size '
for cur_html in pages :
try :
with BS4Parser ( cur_html , parse_only = dict ( ol = { ' id ' : ' torrents ' } ) ) as tbl :
rows + = ' ' . join ( [ _r . prettify ( ) for _r in tbl . find_all ( ' li ' , class_ = ' list-entry ' ) ] )
if not head :
header = [ re . sub (
' (?i).*(?:item-( %s )).* ' % headers , r ' \ 1 ' , ' ' . join ( t . get ( ' class ' , ' ' ) ) )
for t in tbl . find ( ' li ' , class_ = ' list-header ' ) . find_all ( ' span ' ) ]
head = dict ( ( k , header . index ( k ) - len ( header ) ) for k in headers . split ( ' | ' ) )
except ( BaseException , Exception ) :
pass
html = ( ' ' , ' <ol><li data= " header-placeholder " ></li> %s </ol> ' % rows ) [ all ( [ head , rows ] ) ]
html = ' <!DOCTYPE html><html><head></head><body id= " tpb_results " > %s </body></html> ' % html
if html and not self . _has_no_results ( html ) :
break
cnt = len ( items [ mode ] )
cnt = len ( items [ mode ] )
try :
try :
if not html or self . _has_no_results ( html ) :
if None is list_type or not html or self . _has_no_results ( html ) :
self . _url = None
self . _url = None
raise generic . HaltParseException
raise generic . HaltParseException
with BS4Parser ( html , parse_only = dict ( table = { ' id ' : ' searchResult ' } ) ) as tbl :
with BS4Parser ( html , parse_only = dict ( body = { ' id ' : ' tpb_results ' } ) ) as tbl :
tbl_rows = [ ] if not tbl else tbl . find_all ( ' tr ' )
row_type = ( ' li ' , ' tr ' ) [ not list_type ]
tbl_rows = [ ] if not tbl else tbl . find_all ( row_type )
if 2 > len ( tbl_rows ) :
if 2 > len ( tbl_rows ) :
raise generic . HaltParseException
raise generic . HaltParseException
head = None
for tr in tbl . find_all ( row_type ) [ 1 : ] :
for tr in tbl . find_all ( ' tr ' ) [ 1 : ] :
cells = tr . find_all ( ( ' span ' , ' td ' ) [ not list_type ] )
cells = tr . find_all ( ' td ' )
if 3 > len ( cells ) :
if 3 > len ( cells ) :
continue
continue
try :
try :
head = head if None is not head else self . _header_row ( tr )
head = head if None is not head else self . _header_row ( tr )
seeders , leechers = [ try_int ( cells [ head [ x ] ] . get_text ( ) . strip ( ) )
seeders , leechers , size = [ try_int ( n , n ) for n in [
for x in ( ' seed ' , ' leech ' ) ]
cells [ head [ x ] ] . get_text ( ) . strip ( ) for x in ( ' seed ' , ' leech ' , ' size ' ) ] ]
if self . _reject_item ( seeders , leechers ) :
if self . _reject_item ( seeders , leechers ) :
continue
continue
info = tr . find ( ' a ' , title = rc [ ' info ' ] )
info = tr . find ( ' a ' , title = rc [ ' info ' ] ) or tr . find ( ' a ' , href = rc [ ' info ' ] )
title = info . get_text ( ) . strip ( ) . replace ( ' _ ' , ' . ' )
title = info . get_text ( ) . strip ( ) . replace ( ' _ ' , ' . ' )
tid = rc [ ' tid ' ] . sub ( r ' \ 1 ' , str ( info [ ' href ' ] ) )
download_magnet = ( tr . find ( ' a ' , title = rc [ ' get ' ] )
download_magnet = tr . find ( ' a ' , title = rc [ ' get ' ] ) [ ' href ' ]
or tr . find ( ' a ' , href = rc [ ' get ' ] ) ) [ ' href ' ]
except ( AttributeError , TypeError , ValueError ) :
except ( AttributeError , TypeError , ValueError ) :
continue
continue
if self . confirmed and not tr . find ( ' img ' , title = rc [ ' verify ' ] ) :
if self . confirmed and not (
tr . find ( ' img ' , title = rc [ ' verify ' ] ) or tr . find ( ' img ' , alt = rc [ ' verify ' ] )
or tr . find ( ' img ' , src = rc [ ' verify ' ] ) ) :
logger . log ( u ' Skipping untrusted non-verified result: ' + title , logger . DEBUG )
logger . log ( u ' Skipping untrusted non-verified result: ' + title , logger . DEBUG )
continue
continue
# Check number video files = episode in season and
# find the real Quality for full season torrent analyzing files in torrent
if ' Season ' == mode and ' sponly ' == search_mode :
ep_number = int ( epcount / / len ( set ( show_name_helpers . allPossibleShowNames (
self . show_obj ) ) ) )
title = self . _find_season_quality ( title , tid , ep_number )
if title and download_magnet :
if title and download_magnet :
size = None
try :
size = rc [ ' size ' ] . findall ( tr . find_all ( class_ = ' detDesc ' ) [ 0 ] . get_text ( ) ) [ 0 ]
except ( BaseException , Exception ) :
pass
items [ mode ] . append ( ( title , download_magnet , seeders , self . _bytesizer ( size ) ) )
items [ mode ] . append ( ( title , download_magnet , seeders , self . _bytesizer ( size ) ) )
except generic . HaltParseException :
except generic . HaltParseException :