From f5842f315707837b7aaa531f09168a5351aeb0a7 Mon Sep 17 00:00:00 2001 From: Ofir123 Date: Mon, 4 Jan 2016 01:11:01 +0200 Subject: [PATCH] Improved subscenter search algorithm. --- libs/subliminal/services/subscenter.py | 122 ++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 41 deletions(-) diff --git a/libs/subliminal/services/subscenter.py b/libs/subliminal/services/subscenter.py index 6eeafa2..229d19a 100644 --- a/libs/subliminal/services/subscenter.py +++ b/libs/subliminal/services/subscenter.py @@ -15,33 +15,63 @@ # # You should have received a copy of the GNU Lesser General Public License # along with subliminal. If not, see . -import logging -import re -import json from . import ServiceBase +from ..cache import cachedmethod from ..exceptions import ServiceError from ..language import language_set from ..subtitles import get_subtitle_path, ResultSubtitle from ..videos import Episode, Movie from ..utils import to_unicode, get_keywords - +from bs4 import BeautifulSoup +import bisect +import json +import logging logger = logging.getLogger(__name__) class Subscenter(ServiceBase): - server_url = 'http://subscenter.cinemast.com/he/' + server = 'http://subscenter.cinemast.com/he/' api_based = False - languages = language_set(['he', 'en']) + languages = language_set(['he']) videos = [Episode, Movie] require_video = False - required_features = ['permissive'] - @staticmethod - def slugify(string): - new_string = string.replace(' ', '-').replace("'", '').replace(':', '').lower() - # We remove multiple spaces by using this regular expression. - return re.sub('-+', '-', new_string) + @cachedmethod + def _search_url_title(self, title, kind): + """Search the URL title for the given `title`. + + :param str title: title to search for. + :param str kind: kind of the title, ``movie`` or ``series``. + :return: the URL version of the title. + :rtype: str or None + + """ + # make the search + logger.info('Searching title name for %r', title) + r = self.session.get(self.server + 'subtitle/search/', params={'q': title}, allow_redirects=False, timeout=10) + r.raise_for_status() + + # if redirected, get the url title from the Location header + if r.is_redirect: + parts = r.headers['Location'].split('/') + + # check kind + if parts[-3] == kind: + return parts[-2] + + return None + + # otherwise, get the first valid suggestion + soup = BeautifulSoup(r.content, ['lxml', 'html.parser']) + suggestions = soup.select('#processes div.generalWindowTop a') + logger.debug('Found %d suggestions', len(suggestions)) + for suggestion in suggestions: + parts = suggestion.attrs['href'].split('/') + + # check kind + if parts[-3] == kind: + return parts[-2] def list_checked(self, video, languages): series = None @@ -56,45 +86,55 @@ class Subscenter(ServiceBase): episode, title) def query(self, filepath, languages=None, keywords=None, series=None, season=None, episode=None, title=None): - logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages)) - # Converts the title to Subscenter format by replacing whitespaces and removing specific chars. + logger.debug(u'Getting subtitles for {0} season {1} episode {2} with languages {3}'.format( + series, season, episode, languages)) + # Set the correct parameters depending on the kind. if series and season and episode: - # Search for a TV show. - kind = 'episode' - slugified_series = self.slugify(series) - url = self.server_url + 'cinemast/data/series/sb/' + slugified_series + '/' + str(season) + '/' + \ - str(episode) + '/' + url_series = self._search_url_title(series, 'series') + url = self.server + 'cinemast/data/series/sb/{}/{}/{}/'.format(url_series, season, episode) elif title: - # Search for a movie. - kind = 'movie' - slugified_title = self.slugify(title) - url = self.server_url + 'cinemast/data/movie/sb/' + slugified_title + '/' + url_title = self._search_url_title(title, 'movie') + url = self.server + 'cinemast/data/movie/sb/{}/'.format(url_title) else: raise ServiceError('One or more parameters are missing') - logger.debug('Searching subtitles %r', {'title': title, 'season': season, 'episode': episode}) + logger.debug('Searching subtitles for title {0}, season {1}, episode {2}'.format(title, season, episode)) response = self.session.get(url) if response.status_code != 200: - raise ServiceError('Request failed with status code %d' % response.status_code) - - subtitles = [] + raise ServiceError('Request failed with status code {0}'.format(response.status_code)) + # Loop over results. + subtitles = {} response_json = json.loads(response.content) - for lang, lang_json in response_json.items(): - lang_obj = self.get_language(lang) - if lang_obj in self.languages and lang_obj in languages: - for group_data in lang_json.values(): - for quality in group_data.values(): - for sub in quality.values(): - release = sub.get('subtitle_version') - sub_path = get_subtitle_path(filepath, lang_obj, self.config.multi) - link = self.server_url + 'subtitle/download/' + lang + '/' + str(sub.get('id')) + \ - '/?v=' + release + '&key=' + str(sub.get('key')) - subtitles.append(ResultSubtitle(sub_path, lang_obj, self.__class__.__name__.lower(), - link, release=to_unicode(release))) - return subtitles + for language_code, language_data in response_json.items(): + language_object = self.get_language(language_code) + if language_object in self.languages and language_object in languages: + for quality_data in language_data.values(): + for quality, subtitles_data in quality_data.items(): + for subtitle_item in subtitles_data.values(): + # Read the item. + subtitle_id = subtitle_item['id'] + subtitle_key = subtitle_item['key'] + downloaded = subtitle_item['downloaded'] + release = subtitle_item['subtitle_version'] + subtitle_path = get_subtitle_path(filepath, language_object, self.config.multi) + download_link = self.server_url + 'subtitle/download/{0}/{1}/?v={2}&key={3}'.format( + language_code, subtitle_id, release, subtitle_key) + # Add the release and increment downloaded count if we already have the subtitle. + if subtitle_id in subtitles: + logger.debug('Found additional release {0} for subtitle {1}'.format( + release, subtitle_id)) + bisect.insort_left(subtitles[subtitle_id].releases, release) # Deterministic order. + subtitles[subtitle_id].downloaded += downloaded + continue + # Otherwise create it. + subtitle = ResultSubtitle(subtitle_path, language_object, self.__class__.__name__.lower(), + download_link, release=to_unicode(release)) + logger.debug('Found subtitle %r', subtitle) + subtitles[subtitle_id] = subtitle + return subtitles.values() def download(self, subtitle): self.download_zip_file(subtitle.link, subtitle.path) return subtitle -Service = Subscenter \ No newline at end of file +Service = Subscenter