diff --git a/couchpotato/core/media/show/providers/info/tvrage.py b/couchpotato/core/media/show/providers/info/tvrage.py new file mode 100644 index 0000000..91b4672 --- /dev/null +++ b/couchpotato/core/media/show/providers/info/tvrage.py @@ -0,0 +1,285 @@ +from datetime import datetime +import os +import traceback + +from couchpotato import Env + +from couchpotato.core.event import addEvent +from couchpotato.core.helpers.encoding import simplifyString, toUnicode +from couchpotato.core.helpers.variable import splitString, tryInt, tryFloat +from couchpotato.core.logger import CPLog +from couchpotato.core.media.show.providers.base import ShowProvider +from tvrage_api import tvrage_api +from tvrage_api import tvrage_exceptions +from tvrage_api.tvrage_api import Show + +log = CPLog(__name__) + +autoload = 'TVRage' + + +class TVRage(ShowProvider): + + def __init__(self): + # Search is handled by Trakt exclusively as search functionality has + # been removed from TheTVDB provider as well. + addEvent('show.info', self.getShowInfo, priority = 3) + addEvent('season.info', self.getSeasonInfo, priority = 3) + addEvent('episode.info', self.getEpisodeInfo, priority = 3) + + self.tvrage_api_parms = { + 'apikey': self.conf('api_key'), + 'language': 'en', + 'cache': os.path.join(Env.get('cache_dir'), 'tvrage_api') + } + self._setup() + + def _setup(self): + self.tvrage = tvrage_api.TVRage(**self.tvrage_api_parms) + self.valid_languages = self.tvrage.config['valid_languages'] + + def getShow(self, identifier): + show = None + try: + log.debug('Getting show: %s', identifier) + show = self.tvrage[int(identifier)] + except (tvrage_exceptions.tvrage_error, IOError), e: + log.error('Failed to getShowInfo for show id "%s": %s', (identifier, traceback.format_exc())) + + return show + + def getShowInfo(self, identifiers = None): + + if not identifiers: + # Raise exception instead? Invocation is clearly wrong! + return None + if 'tvrage' not in identifiers: + # TVRage identifier unavailable, but invocation was valid. + return None + + identifier = tryInt(identifiers['tvrage'], None) + if identifier is None: + # Raise exception instead? Invocation is clearly wrong! + return None + + cache_key = 'tvrage.cache.show.%s' % identifier + result = self.getCache(cache_key) or [] + if not result: + show = self.getShow(identifier) + if show is not None: + result = self._parseShow(show) + self.setCache(cache_key, result) + + return result + + def getSeasonInfo(self, identifiers = None, params = {}): + """Either return a list of all seasons or a single season by number. + identifier is the show 'id' + """ + if not identifiers: + # Raise exception instead? Invocation is clearly wrong! + return None + if 'tvrage' not in identifiers: + # TVRage identifier unavailable, but invocation was valid. + return None + + season_number = params.get('season_number', None) + identifier = tryInt(identifiers['tvrage'], None) + if identifier is None: + # Raise exception instead? Invocation is clearly wrong! + return None + + cache_key = 'tvrage.cache.%s.%s' % (identifier, season_number) + log.debug('Getting TVRage SeasonInfo: %s', cache_key) + result = self.getCache(cache_key) or {} + if result: + return result + + try: + show = self.tvrage[int(identifier)] + except (tvrage_exceptions.tvrage_error, IOError), e: + log.error('Failed parsing TVRage SeasonInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc())) + return False + + result = [] + for number, season in show.items(): + if season_number is None: + result.append(self._parseSeason(show, number, season)) + elif number == season_number: + result = self._parseSeason(show, number, season) + break + + self.setCache(cache_key, result) + return result + + def getEpisodeInfo(self, identifiers = None, params = {}): + """Either return a list of all episodes or a single episode. + If episode_identifer contains an episode number to search for + """ + if not identifiers: + # Raise exception instead? Invocation is clearly wrong! + return None + if 'tvrage' not in identifiers: + # TVRage identifier unavailable, but invocation was valid. + return None + + season_number = params.get('season_number', None) + episode_identifiers = params.get('episode_identifiers', None) + identifier = tryInt(identifiers['tvrage'], None) + if season_number is None: + # Raise exception instead? Invocation is clearly wrong! + return False + if identifier is None: + # season_identifier might contain the 'show id : season number' + # since there is no tvrage id for season and we need a reference to + # both the show id and season number. + try: + identifier, season_number = season_number.split(':') + season_number = int(season_number) + identifier = tryInt(identifier, None) + except: + pass + + if identifier is None: + # Raise exception instead? Invocation is clearly wrong! + return None + + episode_identifier = None + if episode_identifiers: + if 'tvrage' in episode_identifiers: + episode_identifier = tryInt(episode_identifiers['tvrage'], None) + if episode_identifier is None: + return None + + cache_key = 'tvrage.cache.%s.%s.%s' % (identifier, episode_identifier, season_number) + log.debug('Getting TVRage EpisodeInfo: %s', cache_key) + result = self.getCache(cache_key) or {} + if result: + return result + + try: + show = self.tvrage[int(identifier)] + except (tvrage_exceptions.tvrage_error, IOError), e: + log.error('Failed parsing TVRage EpisodeInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc())) + return False + + result = [] + for number, season in show.items(): + if season_number is not None and number != season_number: + continue + + for episode in season.values(): + if episode_identifier is not None and episode['id'] == toUnicode(episode_identifier): + result = self._parseEpisode(episode) + self.setCache(cache_key, result) + return result + else: + result.append(self._parseEpisode(episode)) + + self.setCache(cache_key, result) + return result + + def _parseShow(self, show): + # + # NOTE: tvrage_api mimics tvdb_api, but some information is unavailable + # + + # + # NOTE: show object only allows direct access via + # show['id'], not show.get('id') + # + def get(name): + return show.get(name) if not hasattr(show, 'search') else show[name] + + genres = splitString(get('genre'), '|') + classification = get('classification') or '' + if classification == 'Talk Shows': + # "Talk Show" is a genre on TheTVDB.com, as these types of shows, + # e.g. "The Tonight Show Starring Jimmy Fallon", often use + # different naming schemes, it might be useful to the searcher if + # it is added here. + genres.append('Talk Show') + if get('firstaired') is not None: + try: year = datetime.strptime(get('firstaired'), '%Y-%m-%d').year + except: year = None + else: + year = None + + show_data = { + 'identifiers': { + 'tvrage': tryInt(get('id')), + }, + 'type': 'show', + 'titles': [get('seriesname')], + 'images': { + 'poster': [], + 'backdrop': [], + 'poster_original': [], + 'backdrop_original': [], + }, + 'year': year, + 'genres': genres, + 'network': get('network'), + 'air_day': (get('airs_dayofweek') or '').lower(), + 'air_time': self.parseTime(get('airs_time')), + 'firstaired': get('firstaired'), + 'runtime': tryInt(get('runtime')), + 'status': get('status'), + } + + show_data = dict((k, v) for k, v in show_data.iteritems() if v) + + # Only load season info when available + if type(show) == Show: + + # Parse season and episode data + show_data['seasons'] = {} + + for season_nr in show: + season = self._parseSeason(show, season_nr, show[season_nr]) + season['episodes'] = {} + + for episode_nr in show[season_nr]: + season['episodes'][episode_nr] = self._parseEpisode(show[season_nr][episode_nr]) + + show_data['seasons'][season_nr] = season + + return show_data + + def _parseSeason(self, show, number, season): + + season_data = { + 'number': tryInt(number), + } + + season_data = dict((k, v) for k, v in season_data.iteritems() if v) + return season_data + + def _parseEpisode(self, episode): + + def get(name, default = None): + return episode.get(name, default) + + poster = get('filename', []) + + episode_data = { + 'number': tryInt(get('episodenumber')), + 'absolute_number': tryInt(get('absolute_number')), + 'identifiers': { + 'tvrage': tryInt(episode['id']) + }, + 'type': 'episode', + 'titles': [get('episodename')] if get('episodename') else [], + 'images': { + 'poster': [poster] if poster else [], + }, + 'released': get('firstaired'), + 'firstaired': get('firstaired'), + 'language': get('language'), + } + + episode_data = dict((k, v) for k, v in episode_data.iteritems() if v) + return episode_data + + def parseTime(self, time): + return time diff --git a/libs/tvrage_api/__init__.py b/libs/tvrage_api/__init__.py new file mode 100644 index 0000000..fb88516 --- /dev/null +++ b/libs/tvrage_api/__init__.py @@ -0,0 +1,4 @@ + +__version__ = '1.0' +__author__ = 'echel0n' +__license__ = 'BSD' diff --git a/libs/tvrage_api/tvrage_api.py b/libs/tvrage_api/tvrage_api.py new file mode 100644 index 0000000..33e93f3 --- /dev/null +++ b/libs/tvrage_api/tvrage_api.py @@ -0,0 +1,721 @@ +# !/usr/bin/env python2 +# encoding:utf-8 +# author:echel0n +# project:tvrage_api +#repository:http://github.com/echel0n/tvrage_api (copied from SickRage, modified to use urllib2) +#license:unlicense (http://unlicense.org/) + +""" +Modified from http://github.com/dbr/tvrage_api +Simple-to-use Python interface to The TVRage's API (tvrage.com) +""" +from functools import wraps +import traceback + +__author__ = "echel0n" +__version__ = "1.0" + +import os +import re +import time +import urllib +import urllib2 +import getpass +import tempfile +import warnings +import logging +import datetime as dt +import xmltodict + +try: + import xml.etree.cElementTree as ElementTree +except ImportError: + import xml.etree.ElementTree as ElementTree + +from dateutil.parser import parse +from tvrage_cache import CacheHandler + +from tvrage_ui import BaseUI +from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_showincomplete, + tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound) + + +def log(): + return logging.getLogger("tvrage_api") + + +def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): + """Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param ExceptionToCheck: the exception to check. may be a tuple of + exceptions to check + :type ExceptionToCheck: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay: initial delay between retries in seconds + :type delay: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + :param logger: logger to use. If None, print + :type logger: logging.Logger instance + """ + + def deco_retry(f): + + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay + while mtries > 1: + try: + return f(*args, **kwargs) + except ExceptionToCheck, e: + msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) + if logger: + logger.warning(msg) + else: + print msg + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return deco_retry + + +class ShowContainer(dict): + """Simple dict that holds a series of Show instances + """ + + def __init__(self): + self._stack = [] + self._lastgc = time.time() + + def __setitem__(self, key, value): + self._stack.append(key) + + #keep only the 100th latest results + if time.time() - self._lastgc > 20: + for o in self._stack[:-100]: + del self[o] + + self._stack = self._stack[-100:] + + self._lastgc = time.time() + + super(ShowContainer, self).__setitem__(key, value) + + +class Show(dict): + """Holds a dict of seasons, and show data. + """ + + def __init__(self): + dict.__init__(self) + self.data = {} + + def __repr__(self): + return "" % ( + self.data.get(u'seriesname', 'instance'), + len(self) + ) + + def __getattr__(self, key): + if key in self: + # Key is an episode, return it + return self[key] + + if key in self.data: + # Non-numeric request is for show-data + return self.data[key] + + raise AttributeError + + def __getitem__(self, key): + if key in self: + # Key is an episode, return it + return dict.__getitem__(self, key) + + if key in self.data: + # Non-numeric request is for show-data + return dict.__getitem__(self.data, key) + + # Data wasn't found, raise appropriate error + if isinstance(key, int) or key.isdigit(): + # Episode number x was not found + raise tvrage_seasonnotfound("Could not find season %s" % (repr(key))) + else: + # If it's not numeric, it must be an attribute name, which + # doesn't exist, so attribute error. + raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key))) + + def airedOn(self, date): + ret = self.search(str(date), 'firstaired') + if len(ret) == 0: + raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date) + return ret + + def search(self, term=None, key=None): + """ + Search all episodes in show. Can search all data, or a specific key (for + example, episodename) + + Always returns an array (can be empty). First index contains the first + match, and so on. + + Each array index is an Episode() instance, so doing + search_results[0]['episodename'] will retrieve the episode name of the + first match. + + Search terms are converted to lower case (unicode) strings. + """ + results = [] + for cur_season in self.values(): + searchresult = cur_season.search(term=term, key=key) + if len(searchresult) != 0: + results.extend(searchresult) + + return results + + +class Season(dict): + def __init__(self, show=None): + """The show attribute points to the parent show + """ + self.show = show + + def __repr__(self): + return "" % ( + len(self.keys()) + ) + + def __getattr__(self, episode_number): + if episode_number in self: + return self[episode_number] + raise AttributeError + + def __getitem__(self, episode_number): + if episode_number not in self: + raise tvrage_episodenotfound("Could not find episode %s" % (repr(episode_number))) + else: + return dict.__getitem__(self, episode_number) + + def search(self, term=None, key=None): + """Search all episodes in season, returns a list of matching Episode + instances. + """ + results = [] + for ep in self.values(): + searchresult = ep.search(term=term, key=key) + if searchresult is not None: + results.append( + searchresult + ) + return results + + +class Episode(dict): + def __init__(self, season=None): + """The season attribute points to the parent season + """ + self.season = season + + def __repr__(self): + seasno = int(self.get(u'seasonnumber', 0)) + epno = int(self.get(u'episodenumber', 0)) + epname = self.get(u'episodename') + if epname is not None: + return "" % (seasno, epno, epname) + else: + return "" % (seasno, epno) + + def __getattr__(self, key): + if key in self: + return self[key] + raise AttributeError + + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key))) + + def search(self, term=None, key=None): + """Search episode data for term, if it matches, return the Episode (self). + The key parameter can be used to limit the search to a specific element, + for example, episodename. + + This primarily for use use by Show.search and Season.search. + """ + if term == None: + raise TypeError("must supply string to search for (contents)") + + term = unicode(term).lower() + for cur_key, cur_value in self.items(): + cur_key, cur_value = unicode(cur_key).lower(), unicode(cur_value).lower() + if key is not None and cur_key != key: + # Do not search this key + continue + if cur_value.find(unicode(term).lower()) > -1: + return self + + +class TVRage: + """Create easy-to-use interface to name of season/episode name""" + + def __init__(self, + interactive=False, + select_first=False, + debug=False, + cache=True, + banners=False, + actors=False, + custom_ui=None, + language=None, + search_all_languages=False, + apikey=None, + forceConnect=False, + useZip=False, + dvdorder=False, + proxy=None): + + """ + cache (True/False/str/unicode/urllib2 opener): + Retrieved XML are persisted to to disc. If true, stores in + tvrage_api folder under your systems TEMP_DIR, if set to + str/unicode instance it will use this as the cache + location. If False, disables caching. Can also be passed + an arbitrary Python object, which is used as a urllib2 + opener, which should be created by urllib2.build_opener + + forceConnect (bool): + If true it will always try to connect to tvrage.com even if we + recently timed out. By default it will wait one minute before + trying again, and any requests within that one minute window will + return an exception immediately. + """ + + self.shows = ShowContainer() # Holds all Show classes + self.corrections = {} # Holds show-name to show_id mapping + + self.config = {} + + if apikey is not None: + self.config['apikey'] = apikey + else: + self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key + + self.config['debug_enabled'] = debug # show debugging messages + + self.config['custom_ui'] = custom_ui + + self.config['proxy'] = proxy + + if cache is True: + self.config['cache_enabled'] = True + self.config['cache_location'] = self._getTempDir() + self.urlopener = urllib2.build_opener( + CacheHandler(self.config['cache_location']) + ) + + elif cache is False: + self.config['cache_enabled'] = False + self.urlopener = urllib2.build_opener() # default opener with no caching + + elif isinstance(cache, basestring): + self.config['cache_enabled'] = True + self.config['cache_location'] = cache + self.urlopener = urllib2.build_opener( + CacheHandler(self.config['cache_location']) + ) + + elif isinstance(cache, urllib2.OpenerDirector): + # If passed something from urllib2.build_opener, use that + log().debug("Using %r as urlopener" % cache) + self.config['cache_enabled'] = True + self.urlopener = cache + + else: + raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) + + if self.config['debug_enabled']: + warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. " + "To enable debug messages, use the following code before importing: " + "import logging; logging.basicConfig(level=logging.DEBUG)") + logging.basicConfig(level=logging.DEBUG) + + + # List of language from http://tvrage.com/api/0629B785CE550C8D/languages.xml + # Hard-coded here as it is realtively static, and saves another HTTP request, as + # recommended on http://tvrage.com/wiki/index.php/API:languages.xml + self.config['valid_languages'] = [ + "da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr", + "ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no" + ] + + # tvrage.com should be based around numeric language codes, + # but to link to a series like http://tvrage.com/?tab=series&id=79349&lid=16 + # requires the language ID, thus this mapping is required (mainly + # for usage in tvrage_ui - internally tvrage_api will use the language abbreviations) + self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27, + 'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, + 'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, + 'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30} + + if language is None: + self.config['language'] = 'en' + else: + if language not in self.config['valid_languages']: + raise ValueError("Invalid language %s, options are: %s" % ( + language, self.config['valid_languages'] + )) + else: + self.config['language'] = language + + # The following url_ configs are based of the + # http://tvrage.com/wiki/index.php/Programmers_API + + self.config['base_url'] = "http://services.tvrage.com" + + self.config['url_getSeries'] = u"%(base_url)s/feeds/search.php?show=%%s" % self.config + + self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php?key=%(apikey)s&sid=%%s" % self.config + + self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php?key=%(apikey)s&sid=%%s" % self.config + + self.config['url_updtes_all'] = u"%(base_url)s/myfeeds/currentshows.php" % self.config + + def _getTempDir(self): + """Returns the [system temp dir]/tvrage_api-u501 (or + tvrage_api-myuser) + """ + if hasattr(os, 'getuid'): + uid = "u%d" % (os.getuid()) + else: + # For Windows + try: + uid = getpass.getuser() + except ImportError: + return os.path.join(tempfile.gettempdir(), "tvrage_api") + + return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid)) + + @retry(tvrage_error) + def _loadUrl(self, url): + global lastTimeout + try: + log().debug("Retrieving URL %s" % url) + resp = self.urlopener.open(url) + if 'x-local-cache' in resp.headers: + log().debug("URL %s was cached in %s" % ( + url, + resp.headers['x-local-cache']) + ) + except (IOError, urllib2.URLError), errormsg: + if not str(errormsg).startswith('HTTP Error'): + lastTimeout = datetime.datetime.now() + raise tvrage_error("Could not connect to server: %s" % (errormsg)) + + + # handle gzipped content, + # http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch + if 'gzip' in resp.headers.get("Content-Encoding", ''): + if gzip: + stream = StringIO.StringIO(resp.read()) + gz = gzip.GzipFile(fileobj=stream) + return gz.read() + + raise tvrage_error("Received gzip data from thetvdb.com, but could not correctly handle it") + + def remap_keys(path, key, value): + name_map = { + 'showid': 'id', + 'showname': 'seriesname', + 'name': 'seriesname', + 'summary': 'overview', + 'started': 'firstaired', + 'genres': 'genre', + 'airtime': 'airs_time', + 'airday': 'airs_dayofweek', + 'image': 'fanart', + 'epnum': 'absolute_number', + 'title': 'episodename', + 'airdate': 'firstaired', + 'screencap': 'filename', + 'seasonnum': 'episodenumber' + } + + status_map = { + 'returning series': 'Continuing', + 'canceled/ended': 'Ended', + 'tbd/on the bubble': 'Continuing', + 'in development': 'Continuing', + 'new series': 'Continuing', + 'never aired': 'Ended', + 'final season': 'Continuing', + 'on hiatus': 'Continuing', + 'pilot ordered': 'Continuing', + 'pilot rejected': 'Ended', + 'canceled': 'Ended', + 'ended': 'Ended', + '': 'Unknown', + } + + try: + key = name_map[key.lower()] + except (ValueError, TypeError, KeyError): + key = key.lower() + + # clean up value and do type changes + if value: + if isinstance(value, dict): + if key == 'status': + try: + value = status_map[str(value).lower()] + if not value: + raise + except: + value = 'Unknown' + + if key == 'network': + value = value['#text'] + + if key == 'genre': + value = value['genre'] + if not value: + value = [] + if not isinstance(value, list): + value = [value] + value = filter(None, value) + value = '|' + '|'.join(value) + '|' + + try: + if key == 'firstaired' and value in "0000-00-00": + new_value = str(dt.date.fromordinal(1)) + new_value = re.sub("([-]0{2}){1,}", "", new_value) + fixDate = parse(new_value, fuzzy=True).date() + value = fixDate.strftime("%Y-%m-%d") + elif key == 'firstaired': + value = parse(value, fuzzy=True).date() + value = value.strftime("%Y-%m-%d") + except: + pass + + return (key, value) + + try: + return xmltodict.parse(resp.read(), postprocessor=remap_keys) + except: + return dict([(u'data', None)]) + + def _getetsrc(self, url): + """Loads a URL using caching, returns an ElementTree of the source + """ + + try: + return self._loadUrl(url).values()[0] + except Exception, e: + raise tvrage_error(e) + + def _setItem(self, sid, seas, ep, attrib, value): + """Creates a new episode, creating Show(), Season() and + Episode()s as required. Called by _getShowData to populate show + + Since the nice-to-use tvrage[1][24]['name] interface + makes it impossible to do tvrage[1][24]['name] = "name" + and still be capable of checking if an episode exists + so we can raise tvrage_shownotfound, we have a slightly + less pretty method of setting items.. but since the API + is supposed to be read-only, this is the best way to + do it! + The problem is that calling tvrage[1][24]['episodename'] = "name" + calls __getitem__ on tvrage[1], there is no way to check if + tvrage.__dict__ should have a key "1" before we auto-create it + """ + if sid not in self.shows: + self.shows[sid] = Show() + if seas not in self.shows[sid]: + self.shows[sid][seas] = Season(show=self.shows[sid]) + if ep not in self.shows[sid][seas]: + self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas]) + self.shows[sid][seas][ep][attrib] = value + + def _setShowData(self, sid, key, value): + """Sets self.shows[sid] to a new Show instance, or sets the data + """ + if sid not in self.shows: + self.shows[sid] = Show() + self.shows[sid].data[key] = value + + def _cleanData(self, data): + """Cleans up strings returned by tvrage.com + + Issues corrected: + - Replaces & with & + - Trailing whitespace + """ + + if isinstance(data, basestring): + data = data.replace(u"&", u"&") + data = data.strip() + + return data + + def search(self, series): + """This searches tvrage.com for the series name + and returns the result list + """ + series = series.encode("utf-8") + log().debug("Searching for show %s" % series) + + return self._getetsrc(self.config['url_getSeries'] % (series)).values()[0] + + def _getSeries(self, series): + """This searches tvrage.com for the series name, + If a custom_ui UI is configured, it uses this to select the correct + series. If not, and interactive == True, ConsoleUI is used, if not + BaseUI is used to select the first result. + """ + allSeries = self.search(series) + if not allSeries: + log().debug('Series result returned zero') + raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)") + + if not isinstance(allSeries, list): + allSeries = [allSeries] + + if self.config['custom_ui'] is not None: + log().debug("Using custom UI %s" % (repr(self.config['custom_ui']))) + CustomUI = self.config['custom_ui'] + ui = CustomUI(config=self.config) + else: + log().debug('Auto-selecting first search result using BaseUI') + ui = BaseUI(config=self.config) + + return ui.selectSeries(allSeries) + + def _getShowData(self, sid, getEpInfo=False): + """Takes a series ID, gets the epInfo URL and parses the TVRAGE + XML file into the shows dict in layout: + shows[series_id][season_number][episode_number] + """ + + # Parse show information + log().debug('Getting all series data for %s' % (sid)) + seriesInfoEt = self._getetsrc(self.config['url_seriesInfo'] % (sid)) + + if not seriesInfoEt: + log().debug('Series result returned zero') + raise tvrage_error("Series result returned zero") + + # get series data + for k, v in seriesInfoEt.items(): + if v is not None: + v = self._cleanData(v) + + self._setShowData(sid, k, v) + + # get episode data + if getEpInfo: + # Parse episode data + log().debug('Getting all episodes of %s' % (sid)) + epsEt = self._getetsrc(self.config['url_epInfo'] % (sid)) + + if not epsEt: + log().debug('Series results incomplete') + raise tvrage_showincomplete( + "Show search returned incomplete results (cannot find complete show on TVRAGE)") + + if 'episodelist' not in epsEt: + return False + + seasons = epsEt['episodelist']['season'] + if not isinstance(seasons, list): + seasons = [seasons] + + for season in seasons: + seas_no = int(season['@no']) + + episodes = season['episode'] + if not isinstance(episodes, list): + episodes = [episodes] + + for episode in episodes: + ep_no = int(episode['episodenumber']) + + for k, v in episode.items(): + k = k.lower() + + if v is not None: + if k == 'link': + v = v.rsplit('/', 1)[1] + k = 'id' + else: + v = self._cleanData(v) + + self._setItem(sid, seas_no, ep_no, k, v) + + return True + + def _nameToSid(self, name): + """Takes show name, returns the correct series ID (if the show has + already been grabbed), or grabs all episodes and returns + the correct SID. + """ + if name in self.corrections: + log().debug('Correcting %s to %s' % (name, self.corrections[name])) + return self.corrections[name] + else: + log().debug('Getting show %s' % (name)) + selected_series = self._getSeries(name) + if isinstance(selected_series, dict): + selected_series = [selected_series] + sids = list(int(x['id']) for x in selected_series if self._getShowData(int(x['id']))) + self.corrections.update(dict((x['seriesname'], int(x['id'])) for x in selected_series)) + return sids + + def __getitem__(self, key): + """Handles tvrage_instance['seriesname'] calls. + The dict index should be the show id + """ + if isinstance(key, (int, long)): + # Item is integer, treat as show id + if key not in self.shows: + self._getShowData(key, True) + return self.shows[key] + + key = str(key).lower() + self.config['searchterm'] = key + selected_series = self._getSeries(key) + if isinstance(selected_series, dict): + selected_series = [selected_series] + [[self._setShowData(show['id'], k, v) for k, v in show.items()] for show in selected_series] + return selected_series + #test = self._getSeries(key) + #sids = self._nameToSid(key) + #return list(self.shows[sid] for sid in sids) + + def __repr__(self): + return str(self.shows) + + +def main(): + """Simple example of using tvrage_api - it just + grabs an episode name interactively. + """ + import logging + + logging.basicConfig(level=logging.DEBUG) + + tvrage_instance = TVRage(cache=False) + print tvrage_instance['Lost']['seriesname'] + print tvrage_instance['Lost'][1][4]['episodename'] + + +if __name__ == '__main__': + main() diff --git a/libs/tvrage_api/tvrage_cache.py b/libs/tvrage_api/tvrage_cache.py new file mode 100644 index 0000000..ac15601 --- /dev/null +++ b/libs/tvrage_api/tvrage_cache.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python2 +#encoding:utf-8 +#author:echel0n +#project:tvrage_api +#repository:http://github.com/echel0n/tvrage_api +#license:unlicense (http://unlicense.org/) + +""" +urllib2 caching handler +Modified from http://code.activestate.com/recipes/491261/ +""" +from __future__ import with_statement + +__author__ = "echel0n" +__version__ = "1.0" + +import os +import time +import errno +import httplib +import urllib2 +import StringIO +from hashlib import md5 +from threading import RLock + +cache_lock = RLock() + +def locked_function(origfunc): + """Decorator to execute function under lock""" + def wrapped(*args, **kwargs): + cache_lock.acquire() + try: + return origfunc(*args, **kwargs) + finally: + cache_lock.release() + return wrapped + +def calculate_cache_path(cache_location, url): + """Checks if [cache_location]/[hash_of_url].headers and .body exist + """ + thumb = md5(url).hexdigest() + header = os.path.join(cache_location, thumb + ".headers") + body = os.path.join(cache_location, thumb + ".body") + return header, body + +def check_cache_time(path, max_age): + """Checks if a file has been created/modified in the [last max_age] seconds. + False means the file is too old (or doesn't exist), True means it is + up-to-date and valid""" + if not os.path.isfile(path): + return False + cache_modified_time = os.stat(path).st_mtime + time_now = time.time() + if cache_modified_time < time_now - max_age: + # Cache is old + return False + else: + return True + +@locked_function +def exists_in_cache(cache_location, url, max_age): + """Returns if header AND body cache file exist (and are up-to-date)""" + hpath, bpath = calculate_cache_path(cache_location, url) + if os.path.exists(hpath) and os.path.exists(bpath): + return( + check_cache_time(hpath, max_age) + and check_cache_time(bpath, max_age) + ) + else: + # File does not exist + return False + +@locked_function +def store_in_cache(cache_location, url, response): + """Tries to store response in cache.""" + hpath, bpath = calculate_cache_path(cache_location, url) + try: + outf = open(hpath, "wb") + headers = str(response.info()) + outf.write(headers) + outf.close() + + outf = open(bpath, "wb") + outf.write(response.read()) + outf.close() + except IOError: + return True + else: + return False + +@locked_function +def delete_from_cache(cache_location, url): + """Deletes a response in cache.""" + hpath, bpath = calculate_cache_path(cache_location, url) + try: + if os.path.exists(hpath): + os.remove(hpath) + if os.path.exists(bpath): + os.remove(bpath) + except IOError: + return True + else: + return False + +class CacheHandler(urllib2.BaseHandler): + """Stores responses in a persistant on-disk cache. + + If a subsequent GET request is made for the same URL, the stored + response is returned, saving time, resources and bandwidth + """ + @locked_function + def __init__(self, cache_location, max_age = 21600): + """The location of the cache directory""" + self.max_age = max_age + self.cache_location = cache_location + if not os.path.exists(self.cache_location): + try: + os.mkdir(self.cache_location) + except OSError, e: + if e.errno == errno.EEXIST and os.path.isdir(self.cache_location): + # File exists, and it's a directory, + # another process beat us to creating this dir, that's OK. + pass + else: + # Our target dir is already a file, or different error, + # relay the error! + raise + + def default_open(self, request): + """Handles GET requests, if the response is cached it returns it + """ + if request.get_method() != "GET": + return None # let the next handler try to handle the request + + if exists_in_cache( + self.cache_location, request.get_full_url(), self.max_age + ): + return CachedResponse( + self.cache_location, + request.get_full_url(), + set_cache_header = True + ) + else: + return None + + def http_response(self, request, response): + """Gets a HTTP response, if it was a GET request and the status code + starts with 2 (200 OK etc) it caches it and returns a CachedResponse + """ + if (request.get_method() == "GET" + and str(response.code).startswith("2") + ): + if 'x-local-cache' not in response.info(): + # Response is not cached + set_cache_header = store_in_cache( + self.cache_location, + request.get_full_url(), + response + ) + else: + set_cache_header = True + + return CachedResponse( + self.cache_location, + request.get_full_url(), + set_cache_header = set_cache_header + ) + else: + return response + +class CachedResponse(StringIO.StringIO): + """An urllib2.response-like object for cached responses. + + To determine if a response is cached or coming directly from + the network, check the x-local-cache header rather than the object type. + """ + + @locked_function + def __init__(self, cache_location, url, set_cache_header=True): + self.cache_location = cache_location + hpath, bpath = calculate_cache_path(cache_location, url) + + StringIO.StringIO.__init__(self, file(bpath, "rb").read()) + + self.url = url + self.code = 200 + self.msg = "OK" + headerbuf = file(hpath, "rb").read() + if set_cache_header: + headerbuf += "x-local-cache: %s\r\n" % (bpath) + self.headers = httplib.HTTPMessage(StringIO.StringIO(headerbuf)) + + def info(self): + """Returns headers + """ + return self.headers + + def geturl(self): + """Returns original URL + """ + return self.url + + @locked_function + def recache(self): + new_request = urllib2.urlopen(self.url) + set_cache_header = store_in_cache( + self.cache_location, + new_request.url, + new_request + ) + CachedResponse.__init__(self, self.cache_location, self.url, True) + + @locked_function + def delete_cache(self): + delete_from_cache( + self.cache_location, + self.url + ) + + +if __name__ == "__main__": + def main(): + """Quick test/example of CacheHandler""" + opener = urllib2.build_opener(CacheHandler("/tmp/")) + response = opener.open("http://google.com") + print response.headers + print "Response:", response.read() + + response.recache() + print response.headers + print "After recache:", response.read() + + # Test usage in threads + from threading import Thread + class CacheThreadTest(Thread): + lastdata = None + def run(self): + req = opener.open("http://google.com") + newdata = req.read() + if self.lastdata is None: + self.lastdata = newdata + assert self.lastdata == newdata, "Data was not consistent, uhoh" + req.recache() + threads = [CacheThreadTest() for x in range(50)] + print "Starting threads" + [t.start() for t in threads] + print "..done" + print "Joining threads" + [t.join() for t in threads] + print "..done" + main() diff --git a/libs/tvrage_api/tvrage_exceptions.py b/libs/tvrage_api/tvrage_exceptions.py new file mode 100644 index 0000000..69b918b --- /dev/null +++ b/libs/tvrage_api/tvrage_exceptions.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python2 +# encoding:utf-8 +#author:echel0n +#project:tvrage_api +#repository:http://github.com/echel0n/tvrage_api +#license:unlicense (http://unlicense.org/) + +"""Custom exceptions used or raised by tvrage_api""" + +__author__ = "echel0n" +__version__ = "1.0" + +__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete", + "tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"] + + +class tvrage_exception(Exception): + """Any exception generated by tvrage_api + """ + pass + + +class tvrage_error(tvrage_exception): + """An error with tvrage.com (Cannot connect, for example) + """ + pass + + +class tvrage_userabort(tvrage_exception): + """User aborted the interactive selection (via + the q command, ^c etc) + """ + pass + + +class tvrage_shownotfound(tvrage_exception): + """Show cannot be found on tvrage.com (non-existant show) + """ + pass + + +class tvrage_showincomplete(tvrage_exception): + """Show found but incomplete on tvrage.com (incomplete show) + """ + pass + + +class tvrage_seasonnotfound(tvrage_exception): + """Season cannot be found on tvrage.com + """ + pass + + +class tvrage_episodenotfound(tvrage_exception): + """Episode cannot be found on tvrage.com + """ + pass + + +class tvrage_attributenotfound(tvrage_exception): + """Raised if an episode does not have the requested + attribute (such as a episode name) + """ + pass diff --git a/libs/tvrage_api/tvrage_ui.py b/libs/tvrage_api/tvrage_ui.py new file mode 100644 index 0000000..64e54d6 --- /dev/null +++ b/libs/tvrage_api/tvrage_ui.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +#encoding:utf-8 +#author:echel0n +#project:tvrage_api +#repository:http://github.com/echel0n/tvrage_api +#license:unlicense (http://unlicense.org/) + +"""Contains included user interface for TVRage show selection""" + +__author__ = "echel0n" +__version__ = "1.0" + +import logging +import warnings + +def log(): + return logging.getLogger(__name__) + +class BaseUI: + """Default non-interactive UI, which auto-selects first results + """ + def __init__(self, config, log = None): + self.config = config + if log is not None: + warnings.warn("the UI's log parameter is deprecated, instead use\n" + "use import logging; logging.getLogger('ui').info('blah')\n" + "The self.log attribute will be removed in the next version") + self.log = logging.getLogger(__name__) + + def selectSeries(self, allSeries): + return allSeries[0] \ No newline at end of file diff --git a/libs/xmltodict.py b/libs/xmltodict.py new file mode 100644 index 0000000..4fdbb16 --- /dev/null +++ b/libs/xmltodict.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python +"Makes working with XML feel like you are working with JSON" + +from xml.parsers import expat +from xml.sax.saxutils import XMLGenerator +from xml.sax.xmlreader import AttributesImpl +try: # pragma no cover + from cStringIO import StringIO +except ImportError: # pragma no cover + try: + from StringIO import StringIO + except ImportError: + from io import StringIO +try: # pragma no cover + from collections import OrderedDict +except ImportError: # pragma no cover + try: + from ordereddict import OrderedDict + except ImportError: + OrderedDict = dict + +try: # pragma no cover + _basestring = basestring +except NameError: # pragma no cover + _basestring = str +try: # pragma no cover + _unicode = unicode +except NameError: # pragma no cover + _unicode = str + +__author__ = 'Martin Blech' +__version__ = '0.9.0' +__license__ = 'MIT' + + +class ParsingInterrupted(Exception): + pass + + +class _DictSAXHandler(object): + def __init__(self, + item_depth=0, + item_callback=lambda *args: True, + xml_attribs=True, + attr_prefix='@', + cdata_key='#text', + force_cdata=False, + cdata_separator='', + postprocessor=None, + dict_constructor=OrderedDict, + strip_whitespace=True, + namespace_separator=':', + namespaces=None): + self.path = [] + self.stack = [] + self.data = None + self.item = None + self.item_depth = item_depth + self.xml_attribs = xml_attribs + self.item_callback = item_callback + self.attr_prefix = attr_prefix + self.cdata_key = cdata_key + self.force_cdata = force_cdata + self.cdata_separator = cdata_separator + self.postprocessor = postprocessor + self.dict_constructor = dict_constructor + self.strip_whitespace = strip_whitespace + self.namespace_separator = namespace_separator + self.namespaces = namespaces + + def _build_name(self, full_name): + if not self.namespaces: + return full_name + i = full_name.rfind(self.namespace_separator) + if i == -1: + return full_name + namespace, name = full_name[:i], full_name[i+1:] + short_namespace = self.namespaces.get(namespace, namespace) + if not short_namespace: + return name + else: + return self.namespace_separator.join((short_namespace, name)) + + def _attrs_to_dict(self, attrs): + if isinstance(attrs, dict): + return attrs + return self.dict_constructor(zip(attrs[0::2], attrs[1::2])) + + def startElement(self, full_name, attrs): + name = self._build_name(full_name) + attrs = self._attrs_to_dict(attrs) + self.path.append((name, attrs or None)) + if len(self.path) > self.item_depth: + self.stack.append((self.item, self.data)) + if self.xml_attribs: + attrs = self.dict_constructor( + (self.attr_prefix+key, value) + for (key, value) in attrs.items()) + else: + attrs = None + self.item = attrs or None + self.data = None + + def endElement(self, full_name): + name = self._build_name(full_name) + if len(self.path) == self.item_depth: + item = self.item + if item is None: + item = self.data + should_continue = self.item_callback(self.path, item) + if not should_continue: + raise ParsingInterrupted() + if len(self.stack): + item, data = self.item, self.data + self.item, self.data = self.stack.pop() + if self.strip_whitespace and data is not None: + data = data.strip() or None + if data and self.force_cdata and item is None: + item = self.dict_constructor() + if item is not None: + if data: + self.push_data(item, self.cdata_key, data) + self.item = self.push_data(self.item, name, item) + else: + self.item = self.push_data(self.item, name, data) + else: + self.item = self.data = None + self.path.pop() + + def characters(self, data): + if not self.data: + self.data = data + else: + self.data += self.cdata_separator + data + + def push_data(self, item, key, data): + if self.postprocessor is not None: + result = self.postprocessor(self.path, key, data) + if result is None: + return item + key, data = result + if item is None: + item = self.dict_constructor() + try: + value = item[key] + if isinstance(value, list): + value.append(data) + else: + item[key] = [value, data] + except KeyError: + item[key] = data + return item + + +def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, + namespace_separator=':', **kwargs): + """Parse the given XML input and convert it into a dictionary. + + `xml_input` can either be a `string` or a file-like object. + + If `xml_attribs` is `True`, element attributes are put in the dictionary + among regular child elements, using `@` as a prefix to avoid collisions. If + set to `False`, they are just ignored. + + Simple example:: + + >>> import xmltodict + >>> doc = xmltodict.parse(\"\"\" + ... + ... 1 + ... 2 + ... + ... \"\"\") + >>> doc['a']['@prop'] + u'x' + >>> doc['a']['b'] + [u'1', u'2'] + + If `item_depth` is `0`, the function returns a dictionary for the root + element (default behavior). Otherwise, it calls `item_callback` every time + an item at the specified depth is found and returns `None` in the end + (streaming mode). + + The callback function receives two parameters: the `path` from the document + root to the item (name-attribs pairs), and the `item` (dict). If the + callback's return value is false-ish, parsing will be stopped with the + :class:`ParsingInterrupted` exception. + + Streaming example:: + + >>> def handle(path, item): + ... print 'path:%s item:%s' % (path, item) + ... return True + ... + >>> xmltodict.parse(\"\"\" + ... + ... 1 + ... 2 + ... \"\"\", item_depth=2, item_callback=handle) + path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1 + path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2 + + The optional argument `postprocessor` is a function that takes `path`, + `key` and `value` as positional arguments and returns a new `(key, value)` + pair where both `key` and `value` may have changed. Usage example:: + + >>> def postprocessor(path, key, value): + ... try: + ... return key + ':int', int(value) + ... except (ValueError, TypeError): + ... return key, value + >>> xmltodict.parse('12x', + ... postprocessor=postprocessor) + OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))]) + + You can pass an alternate version of `expat` (such as `defusedexpat`) by + using the `expat` parameter. E.g: + + >>> import defusedexpat + >>> xmltodict.parse('hello', expat=defusedexpat.pyexpat) + OrderedDict([(u'a', u'hello')]) + + """ + handler = _DictSAXHandler(namespace_separator=namespace_separator, + **kwargs) + if isinstance(xml_input, _unicode): + if not encoding: + encoding = 'utf-8' + xml_input = xml_input.encode(encoding) + if not process_namespaces: + namespace_separator = None + parser = expat.ParserCreate( + encoding, + namespace_separator + ) + try: + parser.ordered_attributes = True + except AttributeError: + # Jython's expat does not support ordered_attributes + pass + parser.StartElementHandler = handler.startElement + parser.EndElementHandler = handler.endElement + parser.CharacterDataHandler = handler.characters + parser.buffer_text = True + try: + parser.ParseFile(xml_input) + except (TypeError, AttributeError): + parser.Parse(xml_input, True) + return handler.item + + +def _emit(key, value, content_handler, + attr_prefix='@', + cdata_key='#text', + depth=0, + preprocessor=None, + pretty=False, + newl='\n', + indent='\t'): + if preprocessor is not None: + result = preprocessor(key, value) + if result is None: + return + key, value = result + if not isinstance(value, (list, tuple)): + value = [value] + if depth == 0 and len(value) > 1: + raise ValueError('document with multiple roots') + for v in value: + if v is None: + v = OrderedDict() + elif not isinstance(v, dict): + v = _unicode(v) + if isinstance(v, _basestring): + v = OrderedDict(((cdata_key, v),)) + cdata = None + attrs = OrderedDict() + children = [] + for ik, iv in v.items(): + if ik == cdata_key: + cdata = iv + continue + if ik.startswith(attr_prefix): + attrs[ik[len(attr_prefix):]] = iv + continue + children.append((ik, iv)) + if pretty: + content_handler.ignorableWhitespace(depth * indent) + content_handler.startElement(key, AttributesImpl(attrs)) + if pretty and children: + content_handler.ignorableWhitespace(newl) + for child_key, child_value in children: + _emit(child_key, child_value, content_handler, + attr_prefix, cdata_key, depth+1, preprocessor, + pretty, newl, indent) + if cdata is not None: + content_handler.characters(cdata) + if pretty and children: + content_handler.ignorableWhitespace(depth * indent) + content_handler.endElement(key) + if pretty and depth: + content_handler.ignorableWhitespace(newl) + + +def unparse(input_dict, output=None, encoding='utf-8', full_document=True, + **kwargs): + """Emit an XML document for the given `input_dict` (reverse of `parse`). + + The resulting XML document is returned as a string, but if `output` (a + file-like object) is specified, it is written there instead. + + Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted + as XML node attributes, whereas keys equal to `cdata_key` + (default=`'#text'`) are treated as character data. + + The `pretty` parameter (default=`False`) enables pretty-printing. In this + mode, lines are terminated with `'\n'` and indented with `'\t'`, but this + can be customized with the `newl` and `indent` parameters. + + """ + ((key, value),) = input_dict.items() + must_return = False + if output is None: + output = StringIO() + must_return = True + content_handler = XMLGenerator(output, encoding) + if full_document: + content_handler.startDocument() + _emit(key, value, content_handler, **kwargs) + if full_document: + content_handler.endDocument() + if must_return: + value = output.getvalue() + try: # pragma no cover + value = value.decode(encoding) + except AttributeError: # pragma no cover + pass + return value + +if __name__ == '__main__': # pragma: no cover + import sys + import marshal + + (item_depth,) = sys.argv[1:] + item_depth = int(item_depth) + + def handle_item(path, item): + marshal.dump((path, item), sys.stdout) + return True + + try: + root = parse(sys.stdin, + item_depth=item_depth, + item_callback=handle_item, + dict_constructor=dict) + if item_depth == 0: + handle_item([], root) + except KeyboardInterrupt: + pass