Browse Source

Merge pull request #4941 from k0ekk0ek/tvrage_info_provider

[TV][Provider] Added TVRage info provider
pull/5156/head
Ruud Burger 10 years ago
parent
commit
f8b90905d0
  1. 285
      couchpotato/core/media/show/providers/info/tvrage.py
  2. 4
      libs/tvrage_api/__init__.py
  3. 721
      libs/tvrage_api/tvrage_api.py
  4. 251
      libs/tvrage_api/tvrage_cache.py
  5. 64
      libs/tvrage_api/tvrage_exceptions.py
  6. 31
      libs/tvrage_api/tvrage_ui.py
  7. 359
      libs/xmltodict.py

285
couchpotato/core/media/show/providers/info/tvrage.py

@ -0,0 +1,285 @@
from datetime import datetime
import os
import traceback
from couchpotato import Env
from couchpotato.core.event import addEvent
from couchpotato.core.helpers.encoding import simplifyString, toUnicode
from couchpotato.core.helpers.variable import splitString, tryInt, tryFloat
from couchpotato.core.logger import CPLog
from couchpotato.core.media.show.providers.base import ShowProvider
from tvrage_api import tvrage_api
from tvrage_api import tvrage_exceptions
from tvrage_api.tvrage_api import Show
log = CPLog(__name__)
autoload = 'TVRage'
class TVRage(ShowProvider):
def __init__(self):
# Search is handled by Trakt exclusively as search functionality has
# been removed from TheTVDB provider as well.
addEvent('show.info', self.getShowInfo, priority = 3)
addEvent('season.info', self.getSeasonInfo, priority = 3)
addEvent('episode.info', self.getEpisodeInfo, priority = 3)
self.tvrage_api_parms = {
'apikey': self.conf('api_key'),
'language': 'en',
'cache': os.path.join(Env.get('cache_dir'), 'tvrage_api')
}
self._setup()
def _setup(self):
self.tvrage = tvrage_api.TVRage(**self.tvrage_api_parms)
self.valid_languages = self.tvrage.config['valid_languages']
def getShow(self, identifier):
show = None
try:
log.debug('Getting show: %s', identifier)
show = self.tvrage[int(identifier)]
except (tvrage_exceptions.tvrage_error, IOError), e:
log.error('Failed to getShowInfo for show id "%s": %s', (identifier, traceback.format_exc()))
return show
def getShowInfo(self, identifiers = None):
if not identifiers:
# Raise exception instead? Invocation is clearly wrong!
return None
if 'tvrage' not in identifiers:
# TVRage identifier unavailable, but invocation was valid.
return None
identifier = tryInt(identifiers['tvrage'], None)
if identifier is None:
# Raise exception instead? Invocation is clearly wrong!
return None
cache_key = 'tvrage.cache.show.%s' % identifier
result = self.getCache(cache_key) or []
if not result:
show = self.getShow(identifier)
if show is not None:
result = self._parseShow(show)
self.setCache(cache_key, result)
return result
def getSeasonInfo(self, identifiers = None, params = {}):
"""Either return a list of all seasons or a single season by number.
identifier is the show 'id'
"""
if not identifiers:
# Raise exception instead? Invocation is clearly wrong!
return None
if 'tvrage' not in identifiers:
# TVRage identifier unavailable, but invocation was valid.
return None
season_number = params.get('season_number', None)
identifier = tryInt(identifiers['tvrage'], None)
if identifier is None:
# Raise exception instead? Invocation is clearly wrong!
return None
cache_key = 'tvrage.cache.%s.%s' % (identifier, season_number)
log.debug('Getting TVRage SeasonInfo: %s', cache_key)
result = self.getCache(cache_key) or {}
if result:
return result
try:
show = self.tvrage[int(identifier)]
except (tvrage_exceptions.tvrage_error, IOError), e:
log.error('Failed parsing TVRage SeasonInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc()))
return False
result = []
for number, season in show.items():
if season_number is None:
result.append(self._parseSeason(show, number, season))
elif number == season_number:
result = self._parseSeason(show, number, season)
break
self.setCache(cache_key, result)
return result
def getEpisodeInfo(self, identifiers = None, params = {}):
"""Either return a list of all episodes or a single episode.
If episode_identifer contains an episode number to search for
"""
if not identifiers:
# Raise exception instead? Invocation is clearly wrong!
return None
if 'tvrage' not in identifiers:
# TVRage identifier unavailable, but invocation was valid.
return None
season_number = params.get('season_number', None)
episode_identifiers = params.get('episode_identifiers', None)
identifier = tryInt(identifiers['tvrage'], None)
if season_number is None:
# Raise exception instead? Invocation is clearly wrong!
return False
if identifier is None:
# season_identifier might contain the 'show id : season number'
# since there is no tvrage id for season and we need a reference to
# both the show id and season number.
try:
identifier, season_number = season_number.split(':')
season_number = int(season_number)
identifier = tryInt(identifier, None)
except:
pass
if identifier is None:
# Raise exception instead? Invocation is clearly wrong!
return None
episode_identifier = None
if episode_identifiers:
if 'tvrage' in episode_identifiers:
episode_identifier = tryInt(episode_identifiers['tvrage'], None)
if episode_identifier is None:
return None
cache_key = 'tvrage.cache.%s.%s.%s' % (identifier, episode_identifier, season_number)
log.debug('Getting TVRage EpisodeInfo: %s', cache_key)
result = self.getCache(cache_key) or {}
if result:
return result
try:
show = self.tvrage[int(identifier)]
except (tvrage_exceptions.tvrage_error, IOError), e:
log.error('Failed parsing TVRage EpisodeInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc()))
return False
result = []
for number, season in show.items():
if season_number is not None and number != season_number:
continue
for episode in season.values():
if episode_identifier is not None and episode['id'] == toUnicode(episode_identifier):
result = self._parseEpisode(episode)
self.setCache(cache_key, result)
return result
else:
result.append(self._parseEpisode(episode))
self.setCache(cache_key, result)
return result
def _parseShow(self, show):
#
# NOTE: tvrage_api mimics tvdb_api, but some information is unavailable
#
#
# NOTE: show object only allows direct access via
# show['id'], not show.get('id')
#
def get(name):
return show.get(name) if not hasattr(show, 'search') else show[name]
genres = splitString(get('genre'), '|')
classification = get('classification') or ''
if classification == 'Talk Shows':
# "Talk Show" is a genre on TheTVDB.com, as these types of shows,
# e.g. "The Tonight Show Starring Jimmy Fallon", often use
# different naming schemes, it might be useful to the searcher if
# it is added here.
genres.append('Talk Show')
if get('firstaired') is not None:
try: year = datetime.strptime(get('firstaired'), '%Y-%m-%d').year
except: year = None
else:
year = None
show_data = {
'identifiers': {
'tvrage': tryInt(get('id')),
},
'type': 'show',
'titles': [get('seriesname')],
'images': {
'poster': [],
'backdrop': [],
'poster_original': [],
'backdrop_original': [],
},
'year': year,
'genres': genres,
'network': get('network'),
'air_day': (get('airs_dayofweek') or '').lower(),
'air_time': self.parseTime(get('airs_time')),
'firstaired': get('firstaired'),
'runtime': tryInt(get('runtime')),
'status': get('status'),
}
show_data = dict((k, v) for k, v in show_data.iteritems() if v)
# Only load season info when available
if type(show) == Show:
# Parse season and episode data
show_data['seasons'] = {}
for season_nr in show:
season = self._parseSeason(show, season_nr, show[season_nr])
season['episodes'] = {}
for episode_nr in show[season_nr]:
season['episodes'][episode_nr] = self._parseEpisode(show[season_nr][episode_nr])
show_data['seasons'][season_nr] = season
return show_data
def _parseSeason(self, show, number, season):
season_data = {
'number': tryInt(number),
}
season_data = dict((k, v) for k, v in season_data.iteritems() if v)
return season_data
def _parseEpisode(self, episode):
def get(name, default = None):
return episode.get(name, default)
poster = get('filename', [])
episode_data = {
'number': tryInt(get('episodenumber')),
'absolute_number': tryInt(get('absolute_number')),
'identifiers': {
'tvrage': tryInt(episode['id'])
},
'type': 'episode',
'titles': [get('episodename')] if get('episodename') else [],
'images': {
'poster': [poster] if poster else [],
},
'released': get('firstaired'),
'firstaired': get('firstaired'),
'language': get('language'),
}
episode_data = dict((k, v) for k, v in episode_data.iteritems() if v)
return episode_data
def parseTime(self, time):
return time

4
libs/tvrage_api/__init__.py

@ -0,0 +1,4 @@
__version__ = '1.0'
__author__ = 'echel0n'
__license__ = 'BSD'

721
libs/tvrage_api/tvrage_api.py

@ -0,0 +1,721 @@
# !/usr/bin/env python2
# encoding:utf-8
# author:echel0n
# project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api (copied from SickRage, modified to use urllib2)
#license:unlicense (http://unlicense.org/)
"""
Modified from http://github.com/dbr/tvrage_api
Simple-to-use Python interface to The TVRage's API (tvrage.com)
"""
from functools import wraps
import traceback
__author__ = "echel0n"
__version__ = "1.0"
import os
import re
import time
import urllib
import urllib2
import getpass
import tempfile
import warnings
import logging
import datetime as dt
import xmltodict
try:
import xml.etree.cElementTree as ElementTree
except ImportError:
import xml.etree.ElementTree as ElementTree
from dateutil.parser import parse
from tvrage_cache import CacheHandler
from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_showincomplete,
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
def log():
return logging.getLogger("tvrage_api")
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
exceptions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
@wraps(f)
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
while mtries > 1:
try:
return f(*args, **kwargs)
except ExceptionToCheck, e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print msg
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
return f(*args, **kwargs)
return f_retry # true decorator
return deco_retry
class ShowContainer(dict):
"""Simple dict that holds a series of Show instances
"""
def __init__(self):
self._stack = []
self._lastgc = time.time()
def __setitem__(self, key, value):
self._stack.append(key)
#keep only the 100th latest results
if time.time() - self._lastgc > 20:
for o in self._stack[:-100]:
del self[o]
self._stack = self._stack[-100:]
self._lastgc = time.time()
super(ShowContainer, self).__setitem__(key, value)
class Show(dict):
"""Holds a dict of seasons, and show data.
"""
def __init__(self):
dict.__init__(self)
self.data = {}
def __repr__(self):
return "<Show %s (containing %s seasons)>" % (
self.data.get(u'seriesname', 'instance'),
len(self)
)
def __getattr__(self, key):
if key in self:
# Key is an episode, return it
return self[key]
if key in self.data:
# Non-numeric request is for show-data
return self.data[key]
raise AttributeError
def __getitem__(self, key):
if key in self:
# Key is an episode, return it
return dict.__getitem__(self, key)
if key in self.data:
# Non-numeric request is for show-data
return dict.__getitem__(self.data, key)
# Data wasn't found, raise appropriate error
if isinstance(key, int) or key.isdigit():
# Episode number x was not found
raise tvrage_seasonnotfound("Could not find season %s" % (repr(key)))
else:
# If it's not numeric, it must be an attribute name, which
# doesn't exist, so attribute error.
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
def airedOn(self, date):
ret = self.search(str(date), 'firstaired')
if len(ret) == 0:
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
return ret
def search(self, term=None, key=None):
"""
Search all episodes in show. Can search all data, or a specific key (for
example, episodename)
Always returns an array (can be empty). First index contains the first
match, and so on.
Each array index is an Episode() instance, so doing
search_results[0]['episodename'] will retrieve the episode name of the
first match.
Search terms are converted to lower case (unicode) strings.
"""
results = []
for cur_season in self.values():
searchresult = cur_season.search(term=term, key=key)
if len(searchresult) != 0:
results.extend(searchresult)
return results
class Season(dict):
def __init__(self, show=None):
"""The show attribute points to the parent show
"""
self.show = show
def __repr__(self):
return "<Season instance (containing %s episodes)>" % (
len(self.keys())
)
def __getattr__(self, episode_number):
if episode_number in self:
return self[episode_number]
raise AttributeError
def __getitem__(self, episode_number):
if episode_number not in self:
raise tvrage_episodenotfound("Could not find episode %s" % (repr(episode_number)))
else:
return dict.__getitem__(self, episode_number)
def search(self, term=None, key=None):
"""Search all episodes in season, returns a list of matching Episode
instances.
"""
results = []
for ep in self.values():
searchresult = ep.search(term=term, key=key)
if searchresult is not None:
results.append(
searchresult
)
return results
class Episode(dict):
def __init__(self, season=None):
"""The season attribute points to the parent season
"""
self.season = season
def __repr__(self):
seasno = int(self.get(u'seasonnumber', 0))
epno = int(self.get(u'episodenumber', 0))
epname = self.get(u'episodename')
if epname is not None:
return "<Episode %02dx%02d - %s>" % (seasno, epno, epname)
else:
return "<Episode %02dx%02d>" % (seasno, epno)
def __getattr__(self, key):
if key in self:
return self[key]
raise AttributeError
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
def search(self, term=None, key=None):
"""Search episode data for term, if it matches, return the Episode (self).
The key parameter can be used to limit the search to a specific element,
for example, episodename.
This primarily for use use by Show.search and Season.search.
"""
if term == None:
raise TypeError("must supply string to search for (contents)")
term = unicode(term).lower()
for cur_key, cur_value in self.items():
cur_key, cur_value = unicode(cur_key).lower(), unicode(cur_value).lower()
if key is not None and cur_key != key:
# Do not search this key
continue
if cur_value.find(unicode(term).lower()) > -1:
return self
class TVRage:
"""Create easy-to-use interface to name of season/episode name"""
def __init__(self,
interactive=False,
select_first=False,
debug=False,
cache=True,
banners=False,
actors=False,
custom_ui=None,
language=None,
search_all_languages=False,
apikey=None,
forceConnect=False,
useZip=False,
dvdorder=False,
proxy=None):
"""
cache (True/False/str/unicode/urllib2 opener):
Retrieved XML are persisted to to disc. If true, stores in
tvrage_api folder under your systems TEMP_DIR, if set to
str/unicode instance it will use this as the cache
location. If False, disables caching. Can also be passed
an arbitrary Python object, which is used as a urllib2
opener, which should be created by urllib2.build_opener
forceConnect (bool):
If true it will always try to connect to tvrage.com even if we
recently timed out. By default it will wait one minute before
trying again, and any requests within that one minute window will
return an exception immediately.
"""
self.shows = ShowContainer() # Holds all Show classes
self.corrections = {} # Holds show-name to show_id mapping
self.config = {}
if apikey is not None:
self.config['apikey'] = apikey
else:
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
self.config['debug_enabled'] = debug # show debugging messages
self.config['custom_ui'] = custom_ui
self.config['proxy'] = proxy
if cache is True:
self.config['cache_enabled'] = True
self.config['cache_location'] = self._getTempDir()
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif cache is False:
self.config['cache_enabled'] = False
self.urlopener = urllib2.build_opener() # default opener with no caching
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
self.config['cache_location'] = cache
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif isinstance(cache, urllib2.OpenerDirector):
# If passed something from urllib2.build_opener, use that
log().debug("Using %r as urlopener" % cache)
self.config['cache_enabled'] = True
self.urlopener = cache
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
if self.config['debug_enabled']:
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
"To enable debug messages, use the following code before importing: "
"import logging; logging.basicConfig(level=logging.DEBUG)")
logging.basicConfig(level=logging.DEBUG)
# List of language from http://tvrage.com/api/0629B785CE550C8D/languages.xml
# Hard-coded here as it is realtively static, and saves another HTTP request, as
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
self.config['valid_languages'] = [
"da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
"ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no"
]
# tvrage.com should be based around numeric language codes,
# but to link to a series like http://tvrage.com/?tab=series&id=79349&lid=16
# requires the language ID, thus this mapping is required (mainly
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
if language is None:
self.config['language'] = 'en'
else:
if language not in self.config['valid_languages']:
raise ValueError("Invalid language %s, options are: %s" % (
language, self.config['valid_languages']
))
else:
self.config['language'] = language
# The following url_ configs are based of the
# http://tvrage.com/wiki/index.php/Programmers_API
self.config['base_url'] = "http://services.tvrage.com"
self.config['url_getSeries'] = u"%(base_url)s/feeds/search.php?show=%%s" % self.config
self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php?key=%(apikey)s&sid=%%s" % self.config
self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php?key=%(apikey)s&sid=%%s" % self.config
self.config['url_updtes_all'] = u"%(base_url)s/myfeeds/currentshows.php" % self.config
def _getTempDir(self):
"""Returns the [system temp dir]/tvrage_api-u501 (or
tvrage_api-myuser)
"""
if hasattr(os, 'getuid'):
uid = "u%d" % (os.getuid())
else:
# For Windows
try:
uid = getpass.getuser()
except ImportError:
return os.path.join(tempfile.gettempdir(), "tvrage_api")
return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid))
@retry(tvrage_error)
def _loadUrl(self, url):
global lastTimeout
try:
log().debug("Retrieving URL %s" % url)
resp = self.urlopener.open(url)
if 'x-local-cache' in resp.headers:
log().debug("URL %s was cached in %s" % (
url,
resp.headers['x-local-cache'])
)
except (IOError, urllib2.URLError), errormsg:
if not str(errormsg).startswith('HTTP Error'):
lastTimeout = datetime.datetime.now()
raise tvrage_error("Could not connect to server: %s" % (errormsg))
# handle gzipped content,
# http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch
if 'gzip' in resp.headers.get("Content-Encoding", ''):
if gzip:
stream = StringIO.StringIO(resp.read())
gz = gzip.GzipFile(fileobj=stream)
return gz.read()
raise tvrage_error("Received gzip data from thetvdb.com, but could not correctly handle it")
def remap_keys(path, key, value):
name_map = {
'showid': 'id',
'showname': 'seriesname',
'name': 'seriesname',
'summary': 'overview',
'started': 'firstaired',
'genres': 'genre',
'airtime': 'airs_time',
'airday': 'airs_dayofweek',
'image': 'fanart',
'epnum': 'absolute_number',
'title': 'episodename',
'airdate': 'firstaired',
'screencap': 'filename',
'seasonnum': 'episodenumber'
}
status_map = {
'returning series': 'Continuing',
'canceled/ended': 'Ended',
'tbd/on the bubble': 'Continuing',
'in development': 'Continuing',
'new series': 'Continuing',
'never aired': 'Ended',
'final season': 'Continuing',
'on hiatus': 'Continuing',
'pilot ordered': 'Continuing',
'pilot rejected': 'Ended',
'canceled': 'Ended',
'ended': 'Ended',
'': 'Unknown',
}
try:
key = name_map[key.lower()]
except (ValueError, TypeError, KeyError):
key = key.lower()
# clean up value and do type changes
if value:
if isinstance(value, dict):
if key == 'status':
try:
value = status_map[str(value).lower()]
if not value:
raise
except:
value = 'Unknown'
if key == 'network':
value = value['#text']
if key == 'genre':
value = value['genre']
if not value:
value = []
if not isinstance(value, list):
value = [value]
value = filter(None, value)
value = '|' + '|'.join(value) + '|'
try:
if key == 'firstaired' and value in "0000-00-00":
new_value = str(dt.date.fromordinal(1))
new_value = re.sub("([-]0{2}){1,}", "", new_value)
fixDate = parse(new_value, fuzzy=True).date()
value = fixDate.strftime("%Y-%m-%d")
elif key == 'firstaired':
value = parse(value, fuzzy=True).date()
value = value.strftime("%Y-%m-%d")
except:
pass
return (key, value)
try:
return xmltodict.parse(resp.read(), postprocessor=remap_keys)
except:
return dict([(u'data', None)])
def _getetsrc(self, url):
"""Loads a URL using caching, returns an ElementTree of the source
"""
try:
return self._loadUrl(url).values()[0]
except Exception, e:
raise tvrage_error(e)
def _setItem(self, sid, seas, ep, attrib, value):
"""Creates a new episode, creating Show(), Season() and
Episode()s as required. Called by _getShowData to populate show
Since the nice-to-use tvrage[1][24]['name] interface
makes it impossible to do tvrage[1][24]['name] = "name"
and still be capable of checking if an episode exists
so we can raise tvrage_shownotfound, we have a slightly
less pretty method of setting items.. but since the API
is supposed to be read-only, this is the best way to
do it!
The problem is that calling tvrage[1][24]['episodename'] = "name"
calls __getitem__ on tvrage[1], there is no way to check if
tvrage.__dict__ should have a key "1" before we auto-create it
"""
if sid not in self.shows:
self.shows[sid] = Show()
if seas not in self.shows[sid]:
self.shows[sid][seas] = Season(show=self.shows[sid])
if ep not in self.shows[sid][seas]:
self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas])
self.shows[sid][seas][ep][attrib] = value
def _setShowData(self, sid, key, value):
"""Sets self.shows[sid] to a new Show instance, or sets the data
"""
if sid not in self.shows:
self.shows[sid] = Show()
self.shows[sid].data[key] = value
def _cleanData(self, data):
"""Cleans up strings returned by tvrage.com
Issues corrected:
- Replaces &amp; with &
- Trailing whitespace
"""
if isinstance(data, basestring):
data = data.replace(u"&amp;", u"&")
data = data.strip()
return data
def search(self, series):
"""This searches tvrage.com for the series name
and returns the result list
"""
series = series.encode("utf-8")
log().debug("Searching for show %s" % series)
return self._getetsrc(self.config['url_getSeries'] % (series)).values()[0]
def _getSeries(self, series):
"""This searches tvrage.com for the series name,
If a custom_ui UI is configured, it uses this to select the correct
series. If not, and interactive == True, ConsoleUI is used, if not
BaseUI is used to select the first result.
"""
allSeries = self.search(series)
if not allSeries:
log().debug('Series result returned zero')
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)")
if not isinstance(allSeries, list):
allSeries = [allSeries]
if self.config['custom_ui'] is not None:
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
CustomUI = self.config['custom_ui']
ui = CustomUI(config=self.config)
else:
log().debug('Auto-selecting first search result using BaseUI')
ui = BaseUI(config=self.config)
return ui.selectSeries(allSeries)
def _getShowData(self, sid, getEpInfo=False):
"""Takes a series ID, gets the epInfo URL and parses the TVRAGE
XML file into the shows dict in layout:
shows[series_id][season_number][episode_number]
"""
# Parse show information
log().debug('Getting all series data for %s' % (sid))
seriesInfoEt = self._getetsrc(self.config['url_seriesInfo'] % (sid))
if not seriesInfoEt:
log().debug('Series result returned zero')
raise tvrage_error("Series result returned zero")
# get series data
for k, v in seriesInfoEt.items():
if v is not None:
v = self._cleanData(v)
self._setShowData(sid, k, v)
# get episode data
if getEpInfo:
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
epsEt = self._getetsrc(self.config['url_epInfo'] % (sid))
if not epsEt:
log().debug('Series results incomplete')
raise tvrage_showincomplete(
"Show search returned incomplete results (cannot find complete show on TVRAGE)")
if 'episodelist' not in epsEt:
return False
seasons = epsEt['episodelist']['season']
if not isinstance(seasons, list):
seasons = [seasons]
for season in seasons:
seas_no = int(season['@no'])
episodes = season['episode']
if not isinstance(episodes, list):
episodes = [episodes]
for episode in episodes:
ep_no = int(episode['episodenumber'])
for k, v in episode.items():
k = k.lower()
if v is not None:
if k == 'link':
v = v.rsplit('/', 1)[1]
k = 'id'
else:
v = self._cleanData(v)
self._setItem(sid, seas_no, ep_no, k, v)
return True
def _nameToSid(self, name):
"""Takes show name, returns the correct series ID (if the show has
already been grabbed), or grabs all episodes and returns
the correct SID.
"""
if name in self.corrections:
log().debug('Correcting %s to %s' % (name, self.corrections[name]))
return self.corrections[name]
else:
log().debug('Getting show %s' % (name))
selected_series = self._getSeries(name)
if isinstance(selected_series, dict):
selected_series = [selected_series]
sids = list(int(x['id']) for x in selected_series if self._getShowData(int(x['id'])))
self.corrections.update(dict((x['seriesname'], int(x['id'])) for x in selected_series))
return sids
def __getitem__(self, key):
"""Handles tvrage_instance['seriesname'] calls.
The dict index should be the show id
"""
if isinstance(key, (int, long)):
# Item is integer, treat as show id
if key not in self.shows:
self._getShowData(key, True)
return self.shows[key]
key = str(key).lower()
self.config['searchterm'] = key
selected_series = self._getSeries(key)
if isinstance(selected_series, dict):
selected_series = [selected_series]
[[self._setShowData(show['id'], k, v) for k, v in show.items()] for show in selected_series]
return selected_series
#test = self._getSeries(key)
#sids = self._nameToSid(key)
#return list(self.shows[sid] for sid in sids)
def __repr__(self):
return str(self.shows)
def main():
"""Simple example of using tvrage_api - it just
grabs an episode name interactively.
"""
import logging
logging.basicConfig(level=logging.DEBUG)
tvrage_instance = TVRage(cache=False)
print tvrage_instance['Lost']['seriesname']
print tvrage_instance['Lost'][1][4]['episodename']
if __name__ == '__main__':
main()

251
libs/tvrage_api/tvrage_cache.py

@ -0,0 +1,251 @@
#!/usr/bin/env python2
#encoding:utf-8
#author:echel0n
#project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
#license:unlicense (http://unlicense.org/)
"""
urllib2 caching handler
Modified from http://code.activestate.com/recipes/491261/
"""
from __future__ import with_statement
__author__ = "echel0n"
__version__ = "1.0"
import os
import time
import errno
import httplib
import urllib2
import StringIO
from hashlib import md5
from threading import RLock
cache_lock = RLock()
def locked_function(origfunc):
"""Decorator to execute function under lock"""
def wrapped(*args, **kwargs):
cache_lock.acquire()
try:
return origfunc(*args, **kwargs)
finally:
cache_lock.release()
return wrapped
def calculate_cache_path(cache_location, url):
"""Checks if [cache_location]/[hash_of_url].headers and .body exist
"""
thumb = md5(url).hexdigest()
header = os.path.join(cache_location, thumb + ".headers")
body = os.path.join(cache_location, thumb + ".body")
return header, body
def check_cache_time(path, max_age):
"""Checks if a file has been created/modified in the [last max_age] seconds.
False means the file is too old (or doesn't exist), True means it is
up-to-date and valid"""
if not os.path.isfile(path):
return False
cache_modified_time = os.stat(path).st_mtime
time_now = time.time()
if cache_modified_time < time_now - max_age:
# Cache is old
return False
else:
return True
@locked_function
def exists_in_cache(cache_location, url, max_age):
"""Returns if header AND body cache file exist (and are up-to-date)"""
hpath, bpath = calculate_cache_path(cache_location, url)
if os.path.exists(hpath) and os.path.exists(bpath):
return(
check_cache_time(hpath, max_age)
and check_cache_time(bpath, max_age)
)
else:
# File does not exist
return False
@locked_function
def store_in_cache(cache_location, url, response):
"""Tries to store response in cache."""
hpath, bpath = calculate_cache_path(cache_location, url)
try:
outf = open(hpath, "wb")
headers = str(response.info())
outf.write(headers)
outf.close()
outf = open(bpath, "wb")
outf.write(response.read())
outf.close()
except IOError:
return True
else:
return False
@locked_function
def delete_from_cache(cache_location, url):
"""Deletes a response in cache."""
hpath, bpath = calculate_cache_path(cache_location, url)
try:
if os.path.exists(hpath):
os.remove(hpath)
if os.path.exists(bpath):
os.remove(bpath)
except IOError:
return True
else:
return False
class CacheHandler(urllib2.BaseHandler):
"""Stores responses in a persistant on-disk cache.
If a subsequent GET request is made for the same URL, the stored
response is returned, saving time, resources and bandwidth
"""
@locked_function
def __init__(self, cache_location, max_age = 21600):
"""The location of the cache directory"""
self.max_age = max_age
self.cache_location = cache_location
if not os.path.exists(self.cache_location):
try:
os.mkdir(self.cache_location)
except OSError, e:
if e.errno == errno.EEXIST and os.path.isdir(self.cache_location):
# File exists, and it's a directory,
# another process beat us to creating this dir, that's OK.
pass
else:
# Our target dir is already a file, or different error,
# relay the error!
raise
def default_open(self, request):
"""Handles GET requests, if the response is cached it returns it
"""
if request.get_method() != "GET":
return None # let the next handler try to handle the request
if exists_in_cache(
self.cache_location, request.get_full_url(), self.max_age
):
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header = True
)
else:
return None
def http_response(self, request, response):
"""Gets a HTTP response, if it was a GET request and the status code
starts with 2 (200 OK etc) it caches it and returns a CachedResponse
"""
if (request.get_method() == "GET"
and str(response.code).startswith("2")
):
if 'x-local-cache' not in response.info():
# Response is not cached
set_cache_header = store_in_cache(
self.cache_location,
request.get_full_url(),
response
)
else:
set_cache_header = True
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header = set_cache_header
)
else:
return response
class CachedResponse(StringIO.StringIO):
"""An urllib2.response-like object for cached responses.
To determine if a response is cached or coming directly from
the network, check the x-local-cache header rather than the object type.
"""
@locked_function
def __init__(self, cache_location, url, set_cache_header=True):
self.cache_location = cache_location
hpath, bpath = calculate_cache_path(cache_location, url)
StringIO.StringIO.__init__(self, file(bpath, "rb").read())
self.url = url
self.code = 200
self.msg = "OK"
headerbuf = file(hpath, "rb").read()
if set_cache_header:
headerbuf += "x-local-cache: %s\r\n" % (bpath)
self.headers = httplib.HTTPMessage(StringIO.StringIO(headerbuf))
def info(self):
"""Returns headers
"""
return self.headers
def geturl(self):
"""Returns original URL
"""
return self.url
@locked_function
def recache(self):
new_request = urllib2.urlopen(self.url)
set_cache_header = store_in_cache(
self.cache_location,
new_request.url,
new_request
)
CachedResponse.__init__(self, self.cache_location, self.url, True)
@locked_function
def delete_cache(self):
delete_from_cache(
self.cache_location,
self.url
)
if __name__ == "__main__":
def main():
"""Quick test/example of CacheHandler"""
opener = urllib2.build_opener(CacheHandler("/tmp/"))
response = opener.open("http://google.com")
print response.headers
print "Response:", response.read()
response.recache()
print response.headers
print "After recache:", response.read()
# Test usage in threads
from threading import Thread
class CacheThreadTest(Thread):
lastdata = None
def run(self):
req = opener.open("http://google.com")
newdata = req.read()
if self.lastdata is None:
self.lastdata = newdata
assert self.lastdata == newdata, "Data was not consistent, uhoh"
req.recache()
threads = [CacheThreadTest() for x in range(50)]
print "Starting threads"
[t.start() for t in threads]
print "..done"
print "Joining threads"
[t.join() for t in threads]
print "..done"
main()

64
libs/tvrage_api/tvrage_exceptions.py

@ -0,0 +1,64 @@
#!/usr/bin/env python2
# encoding:utf-8
#author:echel0n
#project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
#license:unlicense (http://unlicense.org/)
"""Custom exceptions used or raised by tvrage_api"""
__author__ = "echel0n"
__version__ = "1.0"
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete",
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
class tvrage_exception(Exception):
"""Any exception generated by tvrage_api
"""
pass
class tvrage_error(tvrage_exception):
"""An error with tvrage.com (Cannot connect, for example)
"""
pass
class tvrage_userabort(tvrage_exception):
"""User aborted the interactive selection (via
the q command, ^c etc)
"""
pass
class tvrage_shownotfound(tvrage_exception):
"""Show cannot be found on tvrage.com (non-existant show)
"""
pass
class tvrage_showincomplete(tvrage_exception):
"""Show found but incomplete on tvrage.com (incomplete show)
"""
pass
class tvrage_seasonnotfound(tvrage_exception):
"""Season cannot be found on tvrage.com
"""
pass
class tvrage_episodenotfound(tvrage_exception):
"""Episode cannot be found on tvrage.com
"""
pass
class tvrage_attributenotfound(tvrage_exception):
"""Raised if an episode does not have the requested
attribute (such as a episode name)
"""
pass

31
libs/tvrage_api/tvrage_ui.py

@ -0,0 +1,31 @@
#!/usr/bin/env python2
#encoding:utf-8
#author:echel0n
#project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
#license:unlicense (http://unlicense.org/)
"""Contains included user interface for TVRage show selection"""
__author__ = "echel0n"
__version__ = "1.0"
import logging
import warnings
def log():
return logging.getLogger(__name__)
class BaseUI:
"""Default non-interactive UI, which auto-selects first results
"""
def __init__(self, config, log = None):
self.config = config
if log is not None:
warnings.warn("the UI's log parameter is deprecated, instead use\n"
"use import logging; logging.getLogger('ui').info('blah')\n"
"The self.log attribute will be removed in the next version")
self.log = logging.getLogger(__name__)
def selectSeries(self, allSeries):
return allSeries[0]

359
libs/xmltodict.py

@ -0,0 +1,359 @@
#!/usr/bin/env python
"Makes working with XML feel like you are working with JSON"
from xml.parsers import expat
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesImpl
try: # pragma no cover
from cStringIO import StringIO
except ImportError: # pragma no cover
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
try: # pragma no cover
from collections import OrderedDict
except ImportError: # pragma no cover
try:
from ordereddict import OrderedDict
except ImportError:
OrderedDict = dict
try: # pragma no cover
_basestring = basestring
except NameError: # pragma no cover
_basestring = str
try: # pragma no cover
_unicode = unicode
except NameError: # pragma no cover
_unicode = str
__author__ = 'Martin Blech'
__version__ = '0.9.0'
__license__ = 'MIT'
class ParsingInterrupted(Exception):
pass
class _DictSAXHandler(object):
def __init__(self,
item_depth=0,
item_callback=lambda *args: True,
xml_attribs=True,
attr_prefix='@',
cdata_key='#text',
force_cdata=False,
cdata_separator='',
postprocessor=None,
dict_constructor=OrderedDict,
strip_whitespace=True,
namespace_separator=':',
namespaces=None):
self.path = []
self.stack = []
self.data = None
self.item = None
self.item_depth = item_depth
self.xml_attribs = xml_attribs
self.item_callback = item_callback
self.attr_prefix = attr_prefix
self.cdata_key = cdata_key
self.force_cdata = force_cdata
self.cdata_separator = cdata_separator
self.postprocessor = postprocessor
self.dict_constructor = dict_constructor
self.strip_whitespace = strip_whitespace
self.namespace_separator = namespace_separator
self.namespaces = namespaces
def _build_name(self, full_name):
if not self.namespaces:
return full_name
i = full_name.rfind(self.namespace_separator)
if i == -1:
return full_name
namespace, name = full_name[:i], full_name[i+1:]
short_namespace = self.namespaces.get(namespace, namespace)
if not short_namespace:
return name
else:
return self.namespace_separator.join((short_namespace, name))
def _attrs_to_dict(self, attrs):
if isinstance(attrs, dict):
return attrs
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
def startElement(self, full_name, attrs):
name = self._build_name(full_name)
attrs = self._attrs_to_dict(attrs)
self.path.append((name, attrs or None))
if len(self.path) > self.item_depth:
self.stack.append((self.item, self.data))
if self.xml_attribs:
attrs = self.dict_constructor(
(self.attr_prefix+key, value)
for (key, value) in attrs.items())
else:
attrs = None
self.item = attrs or None
self.data = None
def endElement(self, full_name):
name = self._build_name(full_name)
if len(self.path) == self.item_depth:
item = self.item
if item is None:
item = self.data
should_continue = self.item_callback(self.path, item)
if not should_continue:
raise ParsingInterrupted()
if len(self.stack):
item, data = self.item, self.data
self.item, self.data = self.stack.pop()
if self.strip_whitespace and data is not None:
data = data.strip() or None
if data and self.force_cdata and item is None:
item = self.dict_constructor()
if item is not None:
if data:
self.push_data(item, self.cdata_key, data)
self.item = self.push_data(self.item, name, item)
else:
self.item = self.push_data(self.item, name, data)
else:
self.item = self.data = None
self.path.pop()
def characters(self, data):
if not self.data:
self.data = data
else:
self.data += self.cdata_separator + data
def push_data(self, item, key, data):
if self.postprocessor is not None:
result = self.postprocessor(self.path, key, data)
if result is None:
return item
key, data = result
if item is None:
item = self.dict_constructor()
try:
value = item[key]
if isinstance(value, list):
value.append(data)
else:
item[key] = [value, data]
except KeyError:
item[key] = data
return item
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string` or a file-like object.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print 'path:%s item:%s' % (path, item)
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
OrderedDict([(u'a', u'hello')])
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
try:
parser.ParseFile(xml_input)
except (TypeError, AttributeError):
parser.Parse(xml_input, True)
return handler.item
def _emit(key, value, content_handler,
attr_prefix='@',
cdata_key='#text',
depth=0,
preprocessor=None,
pretty=False,
newl='\n',
indent='\t'):
if preprocessor is not None:
result = preprocessor(key, value)
if result is None:
return
key, value = result
if not isinstance(value, (list, tuple)):
value = [value]
if depth == 0 and len(value) > 1:
raise ValueError('document with multiple roots')
for v in value:
if v is None:
v = OrderedDict()
elif not isinstance(v, dict):
v = _unicode(v)
if isinstance(v, _basestring):
v = OrderedDict(((cdata_key, v),))
cdata = None
attrs = OrderedDict()
children = []
for ik, iv in v.items():
if ik == cdata_key:
cdata = iv
continue
if ik.startswith(attr_prefix):
attrs[ik[len(attr_prefix):]] = iv
continue
children.append((ik, iv))
if pretty:
content_handler.ignorableWhitespace(depth * indent)
content_handler.startElement(key, AttributesImpl(attrs))
if pretty and children:
content_handler.ignorableWhitespace(newl)
for child_key, child_value in children:
_emit(child_key, child_value, content_handler,
attr_prefix, cdata_key, depth+1, preprocessor,
pretty, newl, indent)
if cdata is not None:
content_handler.characters(cdata)
if pretty and children:
content_handler.ignorableWhitespace(depth * indent)
content_handler.endElement(key)
if pretty and depth:
content_handler.ignorableWhitespace(newl)
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
**kwargs):
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
The resulting XML document is returned as a string, but if `output` (a
file-like object) is specified, it is written there instead.
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
as XML node attributes, whereas keys equal to `cdata_key`
(default=`'#text'`) are treated as character data.
The `pretty` parameter (default=`False`) enables pretty-printing. In this
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
can be customized with the `newl` and `indent` parameters.
"""
((key, value),) = input_dict.items()
must_return = False
if output is None:
output = StringIO()
must_return = True
content_handler = XMLGenerator(output, encoding)
if full_document:
content_handler.startDocument()
_emit(key, value, content_handler, **kwargs)
if full_document:
content_handler.endDocument()
if must_return:
value = output.getvalue()
try: # pragma no cover
value = value.decode(encoding)
except AttributeError: # pragma no cover
pass
return value
if __name__ == '__main__': # pragma: no cover
import sys
import marshal
(item_depth,) = sys.argv[1:]
item_depth = int(item_depth)
def handle_item(path, item):
marshal.dump((path, item), sys.stdout)
return True
try:
root = parse(sys.stdin,
item_depth=item_depth,
item_callback=handle_item,
dict_constructor=dict)
if item_depth == 0:
handle_item([], root)
except KeyboardInterrupt:
pass
Loading…
Cancel
Save