7 changed files with 1715 additions and 0 deletions
@ -0,0 +1,285 @@ |
|||
from datetime import datetime |
|||
import os |
|||
import traceback |
|||
|
|||
from couchpotato import Env |
|||
|
|||
from couchpotato.core.event import addEvent |
|||
from couchpotato.core.helpers.encoding import simplifyString, toUnicode |
|||
from couchpotato.core.helpers.variable import splitString, tryInt, tryFloat |
|||
from couchpotato.core.logger import CPLog |
|||
from couchpotato.core.media.show.providers.base import ShowProvider |
|||
from tvrage_api import tvrage_api |
|||
from tvrage_api import tvrage_exceptions |
|||
from tvrage_api.tvrage_api import Show |
|||
|
|||
log = CPLog(__name__) |
|||
|
|||
autoload = 'TVRage' |
|||
|
|||
|
|||
class TVRage(ShowProvider): |
|||
|
|||
def __init__(self): |
|||
# Search is handled by Trakt exclusively as search functionality has |
|||
# been removed from TheTVDB provider as well. |
|||
addEvent('show.info', self.getShowInfo, priority = 3) |
|||
addEvent('season.info', self.getSeasonInfo, priority = 3) |
|||
addEvent('episode.info', self.getEpisodeInfo, priority = 3) |
|||
|
|||
self.tvrage_api_parms = { |
|||
'apikey': self.conf('api_key'), |
|||
'language': 'en', |
|||
'cache': os.path.join(Env.get('cache_dir'), 'tvrage_api') |
|||
} |
|||
self._setup() |
|||
|
|||
def _setup(self): |
|||
self.tvrage = tvrage_api.TVRage(**self.tvrage_api_parms) |
|||
self.valid_languages = self.tvrage.config['valid_languages'] |
|||
|
|||
def getShow(self, identifier): |
|||
show = None |
|||
try: |
|||
log.debug('Getting show: %s', identifier) |
|||
show = self.tvrage[int(identifier)] |
|||
except (tvrage_exceptions.tvrage_error, IOError), e: |
|||
log.error('Failed to getShowInfo for show id "%s": %s', (identifier, traceback.format_exc())) |
|||
|
|||
return show |
|||
|
|||
def getShowInfo(self, identifiers = None): |
|||
|
|||
if not identifiers: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
if 'tvrage' not in identifiers: |
|||
# TVRage identifier unavailable, but invocation was valid. |
|||
return None |
|||
|
|||
identifier = tryInt(identifiers['tvrage'], None) |
|||
if identifier is None: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
|
|||
cache_key = 'tvrage.cache.show.%s' % identifier |
|||
result = self.getCache(cache_key) or [] |
|||
if not result: |
|||
show = self.getShow(identifier) |
|||
if show is not None: |
|||
result = self._parseShow(show) |
|||
self.setCache(cache_key, result) |
|||
|
|||
return result |
|||
|
|||
def getSeasonInfo(self, identifiers = None, params = {}): |
|||
"""Either return a list of all seasons or a single season by number. |
|||
identifier is the show 'id' |
|||
""" |
|||
if not identifiers: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
if 'tvrage' not in identifiers: |
|||
# TVRage identifier unavailable, but invocation was valid. |
|||
return None |
|||
|
|||
season_number = params.get('season_number', None) |
|||
identifier = tryInt(identifiers['tvrage'], None) |
|||
if identifier is None: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
|
|||
cache_key = 'tvrage.cache.%s.%s' % (identifier, season_number) |
|||
log.debug('Getting TVRage SeasonInfo: %s', cache_key) |
|||
result = self.getCache(cache_key) or {} |
|||
if result: |
|||
return result |
|||
|
|||
try: |
|||
show = self.tvrage[int(identifier)] |
|||
except (tvrage_exceptions.tvrage_error, IOError), e: |
|||
log.error('Failed parsing TVRage SeasonInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc())) |
|||
return False |
|||
|
|||
result = [] |
|||
for number, season in show.items(): |
|||
if season_number is None: |
|||
result.append(self._parseSeason(show, number, season)) |
|||
elif number == season_number: |
|||
result = self._parseSeason(show, number, season) |
|||
break |
|||
|
|||
self.setCache(cache_key, result) |
|||
return result |
|||
|
|||
def getEpisodeInfo(self, identifiers = None, params = {}): |
|||
"""Either return a list of all episodes or a single episode. |
|||
If episode_identifer contains an episode number to search for |
|||
""" |
|||
if not identifiers: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
if 'tvrage' not in identifiers: |
|||
# TVRage identifier unavailable, but invocation was valid. |
|||
return None |
|||
|
|||
season_number = params.get('season_number', None) |
|||
episode_identifiers = params.get('episode_identifiers', None) |
|||
identifier = tryInt(identifiers['tvrage'], None) |
|||
if season_number is None: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return False |
|||
if identifier is None: |
|||
# season_identifier might contain the 'show id : season number' |
|||
# since there is no tvrage id for season and we need a reference to |
|||
# both the show id and season number. |
|||
try: |
|||
identifier, season_number = season_number.split(':') |
|||
season_number = int(season_number) |
|||
identifier = tryInt(identifier, None) |
|||
except: |
|||
pass |
|||
|
|||
if identifier is None: |
|||
# Raise exception instead? Invocation is clearly wrong! |
|||
return None |
|||
|
|||
episode_identifier = None |
|||
if episode_identifiers: |
|||
if 'tvrage' in episode_identifiers: |
|||
episode_identifier = tryInt(episode_identifiers['tvrage'], None) |
|||
if episode_identifier is None: |
|||
return None |
|||
|
|||
cache_key = 'tvrage.cache.%s.%s.%s' % (identifier, episode_identifier, season_number) |
|||
log.debug('Getting TVRage EpisodeInfo: %s', cache_key) |
|||
result = self.getCache(cache_key) or {} |
|||
if result: |
|||
return result |
|||
|
|||
try: |
|||
show = self.tvrage[int(identifier)] |
|||
except (tvrage_exceptions.tvrage_error, IOError), e: |
|||
log.error('Failed parsing TVRage EpisodeInfo for "%s" id "%s": %s', (show, identifier, traceback.format_exc())) |
|||
return False |
|||
|
|||
result = [] |
|||
for number, season in show.items(): |
|||
if season_number is not None and number != season_number: |
|||
continue |
|||
|
|||
for episode in season.values(): |
|||
if episode_identifier is not None and episode['id'] == toUnicode(episode_identifier): |
|||
result = self._parseEpisode(episode) |
|||
self.setCache(cache_key, result) |
|||
return result |
|||
else: |
|||
result.append(self._parseEpisode(episode)) |
|||
|
|||
self.setCache(cache_key, result) |
|||
return result |
|||
|
|||
def _parseShow(self, show): |
|||
# |
|||
# NOTE: tvrage_api mimics tvdb_api, but some information is unavailable |
|||
# |
|||
|
|||
# |
|||
# NOTE: show object only allows direct access via |
|||
# show['id'], not show.get('id') |
|||
# |
|||
def get(name): |
|||
return show.get(name) if not hasattr(show, 'search') else show[name] |
|||
|
|||
genres = splitString(get('genre'), '|') |
|||
classification = get('classification') or '' |
|||
if classification == 'Talk Shows': |
|||
# "Talk Show" is a genre on TheTVDB.com, as these types of shows, |
|||
# e.g. "The Tonight Show Starring Jimmy Fallon", often use |
|||
# different naming schemes, it might be useful to the searcher if |
|||
# it is added here. |
|||
genres.append('Talk Show') |
|||
if get('firstaired') is not None: |
|||
try: year = datetime.strptime(get('firstaired'), '%Y-%m-%d').year |
|||
except: year = None |
|||
else: |
|||
year = None |
|||
|
|||
show_data = { |
|||
'identifiers': { |
|||
'tvrage': tryInt(get('id')), |
|||
}, |
|||
'type': 'show', |
|||
'titles': [get('seriesname')], |
|||
'images': { |
|||
'poster': [], |
|||
'backdrop': [], |
|||
'poster_original': [], |
|||
'backdrop_original': [], |
|||
}, |
|||
'year': year, |
|||
'genres': genres, |
|||
'network': get('network'), |
|||
'air_day': (get('airs_dayofweek') or '').lower(), |
|||
'air_time': self.parseTime(get('airs_time')), |
|||
'firstaired': get('firstaired'), |
|||
'runtime': tryInt(get('runtime')), |
|||
'status': get('status'), |
|||
} |
|||
|
|||
show_data = dict((k, v) for k, v in show_data.iteritems() if v) |
|||
|
|||
# Only load season info when available |
|||
if type(show) == Show: |
|||
|
|||
# Parse season and episode data |
|||
show_data['seasons'] = {} |
|||
|
|||
for season_nr in show: |
|||
season = self._parseSeason(show, season_nr, show[season_nr]) |
|||
season['episodes'] = {} |
|||
|
|||
for episode_nr in show[season_nr]: |
|||
season['episodes'][episode_nr] = self._parseEpisode(show[season_nr][episode_nr]) |
|||
|
|||
show_data['seasons'][season_nr] = season |
|||
|
|||
return show_data |
|||
|
|||
def _parseSeason(self, show, number, season): |
|||
|
|||
season_data = { |
|||
'number': tryInt(number), |
|||
} |
|||
|
|||
season_data = dict((k, v) for k, v in season_data.iteritems() if v) |
|||
return season_data |
|||
|
|||
def _parseEpisode(self, episode): |
|||
|
|||
def get(name, default = None): |
|||
return episode.get(name, default) |
|||
|
|||
poster = get('filename', []) |
|||
|
|||
episode_data = { |
|||
'number': tryInt(get('episodenumber')), |
|||
'absolute_number': tryInt(get('absolute_number')), |
|||
'identifiers': { |
|||
'tvrage': tryInt(episode['id']) |
|||
}, |
|||
'type': 'episode', |
|||
'titles': [get('episodename')] if get('episodename') else [], |
|||
'images': { |
|||
'poster': [poster] if poster else [], |
|||
}, |
|||
'released': get('firstaired'), |
|||
'firstaired': get('firstaired'), |
|||
'language': get('language'), |
|||
} |
|||
|
|||
episode_data = dict((k, v) for k, v in episode_data.iteritems() if v) |
|||
return episode_data |
|||
|
|||
def parseTime(self, time): |
|||
return time |
@ -0,0 +1,4 @@ |
|||
|
|||
__version__ = '1.0' |
|||
__author__ = 'echel0n' |
|||
__license__ = 'BSD' |
@ -0,0 +1,721 @@ |
|||
# !/usr/bin/env python2 |
|||
# encoding:utf-8 |
|||
# author:echel0n |
|||
# project:tvrage_api |
|||
#repository:http://github.com/echel0n/tvrage_api (copied from SickRage, modified to use urllib2) |
|||
#license:unlicense (http://unlicense.org/) |
|||
|
|||
""" |
|||
Modified from http://github.com/dbr/tvrage_api |
|||
Simple-to-use Python interface to The TVRage's API (tvrage.com) |
|||
""" |
|||
from functools import wraps |
|||
import traceback |
|||
|
|||
__author__ = "echel0n" |
|||
__version__ = "1.0" |
|||
|
|||
import os |
|||
import re |
|||
import time |
|||
import urllib |
|||
import urllib2 |
|||
import getpass |
|||
import tempfile |
|||
import warnings |
|||
import logging |
|||
import datetime as dt |
|||
import xmltodict |
|||
|
|||
try: |
|||
import xml.etree.cElementTree as ElementTree |
|||
except ImportError: |
|||
import xml.etree.ElementTree as ElementTree |
|||
|
|||
from dateutil.parser import parse |
|||
from tvrage_cache import CacheHandler |
|||
|
|||
from tvrage_ui import BaseUI |
|||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_showincomplete, |
|||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound) |
|||
|
|||
|
|||
def log(): |
|||
return logging.getLogger("tvrage_api") |
|||
|
|||
|
|||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): |
|||
"""Retry calling the decorated function using an exponential backoff. |
|||
|
|||
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ |
|||
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry |
|||
|
|||
:param ExceptionToCheck: the exception to check. may be a tuple of |
|||
exceptions to check |
|||
:type ExceptionToCheck: Exception or tuple |
|||
:param tries: number of times to try (not retry) before giving up |
|||
:type tries: int |
|||
:param delay: initial delay between retries in seconds |
|||
:type delay: int |
|||
:param backoff: backoff multiplier e.g. value of 2 will double the delay |
|||
each retry |
|||
:type backoff: int |
|||
:param logger: logger to use. If None, print |
|||
:type logger: logging.Logger instance |
|||
""" |
|||
|
|||
def deco_retry(f): |
|||
|
|||
@wraps(f) |
|||
def f_retry(*args, **kwargs): |
|||
mtries, mdelay = tries, delay |
|||
while mtries > 1: |
|||
try: |
|||
return f(*args, **kwargs) |
|||
except ExceptionToCheck, e: |
|||
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) |
|||
if logger: |
|||
logger.warning(msg) |
|||
else: |
|||
print msg |
|||
time.sleep(mdelay) |
|||
mtries -= 1 |
|||
mdelay *= backoff |
|||
return f(*args, **kwargs) |
|||
|
|||
return f_retry # true decorator |
|||
|
|||
return deco_retry |
|||
|
|||
|
|||
class ShowContainer(dict): |
|||
"""Simple dict that holds a series of Show instances |
|||
""" |
|||
|
|||
def __init__(self): |
|||
self._stack = [] |
|||
self._lastgc = time.time() |
|||
|
|||
def __setitem__(self, key, value): |
|||
self._stack.append(key) |
|||
|
|||
#keep only the 100th latest results |
|||
if time.time() - self._lastgc > 20: |
|||
for o in self._stack[:-100]: |
|||
del self[o] |
|||
|
|||
self._stack = self._stack[-100:] |
|||
|
|||
self._lastgc = time.time() |
|||
|
|||
super(ShowContainer, self).__setitem__(key, value) |
|||
|
|||
|
|||
class Show(dict): |
|||
"""Holds a dict of seasons, and show data. |
|||
""" |
|||
|
|||
def __init__(self): |
|||
dict.__init__(self) |
|||
self.data = {} |
|||
|
|||
def __repr__(self): |
|||
return "<Show %s (containing %s seasons)>" % ( |
|||
self.data.get(u'seriesname', 'instance'), |
|||
len(self) |
|||
) |
|||
|
|||
def __getattr__(self, key): |
|||
if key in self: |
|||
# Key is an episode, return it |
|||
return self[key] |
|||
|
|||
if key in self.data: |
|||
# Non-numeric request is for show-data |
|||
return self.data[key] |
|||
|
|||
raise AttributeError |
|||
|
|||
def __getitem__(self, key): |
|||
if key in self: |
|||
# Key is an episode, return it |
|||
return dict.__getitem__(self, key) |
|||
|
|||
if key in self.data: |
|||
# Non-numeric request is for show-data |
|||
return dict.__getitem__(self.data, key) |
|||
|
|||
# Data wasn't found, raise appropriate error |
|||
if isinstance(key, int) or key.isdigit(): |
|||
# Episode number x was not found |
|||
raise tvrage_seasonnotfound("Could not find season %s" % (repr(key))) |
|||
else: |
|||
# If it's not numeric, it must be an attribute name, which |
|||
# doesn't exist, so attribute error. |
|||
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key))) |
|||
|
|||
def airedOn(self, date): |
|||
ret = self.search(str(date), 'firstaired') |
|||
if len(ret) == 0: |
|||
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date) |
|||
return ret |
|||
|
|||
def search(self, term=None, key=None): |
|||
""" |
|||
Search all episodes in show. Can search all data, or a specific key (for |
|||
example, episodename) |
|||
|
|||
Always returns an array (can be empty). First index contains the first |
|||
match, and so on. |
|||
|
|||
Each array index is an Episode() instance, so doing |
|||
search_results[0]['episodename'] will retrieve the episode name of the |
|||
first match. |
|||
|
|||
Search terms are converted to lower case (unicode) strings. |
|||
""" |
|||
results = [] |
|||
for cur_season in self.values(): |
|||
searchresult = cur_season.search(term=term, key=key) |
|||
if len(searchresult) != 0: |
|||
results.extend(searchresult) |
|||
|
|||
return results |
|||
|
|||
|
|||
class Season(dict): |
|||
def __init__(self, show=None): |
|||
"""The show attribute points to the parent show |
|||
""" |
|||
self.show = show |
|||
|
|||
def __repr__(self): |
|||
return "<Season instance (containing %s episodes)>" % ( |
|||
len(self.keys()) |
|||
) |
|||
|
|||
def __getattr__(self, episode_number): |
|||
if episode_number in self: |
|||
return self[episode_number] |
|||
raise AttributeError |
|||
|
|||
def __getitem__(self, episode_number): |
|||
if episode_number not in self: |
|||
raise tvrage_episodenotfound("Could not find episode %s" % (repr(episode_number))) |
|||
else: |
|||
return dict.__getitem__(self, episode_number) |
|||
|
|||
def search(self, term=None, key=None): |
|||
"""Search all episodes in season, returns a list of matching Episode |
|||
instances. |
|||
""" |
|||
results = [] |
|||
for ep in self.values(): |
|||
searchresult = ep.search(term=term, key=key) |
|||
if searchresult is not None: |
|||
results.append( |
|||
searchresult |
|||
) |
|||
return results |
|||
|
|||
|
|||
class Episode(dict): |
|||
def __init__(self, season=None): |
|||
"""The season attribute points to the parent season |
|||
""" |
|||
self.season = season |
|||
|
|||
def __repr__(self): |
|||
seasno = int(self.get(u'seasonnumber', 0)) |
|||
epno = int(self.get(u'episodenumber', 0)) |
|||
epname = self.get(u'episodename') |
|||
if epname is not None: |
|||
return "<Episode %02dx%02d - %s>" % (seasno, epno, epname) |
|||
else: |
|||
return "<Episode %02dx%02d>" % (seasno, epno) |
|||
|
|||
def __getattr__(self, key): |
|||
if key in self: |
|||
return self[key] |
|||
raise AttributeError |
|||
|
|||
def __getitem__(self, key): |
|||
try: |
|||
return dict.__getitem__(self, key) |
|||
except KeyError: |
|||
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key))) |
|||
|
|||
def search(self, term=None, key=None): |
|||
"""Search episode data for term, if it matches, return the Episode (self). |
|||
The key parameter can be used to limit the search to a specific element, |
|||
for example, episodename. |
|||
|
|||
This primarily for use use by Show.search and Season.search. |
|||
""" |
|||
if term == None: |
|||
raise TypeError("must supply string to search for (contents)") |
|||
|
|||
term = unicode(term).lower() |
|||
for cur_key, cur_value in self.items(): |
|||
cur_key, cur_value = unicode(cur_key).lower(), unicode(cur_value).lower() |
|||
if key is not None and cur_key != key: |
|||
# Do not search this key |
|||
continue |
|||
if cur_value.find(unicode(term).lower()) > -1: |
|||
return self |
|||
|
|||
|
|||
class TVRage: |
|||
"""Create easy-to-use interface to name of season/episode name""" |
|||
|
|||
def __init__(self, |
|||
interactive=False, |
|||
select_first=False, |
|||
debug=False, |
|||
cache=True, |
|||
banners=False, |
|||
actors=False, |
|||
custom_ui=None, |
|||
language=None, |
|||
search_all_languages=False, |
|||
apikey=None, |
|||
forceConnect=False, |
|||
useZip=False, |
|||
dvdorder=False, |
|||
proxy=None): |
|||
|
|||
""" |
|||
cache (True/False/str/unicode/urllib2 opener): |
|||
Retrieved XML are persisted to to disc. If true, stores in |
|||
tvrage_api folder under your systems TEMP_DIR, if set to |
|||
str/unicode instance it will use this as the cache |
|||
location. If False, disables caching. Can also be passed |
|||
an arbitrary Python object, which is used as a urllib2 |
|||
opener, which should be created by urllib2.build_opener |
|||
|
|||
forceConnect (bool): |
|||
If true it will always try to connect to tvrage.com even if we |
|||
recently timed out. By default it will wait one minute before |
|||
trying again, and any requests within that one minute window will |
|||
return an exception immediately. |
|||
""" |
|||
|
|||
self.shows = ShowContainer() # Holds all Show classes |
|||
self.corrections = {} # Holds show-name to show_id mapping |
|||
|
|||
self.config = {} |
|||
|
|||
if apikey is not None: |
|||
self.config['apikey'] = apikey |
|||
else: |
|||
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key |
|||
|
|||
self.config['debug_enabled'] = debug # show debugging messages |
|||
|
|||
self.config['custom_ui'] = custom_ui |
|||
|
|||
self.config['proxy'] = proxy |
|||
|
|||
if cache is True: |
|||
self.config['cache_enabled'] = True |
|||
self.config['cache_location'] = self._getTempDir() |
|||
self.urlopener = urllib2.build_opener( |
|||
CacheHandler(self.config['cache_location']) |
|||
) |
|||
|
|||
elif cache is False: |
|||
self.config['cache_enabled'] = False |
|||
self.urlopener = urllib2.build_opener() # default opener with no caching |
|||
|
|||
elif isinstance(cache, basestring): |
|||
self.config['cache_enabled'] = True |
|||
self.config['cache_location'] = cache |
|||
self.urlopener = urllib2.build_opener( |
|||
CacheHandler(self.config['cache_location']) |
|||
) |
|||
|
|||
elif isinstance(cache, urllib2.OpenerDirector): |
|||
# If passed something from urllib2.build_opener, use that |
|||
log().debug("Using %r as urlopener" % cache) |
|||
self.config['cache_enabled'] = True |
|||
self.urlopener = cache |
|||
|
|||
else: |
|||
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache))) |
|||
|
|||
if self.config['debug_enabled']: |
|||
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. " |
|||
"To enable debug messages, use the following code before importing: " |
|||
"import logging; logging.basicConfig(level=logging.DEBUG)") |
|||
logging.basicConfig(level=logging.DEBUG) |
|||
|
|||
|
|||
# List of language from http://tvrage.com/api/0629B785CE550C8D/languages.xml |
|||
# Hard-coded here as it is realtively static, and saves another HTTP request, as |
|||
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml |
|||
self.config['valid_languages'] = [ |
|||
"da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr", |
|||
"ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no" |
|||
] |
|||
|
|||
# tvrage.com should be based around numeric language codes, |
|||
# but to link to a series like http://tvrage.com/?tab=series&id=79349&lid=16 |
|||
# requires the language ID, thus this mapping is required (mainly |
|||
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations) |
|||
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27, |
|||
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9, |
|||
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11, |
|||
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30} |
|||
|
|||
if language is None: |
|||
self.config['language'] = 'en' |
|||
else: |
|||
if language not in self.config['valid_languages']: |
|||
raise ValueError("Invalid language %s, options are: %s" % ( |
|||
language, self.config['valid_languages'] |
|||
)) |
|||
else: |
|||
self.config['language'] = language |
|||
|
|||
# The following url_ configs are based of the |
|||
# http://tvrage.com/wiki/index.php/Programmers_API |
|||
|
|||
self.config['base_url'] = "http://services.tvrage.com" |
|||
|
|||
self.config['url_getSeries'] = u"%(base_url)s/feeds/search.php?show=%%s" % self.config |
|||
|
|||
self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php?key=%(apikey)s&sid=%%s" % self.config |
|||
|
|||
self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php?key=%(apikey)s&sid=%%s" % self.config |
|||
|
|||
self.config['url_updtes_all'] = u"%(base_url)s/myfeeds/currentshows.php" % self.config |
|||
|
|||
def _getTempDir(self): |
|||
"""Returns the [system temp dir]/tvrage_api-u501 (or |
|||
tvrage_api-myuser) |
|||
""" |
|||
if hasattr(os, 'getuid'): |
|||
uid = "u%d" % (os.getuid()) |
|||
else: |
|||
# For Windows |
|||
try: |
|||
uid = getpass.getuser() |
|||
except ImportError: |
|||
return os.path.join(tempfile.gettempdir(), "tvrage_api") |
|||
|
|||
return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid)) |
|||
|
|||
@retry(tvrage_error) |
|||
def _loadUrl(self, url): |
|||
global lastTimeout |
|||
try: |
|||
log().debug("Retrieving URL %s" % url) |
|||
resp = self.urlopener.open(url) |
|||
if 'x-local-cache' in resp.headers: |
|||
log().debug("URL %s was cached in %s" % ( |
|||
url, |
|||
resp.headers['x-local-cache']) |
|||
) |
|||
except (IOError, urllib2.URLError), errormsg: |
|||
if not str(errormsg).startswith('HTTP Error'): |
|||
lastTimeout = datetime.datetime.now() |
|||
raise tvrage_error("Could not connect to server: %s" % (errormsg)) |
|||
|
|||
|
|||
# handle gzipped content, |
|||
# http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch |
|||
if 'gzip' in resp.headers.get("Content-Encoding", ''): |
|||
if gzip: |
|||
stream = StringIO.StringIO(resp.read()) |
|||
gz = gzip.GzipFile(fileobj=stream) |
|||
return gz.read() |
|||
|
|||
raise tvrage_error("Received gzip data from thetvdb.com, but could not correctly handle it") |
|||
|
|||
def remap_keys(path, key, value): |
|||
name_map = { |
|||
'showid': 'id', |
|||
'showname': 'seriesname', |
|||
'name': 'seriesname', |
|||
'summary': 'overview', |
|||
'started': 'firstaired', |
|||
'genres': 'genre', |
|||
'airtime': 'airs_time', |
|||
'airday': 'airs_dayofweek', |
|||
'image': 'fanart', |
|||
'epnum': 'absolute_number', |
|||
'title': 'episodename', |
|||
'airdate': 'firstaired', |
|||
'screencap': 'filename', |
|||
'seasonnum': 'episodenumber' |
|||
} |
|||
|
|||
status_map = { |
|||
'returning series': 'Continuing', |
|||
'canceled/ended': 'Ended', |
|||
'tbd/on the bubble': 'Continuing', |
|||
'in development': 'Continuing', |
|||
'new series': 'Continuing', |
|||
'never aired': 'Ended', |
|||
'final season': 'Continuing', |
|||
'on hiatus': 'Continuing', |
|||
'pilot ordered': 'Continuing', |
|||
'pilot rejected': 'Ended', |
|||
'canceled': 'Ended', |
|||
'ended': 'Ended', |
|||
'': 'Unknown', |
|||
} |
|||
|
|||
try: |
|||
key = name_map[key.lower()] |
|||
except (ValueError, TypeError, KeyError): |
|||
key = key.lower() |
|||
|
|||
# clean up value and do type changes |
|||
if value: |
|||
if isinstance(value, dict): |
|||
if key == 'status': |
|||
try: |
|||
value = status_map[str(value).lower()] |
|||
if not value: |
|||
raise |
|||
except: |
|||
value = 'Unknown' |
|||
|
|||
if key == 'network': |
|||
value = value['#text'] |
|||
|
|||
if key == 'genre': |
|||
value = value['genre'] |
|||
if not value: |
|||
value = [] |
|||
if not isinstance(value, list): |
|||
value = [value] |
|||
value = filter(None, value) |
|||
value = '|' + '|'.join(value) + '|' |
|||
|
|||
try: |
|||
if key == 'firstaired' and value in "0000-00-00": |
|||
new_value = str(dt.date.fromordinal(1)) |
|||
new_value = re.sub("([-]0{2}){1,}", "", new_value) |
|||
fixDate = parse(new_value, fuzzy=True).date() |
|||
value = fixDate.strftime("%Y-%m-%d") |
|||
elif key == 'firstaired': |
|||
value = parse(value, fuzzy=True).date() |
|||
value = value.strftime("%Y-%m-%d") |
|||
except: |
|||
pass |
|||
|
|||
return (key, value) |
|||
|
|||
try: |
|||
return xmltodict.parse(resp.read(), postprocessor=remap_keys) |
|||
except: |
|||
return dict([(u'data', None)]) |
|||
|
|||
def _getetsrc(self, url): |
|||
"""Loads a URL using caching, returns an ElementTree of the source |
|||
""" |
|||
|
|||
try: |
|||
return self._loadUrl(url).values()[0] |
|||
except Exception, e: |
|||
raise tvrage_error(e) |
|||
|
|||
def _setItem(self, sid, seas, ep, attrib, value): |
|||
"""Creates a new episode, creating Show(), Season() and |
|||
Episode()s as required. Called by _getShowData to populate show |
|||
|
|||
Since the nice-to-use tvrage[1][24]['name] interface |
|||
makes it impossible to do tvrage[1][24]['name] = "name" |
|||
and still be capable of checking if an episode exists |
|||
so we can raise tvrage_shownotfound, we have a slightly |
|||
less pretty method of setting items.. but since the API |
|||
is supposed to be read-only, this is the best way to |
|||
do it! |
|||
The problem is that calling tvrage[1][24]['episodename'] = "name" |
|||
calls __getitem__ on tvrage[1], there is no way to check if |
|||
tvrage.__dict__ should have a key "1" before we auto-create it |
|||
""" |
|||
if sid not in self.shows: |
|||
self.shows[sid] = Show() |
|||
if seas not in self.shows[sid]: |
|||
self.shows[sid][seas] = Season(show=self.shows[sid]) |
|||
if ep not in self.shows[sid][seas]: |
|||
self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas]) |
|||
self.shows[sid][seas][ep][attrib] = value |
|||
|
|||
def _setShowData(self, sid, key, value): |
|||
"""Sets self.shows[sid] to a new Show instance, or sets the data |
|||
""" |
|||
if sid not in self.shows: |
|||
self.shows[sid] = Show() |
|||
self.shows[sid].data[key] = value |
|||
|
|||
def _cleanData(self, data): |
|||
"""Cleans up strings returned by tvrage.com |
|||
|
|||
Issues corrected: |
|||
- Replaces & with & |
|||
- Trailing whitespace |
|||
""" |
|||
|
|||
if isinstance(data, basestring): |
|||
data = data.replace(u"&", u"&") |
|||
data = data.strip() |
|||
|
|||
return data |
|||
|
|||
def search(self, series): |
|||
"""This searches tvrage.com for the series name |
|||
and returns the result list |
|||
""" |
|||
series = series.encode("utf-8") |
|||
log().debug("Searching for show %s" % series) |
|||
|
|||
return self._getetsrc(self.config['url_getSeries'] % (series)).values()[0] |
|||
|
|||
def _getSeries(self, series): |
|||
"""This searches tvrage.com for the series name, |
|||
If a custom_ui UI is configured, it uses this to select the correct |
|||
series. If not, and interactive == True, ConsoleUI is used, if not |
|||
BaseUI is used to select the first result. |
|||
""" |
|||
allSeries = self.search(series) |
|||
if not allSeries: |
|||
log().debug('Series result returned zero') |
|||
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)") |
|||
|
|||
if not isinstance(allSeries, list): |
|||
allSeries = [allSeries] |
|||
|
|||
if self.config['custom_ui'] is not None: |
|||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui']))) |
|||
CustomUI = self.config['custom_ui'] |
|||
ui = CustomUI(config=self.config) |
|||
else: |
|||
log().debug('Auto-selecting first search result using BaseUI') |
|||
ui = BaseUI(config=self.config) |
|||
|
|||
return ui.selectSeries(allSeries) |
|||
|
|||
def _getShowData(self, sid, getEpInfo=False): |
|||
"""Takes a series ID, gets the epInfo URL and parses the TVRAGE |
|||
XML file into the shows dict in layout: |
|||
shows[series_id][season_number][episode_number] |
|||
""" |
|||
|
|||
# Parse show information |
|||
log().debug('Getting all series data for %s' % (sid)) |
|||
seriesInfoEt = self._getetsrc(self.config['url_seriesInfo'] % (sid)) |
|||
|
|||
if not seriesInfoEt: |
|||
log().debug('Series result returned zero') |
|||
raise tvrage_error("Series result returned zero") |
|||
|
|||
# get series data |
|||
for k, v in seriesInfoEt.items(): |
|||
if v is not None: |
|||
v = self._cleanData(v) |
|||
|
|||
self._setShowData(sid, k, v) |
|||
|
|||
# get episode data |
|||
if getEpInfo: |
|||
# Parse episode data |
|||
log().debug('Getting all episodes of %s' % (sid)) |
|||
epsEt = self._getetsrc(self.config['url_epInfo'] % (sid)) |
|||
|
|||
if not epsEt: |
|||
log().debug('Series results incomplete') |
|||
raise tvrage_showincomplete( |
|||
"Show search returned incomplete results (cannot find complete show on TVRAGE)") |
|||
|
|||
if 'episodelist' not in epsEt: |
|||
return False |
|||
|
|||
seasons = epsEt['episodelist']['season'] |
|||
if not isinstance(seasons, list): |
|||
seasons = [seasons] |
|||
|
|||
for season in seasons: |
|||
seas_no = int(season['@no']) |
|||
|
|||
episodes = season['episode'] |
|||
if not isinstance(episodes, list): |
|||
episodes = [episodes] |
|||
|
|||
for episode in episodes: |
|||
ep_no = int(episode['episodenumber']) |
|||
|
|||
for k, v in episode.items(): |
|||
k = k.lower() |
|||
|
|||
if v is not None: |
|||
if k == 'link': |
|||
v = v.rsplit('/', 1)[1] |
|||
k = 'id' |
|||
else: |
|||
v = self._cleanData(v) |
|||
|
|||
self._setItem(sid, seas_no, ep_no, k, v) |
|||
|
|||
return True |
|||
|
|||
def _nameToSid(self, name): |
|||
"""Takes show name, returns the correct series ID (if the show has |
|||
already been grabbed), or grabs all episodes and returns |
|||
the correct SID. |
|||
""" |
|||
if name in self.corrections: |
|||
log().debug('Correcting %s to %s' % (name, self.corrections[name])) |
|||
return self.corrections[name] |
|||
else: |
|||
log().debug('Getting show %s' % (name)) |
|||
selected_series = self._getSeries(name) |
|||
if isinstance(selected_series, dict): |
|||
selected_series = [selected_series] |
|||
sids = list(int(x['id']) for x in selected_series if self._getShowData(int(x['id']))) |
|||
self.corrections.update(dict((x['seriesname'], int(x['id'])) for x in selected_series)) |
|||
return sids |
|||
|
|||
def __getitem__(self, key): |
|||
"""Handles tvrage_instance['seriesname'] calls. |
|||
The dict index should be the show id |
|||
""" |
|||
if isinstance(key, (int, long)): |
|||
# Item is integer, treat as show id |
|||
if key not in self.shows: |
|||
self._getShowData(key, True) |
|||
return self.shows[key] |
|||
|
|||
key = str(key).lower() |
|||
self.config['searchterm'] = key |
|||
selected_series = self._getSeries(key) |
|||
if isinstance(selected_series, dict): |
|||
selected_series = [selected_series] |
|||
[[self._setShowData(show['id'], k, v) for k, v in show.items()] for show in selected_series] |
|||
return selected_series |
|||
#test = self._getSeries(key) |
|||
#sids = self._nameToSid(key) |
|||
#return list(self.shows[sid] for sid in sids) |
|||
|
|||
def __repr__(self): |
|||
return str(self.shows) |
|||
|
|||
|
|||
def main(): |
|||
"""Simple example of using tvrage_api - it just |
|||
grabs an episode name interactively. |
|||
""" |
|||
import logging |
|||
|
|||
logging.basicConfig(level=logging.DEBUG) |
|||
|
|||
tvrage_instance = TVRage(cache=False) |
|||
print tvrage_instance['Lost']['seriesname'] |
|||
print tvrage_instance['Lost'][1][4]['episodename'] |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -0,0 +1,251 @@ |
|||
#!/usr/bin/env python2 |
|||
#encoding:utf-8 |
|||
#author:echel0n |
|||
#project:tvrage_api |
|||
#repository:http://github.com/echel0n/tvrage_api |
|||
#license:unlicense (http://unlicense.org/) |
|||
|
|||
""" |
|||
urllib2 caching handler |
|||
Modified from http://code.activestate.com/recipes/491261/ |
|||
""" |
|||
from __future__ import with_statement |
|||
|
|||
__author__ = "echel0n" |
|||
__version__ = "1.0" |
|||
|
|||
import os |
|||
import time |
|||
import errno |
|||
import httplib |
|||
import urllib2 |
|||
import StringIO |
|||
from hashlib import md5 |
|||
from threading import RLock |
|||
|
|||
cache_lock = RLock() |
|||
|
|||
def locked_function(origfunc): |
|||
"""Decorator to execute function under lock""" |
|||
def wrapped(*args, **kwargs): |
|||
cache_lock.acquire() |
|||
try: |
|||
return origfunc(*args, **kwargs) |
|||
finally: |
|||
cache_lock.release() |
|||
return wrapped |
|||
|
|||
def calculate_cache_path(cache_location, url): |
|||
"""Checks if [cache_location]/[hash_of_url].headers and .body exist |
|||
""" |
|||
thumb = md5(url).hexdigest() |
|||
header = os.path.join(cache_location, thumb + ".headers") |
|||
body = os.path.join(cache_location, thumb + ".body") |
|||
return header, body |
|||
|
|||
def check_cache_time(path, max_age): |
|||
"""Checks if a file has been created/modified in the [last max_age] seconds. |
|||
False means the file is too old (or doesn't exist), True means it is |
|||
up-to-date and valid""" |
|||
if not os.path.isfile(path): |
|||
return False |
|||
cache_modified_time = os.stat(path).st_mtime |
|||
time_now = time.time() |
|||
if cache_modified_time < time_now - max_age: |
|||
# Cache is old |
|||
return False |
|||
else: |
|||
return True |
|||
|
|||
@locked_function |
|||
def exists_in_cache(cache_location, url, max_age): |
|||
"""Returns if header AND body cache file exist (and are up-to-date)""" |
|||
hpath, bpath = calculate_cache_path(cache_location, url) |
|||
if os.path.exists(hpath) and os.path.exists(bpath): |
|||
return( |
|||
check_cache_time(hpath, max_age) |
|||
and check_cache_time(bpath, max_age) |
|||
) |
|||
else: |
|||
# File does not exist |
|||
return False |
|||
|
|||
@locked_function |
|||
def store_in_cache(cache_location, url, response): |
|||
"""Tries to store response in cache.""" |
|||
hpath, bpath = calculate_cache_path(cache_location, url) |
|||
try: |
|||
outf = open(hpath, "wb") |
|||
headers = str(response.info()) |
|||
outf.write(headers) |
|||
outf.close() |
|||
|
|||
outf = open(bpath, "wb") |
|||
outf.write(response.read()) |
|||
outf.close() |
|||
except IOError: |
|||
return True |
|||
else: |
|||
return False |
|||
|
|||
@locked_function |
|||
def delete_from_cache(cache_location, url): |
|||
"""Deletes a response in cache.""" |
|||
hpath, bpath = calculate_cache_path(cache_location, url) |
|||
try: |
|||
if os.path.exists(hpath): |
|||
os.remove(hpath) |
|||
if os.path.exists(bpath): |
|||
os.remove(bpath) |
|||
except IOError: |
|||
return True |
|||
else: |
|||
return False |
|||
|
|||
class CacheHandler(urllib2.BaseHandler): |
|||
"""Stores responses in a persistant on-disk cache. |
|||
|
|||
If a subsequent GET request is made for the same URL, the stored |
|||
response is returned, saving time, resources and bandwidth |
|||
""" |
|||
@locked_function |
|||
def __init__(self, cache_location, max_age = 21600): |
|||
"""The location of the cache directory""" |
|||
self.max_age = max_age |
|||
self.cache_location = cache_location |
|||
if not os.path.exists(self.cache_location): |
|||
try: |
|||
os.mkdir(self.cache_location) |
|||
except OSError, e: |
|||
if e.errno == errno.EEXIST and os.path.isdir(self.cache_location): |
|||
# File exists, and it's a directory, |
|||
# another process beat us to creating this dir, that's OK. |
|||
pass |
|||
else: |
|||
# Our target dir is already a file, or different error, |
|||
# relay the error! |
|||
raise |
|||
|
|||
def default_open(self, request): |
|||
"""Handles GET requests, if the response is cached it returns it |
|||
""" |
|||
if request.get_method() != "GET": |
|||
return None # let the next handler try to handle the request |
|||
|
|||
if exists_in_cache( |
|||
self.cache_location, request.get_full_url(), self.max_age |
|||
): |
|||
return CachedResponse( |
|||
self.cache_location, |
|||
request.get_full_url(), |
|||
set_cache_header = True |
|||
) |
|||
else: |
|||
return None |
|||
|
|||
def http_response(self, request, response): |
|||
"""Gets a HTTP response, if it was a GET request and the status code |
|||
starts with 2 (200 OK etc) it caches it and returns a CachedResponse |
|||
""" |
|||
if (request.get_method() == "GET" |
|||
and str(response.code).startswith("2") |
|||
): |
|||
if 'x-local-cache' not in response.info(): |
|||
# Response is not cached |
|||
set_cache_header = store_in_cache( |
|||
self.cache_location, |
|||
request.get_full_url(), |
|||
response |
|||
) |
|||
else: |
|||
set_cache_header = True |
|||
|
|||
return CachedResponse( |
|||
self.cache_location, |
|||
request.get_full_url(), |
|||
set_cache_header = set_cache_header |
|||
) |
|||
else: |
|||
return response |
|||
|
|||
class CachedResponse(StringIO.StringIO): |
|||
"""An urllib2.response-like object for cached responses. |
|||
|
|||
To determine if a response is cached or coming directly from |
|||
the network, check the x-local-cache header rather than the object type. |
|||
""" |
|||
|
|||
@locked_function |
|||
def __init__(self, cache_location, url, set_cache_header=True): |
|||
self.cache_location = cache_location |
|||
hpath, bpath = calculate_cache_path(cache_location, url) |
|||
|
|||
StringIO.StringIO.__init__(self, file(bpath, "rb").read()) |
|||
|
|||
self.url = url |
|||
self.code = 200 |
|||
self.msg = "OK" |
|||
headerbuf = file(hpath, "rb").read() |
|||
if set_cache_header: |
|||
headerbuf += "x-local-cache: %s\r\n" % (bpath) |
|||
self.headers = httplib.HTTPMessage(StringIO.StringIO(headerbuf)) |
|||
|
|||
def info(self): |
|||
"""Returns headers |
|||
""" |
|||
return self.headers |
|||
|
|||
def geturl(self): |
|||
"""Returns original URL |
|||
""" |
|||
return self.url |
|||
|
|||
@locked_function |
|||
def recache(self): |
|||
new_request = urllib2.urlopen(self.url) |
|||
set_cache_header = store_in_cache( |
|||
self.cache_location, |
|||
new_request.url, |
|||
new_request |
|||
) |
|||
CachedResponse.__init__(self, self.cache_location, self.url, True) |
|||
|
|||
@locked_function |
|||
def delete_cache(self): |
|||
delete_from_cache( |
|||
self.cache_location, |
|||
self.url |
|||
) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
def main(): |
|||
"""Quick test/example of CacheHandler""" |
|||
opener = urllib2.build_opener(CacheHandler("/tmp/")) |
|||
response = opener.open("http://google.com") |
|||
print response.headers |
|||
print "Response:", response.read() |
|||
|
|||
response.recache() |
|||
print response.headers |
|||
print "After recache:", response.read() |
|||
|
|||
# Test usage in threads |
|||
from threading import Thread |
|||
class CacheThreadTest(Thread): |
|||
lastdata = None |
|||
def run(self): |
|||
req = opener.open("http://google.com") |
|||
newdata = req.read() |
|||
if self.lastdata is None: |
|||
self.lastdata = newdata |
|||
assert self.lastdata == newdata, "Data was not consistent, uhoh" |
|||
req.recache() |
|||
threads = [CacheThreadTest() for x in range(50)] |
|||
print "Starting threads" |
|||
[t.start() for t in threads] |
|||
print "..done" |
|||
print "Joining threads" |
|||
[t.join() for t in threads] |
|||
print "..done" |
|||
main() |
@ -0,0 +1,64 @@ |
|||
#!/usr/bin/env python2 |
|||
# encoding:utf-8 |
|||
#author:echel0n |
|||
#project:tvrage_api |
|||
#repository:http://github.com/echel0n/tvrage_api |
|||
#license:unlicense (http://unlicense.org/) |
|||
|
|||
"""Custom exceptions used or raised by tvrage_api""" |
|||
|
|||
__author__ = "echel0n" |
|||
__version__ = "1.0" |
|||
|
|||
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete", |
|||
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"] |
|||
|
|||
|
|||
class tvrage_exception(Exception): |
|||
"""Any exception generated by tvrage_api |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_error(tvrage_exception): |
|||
"""An error with tvrage.com (Cannot connect, for example) |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_userabort(tvrage_exception): |
|||
"""User aborted the interactive selection (via |
|||
the q command, ^c etc) |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_shownotfound(tvrage_exception): |
|||
"""Show cannot be found on tvrage.com (non-existant show) |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_showincomplete(tvrage_exception): |
|||
"""Show found but incomplete on tvrage.com (incomplete show) |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_seasonnotfound(tvrage_exception): |
|||
"""Season cannot be found on tvrage.com |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_episodenotfound(tvrage_exception): |
|||
"""Episode cannot be found on tvrage.com |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class tvrage_attributenotfound(tvrage_exception): |
|||
"""Raised if an episode does not have the requested |
|||
attribute (such as a episode name) |
|||
""" |
|||
pass |
@ -0,0 +1,31 @@ |
|||
#!/usr/bin/env python2 |
|||
#encoding:utf-8 |
|||
#author:echel0n |
|||
#project:tvrage_api |
|||
#repository:http://github.com/echel0n/tvrage_api |
|||
#license:unlicense (http://unlicense.org/) |
|||
|
|||
"""Contains included user interface for TVRage show selection""" |
|||
|
|||
__author__ = "echel0n" |
|||
__version__ = "1.0" |
|||
|
|||
import logging |
|||
import warnings |
|||
|
|||
def log(): |
|||
return logging.getLogger(__name__) |
|||
|
|||
class BaseUI: |
|||
"""Default non-interactive UI, which auto-selects first results |
|||
""" |
|||
def __init__(self, config, log = None): |
|||
self.config = config |
|||
if log is not None: |
|||
warnings.warn("the UI's log parameter is deprecated, instead use\n" |
|||
"use import logging; logging.getLogger('ui').info('blah')\n" |
|||
"The self.log attribute will be removed in the next version") |
|||
self.log = logging.getLogger(__name__) |
|||
|
|||
def selectSeries(self, allSeries): |
|||
return allSeries[0] |
@ -0,0 +1,359 @@ |
|||
#!/usr/bin/env python |
|||
"Makes working with XML feel like you are working with JSON" |
|||
|
|||
from xml.parsers import expat |
|||
from xml.sax.saxutils import XMLGenerator |
|||
from xml.sax.xmlreader import AttributesImpl |
|||
try: # pragma no cover |
|||
from cStringIO import StringIO |
|||
except ImportError: # pragma no cover |
|||
try: |
|||
from StringIO import StringIO |
|||
except ImportError: |
|||
from io import StringIO |
|||
try: # pragma no cover |
|||
from collections import OrderedDict |
|||
except ImportError: # pragma no cover |
|||
try: |
|||
from ordereddict import OrderedDict |
|||
except ImportError: |
|||
OrderedDict = dict |
|||
|
|||
try: # pragma no cover |
|||
_basestring = basestring |
|||
except NameError: # pragma no cover |
|||
_basestring = str |
|||
try: # pragma no cover |
|||
_unicode = unicode |
|||
except NameError: # pragma no cover |
|||
_unicode = str |
|||
|
|||
__author__ = 'Martin Blech' |
|||
__version__ = '0.9.0' |
|||
__license__ = 'MIT' |
|||
|
|||
|
|||
class ParsingInterrupted(Exception): |
|||
pass |
|||
|
|||
|
|||
class _DictSAXHandler(object): |
|||
def __init__(self, |
|||
item_depth=0, |
|||
item_callback=lambda *args: True, |
|||
xml_attribs=True, |
|||
attr_prefix='@', |
|||
cdata_key='#text', |
|||
force_cdata=False, |
|||
cdata_separator='', |
|||
postprocessor=None, |
|||
dict_constructor=OrderedDict, |
|||
strip_whitespace=True, |
|||
namespace_separator=':', |
|||
namespaces=None): |
|||
self.path = [] |
|||
self.stack = [] |
|||
self.data = None |
|||
self.item = None |
|||
self.item_depth = item_depth |
|||
self.xml_attribs = xml_attribs |
|||
self.item_callback = item_callback |
|||
self.attr_prefix = attr_prefix |
|||
self.cdata_key = cdata_key |
|||
self.force_cdata = force_cdata |
|||
self.cdata_separator = cdata_separator |
|||
self.postprocessor = postprocessor |
|||
self.dict_constructor = dict_constructor |
|||
self.strip_whitespace = strip_whitespace |
|||
self.namespace_separator = namespace_separator |
|||
self.namespaces = namespaces |
|||
|
|||
def _build_name(self, full_name): |
|||
if not self.namespaces: |
|||
return full_name |
|||
i = full_name.rfind(self.namespace_separator) |
|||
if i == -1: |
|||
return full_name |
|||
namespace, name = full_name[:i], full_name[i+1:] |
|||
short_namespace = self.namespaces.get(namespace, namespace) |
|||
if not short_namespace: |
|||
return name |
|||
else: |
|||
return self.namespace_separator.join((short_namespace, name)) |
|||
|
|||
def _attrs_to_dict(self, attrs): |
|||
if isinstance(attrs, dict): |
|||
return attrs |
|||
return self.dict_constructor(zip(attrs[0::2], attrs[1::2])) |
|||
|
|||
def startElement(self, full_name, attrs): |
|||
name = self._build_name(full_name) |
|||
attrs = self._attrs_to_dict(attrs) |
|||
self.path.append((name, attrs or None)) |
|||
if len(self.path) > self.item_depth: |
|||
self.stack.append((self.item, self.data)) |
|||
if self.xml_attribs: |
|||
attrs = self.dict_constructor( |
|||
(self.attr_prefix+key, value) |
|||
for (key, value) in attrs.items()) |
|||
else: |
|||
attrs = None |
|||
self.item = attrs or None |
|||
self.data = None |
|||
|
|||
def endElement(self, full_name): |
|||
name = self._build_name(full_name) |
|||
if len(self.path) == self.item_depth: |
|||
item = self.item |
|||
if item is None: |
|||
item = self.data |
|||
should_continue = self.item_callback(self.path, item) |
|||
if not should_continue: |
|||
raise ParsingInterrupted() |
|||
if len(self.stack): |
|||
item, data = self.item, self.data |
|||
self.item, self.data = self.stack.pop() |
|||
if self.strip_whitespace and data is not None: |
|||
data = data.strip() or None |
|||
if data and self.force_cdata and item is None: |
|||
item = self.dict_constructor() |
|||
if item is not None: |
|||
if data: |
|||
self.push_data(item, self.cdata_key, data) |
|||
self.item = self.push_data(self.item, name, item) |
|||
else: |
|||
self.item = self.push_data(self.item, name, data) |
|||
else: |
|||
self.item = self.data = None |
|||
self.path.pop() |
|||
|
|||
def characters(self, data): |
|||
if not self.data: |
|||
self.data = data |
|||
else: |
|||
self.data += self.cdata_separator + data |
|||
|
|||
def push_data(self, item, key, data): |
|||
if self.postprocessor is not None: |
|||
result = self.postprocessor(self.path, key, data) |
|||
if result is None: |
|||
return item |
|||
key, data = result |
|||
if item is None: |
|||
item = self.dict_constructor() |
|||
try: |
|||
value = item[key] |
|||
if isinstance(value, list): |
|||
value.append(data) |
|||
else: |
|||
item[key] = [value, data] |
|||
except KeyError: |
|||
item[key] = data |
|||
return item |
|||
|
|||
|
|||
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, |
|||
namespace_separator=':', **kwargs): |
|||
"""Parse the given XML input and convert it into a dictionary. |
|||
|
|||
`xml_input` can either be a `string` or a file-like object. |
|||
|
|||
If `xml_attribs` is `True`, element attributes are put in the dictionary |
|||
among regular child elements, using `@` as a prefix to avoid collisions. If |
|||
set to `False`, they are just ignored. |
|||
|
|||
Simple example:: |
|||
|
|||
>>> import xmltodict |
|||
>>> doc = xmltodict.parse(\"\"\" |
|||
... <a prop="x"> |
|||
... <b>1</b> |
|||
... <b>2</b> |
|||
... </a> |
|||
... \"\"\") |
|||
>>> doc['a']['@prop'] |
|||
u'x' |
|||
>>> doc['a']['b'] |
|||
[u'1', u'2'] |
|||
|
|||
If `item_depth` is `0`, the function returns a dictionary for the root |
|||
element (default behavior). Otherwise, it calls `item_callback` every time |
|||
an item at the specified depth is found and returns `None` in the end |
|||
(streaming mode). |
|||
|
|||
The callback function receives two parameters: the `path` from the document |
|||
root to the item (name-attribs pairs), and the `item` (dict). If the |
|||
callback's return value is false-ish, parsing will be stopped with the |
|||
:class:`ParsingInterrupted` exception. |
|||
|
|||
Streaming example:: |
|||
|
|||
>>> def handle(path, item): |
|||
... print 'path:%s item:%s' % (path, item) |
|||
... return True |
|||
... |
|||
>>> xmltodict.parse(\"\"\" |
|||
... <a prop="x"> |
|||
... <b>1</b> |
|||
... <b>2</b> |
|||
... </a>\"\"\", item_depth=2, item_callback=handle) |
|||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1 |
|||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2 |
|||
|
|||
The optional argument `postprocessor` is a function that takes `path`, |
|||
`key` and `value` as positional arguments and returns a new `(key, value)` |
|||
pair where both `key` and `value` may have changed. Usage example:: |
|||
|
|||
>>> def postprocessor(path, key, value): |
|||
... try: |
|||
... return key + ':int', int(value) |
|||
... except (ValueError, TypeError): |
|||
... return key, value |
|||
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>', |
|||
... postprocessor=postprocessor) |
|||
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))]) |
|||
|
|||
You can pass an alternate version of `expat` (such as `defusedexpat`) by |
|||
using the `expat` parameter. E.g: |
|||
|
|||
>>> import defusedexpat |
|||
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat) |
|||
OrderedDict([(u'a', u'hello')]) |
|||
|
|||
""" |
|||
handler = _DictSAXHandler(namespace_separator=namespace_separator, |
|||
**kwargs) |
|||
if isinstance(xml_input, _unicode): |
|||
if not encoding: |
|||
encoding = 'utf-8' |
|||
xml_input = xml_input.encode(encoding) |
|||
if not process_namespaces: |
|||
namespace_separator = None |
|||
parser = expat.ParserCreate( |
|||
encoding, |
|||
namespace_separator |
|||
) |
|||
try: |
|||
parser.ordered_attributes = True |
|||
except AttributeError: |
|||
# Jython's expat does not support ordered_attributes |
|||
pass |
|||
parser.StartElementHandler = handler.startElement |
|||
parser.EndElementHandler = handler.endElement |
|||
parser.CharacterDataHandler = handler.characters |
|||
parser.buffer_text = True |
|||
try: |
|||
parser.ParseFile(xml_input) |
|||
except (TypeError, AttributeError): |
|||
parser.Parse(xml_input, True) |
|||
return handler.item |
|||
|
|||
|
|||
def _emit(key, value, content_handler, |
|||
attr_prefix='@', |
|||
cdata_key='#text', |
|||
depth=0, |
|||
preprocessor=None, |
|||
pretty=False, |
|||
newl='\n', |
|||
indent='\t'): |
|||
if preprocessor is not None: |
|||
result = preprocessor(key, value) |
|||
if result is None: |
|||
return |
|||
key, value = result |
|||
if not isinstance(value, (list, tuple)): |
|||
value = [value] |
|||
if depth == 0 and len(value) > 1: |
|||
raise ValueError('document with multiple roots') |
|||
for v in value: |
|||
if v is None: |
|||
v = OrderedDict() |
|||
elif not isinstance(v, dict): |
|||
v = _unicode(v) |
|||
if isinstance(v, _basestring): |
|||
v = OrderedDict(((cdata_key, v),)) |
|||
cdata = None |
|||
attrs = OrderedDict() |
|||
children = [] |
|||
for ik, iv in v.items(): |
|||
if ik == cdata_key: |
|||
cdata = iv |
|||
continue |
|||
if ik.startswith(attr_prefix): |
|||
attrs[ik[len(attr_prefix):]] = iv |
|||
continue |
|||
children.append((ik, iv)) |
|||
if pretty: |
|||
content_handler.ignorableWhitespace(depth * indent) |
|||
content_handler.startElement(key, AttributesImpl(attrs)) |
|||
if pretty and children: |
|||
content_handler.ignorableWhitespace(newl) |
|||
for child_key, child_value in children: |
|||
_emit(child_key, child_value, content_handler, |
|||
attr_prefix, cdata_key, depth+1, preprocessor, |
|||
pretty, newl, indent) |
|||
if cdata is not None: |
|||
content_handler.characters(cdata) |
|||
if pretty and children: |
|||
content_handler.ignorableWhitespace(depth * indent) |
|||
content_handler.endElement(key) |
|||
if pretty and depth: |
|||
content_handler.ignorableWhitespace(newl) |
|||
|
|||
|
|||
def unparse(input_dict, output=None, encoding='utf-8', full_document=True, |
|||
**kwargs): |
|||
"""Emit an XML document for the given `input_dict` (reverse of `parse`). |
|||
|
|||
The resulting XML document is returned as a string, but if `output` (a |
|||
file-like object) is specified, it is written there instead. |
|||
|
|||
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted |
|||
as XML node attributes, whereas keys equal to `cdata_key` |
|||
(default=`'#text'`) are treated as character data. |
|||
|
|||
The `pretty` parameter (default=`False`) enables pretty-printing. In this |
|||
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this |
|||
can be customized with the `newl` and `indent` parameters. |
|||
|
|||
""" |
|||
((key, value),) = input_dict.items() |
|||
must_return = False |
|||
if output is None: |
|||
output = StringIO() |
|||
must_return = True |
|||
content_handler = XMLGenerator(output, encoding) |
|||
if full_document: |
|||
content_handler.startDocument() |
|||
_emit(key, value, content_handler, **kwargs) |
|||
if full_document: |
|||
content_handler.endDocument() |
|||
if must_return: |
|||
value = output.getvalue() |
|||
try: # pragma no cover |
|||
value = value.decode(encoding) |
|||
except AttributeError: # pragma no cover |
|||
pass |
|||
return value |
|||
|
|||
if __name__ == '__main__': # pragma: no cover |
|||
import sys |
|||
import marshal |
|||
|
|||
(item_depth,) = sys.argv[1:] |
|||
item_depth = int(item_depth) |
|||
|
|||
def handle_item(path, item): |
|||
marshal.dump((path, item), sys.stdout) |
|||
return True |
|||
|
|||
try: |
|||
root = parse(sys.stdin, |
|||
item_depth=item_depth, |
|||
item_callback=handle_item, |
|||
dict_constructor=dict) |
|||
if item_depth == 0: |
|||
handle_item([], root) |
|||
except KeyboardInterrupt: |
|||
pass |
Loading…
Reference in new issue