Browse Source

Merge pull request #2284 from fuzeman/tv_searcher

[TV][Searcher] TV searching, parsing and Release creation
pull/2290/merge
Joel Kåberg 12 years ago
parent
commit
c1b13cd076
  1. 10
      couchpotato/core/helpers/variable.py
  2. 118
      couchpotato/core/media/_base/searcher/main.py
  3. 119
      couchpotato/core/media/movie/searcher/main.py
  4. 32
      couchpotato/core/media/show/_base/main.py
  5. 263
      couchpotato/core/media/show/searcher/main.py
  6. 10
      couchpotato/core/plugins/quality/main.py
  7. 2
      couchpotato/core/plugins/release/main.py
  8. 44
      couchpotato/core/providers/base.py
  9. 75
      couchpotato/core/providers/torrent/iptorrents/main.py
  10. 2
      couchpotato/core/providers/torrent/sceneaccess/main.py
  11. 161
      libs/caper/__init__.py
  12. 74
      libs/caper/constraint.py
  13. 147
      libs/caper/group.py
  14. 64
      libs/caper/helpers.py
  15. 193
      libs/caper/matcher.py
  16. 75
      libs/caper/objects.py
  17. 0
      libs/caper/parsers/__init__.py
  18. 88
      libs/caper/parsers/anime.py
  19. 136
      libs/caper/parsers/base.py
  20. 148
      libs/caper/parsers/scene.py
  21. 172
      libs/caper/result.py
  22. 72
      libs/caper/step.py
  23. 201
      libs/logr/__init__.py

10
couchpotato/core/helpers/variable.py

@ -1,3 +1,4 @@
import collections
from couchpotato.core.helpers.encoding import simplifyString, toSafeString, ss
from couchpotato.core.logger import CPLog
import hashlib
@ -145,9 +146,9 @@ def getImdb(txt, check_inside = False, multiple = False):
return False
def tryInt(s):
def tryInt(s, default=0):
try: return int(s)
except: return 0
except: return default
def tryFloat(s):
try:
@ -163,6 +164,11 @@ def natsortKey(s):
def natcmp(a, b):
return cmp(natsortKey(a), natsortKey(b))
def toIterable(value):
if isinstance(value, collections.Iterable):
return value
return [value]
def getTitle(library_dict):
try:
try:

118
couchpotato/core/media/_base/searcher/main.py

@ -2,11 +2,12 @@ from couchpotato import get_session
from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import simplifyString, toUnicode
from couchpotato.core.helpers.variable import md5, getTitle
from couchpotato.core.helpers.variable import md5, getTitle, splitString
from couchpotato.core.logger import CPLog
from couchpotato.core.media._base.searcher.base import SearcherBase
from couchpotato.core.settings.model import Media, Release, ReleaseInfo
from couchpotato.environment import Env
from sqlalchemy.exc import InterfaceError
from inspect import ismethod, isfunction
import datetime
import re
@ -23,7 +24,10 @@ class Searcher(SearcherBase):
addEvent('searcher.contains_other_quality', self.containsOtherQuality)
addEvent('searcher.correct_year', self.correctYear)
addEvent('searcher.correct_name', self.correctName)
addEvent('searcher.correct_words', self.correctWords)
addEvent('searcher.download', self.download)
addEvent('searcher.search', self.search)
addEvent('searcher.create_releases', self.createReleases)
addApiView('searcher.full_search', self.searchAllView, docs = {
'desc': 'Starts a full search for all media',
@ -130,6 +134,74 @@ class Searcher(SearcherBase):
return False
def search(self, protocols, media, quality):
results = []
search_type = None
if media['type'] == 'movie':
search_type = 'movie'
elif media['type'] in ['show', 'season', 'episode']:
search_type = 'show'
for search_protocol in protocols:
protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, search_type), media, quality, merge = True)
if protocol_results:
results += protocol_results
sorted_results = sorted(results, key = lambda k: k['score'], reverse = True)
download_preference = self.conf('preferred_method', section = 'searcher')
if download_preference != 'both':
sorted_results = sorted(sorted_results, key = lambda k: k['protocol'][:3], reverse = (download_preference == 'torrent'))
return sorted_results
def createReleases(self, search_results, media, quality_type):
available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
db = get_session()
found_releases = []
for rel in search_results:
nzb_identifier = md5(rel['url'])
found_releases.append(nzb_identifier)
rls = db.query(Release).filter_by(identifier = nzb_identifier).first()
if not rls:
rls = Release(
identifier = nzb_identifier,
media_id = media.get('id'),
quality_id = quality_type.get('quality_id'),
status_id = available_status.get('id')
)
db.add(rls)
else:
[db.delete(old_info) for old_info in rls.info]
rls.last_edit = int(time.time())
db.commit()
for info in rel:
try:
if not isinstance(rel[info], (str, unicode, int, long, float)):
continue
rls_info = ReleaseInfo(
identifier = info,
value = toUnicode(rel[info])
)
rls.info.append(rls_info)
except InterfaceError:
log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
db.commit()
rel['status_id'] = rls.status_id
return found_releases
def getSearchProtocols(self):
download_protocols = fireEvent('download.enabled_protocols', merge = True)
@ -234,5 +306,49 @@ class Searcher(SearcherBase):
return False
def correctWords(self, rel_name, media):
media_title = fireEvent('searcher.get_search_title', media, single = True)
media_words = re.split('\W+', simplifyString(media_title))
rel_name = simplifyString(rel_name)
rel_words = re.split('\W+', rel_name)
# Make sure it has required words
required_words = splitString(self.conf('required_words', section = 'searcher').lower())
try: required_words = list(set(required_words + splitString(media['category']['required'].lower())))
except: pass
req_match = 0
for req_set in required_words:
req = splitString(req_set, '&')
req_match += len(list(set(rel_words) & set(req))) == len(req)
if len(required_words) > 0 and req_match == 0:
log.info2('Wrong: Required word missing: %s', rel_name)
return False
# Ignore releases
ignored_words = splitString(self.conf('ignored_words', section = 'searcher').lower())
try: ignored_words = list(set(ignored_words + splitString(media['category']['ignored'].lower())))
except: pass
ignored_match = 0
for ignored_set in ignored_words:
ignored = splitString(ignored_set, '&')
ignored_match += len(list(set(rel_words) & set(ignored))) == len(ignored)
if len(ignored_words) > 0 and ignored_match:
log.info2("Wrong: '%s' contains 'ignored words'", rel_name)
return False
# Ignore porn stuff
pron_tags = ['xxx', 'sex', 'anal', 'tits', 'fuck', 'porn', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'cock', 'dick']
pron_words = list(set(rel_words) & set(pron_tags) - set(media_words))
if pron_words:
log.info('Wrong: %s, probably pr0n', rel_name)
return False
return True
class SearchSetupError(Exception):
pass

119
couchpotato/core/media/movie/searcher/main.py

@ -29,9 +29,10 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
addEvent('movie.searcher.all', self.searchAll)
addEvent('movie.searcher.all_view', self.searchAllView)
addEvent('movie.searcher.single', self.single)
addEvent('movie.searcher.correct_movie', self.correctMovie)
addEvent('movie.searcher.try_next_release', self.tryNextRelease)
addEvent('movie.searcher.could_be_released', self.couldBeReleased)
addEvent('searcher.correct_release', self.correctRelease)
addEvent('searcher.get_search_title', self.getSearchTitle)
addApiView('movie.searcher.try_next', self.tryNextReleaseView, docs = {
'desc': 'Marks the snatched results as ignored and try the next best release',
@ -167,64 +168,18 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
log.info('Search for %s in %s', (default_title, quality_type['quality']['label']))
quality = fireEvent('quality.single', identifier = quality_type['quality']['identifier'], single = True)
results = []
for search_protocol in search_protocols:
protocol_results = fireEvent('provider.search.%s.movie' % search_protocol, movie, quality, merge = True)
if protocol_results:
results += protocol_results
sorted_results = sorted(results, key = lambda k: k['score'], reverse = True)
if len(sorted_results) == 0:
results = fireEvent('searcher.search', search_protocols, movie, quality, single = True)
if len(results) == 0:
log.debug('Nothing found for %s in %s', (default_title, quality_type['quality']['label']))
download_preference = self.conf('preferred_method', section = 'searcher')
if download_preference != 'both':
sorted_results = sorted(sorted_results, key = lambda k: k['protocol'][:3], reverse = (download_preference == 'torrent'))
# Check if movie isn't deleted while searching
if not db.query(Media).filter_by(id = movie.get('id')).first():
break
# Add them to this movie releases list
for nzb in sorted_results:
nzb_identifier = md5(nzb['url'])
found_releases.append(nzb_identifier)
rls = db.query(Release).filter_by(identifier = nzb_identifier).first()
if not rls:
rls = Release(
identifier = nzb_identifier,
movie_id = movie.get('id'),
quality_id = quality_type.get('quality_id'),
status_id = available_status.get('id')
)
db.add(rls)
else:
[db.delete(old_info) for old_info in rls.info]
rls.last_edit = int(time.time())
db.commit()
for info in nzb:
try:
if not isinstance(nzb[info], (str, unicode, int, long, float)):
continue
found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True)
rls_info = ReleaseInfo(
identifier = info,
value = toUnicode(nzb[info])
)
rls.info.append(rls_info)
except InterfaceError:
log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
db.commit()
nzb['status_id'] = rls.status_id
for nzb in sorted_results:
for nzb in results:
if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and nzb.get('age') <= quality_type.get('wait_for', 0):
log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), nzb['name']))
continue
@ -265,7 +220,11 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
return ret
def correctMovie(self, nzb = None, movie = None, quality = None, **kwargs):
def correctRelease(self, nzb = None, media = None, quality = None, **kwargs):
if media.get('type') != 'movie': return
media_title = fireEvent('searcher.get_search_title', media, single = True)
imdb_results = kwargs.get('imdb_results', False)
retention = Env.setting('retention', section = 'nzb')
@ -274,50 +233,14 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
log.info2('Wrong: Outside retention, age is %s, needs %s or lower: %s', (nzb['age'], retention, nzb['name']))
return False
movie_name = getTitle(movie['library'])
movie_words = re.split('\W+', simplifyString(movie_name))
nzb_name = simplifyString(nzb['name'])
nzb_words = re.split('\W+', nzb_name)
# Make sure it has required words
required_words = splitString(self.conf('required_words', section = 'searcher').lower())
try: required_words = list(set(required_words + splitString(movie['category']['required'].lower())))
except: pass
req_match = 0
for req_set in required_words:
req = splitString(req_set, '&')
req_match += len(list(set(nzb_words) & set(req))) == len(req)
if len(required_words) > 0 and req_match == 0:
log.info2('Wrong: Required word missing: %s', nzb['name'])
return False
# Ignore releases
ignored_words = splitString(self.conf('ignored_words', section = 'searcher').lower())
try: ignored_words = list(set(ignored_words + splitString(movie['category']['ignored'].lower())))
except: pass
ignored_match = 0
for ignored_set in ignored_words:
ignored = splitString(ignored_set, '&')
ignored_match += len(list(set(nzb_words) & set(ignored))) == len(ignored)
if len(ignored_words) > 0 and ignored_match:
log.info2("Wrong: '%s' contains 'ignored words'", (nzb['name']))
return False
# Ignore porn stuff
pron_tags = ['xxx', 'sex', 'anal', 'tits', 'fuck', 'porn', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'cock', 'dick']
pron_words = list(set(nzb_words) & set(pron_tags) - set(movie_words))
if pron_words:
log.info('Wrong: %s, probably pr0n', (nzb['name']))
# Check for required and ignored words
if not fireEvent('searcher.correct_words', nzb['name'], media, single = True):
return False
preferred_quality = fireEvent('quality.single', identifier = quality['identifier'], single = True)
# Contains lower quality string
if fireEvent('searcher.contains_other_quality', nzb, movie_year = movie['library']['year'], preferred_quality = preferred_quality, single = True):
if fireEvent('searcher.contains_other_quality', nzb, movie_year = media['library']['year'], preferred_quality = preferred_quality, single = True):
log.info2('Wrong: %s, looking for %s', (nzb['name'], quality['label']))
return False
@ -347,23 +270,23 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
return True
# Check if nzb contains imdb link
if getImdb(nzb.get('description', '')) == movie['library']['identifier']:
if getImdb(nzb.get('description', '')) == media['library']['identifier']:
return True
for raw_title in movie['library']['titles']:
for raw_title in media['library']['titles']:
for movie_title in possibleTitles(raw_title['title']):
movie_words = re.split('\W+', simplifyString(movie_title))
if fireEvent('searcher.correct_name', nzb['name'], movie_title, single = True):
# if no IMDB link, at least check year range 1
if len(movie_words) > 2 and fireEvent('searcher.correct_year', nzb['name'], movie['library']['year'], 1, single = True):
if len(movie_words) > 2 and fireEvent('searcher.correct_year', nzb['name'], media['library']['year'], 1, single = True):
return True
# if no IMDB link, at least check year
if len(movie_words) <= 2 and fireEvent('searcher.correct_year', nzb['name'], movie['library']['year'], 0, single = True):
if len(movie_words) <= 2 and fireEvent('searcher.correct_year', nzb['name'], media['library']['year'], 0, single = True):
return True
log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'", (nzb['name'], movie_name, movie['library']['year']))
log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'", (nzb['name'], media_title, media['library']['year']))
return False
def couldBeReleased(self, is_pre_release, dates, year = None):
@ -434,5 +357,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
log.error('Failed searching for next release: %s', traceback.format_exc())
return False
def getSearchTitle(self, media):
if media['type'] == 'movie':
return getTitle(media['library'])
class SearchSetupError(Exception):
pass

32
couchpotato/core/media/show/_base/main.py

@ -42,6 +42,12 @@ class ShowBase(MediaBase):
'shows': array, shows found,
}"""}
})
addApiView('show.refresh', self.refresh, docs = {
'desc': 'Refresh a show, season or episode by id',
'params': {
'id': {'desc': 'Show, Season or Episode ID(s) you want to refresh.', 'type': 'int (comma separated)'},
}
})
addApiView('show.add', self.addView, docs = {
'desc': 'Add new movie to the wanted list',
'params': {
@ -53,6 +59,26 @@ class ShowBase(MediaBase):
addEvent('show.add', self.add)
def refresh(self, id = '', **kwargs):
db = get_session()
for x in splitString(id):
media = db.query(Media).filter_by(id = x).first()
if media:
# Get current selected title
default_title = ''
for title in media.library.titles:
if title.default: default_title = title.title
fireEvent('notify.frontend', type = '%s.busy.%s' % (media.type, x), data = True)
fireEventAsync('library.update.%s' % media.type, identifier = media.library.identifier, default_title = default_title, force = True, on_complete = self.createOnComplete(x))
db.expire_all()
return {
'success': True,
}
def search(self, q = '', **kwargs):
cache_key = u'%s/%s' % (__name__, simplifyString(q))
shows = Env.get('cache').get(cache_key)
@ -273,12 +299,12 @@ class ShowBase(MediaBase):
db.expire_all()
return show_dict
def createOnComplete(self, show_id):
def createOnComplete(self, id):
def onComplete():
db = get_session()
show = db.query(Media).filter_by(id = show_id).first()
fireEventAsync('show.searcher.single', show.to_dict(self.default_dict), on_complete = self.createNotifyFront(show_id))
media = db.query(Media).filter_by(id = id).first()
fireEventAsync('show.searcher.single', media.to_dict(self.default_dict), on_complete = self.createNotifyFront(id))
db.expire_all()
return onComplete

263
couchpotato/core/media/show/searcher/main.py

@ -1,5 +1,14 @@
import pprint
import re
from couchpotato import get_session, Env
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.encoding import simplifyString
from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles
from couchpotato.core.logger import CPLog
from couchpotato.core.media._base.searcher.main import SearchSetupError
from couchpotato.core.plugins.base import Plugin
from couchpotato.core.settings.model import Media, Library
from caper import Caper
log = CPLog(__name__)
@ -8,5 +17,257 @@ class ShowSearcher(Plugin):
in_progress = False
# TODO come back to this later, think this could be handled better
quality_map = {
'webdl_1080p': {'resolution': ['1080p'], 'source': ['webdl']},
'webdl_720p': {'resolution': ['720p'], 'source': ['webdl']},
'hdtv_720p': {'resolution': ['720p'], 'source': ['hdtv']},
'hdtv_sd': {'resolution': ['480p', None], 'source': ['hdtv']},
}
def __init__(self):
pass
super(ShowSearcher, self).__init__()
addEvent('show.searcher.single', self.single)
addEvent('searcher.correct_release', self.correctRelease)
addEvent('searcher.get_search_title', self.getSearchTitle)
self.caper = Caper()
def _lookupMedia(self, media):
db = get_session()
media_library = db.query(Library).filter_by(id = media['library_id']).first()
show = None
season = None
episode = None
if media['type'] == 'episode':
show = media_library.parent.parent
season = media_library.parent
episode = media_library
if media['type'] == 'season':
show = media_library.parent
season = media_library
if media['type'] == 'show':
show = media_library
return show, season, episode
def single(self, media, search_protocols = None):
pprint.pprint(media)
if media['type'] == 'show':
# TODO handle show searches (scan all seasons)
return
# Find out search type
try:
if not search_protocols:
search_protocols = fireEvent('searcher.protocols', single = True)
except SearchSetupError:
return
done_status = fireEvent('status.get', 'done', single = True)
if not media['profile'] or media['status_id'] == done_status.get('id'):
log.debug('Episode doesn\'t have a profile or already done, assuming in manage tab.')
return
db = get_session()
pre_releases = fireEvent('quality.pre_releases', single = True)
available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
found_releases = []
too_early_to_search = []
default_title = self.getSearchTitle(media['library'])
if not default_title:
log.error('No proper info found for episode, removing it from library to cause it from having more issues.')
#fireEvent('episode.delete', episode['id'], single = True)
return
show, season, episode = self._lookupMedia(media)
if show is None or season is None:
log.error('Unable to find show or season library in database, missing required data for searching')
return
fireEvent('notify.frontend', type = 'show.searcher.started.%s' % media['id'], data = True, message = 'Searching for "%s"' % default_title)
ret = False
for quality_type in media['profile']['types']:
# TODO check air date?
#if not self.conf('always_search') and not self.couldBeReleased(quality_type['quality']['identifier'] in pre_releases, release_dates, movie['library']['year']):
# too_early_to_search.append(quality_type['quality']['identifier'])
# continue
has_better_quality = 0
# See if better quality is available
for release in media['releases']:
if release['quality']['order'] <= quality_type['quality']['order'] and release['status_id'] not in [available_status.get('id'), ignored_status.get('id'), failed_status.get('id')]:
has_better_quality += 1
# Don't search for quality lower then already available.
if has_better_quality is 0:
log.info('Search for %s S%02d%s in %s', (getTitle(show), season.season_number, "E%02d" % episode.episode_number if episode else "", quality_type['quality']['label']))
quality = fireEvent('quality.single', identifier = quality_type['quality']['identifier'], single = True)
results = fireEvent('searcher.search', search_protocols, media, quality, single = True)
if len(results) == 0:
log.debug('Nothing found for %s in %s', (default_title, quality_type['quality']['label']))
# Check if movie isn't deleted while searching
if not db.query(Media).filter_by(id = media.get('id')).first():
break
# Add them to this movie releases list
found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True)
log.info('%d results found' % len(results))
def correctRelease(self, release = None, media = None, quality = None, **kwargs):
if media.get('type') not in ['season', 'episode']: return
retention = Env.setting('retention', section = 'nzb')
if release.get('seeders') is None and 0 < retention < release.get('age', 0):
log.info2('Wrong: Outside retention, age is %s, needs %s or lower: %s', (release['age'], retention, release['name']))
return False
# Check for required and ignored words
if not fireEvent('searcher.correct_words', release['name'], media, single = True):
return False
show, season, episode = self._lookupMedia(media)
if show is None or season is None:
log.error('Unable to find show or season library in database, missing required data for searching')
return
release_info = self.caper.parse(release['name'])
if len(release_info.chains) < 1:
log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
return False
# TODO look at all chains
chain = release_info.chains[0]
if not self.correctQuality(chain, quality['identifier']):
log.info('Wrong: %s, quality does not match', release['name'])
return False
if not self.correctIdentifier(chain, media):
log.info('Wrong: %s, identifier does not match', release['name'])
return False
if 'show_name' not in chain.info or not len(chain.info['show_name']):
log.info('Wrong: %s, missing show name in parsed result', release['name'])
return False
chain_words = [x.lower() for x in chain.info['show_name']]
chain_title = ' '.join(chain_words)
library_title = None
# Check show titles match
for raw_title in show.titles:
for valid_words in [x.split(' ') for x in possibleTitles(raw_title.title)]:
if not library_title:
library_title = ' '.join(valid_words)
if valid_words == chain_words:
return True
log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year']))
return False
def correctQuality(self, chain, quality_identifier):
if quality_identifier not in self.quality_map:
log.info2('Wrong: unknown preferred quality %s for TV searching', quality_identifier)
return False
if 'video' not in chain.info:
log.info2('Wrong: no video tags found')
return False
video_tags = self.quality_map[quality_identifier]
if not self.chainMatches(chain, 'video', video_tags):
log.info2('Wrong: %s tags not in chain', video_tags)
return False
return True
def correctIdentifier(self, chain, media):
required_id = self.getIdentifier(media['library'], 'season_number', 'episode_number')
if 'identifier' not in chain.info:
return False
# TODO could be handled better?
if len(chain.info['identifier']) != 1:
return False
identifier = chain.info['identifier'][0]
# TODO air by date episodes
release_id = self.getIdentifier(identifier, 'season', 'episode')
if required_id != release_id:
log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id)))
return False
return True
def getIdentifier(self, d, episode_key, season_key):
return (
tryInt(d.get(season_key), None) if season_key in d else None,
tryInt(d.get(episode_key), None) if episode_key in d else None
)
def chainMatches(self, chain, group, tags):
found_tags = []
for match in chain.info[group]:
for ck, cv in match.items():
if ck in tags and self.cleanMatchValue(cv) in tags[ck]:
found_tags.append(ck)
if set(tags.keys()) == set(found_tags):
return True
return set([key for key, value in tags.items() if value]) == set(found_tags)
def cleanMatchValue(self, value):
value = value.lower()
value = value.strip()
for ch in [' ', '-', '.']:
value = value.replace(ch, '')
return value
def getSearchTitle(self, media):
show, season, episode = self._lookupMedia(media)
if show is None:
return None
name = ''
if season is not None:
name = ' S%02d' % season.season_number
if episode is not None:
name += 'E%02d' % episode.episode_number
show_title = getTitle(show)
if not show_title:
return None
return show_title + name

10
couchpotato/core/plugins/quality/main.py

@ -26,7 +26,15 @@ class QualityPlugin(Plugin):
{'identifier': 'r5', 'size': (600, 1000), 'label': 'R5', 'alternative': ['r6'], 'allow': ['dvdr'], 'ext':['avi', 'mpg', 'mpeg']},
{'identifier': 'tc', 'size': (600, 1000), 'label': 'TeleCine', 'alternative': ['telecine'], 'allow': [], 'ext':['avi', 'mpg', 'mpeg']},
{'identifier': 'ts', 'size': (600, 1000), 'label': 'TeleSync', 'alternative': ['telesync', 'hdts'], 'allow': [], 'ext':['avi', 'mpg', 'mpeg']},
{'identifier': 'cam', 'size': (600, 1000), 'label': 'Cam', 'alternative': ['camrip', 'hdcam'], 'allow': [], 'ext':['avi', 'mpg', 'mpeg']}
{'identifier': 'cam', 'size': (600, 1000), 'label': 'Cam', 'alternative': ['camrip', 'hdcam'], 'allow': [], 'ext':['avi', 'mpg', 'mpeg']},
# TODO come back to this later, think this could be handled better
# WEB-DL
{'identifier': 'webdl_1080p', 'hd': True, 'size': (800, 5000), 'label': 'WEB-DL - 1080p', 'width': 1920, 'height': 1080, 'alternative': [], 'allow': [], 'ext':['mkv']},
{'identifier': 'webdl_720p', 'hd': True, 'size': (800, 5000), 'label': 'WEB-DL - 720p', 'width': 1280, 'height': 720, 'alternative': [], 'allow': [], 'ext':['mkv']},
# HDTV
{'identifier': 'hdtv_720p', 'hd': True, 'size': (800, 5000), 'label': 'HDTV - 720p', 'width': 1280, 'height': 720, 'alternative': [], 'allow': [], 'ext':['mkv']},
{'identifier': 'hdtv_sd', 'hd': False, 'size': (100, 1000), 'label': 'HDTV - SD', 'width': 720, 'alternative': [], 'allow': [], 'ext':['mkv', 'mp4', 'avi']},
]
pre_releases = ['cam', 'ts', 'tc', 'r5', 'scr']

2
couchpotato/core/plugins/release/main.py

@ -190,7 +190,7 @@ class Release(Plugin):
if item.get('protocol') != 'torrent_magnet':
item['download'] = provider.loginDownload if provider.urls.get('login') else provider.download
success = fireEvent('searcher.download', data = item, movie = rel.movie.to_dict({
success = fireEvent('searcher.download', data = item, movie = rel.media.to_dict({
'profile': {'types': {'quality': {}}},
'releases': {'status': {}, 'quality': {}},
'library': {'titles': {}, 'files':{}},

44
couchpotato/core/providers/base.py

@ -1,6 +1,6 @@
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.variable import tryFloat, mergeDicts, md5, \
possibleTitles, getTitle
possibleTitles, toIterable
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
from couchpotato.environment import Env
@ -15,7 +15,6 @@ import xml.etree.ElementTree as XMLTree
log = CPLog(__name__)
class MultiProvider(Plugin):
def __init__(self):
@ -102,6 +101,7 @@ class YarrProvider(Provider):
type = 'movie'
cat_ids = {}
cat_ids_structure = None
cat_backup_id = None
sizeGb = ['gb', 'gib']
@ -183,7 +183,7 @@ class YarrProvider(Provider):
return 'try_next'
def search(self, movie, quality):
def search(self, media, quality):
if self.isDisabled():
return []
@ -195,15 +195,15 @@ class YarrProvider(Provider):
# Create result container
imdb_results = hasattr(self, '_search')
results = ResultList(self, movie, quality, imdb_results = imdb_results)
results = ResultList(self, media, quality, imdb_results = imdb_results)
# Do search based on imdb id
if imdb_results:
self._search(movie, quality, results)
self._search(media, quality, results)
# Search possible titles
else:
for title in possibleTitles(getTitle(movie['library'])):
self._searchOnTitle(title, movie, quality, results)
for title in possibleTitles(fireEvent('searcher.get_search_title', media, single = True)):
self._searchOnTitle(title, media, quality, results)
return results
@ -244,9 +244,32 @@ class YarrProvider(Provider):
return 0
def getCatId(self, identifier):
def _discoverCatIdStructure(self):
# Discover cat_ids structure (single or groups)
for group_name, group_cat_ids in self.cat_ids:
if len(group_cat_ids) > 0:
if type(group_cat_ids[0]) is tuple:
self.cat_ids_structure = 'group'
if type(group_cat_ids[0]) is str:
self.cat_ids_structure = 'single'
def getCatId(self, identifier, group = None):
cat_ids = self.cat_ids
if not self.cat_ids_structure:
self._discoverCatIdStructure()
# If cat_ids is in a 'groups' structure, locate the media group
if self.cat_ids_structure == 'group':
if not group:
raise ValueError("group is required on group cat_ids structure")
for group_type, group_cat_ids in cat_ids:
if group in toIterable(group_type):
cat_ids = group_cat_ids
for cats in self.cat_ids:
for cats in cat_ids:
ids, qualities = cats
if identifier in qualities:
return ids
@ -279,8 +302,7 @@ class ResultList(list):
new_result = self.fillResult(result)
is_correct_movie = fireEvent('movie.searcher.correct_movie',
nzb = new_result, movie = self.movie, quality = self.quality,
is_correct_movie = fireEvent('searcher.correct_release', new_result, self.movie, self.quality,
imdb_results = self.kwargs.get('imdb_results', False), single = True)
if is_correct_movie and new_result['id'] not in self.result_ids:

75
couchpotato/core/providers/torrent/iptorrents/main.py

@ -2,42 +2,56 @@ from bs4 import BeautifulSoup
from couchpotato.core.helpers.encoding import tryUrlencode
from couchpotato.core.helpers.variable import tryInt
from couchpotato.core.logger import CPLog
from couchpotato.core.providers.base import MultiProvider
from couchpotato.core.providers.info.base import MovieProvider, ShowProvider
from couchpotato.core.providers.torrent.base import TorrentProvider
import traceback
log = CPLog(__name__)
class IPTorrents(TorrentProvider):
class IPTorrents(MultiProvider):
def getTypes(self):
return [Movie, Show]
class Base(TorrentProvider):
urls = {
'test' : 'http://www.iptorrents.com/',
'base_url' : 'http://www.iptorrents.com',
'login' : 'http://www.iptorrents.com/torrents/',
'login_check': 'http://www.iptorrents.com/inbox.php',
'search' : 'http://www.iptorrents.com/torrents/?l%d=1%s&q=%s&qf=ti&p=%d',
'search' : 'http://www.iptorrents.com/torrents/?l%d=1%%s&q=%s&qf=ti&p=%%d',
}
cat_ids = [
([48], ['720p', '1080p', 'bd50']),
([72], ['cam', 'ts', 'tc', 'r5', 'scr']),
([7], ['dvdrip', 'brrip']),
([6], ['dvdr']),
]
http_time_between_calls = 1 #seconds
cat_backup_id = None
def _searchOnTitle(self, title, movie, quality, results):
def _buildUrl(self, query, quality_identifier, cat_ids_group = None):
cat_id = self.getCatId(quality_identifier, cat_ids_group)[0]
if not cat_id:
log.warning('Unable to find category for quality %s', quality_identifier)
return
return self.urls['search'] % (cat_id, tryUrlencode(query).replace('%', '%%'))
def _searchOnTitle(self, title, media, quality, results):
freeleech = '' if not self.conf('freeleech') else '&free=on'
base_url = self.buildUrl(title, media, quality)
if not base_url: return
pages = 1
current_page = 1
while current_page <= pages and not self.shuttingDown():
url = self.urls['search'] % (self.getCatId(quality['identifier'])[0], freeleech, tryUrlencode('%s %s' % (title.replace(':', ''), movie['library']['year'])), current_page)
data = self.getHTMLData(url, opener = self.login_opener)
data = self.getHTMLData(
base_url % (freeleech, current_page),
opener = self.login_opener
)
if data:
html = BeautifulSoup(data)
@ -101,3 +115,38 @@ class IPTorrents(TorrentProvider):
def loginCheckSuccess(self, output):
return '/logout.php' in output.lower()
class Movie(MovieProvider, Base):
cat_ids = [
([48], ['720p', '1080p', 'bd50']),
([72], ['cam', 'ts', 'tc', 'r5', 'scr']),
([7], ['dvdrip', 'brrip']),
([6], ['dvdr']),
]
def buildUrl(self, title, media, quality):
query = '%s %s' % (title.replace(':', ''), media['library']['year'])
return self._buildUrl(query, quality['identifier'])
class Show(ShowProvider, Base):
cat_ids = [
('season', [
([65], ['hdtv_sd', 'hdtv_720p', 'webdl_720p', 'webdl_1080p']),
]),
('episode', [
([5], ['hdtv_720p', 'webdl_720p', 'webdl_1080p']),
([78], ['hdtv_sd']),
([4, 79], ['hdtv_sd'])
])
]
def buildUrl(self, title, media, quality):
if media['type'] not in ['season', 'episode']:
return
return self._buildUrl(title.replace(':', ''), quality['identifier'], media['type'])

2
couchpotato/core/providers/torrent/sceneaccess/main.py

@ -102,7 +102,7 @@ class Base(TorrentProvider):
loginCheckSuccess = loginSuccess
class Movie(Base, MovieProvider):
class Movie(MovieProvider, Base):
cat_ids = [
([22], ['720p', '1080p']),

161
libs/caper/__init__.py

@ -0,0 +1,161 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.matcher import FragmentMatcher
from caper.objects import CaperFragment, CaperClosure
from caper.parsers.anime import AnimeParser
from caper.parsers.scene import SceneParser
__version_info__ = ('0', '2', '0')
__version_branch__ = 'master'
__version__ = "%s%s" % (
'.'.join(__version_info__),
'-' + __version_branch__ if __version_branch__ else ''
)
CL_START_CHARS = ['(', '[']
CL_END_CHARS = [')', ']']
STRIP_START_CHARS = ''.join(CL_START_CHARS)
STRIP_END_CHARS = ''.join(CL_END_CHARS)
STRIP_CHARS = ''.join(['_', ' ', '.'])
FRAGMENT_SEPARATORS = ['.', '-', '_', ' ']
CL_START = 0
CL_END = 1
class Caper(object):
def __init__(self):
self.parsers = {
'scene': SceneParser(),
'anime': AnimeParser()
}
def _closure_split(self, name):
"""
:type name: str
:rtype: list of CaperClosure
"""
closures = []
def end_closure(closures, buf):
buf = buf.strip(STRIP_CHARS)
if len(buf) < 1:
return
cur = CaperClosure(buf)
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None
if cur.left:
cur.left.right = cur
closures.append(cur)
state = CL_START
buf = ""
for x, ch in enumerate(name):
if state == CL_START and ch in CL_START_CHARS:
end_closure(closures, buf)
state = CL_END
buf = ""
buf += ch
if state == CL_END and ch in CL_END_CHARS:
end_closure(closures, buf)
state = CL_START
buf = ""
end_closure(closures, buf)
return closures
def _clean_closure(self, closure):
"""
:type closure: str
:rtype: str
"""
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS)
def _fragment_split(self, closures):
"""
:type closures: list of CaperClosure
:rtype: list of CaperClosure
"""
cur_position = 0
cur = CaperFragment()
def end_fragment(fragments, cur, cur_position):
cur.position = cur_position
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None
if cur.left:
cur.left_sep = cur.left.right_sep
cur.left.right = cur
cur.right_sep = ch
fragments.append(cur)
for closure in closures:
closure.fragments = []
for x, ch in enumerate(self._clean_closure(closure.value)):
if ch in FRAGMENT_SEPARATORS:
end_fragment(closure.fragments, cur, cur_position)
# Reset
cur = CaperFragment()
cur_position += 1
else:
cur.value += ch
# Finish parsing the last fragment
if cur.value != "":
end_fragment(closure.fragments, cur, cur_position)
# Reset
cur_position = 0
cur = CaperFragment()
return closures
def parse(self, name, parser='scene'):
closures = self._closure_split(name)
closures = self._fragment_split(closures)
# Print closures
for closure in closures:
Logr.debug("closure [%s]", closure.value)
if parser not in self.parsers:
raise ValueError("Unknown parser")
# TODO autodetect the parser type
return self.parsers[parser].run(closures)

74
libs/caper/constraint.py

@ -0,0 +1,74 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class CaptureConstraint(object):
def __init__(self, capture_group, comparisons=None, **kwargs):
"""Capture constraint object
:type capture_group: CaptureGroup
"""
self.capture_group = capture_group
self.comparisons = comparisons if comparisons else []
for key, value in kwargs.items():
key = key.split('__')
if len(key) != 2:
continue
name, method = key
method = '_compare_' + method
if not hasattr(self, method):
continue
self.comparisons.append((name, getattr(self, method), value))
def _compare_eq(self, fragment, name, expected):
if not hasattr(fragment, name):
return None
return 1.0, getattr(fragment, name) == expected
def _compare_re(self, fragment, name, arg):
if name == 'fragment':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group)
return weight, weight > minimum_weight
elif type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(fragment, name)) is not None
elif hasattr(fragment, name):
match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True)
return 1.0, match is not None
if not hasattr(fragment, name):
raise ValueError("Unable to find fragment with name '%s'" % name)
else:
raise ValueError("Unexpected argument type")
def execute(self, fragment):
results = []
total_weight = 0
for name, method, argument in self.comparisons:
weight, success = method(fragment, name, argument)
total_weight += weight
results.append(success)
return total_weight / float(len(results)), all(results) if len(results) > 0 else False
def __repr__(self):
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons)

147
libs/caper/group.py

@ -0,0 +1,147 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.helpers import clean_dict
from caper.result import CaperFragmentNode
from caper.step import CaptureStep
from caper.constraint import CaptureConstraint
class CaptureGroup(object):
def __init__(self, parser, result):
"""Capture group object
:type parser: caper.parsers.base.Parser
:type result: caper.result.CaperResult
"""
self.parser = parser
self.result = result
#: @type: list of CaptureStep
self.steps = []
#: @type: list of CaptureConstraint
self.constraints = []
def capture_fragment(self, tag, regex=None, func=None, single=True):
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single)
self.steps.append(CaptureStep(
self, tag,
'fragment',
regex=regex,
func=func,
single=single
))
return self
def capture_closure(self, tag, regex=None, func=None, single=True):
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)
self.steps.append(CaptureStep(
self, tag,
'closure',
regex=regex,
func=func,
single=single
))
return self
def until(self, **kwargs):
self.constraints.append(CaptureConstraint(self, **kwargs))
return self
def parse_subject(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
# TODO - if subject is a closure?
nodes = []
# Check constraints
for constraint in self.constraints:
weight, success = constraint.execute(subject)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
nodes.append(parent_head)
if weight == 1.0:
return nodes
else:
Logr.debug('Branching result')
# Try match subject against the steps available
tag, success, weight, match, num_fragments = (None, None, None, None, None)
for step in self.steps:
tag = step.tag
success, weight, match, num_fragments = step.execute(subject)
if success:
match = clean_dict(match) if type(match) is dict else match
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments))
break
Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)]
if match and weight < 1.0:
if num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def execute(self):
heads_finished = None
while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)):
heads_finished = []
heads = self.result.heads
self.result.heads = []
for head in heads:
node = head[0] if type(head) is list else head
Logr.debug("head node: %s" % node)
if self in node.finished_groups:
Logr.debug("head finished for group")
self.result.heads.append(head)
heads_finished.append(True)
continue
next_subject = node.next()
if next_subject:
for node_result in self.parse_subject(head, next_subject):
self.result.heads.append(node_result)
heads_finished.append(self in node.finished_groups or next_subject is None)
if len(self.result.heads) == 0:
self.result.heads = heads
Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads)
Logr.debug("group finished")

64
libs/caper/helpers.py

@ -0,0 +1,64 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
def is_list_type(obj, element_type):
if not type(obj) is list:
return False
if len(obj) < 1:
raise ValueError("Unable to determine list element type from empty list")
return type(obj[0]) is element_type
def clean_dict(target, remove=None):
"""Recursively remove items matching a value 'remove' from the dictionary
:type target: dict
"""
if type(target) is not dict:
raise ValueError("Target is required to be a dict")
remove_keys = []
for key in target.keys():
if type(target[key]) is not dict:
if target[key] == remove:
remove_keys.append(key)
else:
clean_dict(target[key], remove)
for key in remove_keys:
target.pop(key)
return target
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)

193
libs/caper/matcher.py

@ -0,0 +1,193 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pprint
import re
from logr import Logr
from caper.helpers import is_list_type, clean_dict
class FragmentMatcher(object):
def __init__(self, pattern_groups):
self.regex = {}
for group_name, patterns in pattern_groups:
if group_name not in self.regex:
self.regex[group_name] = []
# Transform into weight groups
if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]:
patterns = [(1.0, patterns)]
for weight, patterns in patterns:
weight_patterns = []
for pattern in patterns:
# Transform into multi-fragment patterns
if type(pattern) is str:
pattern = (pattern,)
if type(pattern) is tuple and len(pattern) == 2:
if type(pattern[0]) is str and is_list_type(pattern[1], str):
pattern = (pattern,)
result = []
for value in pattern:
if type(value) is tuple:
if len(value) == 2:
# Construct OR-list pattern
value = value[0] % '|'.join(value[1])
elif len(value) == 1:
value = value[0]
result.append(re.compile(value, re.IGNORECASE))
weight_patterns.append(tuple(result))
self.regex[group_name].append((weight, weight_patterns))
pprint.pprint(self.regex)
def find_group(self, name):
for group_name, weight_groups in self.regex.items():
if group_name and group_name == name:
return group_name, weight_groups
return None
def parser_match(self, parser, group_name, single=True):
"""
:type parser: caper.parsers.base.Parser
"""
result = None
for group, weight_groups in self.regex.items():
if group_name and group != group_name:
continue
# TODO handle multiple weights
weight, patterns = weight_groups[0]
for pattern in patterns:
fragments = []
pattern_matched = True
pattern_result = {}
for fragment_pattern in pattern:
if not parser.fragment_available():
pattern_matched = False
break
fragment = parser.next_fragment()
fragments.append(fragment)
Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value)
match = fragment_pattern.match(fragment.value)
if match:
Logr.debug('Pattern "%s" matched', fragment_pattern.pattern)
else:
pattern_matched = False
break
pattern_result.update(clean_dict(match.groupdict()))
if pattern_matched:
if result is None:
result = {}
if group not in result:
result[group] = {}
Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments]))
result[group].update(pattern_result)
parser.commit()
if single:
return result
else:
parser.rewind()
return result
def value_match(self, value, group_name=None, single=True):
result = None
for group, weight_groups in self.regex.items():
if group_name and group != group_name:
continue
# TODO handle multiple weights
weight, patterns = weight_groups[0]
for pattern in patterns:
match = pattern[0].match(value)
if not match:
continue
if result is None:
result = {}
if group not in result:
result[group] = {}
result[group].update(match.groupdict())
if single:
return result
return result
def fragment_match(self, fragment, group_name=None):
"""Follow a fragment chain to try find a match
:type fragment: caper.objects.CaperFragment
:type group_name: str or None
:return: The weight of the match found between 0.0 and 1.0,
where 1.0 means perfect match and 0.0 means no match
:rtype: (float, dict, int)
"""
group_name, weight_groups = self.find_group(group_name)
for weight, patterns in weight_groups:
for pattern in patterns:
cur_fragment = fragment
success = True
result = {}
# Ignore empty patterns
if len(pattern) < 1:
break
for fragment_pattern in pattern:
if not cur_fragment:
success = False
break
match = fragment_pattern.match(cur_fragment.value)
if match:
result.update(match.groupdict())
else:
success = False
break
cur_fragment = cur_fragment.right if cur_fragment else None
if success:
Logr.debug("Found match with weight %s" % weight)
return float(weight), result, len(pattern)
return 0.0, None, 1

75
libs/caper/objects.py

@ -0,0 +1,75 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.helpers import xrange_six
class CaperClosure(object):
def __init__(self, value):
#: :type: str
self.value = value
#: :type: CaperClosure
self.left = None
#: :type: CaperClosure
self.right = None
#: :type: list of CaperFragment
self.fragments = []
class CaperFragment(object):
def __init__(self):
#: :type: str
self.value = ""
#: :type: CaperFragment
self.left = None
#: :type: str
self.left_sep = None
#: :type: CaperFragment
self.right = None
#: :type: str
self.right_sep = None
#: :type: int
self.position = None
def take(self, direction, count, include_self=True):
if direction not in ['left', 'right']:
raise ValueError('Un-Expected value for "direction", expected "left" or "right".')
result = []
if include_self:
result.append(self)
count -= 1
cur = self
for x in xrange_six(count):
if cur and getattr(cur, direction):
cur = getattr(cur, direction)
result.append(cur)
else:
result.append(None)
cur = None
return result
def take_left(self, count, include_self=True):
return self.take('left', count, include_self)
def take_right(self, count, include_self=True):
return self.take('right', count, include_self)

0
libs/caper/parsers/__init__.py

88
libs/caper/parsers/anime.py

@ -0,0 +1,88 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from caper.parsers.base import Parser
REGEX_GROUP = re.compile(r'(\(|\[)(?P<group>.*?)(\)|\])', re.IGNORECASE)
PATTERN_GROUPS = [
('identifier', [
r'S(?P<season>\d+)E(?P<episode>\d+)',
r'(S(?P<season>\d+))|(E(?P<episode>\d+))',
r'Ep(?P<episode>\d+)',
r'$(?P<absolute>\d+)^',
(r'Episode', r'(?P<episode>\d+)'),
]),
('video', [
(r'(?P<h264_profile>%s)', [
'Hi10P'
]),
(r'.(?P<resolution>%s)', [
'720p',
'1080p',
'960x720',
'1920x1080'
]),
(r'(?P<source>%s)', [
'BD'
]),
]),
('audio', [
(r'(?P<codec>%s)', [
'FLAC'
]),
])
]
class AnimeParser(Parser):
def __init__(self):
super(AnimeParser, self).__init__(PATTERN_GROUPS)
def capture_group(self, fragment):
match = REGEX_GROUP.match(fragment.value)
if not match:
return None
return match.group('group')
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_closure('group', func=self.capture_group)\
.execute(once=True)
self.capture_fragment('show_name', single=False)\
.until(value__re='identifier')\
.until(value__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier') \
.capture_fragment('video', regex='video', single=False) \
.capture_fragment('audio', regex='audio', single=False) \
.execute()
self.result.build()
return self.result

136
libs/caper/parsers/base.py

@ -0,0 +1,136 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.group import CaptureGroup
from caper.result import CaperResult, CaperClosureNode
class Parser(object):
def __init__(self, pattern_groups):
self.matcher = FragmentMatcher(pattern_groups)
self.closures = None
#: :type: caper.result.CaperResult
self.result = None
self._match_cache = None
self._fragment_pos = None
self._closure_pos = None
self._history = None
self.reset()
def reset(self):
self.closures = None
self.result = CaperResult()
self._match_cache = {}
self._fragment_pos = -1
self._closure_pos = -1
self._history = []
def setup(self, closures):
"""
:type closures: list of CaperClosure
"""
self.reset()
self.closures = closures
self.result.heads = [CaperClosureNode(closures[0])]
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
raise NotImplementedError()
#
# Closure Methods
#
def next_closure(self):
self._closure_pos += 1
closure = self.closures[self._closure_pos]
self._history.append(('fragment', -1 - self._fragment_pos))
self._fragment_pos = -1
if self._closure_pos != 0:
self._history.append(('closure', 1))
Logr.debug('(next_closure) closure.value: "%s"', closure.value)
return closure
def closure_available(self):
return self._closure_pos + 1 < len(self.closures)
#
# Fragment Methods
#
def next_fragment(self):
closure = self.closures[self._closure_pos]
self._fragment_pos += 1
fragment = closure.fragments[self._fragment_pos]
self._history.append(('fragment', 1))
Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value)
return fragment
def fragment_available(self):
if not self.closure_available():
return False
return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments)
def rewind(self):
for source, delta in reversed(self._history):
Logr.debug('(rewind) Rewinding step: %s', (source, delta))
if source == 'fragment':
self._fragment_pos -= delta
elif source == 'closure':
self._closure_pos -= delta
else:
raise NotImplementedError()
self.commit()
def commit(self):
Logr.debug('(commit)')
self._history = []
#
# Capture Methods
#
def capture_fragment(self, tag, regex=None, func=None, single=True):
return CaptureGroup(self, self.result).capture_fragment(
tag,
regex=regex,
func=func,
single=single
)
def capture_closure(self, tag, regex=None, func=None, single=True):
return CaptureGroup(self, self.result).capture_closure(
tag,
regex=regex,
func=func,
single=single
)

148
libs/caper/parsers/scene.py

@ -0,0 +1,148 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.parsers.base import Parser
from caper.result import CaperFragmentNode
PATTERN_GROUPS = [
('identifier', [
(1.0, [
# S01E01-E02
('^S(?P<season>\d+)E(?P<episode_from>\d+)$', '^E(?P<episode_to>\d+)$'),
# S02E13
r'^S(?P<season>\d+)E(?P<episode>\d+)$',
# S01 E13
(r'^(S(?P<season>\d+))$', r'^(E(?P<episode>\d+))$'),
# S02
# E13
r'^((S(?P<season>\d+))|(E(?P<episode>\d+)))$',
# 3x19
r'^(?P<season>\d+)x(?P<episode>\d+)$',
# 2013.09.15
(r'^(?P<year>\d{4})$', r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$'),
# 09.15.2013
(r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$', r'^(?P<year>\d{4})$'),
# TODO - US/UK Date Format Conflict? will only support US format for now..
# 15.09.2013
#(r'^(?P<day>\d{2})$', r'^(?P<month>\d{2})$', r'^(?P<year>\d{4})$'),
# 130915
r'^(?P<year_short>\d{2})(?P<month>\d{2})(?P<day>\d{2})$',
# Season 3 Episode 14
(r'^Se(ason)?$', r'^(?P<season>\d+)$', r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Season 3
(r'^Se(ason)?$', r'^(?P<season>\d+)$'),
# Episode 14
(r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Part.3
# Part.1.and.Part.3
('^Part$', '(?P<part>\d+)'),
]),
(0.8, [
# 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake)
# TODO - Update this pattern on 31 Dec 2099
r'^(?P<season>([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P<episode>\d{2})$'
]),
(0.5, [
# 100 - 9999
r'^(?P<season>([1-9])|([1-9][0-9]))(?P<episode>\d{2})$'
])
]),
('video', [
r'(?P<aspect>FS|WS)',
(r'(?P<resolution>%s)', [
'480p',
'720p',
'1080p'
]),
(r'(?P<source>%s)', [
'HDTV',
'PDTV',
'DSR',
'DVDRiP'
]),
(r'(?P<codec>%s)', [
'x264',
'XViD'
]),
(r'(?P<language>%s)', [
'GERMAN',
'DUTCH',
'FRENCH',
'SWEDiSH',
'DANiSH',
'iTALiAN'
]),
])
]
class SceneParser(Parser):
def __init__(self):
super(SceneParser, self).__init__(PATTERN_GROUPS)
def capture_group(self, fragment):
if fragment.left_sep == '-' and not fragment.right:
return fragment.value
return None
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_fragment('show_name', single=False)\
.until(fragment__re='identifier')\
.until(fragment__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier', single=False)\
.capture_fragment('video', regex='video', single=False)\
.until(left_sep__eq='-', right__eq=None)\
.execute()
self.capture_fragment('group', func=self.capture_group)\
.execute()
self.print_tree(self.result.heads)
self.result.build()
return self.result
def print_tree(self, heads):
for head in heads:
head = head if type(head) is list else [head]
if type(head[0]) is CaperFragmentNode:
for fragment in head[0].fragments:
Logr.debug(fragment.value)
else:
Logr.debug(head[0].closure.value)
for node in head:
Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match))
if len(head) > 0 and head[0].parent:
self.print_tree([head[0].parent])

172
libs/caper/result.py

@ -0,0 +1,172 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from logr import Logr
GROUP_MATCHES = ['identifier']
class CaperNode(object):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
"""
:type parent: CaperNode
:type weight: float
"""
#: :type: caper.objects.CaperClosure
self.closure = closure
#: :type: CaperNode
self.parent = parent
#: :type: str
self.tag = tag
#: :type: float
self.weight = weight
#: :type: dict
self.match = match
#: :type: list of CaptureGroup
self.finished_groups = []
def next(self):
raise NotImplementedError()
class CaperClosureNode(CaperNode):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match)
def next(self):
if self.closure and len(self.closure.fragments) > 0:
return self.closure.fragments[0]
return None
class CaperFragmentNode(CaperNode):
def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None):
"""
:type closure: caper.objects.CaperClosure
:type fragments: list of caper.objects.CaperFragment
"""
super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match)
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment
self.fragments = fragments
def next(self):
if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right:
return self.fragments[-1].right
if self.closure.right:
return self.closure.right
return None
class CaperResult(object):
def __init__(self):
#: :type: list of CaperNode
self.heads = []
self.chains = []
def build(self):
max_matched = 0
for head in self.heads:
for chain in self.combine_chain(head):
if chain.num_matched > max_matched:
max_matched = chain.num_matched
self.chains.append(chain)
for chain in self.chains:
chain.weights.append(chain.num_matched / float(max_matched))
chain.finish()
self.chains.sort(key=lambda chain: chain.weight, reverse=True)
for chain in self.chains:
Logr.debug("chain weight: %.02f", chain.weight)
Logr.debug("\tInfo: %s", chain.info)
Logr.debug("\tWeights: %s", chain.weights)
Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched)
def combine_chain(self, subject, chain=None):
nodes = subject if type(subject) is list else [subject]
if chain is None:
chain = CaperResultChain()
result = []
for x, node in enumerate(nodes):
node_chain = chain if x == len(nodes) - 1 else chain.copy()
if not node.parent:
result.append(node_chain)
continue
# Skip over closure nodes
if type(node) is CaperClosureNode:
result.extend(self.combine_chain(node.parent, node_chain))
# Parse fragment matches
if type(node) is CaperFragmentNode:
node_chain.update(node)
result.extend(self.combine_chain(node.parent, node_chain))
return result
class CaperResultChain(object):
def __init__(self):
#: :type: float
self.weight = None
self.info = {}
self.num_matched = 0
self.weights = []
def update(self, subject):
if subject.weight is None:
return
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0
self.weights.append(subject.weight)
if subject.match:
if subject.tag not in self.info:
self.info[subject.tag] = []
self.info[subject.tag].insert(0, subject.match)
def finish(self):
self.weight = sum(self.weights) / len(self.weights)
def copy(self):
chain = CaperResultChain()
chain.weight = self.weight
chain.info = copy.deepcopy(self.info)
chain.num_matched = self.num_matched
chain.weights = copy.copy(self.weights)
return chain

72
libs/caper/step.py

@ -0,0 +1,72 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
class CaptureStep(object):
REPR_KEYS = ['regex', 'func', 'single']
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None):
#: @type: CaptureGroup
self.capture_group = capture_group
#: @type: str
self.tag = tag
#: @type: str
self.source = source
#: @type: str
self.regex = regex
#: @type: function
self.func = func
#: @type: bool
self.single = single
def _get_next_subject(self, parser):
if self.source == 'fragment':
if not parser.fragment_available():
return None
return parser.next_fragment()
elif self.source == 'closure':
if not parser.closure_available():
return None
return parser.next_closure()
raise NotImplementedError()
def execute(self, fragment):
if self.regex:
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
Logr.debug('(execute) [regex] tag: "%s"', self.tag)
if match:
return True, weight, match, num_fragments
elif self.func:
match = self.func(fragment)
Logr.debug('(execute) [func] %s += "%s"', self.tag, match)
if match:
return True, 1.0, match, 1
else:
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)
return True, 1.0, fragment.value, 1
return False, None, None, 1
def __repr__(self):
attribute_values = [key + '=' + repr(getattr(self, key))
for key in self.REPR_KEYS
if hasattr(self, key) and getattr(self, key)]
attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else ''
return "CaptureStep('%s'%s)" % (self.tag, attribute_string)

201
libs/logr/__init__.py

@ -0,0 +1,201 @@
# logr - Simple python logging wrapper
# Packed by Dean Gardiner <gardiner91@gmail.com>
#
# File part of:
# rdio-sock - Rdio WebSocket Library
# Copyright (C) 2013 fzza- <fzzzzzzzza@gmail.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import inspect
import logging
import os
import sys
IGNORE = ()
PY3 = sys.version_info[0] == 3
class Logr(object):
loggers = {}
handler = None
@staticmethod
def configure(level=logging.WARNING, handler=None, formatter=None):
"""Configure Logr
@param handler: Logger message handler
@type handler: logging.Handler or None
@param formatter: Logger message Formatter
@type formatter: logging.Formatter or None
"""
if formatter is None:
formatter = LogrFormatter()
if handler is None:
handler = logging.StreamHandler()
handler.setFormatter(formatter)
handler.setLevel(level)
Logr.handler = handler
@staticmethod
def configure_check():
if Logr.handler is None:
Logr.configure()
@staticmethod
def _get_name_from_path(filename):
try:
return os.path.splitext(os.path.basename(filename))[0]
except TypeError:
return "<unknown>"
@staticmethod
def get_logger_name():
stack = inspect.stack()
for x in xrange_six(len(stack)):
frame = stack[x][0]
name = None
# Try find name of function defined inside a class
if len(frame.f_code.co_varnames) > 0:
self_argument = frame.f_code.co_varnames[0]
if self_argument == 'self' and self_argument in frame.f_locals:
instance = frame.f_locals[self_argument]
class_ = instance.__class__
class_name = class_.__name__
module_name = class_.__module__
if module_name != '__main__':
name = module_name + '.' + class_name
else:
name = class_name
# Try find name of function defined outside of a class
if name is None:
if frame.f_code.co_name in frame.f_globals:
name = frame.f_globals.get('__name__')
if name == '__main__':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
name = name
elif frame.f_code.co_name == '<module>':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
if name is not None and name not in IGNORE:
return name
return ""
@staticmethod
def get_logger():
"""Get or create logger (if it does not exist)
@rtype: RootLogger
"""
name = Logr.get_logger_name()
if name not in Logr.loggers:
Logr.configure_check()
Logr.loggers[name] = logging.Logger(name)
Logr.loggers[name].addHandler(Logr.handler)
return Logr.loggers[name]
@staticmethod
def debug(msg, *args, **kwargs):
Logr.get_logger().debug(msg, *args, **kwargs)
@staticmethod
def info(msg, *args, **kwargs):
Logr.get_logger().info(msg, *args, **kwargs)
@staticmethod
def warning(msg, *args, **kwargs):
Logr.get_logger().warning(msg, *args, **kwargs)
warn = warning
@staticmethod
def error(msg, *args, **kwargs):
Logr.get_logger().error(msg, *args, **kwargs)
@staticmethod
def exception(msg, *args, **kwargs):
Logr.get_logger().exception(msg, *args, **kwargs)
@staticmethod
def critical(msg, *args, **kwargs):
Logr.get_logger().critical(msg, *args, **kwargs)
fatal = critical
@staticmethod
def log(level, msg, *args, **kwargs):
Logr.get_logger().log(level, msg, *args, **kwargs)
class LogrFormatter(logging.Formatter):
LENGTH_NAME = 32
LENGTH_LEVEL_NAME = 5
def __init__(self, fmt=None, datefmt=None):
if sys.version_info[:2] > (2,6):
super(LogrFormatter, self).__init__(fmt, datefmt)
else:
logging.Formatter.__init__(self, fmt, datefmt)
def usesTime(self):
return True
def format(self, record):
record.message = record.getMessage()
if self.usesTime():
record.asctime = self.formatTime(record, self.datefmt)
s = "%(asctime)s %(name)s %(levelname)s %(message)s" % {
'asctime': record.asctime,
'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '),
'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '),
'message': record.message
}
if record.exc_info:
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
if s[-1:] != "\n":
s += "\n"
try:
s += record.exc_text
except UnicodeError:
s = s + record.exc_text.decode(sys.getfilesystemencoding(),
'replace')
return s
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)
Loading…
Cancel
Save