From 60d8934444d7fb994ad978d7ce1bc4a44b369104 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 2 Oct 2013 15:56:22 +1300 Subject: [PATCH 01/21] Created 'searcher.try_download_result' event from section in MovieSearcher.single --- couchpotato/core/media/_base/searcher/main.py | 25 +++++++++++++++++++++++++ couchpotato/core/media/movie/searcher/main.py | 22 +++------------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index 4348f41..68efcfa 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -25,6 +25,7 @@ class Searcher(SearcherBase): addEvent('searcher.correct_year', self.correctYear) addEvent('searcher.correct_name', self.correctName) addEvent('searcher.correct_words', self.correctWords) + addEvent('searcher.try_download_result', self.tryDownloadResult) addEvent('searcher.download', self.download) addEvent('searcher.search', self.search) addEvent('searcher.create_releases', self.createReleases) @@ -53,6 +54,30 @@ class Searcher(SearcherBase): progress = fireEvent('searcher.progress', merge = True) return progress + def tryDownloadResult(self, results, media, quality_type, manual = False): + available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) + + for rel in results: + if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0): + log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), rel['name'])) + continue + + if rel['status_id'] in [ignored_status.get('id'), failed_status.get('id')]: + log.info('Ignored: %s', rel['name']) + continue + + if rel['score'] <= 0: + log.info('Ignored, score to low: %s', rel['name']) + continue + + downloaded = fireEvent('searcher.download', data = rel, movie = media, manual = manual, single = True) + if downloaded is True: + return True + elif downloaded != 'try_next': + break + + return False + def download(self, data, movie, manual = False): if not data.get('protocol'): diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py index d6c3367..0f90941 100644 --- a/couchpotato/core/media/movie/searcher/main.py +++ b/couchpotato/core/media/movie/searcher/main.py @@ -177,25 +177,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase): # Add them to this movie releases list found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True) - for nzb in results: - if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and nzb.get('age') <= quality_type.get('wait_for', 0): - log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), nzb['name'])) - continue - - if nzb['status_id'] in [ignored_status.get('id'), failed_status.get('id')]: - log.info('Ignored: %s', nzb['name']) - continue - - if nzb['score'] <= 0: - log.info('Ignored, score to low: %s', nzb['name']) - continue - - downloaded = fireEvent('searcher.download', data = nzb, movie = movie, manual = manual, single = True) - if downloaded is True: - ret = True - break - elif downloaded != 'try_next': - break + # Try find a valid result and download it + if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True): + ret = True # Remove releases that aren't found anymore for release in movie.get('releases', []): From ff63b8a1c5d3dd4f463a1b61cebf5a955a0a529e Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 2 Oct 2013 15:57:08 +1300 Subject: [PATCH 02/21] Added TV release snatching/downloading --- couchpotato/core/media/show/searcher/main.py | 31 +++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 41ac3ff..319e8f2 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -1,8 +1,5 @@ -import pprint -import re from couchpotato import get_session, Env from couchpotato.core.event import addEvent, fireEvent -from couchpotato.core.helpers.encoding import simplifyString from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles from couchpotato.core.logger import CPLog from couchpotato.core.media._base.searcher.main import SearchSetupError @@ -58,9 +55,7 @@ class ShowSearcher(Plugin): return show, season, episode - def single(self, media, search_protocols = None): - pprint.pprint(media) - + def single(self, media, search_protocols = None, manual = False): if media['type'] == 'show': # TODO handle show searches (scan all seasons) return @@ -130,7 +125,29 @@ class ShowSearcher(Plugin): # Add them to this movie releases list found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True) - log.info('%d results found' % len(results)) + # Try find a valid result and download it + if fireEvent('searcher.try_download_result', results, media, quality_type, manual, single = True): + ret = True + + # Remove releases that aren't found anymore + for release in media.get('releases', []): + if release.get('status_id') == available_status.get('id') and release.get('identifier') not in found_releases: + fireEvent('release.delete', release.get('id'), single = True) + else: + log.info('Better quality (%s) already available or snatched for %s', (quality_type['quality']['label'], default_title)) + fireEvent('movie.restatus', media['id']) + break + + # Break if CP wants to shut down + if self.shuttingDown() or ret: + break + + if len(too_early_to_search) > 0: + log.info2('Too early to search for %s, %s', (too_early_to_search, default_title)) + + fireEvent('notify.frontend', type = 'show.searcher.ended.%s' % media['id'], data = True) + + return ret def correctRelease(self, release = None, media = None, quality = None, **kwargs): From 32c289fd3da92e654130a0b9c1c4a4e4318f17e8 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 2 Oct 2013 16:22:41 +1300 Subject: [PATCH 03/21] Renamed 'movie' -> 'media' in 'searcher.download' --- couchpotato/core/media/_base/searcher/main.py | 39 ++++++++++++++------------- couchpotato/core/plugins/release/main.py | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index 68efcfa..ec1c814 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -70,7 +70,7 @@ class Searcher(SearcherBase): log.info('Ignored, score to low: %s', rel['name']) continue - downloaded = fireEvent('searcher.download', data = rel, movie = media, manual = manual, single = True) + downloaded = fireEvent('searcher.download', data = rel, media = media, manual = manual, single = True) if downloaded is True: return True elif downloaded != 'try_next': @@ -78,11 +78,12 @@ class Searcher(SearcherBase): return False - def download(self, data, movie, manual = False): + def download(self, data, media, manual = False): - if not data.get('protocol'): - data['protocol'] = data['type'] - data['type'] = 'movie' + # TODO what is this for? + #if not data.get('protocol'): + # data['protocol'] = data['type'] + # data['type'] = 'movie' # Test to see if any downloaders are enabled for this type downloader_enabled = fireEvent('download.enabled', manual, data, single = True) @@ -91,14 +92,14 @@ class Searcher(SearcherBase): snatched_status = fireEvent('status.get', 'snatched', single = True) - # Download movie to temp + # Download release to temp filedata = None if data.get('download') and (ismethod(data.get('download')) or isfunction(data.get('download'))): filedata = data.get('download')(url = data.get('url'), nzb_id = data.get('id')) if filedata == 'try_next': return filedata - download_result = fireEvent('download', data = data, movie = movie, manual = manual, filedata = filedata, single = True) + download_result = fireEvent('download', data = data, movie = media, manual = manual, filedata = filedata, single = True) log.debug('Downloader result: %s', download_result) if download_result: @@ -122,36 +123,36 @@ class Searcher(SearcherBase): rls.info.append(rls_info) db.commit() - log_movie = '%s (%s) in %s' % (getTitle(movie['library']), movie['library']['year'], rls.quality.label) + log_movie = '%s (%s) in %s' % (getTitle(media['library']), media['library']['year'], rls.quality.label) snatch_message = 'Snatched "%s": %s' % (data.get('name'), log_movie) log.info(snatch_message) - fireEvent('movie.snatched', message = snatch_message, data = rls.to_dict()) + fireEvent('%s.snatched' % data['type'], message = snatch_message, data = rls.to_dict()) - # If renamer isn't used, mark movie done + # If renamer isn't used, mark media done if not renamer_enabled: active_status = fireEvent('status.get', 'active', single = True) done_status = fireEvent('status.get', 'done', single = True) try: - if movie['status_id'] == active_status.get('id'): - for profile_type in movie['profile']['types']: + if media['status_id'] == active_status.get('id'): + for profile_type in media['profile']['types']: if profile_type['quality_id'] == rls.quality.id and profile_type['finish']: - log.info('Renamer disabled, marking movie as finished: %s', log_movie) + log.info('Renamer disabled, marking media as finished: %s', log_movie) # Mark release done rls.status_id = done_status.get('id') rls.last_edit = int(time.time()) db.commit() - # Mark movie done - mvie = db.query(Media).filter_by(id = movie['id']).first() - mvie.status_id = done_status.get('id') - mvie.last_edit = int(time.time()) + # Mark media done + mdia = db.query(Media).filter_by(id = media['id']).first() + mdia.status_id = done_status.get('id') + mdia.last_edit = int(time.time()) db.commit() except: - log.error('Failed marking movie finished, renamer disabled: %s', traceback.format_exc()) + log.error('Failed marking media finished, renamer disabled: %s', traceback.format_exc()) except: - log.error('Failed marking movie finished: %s', traceback.format_exc()) + log.error('Failed marking media finished: %s', traceback.format_exc()) return True diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py index 9238e18..833ceec 100644 --- a/couchpotato/core/plugins/release/main.py +++ b/couchpotato/core/plugins/release/main.py @@ -191,7 +191,7 @@ class Release(Plugin): if item.get('protocol') != 'torrent_magnet': item['download'] = provider.loginDownload if provider.urls.get('login') else provider.download - success = fireEvent('searcher.download', data = item, movie = rel.media.to_dict({ + success = fireEvent('searcher.download', data = item, media = rel.media.to_dict({ 'profile': {'types': {'quality': {}}}, 'releases': {'status': {}, 'quality': {}}, 'library': {'titles': {}, 'files':{}}, From 7ffa5dc7b679e05b4c7670b8301c6c70295f8c58 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 2 Oct 2013 20:06:12 +1300 Subject: [PATCH 04/21] Fixed IPT Show SD cat_ids --- couchpotato/core/providers/torrent/iptorrents/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py index e8247c5..79e83d9 100644 --- a/couchpotato/core/providers/torrent/iptorrents/main.py +++ b/couchpotato/core/providers/torrent/iptorrents/main.py @@ -31,6 +31,7 @@ class Base(TorrentProvider): def _buildUrl(self, query, quality_identifier, cat_ids_group = None): + # TODO this should support searching multiple cat_ids under a group cat_id = self.getCatId(quality_identifier, cat_ids_group)[0] if not cat_id: log.warning('Unable to find category for quality %s', quality_identifier) @@ -140,8 +141,7 @@ class Show(ShowProvider, Base): ]), ('episode', [ ([5], ['hdtv_720p', 'webdl_720p', 'webdl_1080p']), - ([78], ['hdtv_sd']), - ([4, 79], ['hdtv_sd']) + ([4, 78, 79], ['hdtv_sd']) ]) ] From fb0719d6770c7c51194db832427c2c73f57ca563 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 2 Oct 2013 20:07:34 +1300 Subject: [PATCH 05/21] TV Searcher now supports xem scene mappings --- couchpotato/core/media/show/searcher/main.py | 58 ++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 319e8f2..7c1e847 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -81,7 +81,7 @@ class ShowSearcher(Plugin): found_releases = [] too_early_to_search = [] - default_title = self.getSearchTitle(media['library']) + default_title = self.getSearchTitle(media) if not default_title: log.error('No proper info found for episode, removing it from library to cause it from having more issues.') #fireEvent('episode.delete', episode['id'], single = True) @@ -223,7 +223,7 @@ class ShowSearcher(Plugin): return True def correctIdentifier(self, chain, media): - required_id = self.getIdentifier(media['library'], 'season_number', 'episode_number') + required_id = self.getMediaIdentifier(media['library']) if 'identifier' not in chain.info: return False @@ -234,7 +234,7 @@ class ShowSearcher(Plugin): identifier = chain.info['identifier'][0] # TODO air by date episodes - release_id = self.getIdentifier(identifier, 'season', 'episode') + release_id = self.toNumericIdentifier(identifier.get('season'), identifier.get('episode')) if required_id != release_id: log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id))) @@ -242,11 +242,31 @@ class ShowSearcher(Plugin): return True - def getIdentifier(self, d, episode_key, season_key): - return ( - tryInt(d.get(season_key), None) if season_key in d else None, - tryInt(d.get(episode_key), None) if episode_key in d else None - ) + def getMediaIdentifier(self, media_library): + identifier = None, None + + if media_library['type'] == 'episode': + map_episode = media_library['info'].get('map_episode') + + if map_episode and 'scene' in map_episode: + identifier = ( + map_episode['scene'].get('season'), + map_episode['scene'].get('episode') + ) + else: + # TODO xem mapping? + identifier = ( + media_library.get('season_number'), + media_library.get('episode_number') + ) + + if media_library['type'] == 'season': + identifier = media_library.get('season_number'), None + + return self.toNumericIdentifier(*identifier) + + def toNumericIdentifier(self, season, episode): + return tryInt(season, None), tryInt(episode, None) def chainMatches(self, chain, group, tags): found_tags = [] @@ -260,7 +280,7 @@ class ShowSearcher(Plugin): if set(tags.keys()) == set(found_tags): return True - return set([key for key, value in tags.items() if value]) == set(found_tags) + return set([key for key, value in tags.items() if None not in value]) == set(found_tags) def cleanMatchValue(self, value): value = value.lower() @@ -276,15 +296,19 @@ class ShowSearcher(Plugin): if show is None: return None - name = '' - if season is not None: - name = ' S%02d' % season.season_number - - if episode is not None: - name += 'E%02d' % episode.episode_number - + # TODO this misses alternative titles from the database show_title = getTitle(show) if not show_title: return None - return show_title + name + season_num, episode_num = self.getMediaIdentifier(media['library']) + + name = show_title + + if season_num: + name += ' S%02d' % season_num + + if episode_num: + name += 'E%02d' % episode_num + + return name From 2d2b0c90486b548193b71078522ae72651b0518a Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Thu, 3 Oct 2013 00:30:10 +1300 Subject: [PATCH 06/21] IPT provider now searches in multiple categories. --- couchpotato/core/providers/torrent/iptorrents/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py index 79e83d9..7d358cb 100644 --- a/couchpotato/core/providers/torrent/iptorrents/main.py +++ b/couchpotato/core/providers/torrent/iptorrents/main.py @@ -23,7 +23,7 @@ class Base(TorrentProvider): 'base_url' : 'http://www.iptorrents.com', 'login' : 'http://www.iptorrents.com/torrents/', 'login_check': 'http://www.iptorrents.com/inbox.php', - 'search' : 'http://www.iptorrents.com/torrents/?l%d=1%%s&q=%s&qf=ti&p=%%d', + 'search' : 'http://www.iptorrents.com/torrents/?%s%%s&q=%s&qf=ti&p=%%d', } http_time_between_calls = 1 #seconds @@ -31,13 +31,13 @@ class Base(TorrentProvider): def _buildUrl(self, query, quality_identifier, cat_ids_group = None): - # TODO this should support searching multiple cat_ids under a group - cat_id = self.getCatId(quality_identifier, cat_ids_group)[0] - if not cat_id: + cat_ids = self.getCatId(quality_identifier, cat_ids_group) + + if not len(cat_ids): log.warning('Unable to find category for quality %s', quality_identifier) return - return self.urls['search'] % (cat_id, tryUrlencode(query).replace('%', '%%')) + return self.urls['search'] % ("&".join(("l%d=" % x) for x in cat_ids), tryUrlencode(query).replace('%', '%%')) def _searchOnTitle(self, title, media, quality, results): From 8d368ecf29dbfe476b08ee6dde918242740ad6f6 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Thu, 3 Oct 2013 00:59:15 +1300 Subject: [PATCH 07/21] 'searcher.correct_release' can now return a float indicating the weight/accuracy which is used to scale the score. Fix to IPT _buildUrl method. --- couchpotato/core/providers/base.py | 14 ++++++++++++-- couchpotato/core/providers/torrent/iptorrents/main.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py index 249ae85..2fc1e38 100644 --- a/couchpotato/core/providers/base.py +++ b/couchpotato/core/providers/base.py @@ -302,12 +302,22 @@ class ResultList(list): new_result = self.fillResult(result) - is_correct_movie = fireEvent('searcher.correct_release', new_result, self.movie, self.quality, + is_correct = fireEvent('searcher.correct_release', new_result, self.movie, self.quality, imdb_results = self.kwargs.get('imdb_results', False), single = True) - if is_correct_movie and new_result['id'] not in self.result_ids: + if is_correct and new_result['id'] not in self.result_ids: + is_correct_weight = float(is_correct) + new_result['score'] += fireEvent('score.calculate', new_result, self.movie, single = True) + old_score = new_result['score'] + new_result['score'] = int(old_score * is_correct_weight) + log.info('Found correct release with weight %.02f, old_score(%d) now scaled to score(%d)', ( + is_correct_weight, + old_score, + new_result['score'] + )) + self.found(new_result) self.result_ids.append(result['id']) diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py index 7d358cb..55fa815 100644 --- a/couchpotato/core/providers/torrent/iptorrents/main.py +++ b/couchpotato/core/providers/torrent/iptorrents/main.py @@ -33,7 +33,7 @@ class Base(TorrentProvider): cat_ids = self.getCatId(quality_identifier, cat_ids_group) - if not len(cat_ids): + if not cat_ids or not len(cat_ids): log.warning('Unable to find category for quality %s', quality_identifier) return From 0793668e5cd47ab031fd2f5bcb4d53383fea40cf Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Thu, 3 Oct 2013 01:00:46 +1300 Subject: [PATCH 08/21] Chain result weight now returned from TV searcher correctRelease function. --- couchpotato/core/media/show/searcher/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 7c1e847..48f8dcc 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -200,7 +200,7 @@ class ShowSearcher(Plugin): library_title = ' '.join(valid_words) if valid_words == chain_words: - return True + return chain.weight log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year'])) return False From 529b535d9f697ded7a0911222c13a95245c39f71 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Thu, 3 Oct 2013 01:30:13 +1300 Subject: [PATCH 09/21] Added 'searcher.get_media_searcher_id' event, Cleaned up some 'status.get' calls, Renamed some references of 'nzb' to 'rel'. --- couchpotato/core/media/_base/searcher/main.py | 24 ++++++++---------------- couchpotato/core/media/movie/searcher/main.py | 5 +++++ couchpotato/core/providers/base.py | 1 + 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index ec1c814..a986f3d 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -55,7 +55,7 @@ class Searcher(SearcherBase): return progress def tryDownloadResult(self, results, media, quality_type, manual = False): - available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) + ignored_status, failed_status = fireEvent('status.get', ['ignored', 'failed'], single = True) for rel in results: if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0): @@ -89,8 +89,7 @@ class Searcher(SearcherBase): downloader_enabled = fireEvent('download.enabled', manual, data, single = True) if downloader_enabled: - - snatched_status = fireEvent('status.get', 'snatched', single = True) + snatched_status, active_status, done_status = fireEvent('status.get', ['snatched', 'active', 'done'], single = True) # Download release to temp filedata = None @@ -110,7 +109,6 @@ class Searcher(SearcherBase): if rls: renamer_enabled = Env.setting('enabled', 'renamer') - done_status = fireEvent('status.get', 'done', single = True) rls.status_id = done_status.get('id') if not renamer_enabled else snatched_status.get('id') # Save download-id info if returned @@ -130,8 +128,6 @@ class Searcher(SearcherBase): # If renamer isn't used, mark media done if not renamer_enabled: - active_status = fireEvent('status.get', 'active', single = True) - done_status = fireEvent('status.get', 'done', single = True) try: if media['status_id'] == active_status.get('id'): for profile_type in media['profile']['types']: @@ -163,14 +159,10 @@ class Searcher(SearcherBase): def search(self, protocols, media, quality): results = [] - search_type = None - if media['type'] == 'movie': - search_type = 'movie' - elif media['type'] in ['show', 'season', 'episode']: - search_type = 'show' + searcher_id = fireEvent('searcher.get_media_searcher_id', media['type'], single = True) for search_protocol in protocols: - protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, search_type), media, quality, merge = True) + protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, searcher_id), media, quality, merge = True) if protocol_results: results += protocol_results @@ -191,13 +183,13 @@ class Searcher(SearcherBase): for rel in search_results: - nzb_identifier = md5(rel['url']) - found_releases.append(nzb_identifier) + rel_identifier = md5(rel['url']) + found_releases.append(rel_identifier) - rls = db.query(Release).filter_by(identifier = nzb_identifier).first() + rls = db.query(Release).filter_by(identifier = rel_identifier).first() if not rls: rls = Release( - identifier = nzb_identifier, + identifier = rel_identifier, media_id = media.get('id'), quality_id = quality_type.get('quality_id'), status_id = available_status.get('id') diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py index 0f90941..70b0278 100644 --- a/couchpotato/core/media/movie/searcher/main.py +++ b/couchpotato/core/media/movie/searcher/main.py @@ -31,6 +31,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): addEvent('movie.searcher.could_be_released', self.couldBeReleased) addEvent('searcher.correct_release', self.correctRelease) addEvent('searcher.get_search_title', self.getSearchTitle) + addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) addApiView('movie.searcher.try_next', self.tryNextReleaseView, docs = { 'desc': 'Marks the snatched results as ignored and try the next best release', @@ -343,5 +344,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase): if media['type'] == 'movie': return getTitle(media['library']) + def getMediaSearcherId(self, media_type): + if media_type == 'movie': + return 'movie' + class SearchSetupError(Exception): pass diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py index 2fc1e38..66f7182 100644 --- a/couchpotato/core/providers/base.py +++ b/couchpotato/core/providers/base.py @@ -312,6 +312,7 @@ class ResultList(list): old_score = new_result['score'] new_result['score'] = int(old_score * is_correct_weight) + log.info('Found correct release with weight %.02f, old_score(%d) now scaled to score(%d)', ( is_correct_weight, old_score, From 8d24d96804652905939366b1526097a723417281 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Thu, 3 Oct 2013 01:30:42 +1300 Subject: [PATCH 10/21] Implemented 'searcher.get_media_searcher_id' in the TV searcher. --- couchpotato/core/media/show/searcher/main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 48f8dcc..12e7d02 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -29,6 +29,7 @@ class ShowSearcher(Plugin): addEvent('show.searcher.single', self.single) addEvent('searcher.correct_release', self.correctRelease) addEvent('searcher.get_search_title', self.getSearchTitle) + addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) self.caper = Caper() @@ -312,3 +313,7 @@ class ShowSearcher(Plugin): name += 'E%02d' % episode_num return name + + def getMediaSearcherId(self, media_type): + if media_type in ['show', 'season', 'episode']: + return 'show' From e3745b5d74dd5253d56d310db1d3b38d1a1dc4c2 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Fri, 4 Oct 2013 02:39:44 +1300 Subject: [PATCH 11/21] Updated Caper library --- libs/caper/constraint.py | 2 +- libs/caper/group.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/caper/constraint.py b/libs/caper/constraint.py index 8e5ee28..d8f5280 100644 --- a/libs/caper/constraint.py +++ b/libs/caper/constraint.py @@ -38,7 +38,7 @@ class CaptureConstraint(object): def _compare_eq(self, fragment, name, expected): if not hasattr(fragment, name): - return None + return 1.0, False return 1.0, getattr(fragment, name) == expected diff --git a/libs/caper/group.py b/libs/caper/group.py index 3dcb00b..71b9766 100644 --- a/libs/caper/group.py +++ b/libs/caper/group.py @@ -14,8 +14,9 @@ from logr import Logr +from caper import CaperClosure from caper.helpers import clean_dict -from caper.result import CaperFragmentNode +from caper.result import CaperFragmentNode, CaperClosureNode from caper.step import CaptureStep from caper.constraint import CaptureConstraint @@ -70,7 +71,9 @@ class CaptureGroup(object): def parse_subject(self, parent_head, subject): parent_node = parent_head[0] if type(parent_head) is list else parent_head - # TODO - if subject is a closure? + # TODO just jumping into closures for now, will be fixed later + if type(subject) is CaperClosure: + return [CaperClosureNode(subject, parent_head)] nodes = [] From fb5b17005f47603fb27502a8f4d21234ddf75be1 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Fri, 4 Oct 2013 02:43:57 +1300 Subject: [PATCH 12/21] Cleaned up status.get calls in TV searcher --- couchpotato/core/media/show/searcher/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 12e7d02..ec9da74 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -68,7 +68,7 @@ class ShowSearcher(Plugin): except SearchSetupError: return - done_status = fireEvent('status.get', 'done', single = True) + done_status, available_status, ignored_status, failed_status = fireEvent('status.get', ['done', 'available', 'ignored', 'failed'], single = True) if not media['profile'] or media['status_id'] == done_status.get('id'): log.debug('Episode doesn\'t have a profile or already done, assuming in manage tab.') @@ -76,8 +76,7 @@ class ShowSearcher(Plugin): db = get_session() - pre_releases = fireEvent('quality.pre_releases', single = True) - available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) + #pre_releases = fireEvent('quality.pre_releases', single = True) found_releases = [] too_early_to_search = [] From 0a0935d635ce40b91afa6216ffe97267ad1255b5 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Sat, 5 Oct 2013 14:24:08 +1300 Subject: [PATCH 13/21] Fix to Provider getCatId when returning the cet_backup_id --- couchpotato/core/providers/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py index 66f7182..8760710 100644 --- a/couchpotato/core/providers/base.py +++ b/couchpotato/core/providers/base.py @@ -274,7 +274,10 @@ class YarrProvider(Provider): if identifier in qualities: return ids - return [self.cat_backup_id] + if self.cat_backup_id: + return [self.cat_backup_id] + + return [] class ResultList(list): From 6259684487f084d79bee0c896508f15ac47d4bfb Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Sat, 5 Oct 2013 14:26:49 +1300 Subject: [PATCH 14/21] Moved caper matching into a new 'matcher' plugin. --- couchpotato/core/media/show/searcher/main.py | 228 ++++++++++----------------- couchpotato/core/plugins/matcher/__init__.py | 6 + couchpotato/core/plugins/matcher/main.py | 120 ++++++++++++++ 3 files changed, 213 insertions(+), 141 deletions(-) create mode 100644 couchpotato/core/plugins/matcher/__init__.py create mode 100644 couchpotato/core/plugins/matcher/main.py diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index ec9da74..e8c0655 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -5,7 +5,6 @@ from couchpotato.core.logger import CPLog from couchpotato.core.media._base.searcher.main import SearchSetupError from couchpotato.core.plugins.base import Plugin from couchpotato.core.settings.model import Media, Library -from caper import Caper log = CPLog(__name__) @@ -27,34 +26,14 @@ class ShowSearcher(Plugin): super(ShowSearcher, self).__init__() addEvent('show.searcher.single', self.single) - addEvent('searcher.correct_release', self.correctRelease) addEvent('searcher.get_search_title', self.getSearchTitle) - addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) - - self.caper = Caper() - - def _lookupMedia(self, media): - db = get_session() - - media_library = db.query(Library).filter_by(id = media['library_id']).first() - show = None - season = None - episode = None - - if media['type'] == 'episode': - show = media_library.parent.parent - season = media_library.parent - episode = media_library - - if media['type'] == 'season': - show = media_library.parent - season = media_library - - if media['type'] == 'show': - show = media_library + addEvent('searcher.correct_match', self.correctMatch) + addEvent('searcher.correct_release', self.correctRelease) - return show, season, episode + addEvent('searcher.get_media_identifier', self.getMediaIdentifier) + addEvent('searcher.get_media_root', self.getMediaRoot) + addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) def single(self, media, search_protocols = None, manual = False): if media['type'] == 'show': @@ -87,7 +66,7 @@ class ShowSearcher(Plugin): #fireEvent('episode.delete', episode['id'], single = True) return - show, season, episode = self._lookupMedia(media) + show, season, episode = self.getMedia(media) if show is None or season is None: log.error('Unable to find show or season library in database, missing required data for searching') return @@ -149,6 +128,28 @@ class ShowSearcher(Plugin): return ret + def getSearchTitle(self, media): + show, season, episode = self.getMedia(media) + if show is None: + return None + + # TODO this misses alternative titles from the database + show_title = getTitle(show) + if not show_title: + return None + + identifier = fireEvent('searcher.get_media_identifier', media['library'], single = True) + + name = show_title + + if identifier['season']: + name += ' S%02d' % identifier['season'] + + if identifier['episode']: + name += 'E%02d' % identifier['episode'] + + return name + def correctRelease(self, release = None, media = None, quality = None, **kwargs): if media.get('type') not in ['season', 'episode']: return @@ -163,156 +164,101 @@ class ShowSearcher(Plugin): if not fireEvent('searcher.correct_words', release['name'], media, single = True): return False - show, season, episode = self._lookupMedia(media) + show, season, episode = self.getMedia(media) if show is None or season is None: log.error('Unable to find show or season library in database, missing required data for searching') return - release_info = self.caper.parse(release['name']) - if len(release_info.chains) < 1: - log.info2('Wrong: %s, unable to parse release name (no chains)', release['name']) - return False - - # TODO look at all chains - chain = release_info.chains[0] - - if not self.correctQuality(chain, quality['identifier']): - log.info('Wrong: %s, quality does not match', release['name']) - return False - - if not self.correctIdentifier(chain, media): - log.info('Wrong: %s, identifier does not match', release['name']) - return False - - if 'show_name' not in chain.info or not len(chain.info['show_name']): - log.info('Wrong: %s, missing show name in parsed result', release['name']) - return False - - chain_words = [x.lower() for x in chain.info['show_name']] - chain_title = ' '.join(chain_words) - - library_title = None - - # Check show titles match - for raw_title in show.titles: - for valid_words in [x.split(' ') for x in possibleTitles(raw_title.title)]: - if not library_title: - library_title = ' '.join(valid_words) + match = fireEvent('matcher.best', release, media, quality, single = True) + if match: + return match.weight - if valid_words == chain_words: - return chain.weight - - log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year'])) return False - def correctQuality(self, chain, quality_identifier): - if quality_identifier not in self.quality_map: - log.info2('Wrong: unknown preferred quality %s for TV searching', quality_identifier) - return False - - if 'video' not in chain.info: - log.info2('Wrong: no video tags found') - return False - - video_tags = self.quality_map[quality_identifier] - - if not self.chainMatches(chain, 'video', video_tags): - log.info2('Wrong: %s tags not in chain', video_tags) - return False - - return True - - def correctIdentifier(self, chain, media): - required_id = self.getMediaIdentifier(media['library']) + def correctMatch(self, chain, release, media, quality): + log.info("Checking if '%s' is valid", release['name']) - if 'identifier' not in chain.info: + if not fireEvent('matcher.correct_quality', chain, quality, self.quality_map, single = True): + log.info('Wrong: %s, quality does not match', release['name']) return False - # TODO could be handled better? - if len(chain.info['identifier']) != 1: + if not fireEvent('matcher.correct_identifier', chain, media): + log.info('Wrong: %s, identifier does not match', release['name']) return False - identifier = chain.info['identifier'][0] - - # TODO air by date episodes - release_id = self.toNumericIdentifier(identifier.get('season'), identifier.get('episode')) - if required_id != release_id: - log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id))) + if not fireEvent('matcher.correct_title', chain, media): + log.info("Wrong: '%s', undetermined naming. Looking for '%s (%s)'", ( + ' '.join(chain.info['show_name']), + 'library_title', + media['library']['year']) + ) return False return True def getMediaIdentifier(self, media_library): - identifier = None, None + if media_library['type'] not in ['show', 'season', 'episode']: + return None + + identifier = { + 'season': None, + 'episode': None + } if media_library['type'] == 'episode': map_episode = media_library['info'].get('map_episode') if map_episode and 'scene' in map_episode: - identifier = ( - map_episode['scene'].get('season'), - map_episode['scene'].get('episode') - ) + identifier['season'] = map_episode['scene'].get('season') + identifier['episode'] = map_episode['scene'].get('episode') else: # TODO xem mapping? - identifier = ( - media_library.get('season_number'), - media_library.get('episode_number') - ) + identifier['season'] = media_library.get('season_number') + identifier['episode'] = media_library.get('episode_number') if media_library['type'] == 'season': - identifier = media_library.get('season_number'), None - - return self.toNumericIdentifier(*identifier) - - def toNumericIdentifier(self, season, episode): - return tryInt(season, None), tryInt(episode, None) - - def chainMatches(self, chain, group, tags): - found_tags = [] + identifier['season'] = media_library.get('season_number') - for match in chain.info[group]: - for ck, cv in match.items(): - if ck in tags and self.cleanMatchValue(cv) in tags[ck]: - found_tags.append(ck) + # Try cast identifier values to integers + identifier['season'] = tryInt(identifier['season'], None) + identifier['episode'] = tryInt(identifier['episode'], None) + return identifier - if set(tags.keys()) == set(found_tags): - return True - - return set([key for key, value in tags.items() if None not in value]) == set(found_tags) - - def cleanMatchValue(self, value): - value = value.lower() - value = value.strip() + def getMediaRoot(self, media): + if media['type'] not in ['show', 'season', 'episode']: + return None - for ch in [' ', '-', '.']: - value = value.replace(ch, '') + show, season, episode = self.getMedia(media) + if show is None or season is None: + log.error('Unable to find show or season library in database, missing required data for searching') + return - return value + return show.to_dict() - def getSearchTitle(self, media): - show, season, episode = self._lookupMedia(media) - if show is None: - return None + def getMediaSearcherId(self, media_type): + if media_type in ['show', 'season', 'episode']: + return 'show' - # TODO this misses alternative titles from the database - show_title = getTitle(show) - if not show_title: - return None + def getMedia(self, media): + db = get_session() - season_num, episode_num = self.getMediaIdentifier(media['library']) + media_library = db.query(Library).filter_by(id = media['library_id']).first() - name = show_title + show = None + season = None + episode = None - if season_num: - name += ' S%02d' % season_num + if media['type'] == 'episode': + show = media_library.parent.parent + season = media_library.parent + episode = media_library - if episode_num: - name += 'E%02d' % episode_num + if media['type'] == 'season': + show = media_library.parent + season = media_library - return name + if media['type'] == 'show': + show = media_library - def getMediaSearcherId(self, media_type): - if media_type in ['show', 'season', 'episode']: - return 'show' + return show, season, episode \ No newline at end of file diff --git a/couchpotato/core/plugins/matcher/__init__.py b/couchpotato/core/plugins/matcher/__init__.py new file mode 100644 index 0000000..46b1a8e --- /dev/null +++ b/couchpotato/core/plugins/matcher/__init__.py @@ -0,0 +1,6 @@ +from .main import Matcher + +def start(): + return Matcher() + +config = [] diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py new file mode 100644 index 0000000..dda9311 --- /dev/null +++ b/couchpotato/core/plugins/matcher/main.py @@ -0,0 +1,120 @@ +import pprint +from caper import Caper +from couchpotato import CPLog, tryInt +from couchpotato.core.event import addEvent, fireEvent +from couchpotato.core.helpers.variable import possibleTitles +from couchpotato.core.plugins.base import Plugin + +log = CPLog(__name__) + + +class Matcher(Plugin): + def __init__(self): + self.caper = Caper() + + addEvent('matcher.parse', self.parse) + addEvent('matcher.best', self.best) + + addEvent('matcher.correct_title', self.correctTitle) + addEvent('matcher.correct_identifier', self.correctIdentifier) + addEvent('matcher.correct_quality', self.correctQuality) + + def parse(self, release): + return self.caper.parse(release['name']) + + def best(self, release, media, quality): + rel_info = fireEvent('matcher.parse', release, single = True) + + if len(rel_info.chains) < 1: + log.info2('Wrong: %s, unable to parse release name (no chains)', release['name']) + return False + + for chain in rel_info.chains: + if fireEvent('searcher.correct_match', chain, release, media, quality, single = True): + return chain + + return None + + def chainMatches(self, chain, group, tags): + found_tags = [] + + for match in chain.info[group]: + for ck, cv in match.items(): + if ck in tags and self.cleanMatchValue(cv) in tags[ck]: + found_tags.append(ck) + + + if set(tags.keys()) == set(found_tags): + return True + + return set([key for key, value in tags.items() if None not in value]) == set(found_tags) + + def cleanMatchValue(self, value): + value = value.lower() + value = value.strip() + + for ch in [' ', '-', '.']: + value = value.replace(ch, '') + + return value + + def dictIsSubset(self, a, b): + return all([k in b and b[k] == v for k, v in a.items()]) + + def correctIdentifier(self, chain, media): + required_id = fireEvent('searcher.get_media_identifier', media['library'], single = True) + + if 'identifier' not in chain.info: + return False + + # TODO could be handled better? + if len(chain.info['identifier']) != 1: + return False + identifier = chain.info['identifier'][0] + + # TODO air by date episodes + + # TODO this should support identifiers with characters 'a', 'b', etc.. + for k, v in identifier.items(): + identifier[k] = tryInt(v, None) + + if not self.dictIsSubset(required_id, identifier): + log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(identifier))) + return False + + return True + + def correctTitle(self, chain, media): + root_library = fireEvent('searcher.get_media_root', media['library'], single = True) + + if 'show_name' not in chain.info or not len(chain.info['show_name']): + log.info('Wrong: missing show name in parsed result') + return False + + chain_words = [x.lower() for x in chain.info['show_name']] + + # Check show titles match + for title in root_library['info']['titles']: + for valid_words in [x.split(' ') for x in possibleTitles(title)]: + + if valid_words == chain_words: + return True + + return False + + def correctQuality(self, chain, quality, quality_map): + if quality['identifier'] not in quality_map: + log.info2('Wrong: unknown preferred quality %s', quality['identifier']) + return False + + if 'video' not in chain.info: + log.info2('Wrong: no video tags found') + return False + + video_tags = quality_map[quality['identifier']] + + if not self.chainMatches(chain, 'video', video_tags): + log.info2('Wrong: %s tags not in chain', video_tags) + return False + + return True From 07abf7c83da43d5b7f078d4fb9bb317500aed2e3 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Mon, 7 Oct 2013 01:35:18 +1300 Subject: [PATCH 15/21] Updated Caper to version 0.2.2 --- libs/caper/__init__.py | 2 +- libs/caper/matcher.py | 61 +--------------------------------------------- libs/caper/parsers/base.py | 57 ------------------------------------------- libs/caper/step.py | 12 --------- 4 files changed, 2 insertions(+), 130 deletions(-) diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py index 23801ee..1638ec0 100644 --- a/libs/caper/__init__.py +++ b/libs/caper/__init__.py @@ -19,7 +19,7 @@ from caper.parsers.anime import AnimeParser from caper.parsers.scene import SceneParser -__version_info__ = ('0', '2', '0') +__version_info__ = ('0', '2', '2') __version_branch__ = 'master' __version__ = "%s%s" % ( diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py index 24ef69a..23fdcf9 100644 --- a/libs/caper/matcher.py +++ b/libs/caper/matcher.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pprint import re from logr import Logr -from caper.helpers import is_list_type, clean_dict +from caper.helpers import is_list_type class FragmentMatcher(object): @@ -57,8 +56,6 @@ class FragmentMatcher(object): self.regex[group_name].append((weight, weight_patterns)) - pprint.pprint(self.regex) - def find_group(self, name): for group_name, weight_groups in self.regex.items(): if group_name and group_name == name: @@ -66,62 +63,6 @@ class FragmentMatcher(object): return None - def parser_match(self, parser, group_name, single=True): - """ - - :type parser: caper.parsers.base.Parser - """ - result = None - - for group, weight_groups in self.regex.items(): - if group_name and group != group_name: - continue - - # TODO handle multiple weights - weight, patterns = weight_groups[0] - - for pattern in patterns: - fragments = [] - pattern_matched = True - pattern_result = {} - - for fragment_pattern in pattern: - if not parser.fragment_available(): - pattern_matched = False - break - - fragment = parser.next_fragment() - fragments.append(fragment) - - Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value) - match = fragment_pattern.match(fragment.value) - if match: - Logr.debug('Pattern "%s" matched', fragment_pattern.pattern) - else: - pattern_matched = False - break - - pattern_result.update(clean_dict(match.groupdict())) - - if pattern_matched: - if result is None: - result = {} - - if group not in result: - result[group] = {} - - Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments])) - - result[group].update(pattern_result) - parser.commit() - - if single: - return result - else: - parser.rewind() - - return result - def value_match(self, value, group_name=None, single=True): result = None diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py index 70bc733..d0e109e 100644 --- a/libs/caper/parsers/base.py +++ b/libs/caper/parsers/base.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from logr import Logr from caper import FragmentMatcher from caper.group import CaptureGroup from caper.result import CaperResult, CaperClosureNode @@ -60,62 +59,6 @@ class Parser(object): raise NotImplementedError() # - # Closure Methods - # - - def next_closure(self): - self._closure_pos += 1 - closure = self.closures[self._closure_pos] - - self._history.append(('fragment', -1 - self._fragment_pos)) - self._fragment_pos = -1 - - if self._closure_pos != 0: - self._history.append(('closure', 1)) - - Logr.debug('(next_closure) closure.value: "%s"', closure.value) - return closure - - def closure_available(self): - return self._closure_pos + 1 < len(self.closures) - - # - # Fragment Methods - # - - def next_fragment(self): - closure = self.closures[self._closure_pos] - - self._fragment_pos += 1 - fragment = closure.fragments[self._fragment_pos] - - self._history.append(('fragment', 1)) - - Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value) - return fragment - - def fragment_available(self): - if not self.closure_available(): - return False - return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments) - - def rewind(self): - for source, delta in reversed(self._history): - Logr.debug('(rewind) Rewinding step: %s', (source, delta)) - if source == 'fragment': - self._fragment_pos -= delta - elif source == 'closure': - self._closure_pos -= delta - else: - raise NotImplementedError() - - self.commit() - - def commit(self): - Logr.debug('(commit)') - self._history = [] - - # # Capture Methods # diff --git a/libs/caper/step.py b/libs/caper/step.py index eb94b6f..a82a930 100644 --- a/libs/caper/step.py +++ b/libs/caper/step.py @@ -33,18 +33,6 @@ class CaptureStep(object): #: @type: bool self.single = single - def _get_next_subject(self, parser): - if self.source == 'fragment': - if not parser.fragment_available(): - return None - return parser.next_fragment() - elif self.source == 'closure': - if not parser.closure_available(): - return None - return parser.next_closure() - - raise NotImplementedError() - def execute(self, fragment): if self.regex: weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) From f23412ea7e1393e13311c96b06c61cd1c873edb3 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Tue, 15 Oct 2013 16:16:26 +1300 Subject: [PATCH 16/21] Added qcond (Query Condenser) v0.1.0 library - https://github.com/fuzeman/QueryCondenser --- libs/qcond/__init__.py | 42 +++++ libs/qcond/compat.py | 23 +++ libs/qcond/helpers.py | 84 ++++++++++ libs/qcond/transformers/__init__.py | 0 libs/qcond/transformers/base.py | 21 +++ libs/qcond/transformers/merge.py | 238 +++++++++++++++++++++++++++ libs/qcond/transformers/slice.py | 280 ++++++++++++++++++++++++++++++++ libs/qcond/transformers/strip_common.py | 26 +++ 8 files changed, 714 insertions(+) create mode 100644 libs/qcond/__init__.py create mode 100644 libs/qcond/compat.py create mode 100644 libs/qcond/helpers.py create mode 100644 libs/qcond/transformers/__init__.py create mode 100644 libs/qcond/transformers/base.py create mode 100644 libs/qcond/transformers/merge.py create mode 100644 libs/qcond/transformers/slice.py create mode 100644 libs/qcond/transformers/strip_common.py diff --git a/libs/qcond/__init__.py b/libs/qcond/__init__.py new file mode 100644 index 0000000..be64e7b --- /dev/null +++ b/libs/qcond/__init__.py @@ -0,0 +1,42 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from qcond.transformers.merge import MergeTransformer +from qcond.transformers.slice import SliceTransformer +from qcond.transformers.strip_common import StripCommonTransformer + + +__version_info__ = ('0', '1', '0') +__version_branch__ = 'master' + +__version__ = "%s%s" % ( + '.'.join(__version_info__), + '-' + __version_branch__ if __version_branch__ else '' +) + + +class QueryCondenser(object): + def __init__(self): + self.transformers = [ + MergeTransformer(), + SliceTransformer(), + StripCommonTransformer() + ] + + def distinct(self, titles): + for transformer in self.transformers: + titles = transformer.run(titles) + + return titles diff --git a/libs/qcond/compat.py b/libs/qcond/compat.py new file mode 100644 index 0000000..f3f0925 --- /dev/null +++ b/libs/qcond/compat.py @@ -0,0 +1,23 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys + +PY3 = sys.version_info[0] == 3 + +if PY3: + xrange = range +else: + xrange = xrange diff --git a/libs/qcond/helpers.py b/libs/qcond/helpers.py new file mode 100644 index 0000000..a341b6e --- /dev/null +++ b/libs/qcond/helpers.py @@ -0,0 +1,84 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from difflib import SequenceMatcher +import re +import sys +from logr import Logr +from qcond.compat import xrange + + +PY3 = sys.version_info[0] == 3 + + +def simplify(s): + s = s.lower() + s = re.sub(r"(\w)'(\w)", r"\1\2", s) + return s + + +def strip(s): + return re.sub(r"^(\W*)(.*?)(\W*)$", r"\2", s) + + +def create_matcher(a, b, swap_longest = True, case_sensitive = False): + # Ensure longest string is a + if swap_longest and len(b) > len(a): + a_ = a + a = b + b = a_ + + if not case_sensitive: + a = a.upper() + b = b.upper() + + return SequenceMatcher(None, a, b) + + +def first(function_or_none, sequence): + if PY3: + for item in filter(function_or_none, sequence): + return item + else: + result = filter(function_or_none, sequence) + if len(result): + return result[0] + + return None + +def sorted_append(sequence, item, func): + if not len(sequence): + sequence.insert(0, item) + return + + x = 0 + for x in xrange(len(sequence)): + if func(sequence[x]): + sequence.insert(x, item) + return + + sequence.append(item) + +def itemsMatch(L1, L2): + return len(L1) == len(L2) and sorted(L1) == sorted(L2) + +def distinct(sequence): + result = [] + + for item in sequence: + if item not in result: + result.append(item) + + return result \ No newline at end of file diff --git a/libs/qcond/transformers/__init__.py b/libs/qcond/transformers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/qcond/transformers/base.py b/libs/qcond/transformers/base.py new file mode 100644 index 0000000..7054729 --- /dev/null +++ b/libs/qcond/transformers/base.py @@ -0,0 +1,21 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Transformer(object): + def __init__(self): + pass + + def run(self, titles): + raise NotImplementedError() diff --git a/libs/qcond/transformers/merge.py b/libs/qcond/transformers/merge.py new file mode 100644 index 0000000..d82f249 --- /dev/null +++ b/libs/qcond/transformers/merge.py @@ -0,0 +1,238 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from operator import itemgetter +from logr import Logr +from qcond.helpers import simplify, strip, first, sorted_append, distinct +from qcond.transformers.base import Transformer +from qcond.compat import xrange + + +class MergeTransformer(Transformer): + def __init__(self): + super(MergeTransformer, self).__init__() + + def run(self, titles): + titles = distinct([simplify(title) for title in titles]) + + Logr.info(str(titles)) + + Logr.debug("------------------------------------------------------------") + + root, tails = self.parse(titles) + + Logr.debug("--------------------------PARSE-----------------------------") + + for node in root: + print_tree(node) + + Logr.debug("--------------------------MERGE-----------------------------") + + self.merge(root) + + Logr.debug("--------------------------FINAL-----------------------------") + + for node in root: + print_tree(node) + + Logr.debug("--------------------------RESULT-----------------------------") + + scores = {} + results = [] + + for tail in tails: + score, value, original_value = tail.full_value() + + if value in scores: + scores[value] += score + else: + results.append((value, original_value)) + scores[value] = score + + Logr.debug("%s %s %s", score, value, original_value) + + sorted_results = sorted(results, key=lambda item: (scores[item[0]], item[1]), reverse = True) + + return [result[0] for result in sorted_results] + + def parse(self, titles): + root = [] + tails = [] + + for title in titles: + Logr.debug(title) + + cur = None + words = title.split(' ') + + for wx in xrange(len(words)): + word = strip(words[wx]) + + if cur is None: + cur = find_node(root, word) + + if cur is None: + cur = DNode(word, None, num_children=len(words) - wx, original_value=title) + root.append(cur) + else: + parent = cur + parent.weight += 1 + + cur = find_node(parent.right, word) + + if cur is None: + Logr.debug("%s %d", word, len(words) - wx) + cur = DNode(word, parent, num_children=len(words) - wx) + sorted_append(parent.right, cur, lambda a: a.num_children < cur.num_children) + else: + cur.weight += 1 + + tails.append(cur) + + return root, tails + + def merge(self, root): + for x in range(len(root)): + Logr.debug(root[x]) + root[x].right = self._merge(root[x].right) + Logr.debug('=================================================================') + + return root + + def get_nodes_right(self, value): + if type(value) is not list: + value = [value] + + nodes = [] + + for node in value: + nodes.append(node) + + for child in self.get_nodes_right(node.right): + nodes.append(child) + + return nodes + + def destroy_nodes_right(self, value): + nodes = self.get_nodes_right(value) + + for node in nodes: + node.value = None + node.dead = True + + def _merge(self, nodes, depth = 0): + Logr.debug(str('\t' * depth) + str(nodes)) + + top = nodes[0] + + # Merge into top + for x in range(len(nodes)): + # Merge extra results into top + if x > 0: + top.value = None + top.weight += nodes[x].weight + self.destroy_nodes_right(top.right) + + if len(nodes[x].right): + top.join_right(nodes[x].right) + + Logr.debug("= %s joined %s", nodes[x], top) + + nodes[x].dead = True + + nodes = [n for n in nodes if not n.dead] + + # Traverse further + for node in nodes: + if len(node.right): + node.right = self._merge(node.right, depth + 1) + + return nodes + + +def print_tree(node, depth = 0): + Logr.debug(str('\t' * depth) + str(node)) + + if len(node.right): + for child in node.right: + print_tree(child, depth + 1) + else: + Logr.debug(node.full_value()[1]) + + +def find_node(node_list, value): + # Try find adjacent node match + for node in node_list: + if node.value == value: + return node + + return None + + +class DNode(object): + def __init__(self, value, parent, right=None, weight=1, num_children=None, original_value=None): + self.value = value + + self.parent = parent + + if right is None: + right = [] + self.right = right + + self.weight = weight + + self.original_value = original_value + self.num_children = num_children + + self.dead = False + + def join_right(self, nodes): + for node in nodes: + duplicate = first(lambda x: x.value == node.value, self.right) + + if duplicate: + duplicate.weight += node.weight + duplicate.join_right(node.right) + else: + node.parent = self + self.right.append(node) + + def full_value(self): + words = [] + total_score = 0 + + cur = self + root = None + + while cur is not None: + if cur.value and not cur.dead: + words.insert(0, cur.value) + total_score += cur.weight + + if cur.parent is None: + root = cur + cur = cur.parent + + return float(total_score) / len(words), ' '.join(words), root.original_value if root else None + + def __repr__(self): + return '<%s value:"%s", weight: %s, num_children: %s%s%s>' % ( + 'DNode', + self.value, + self.weight, + self.num_children, + (', original_value: %s' % self.original_value) if self.original_value else '', + ' REMOVING' if self.dead else '' + ) diff --git a/libs/qcond/transformers/slice.py b/libs/qcond/transformers/slice.py new file mode 100644 index 0000000..864f673 --- /dev/null +++ b/libs/qcond/transformers/slice.py @@ -0,0 +1,280 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from logr import Logr +from qcond.helpers import create_matcher +from qcond.transformers.base import Transformer + + +class SliceTransformer(Transformer): + def __init__(self): + super(SliceTransformer, self).__init__() + + def run(self, titles): + nodes = [] + + # Create a node for each title + for title in titles: + nodes.append(SimNode(title)) + + # Calculate similarities between nodes + for node in nodes: + calculate_sim_links(node, [n for n in nodes if n != node]) + + kill_nodes_above(nodes, 0.90) + + Logr.debug('---------------------------------------------------------------------') + + print_link_tree(nodes) + Logr.debug('%s %s', len(nodes), [n.value for n in nodes]) + + Logr.debug('---------------------------------------------------------------------') + + kill_trailing_nodes(nodes) + + Logr.debug('---------------------------------------------------------------------') + + # Sort remaining nodes by 'num_merges' + nodes = sorted(nodes, key=lambda n: n.num_merges, reverse=True) + + print_link_tree(nodes) + + Logr.debug('---------------------------------------------------------------------') + + Logr.debug('%s %s', len(nodes), [n.value for n in nodes]) + + return [n.value for n in nodes] + + +class SimLink(object): + def __init__(self, similarity, opcodes, stats): + self.similarity = similarity + self.opcodes = opcodes + self.stats = stats + + +class SimNode(object): + def __init__(self, value): + self.value = value + + self.dead = False + self.num_merges = 0 + + self.links = {} # {: } + + +def kill_nodes(nodes, killed_nodes): + # Remove killed nodes from root list + for node in killed_nodes: + if node in nodes: + nodes.remove(node) + + # Remove killed nodes from links + for killed_node in killed_nodes: + for node in nodes: + if killed_node in node.links: + node.links.pop(killed_node) + + +def kill_nodes_above(nodes, above_sim): + killed_nodes = [] + + for node in nodes: + if node.dead: + continue + + Logr.debug(node.value) + + for link_node, link in node.links.items(): + if link_node.dead: + continue + + Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value) + + if link.similarity >= above_sim: + if len(link_node.value) > len(node.value): + Logr.debug('\t\tvery similar, killed this node') + link_node.dead = True + node.num_merges += 1 + killed_nodes.append(link_node) + else: + Logr.debug('\t\tvery similar, killed owner') + node.dead = True + link_node.num_merges += 1 + killed_nodes.append(node) + + kill_nodes(nodes, killed_nodes) + + +def print_link_tree(nodes): + for node in nodes: + Logr.debug(node.value) + Logr.debug('\tnum_merges: %s', node.num_merges) + + if len(node.links): + Logr.debug('\t========== LINKS ==========') + for link_node, link in node.links.items(): + Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value) + + Logr.debug('\t---------------------------') + + +def kill_trailing_nodes(nodes): + killed_nodes = [] + + for node in nodes: + if node.dead: + continue + + Logr.debug(node.value) + + for link_node, link in node.links.items(): + if link_node.dead: + continue + + is_valid = link.stats.get('valid', False) + + has_deletions = False + has_insertions = False + has_replacements = False + + for opcode in link.opcodes: + if opcode[0] == 'delete': + has_deletions = True + if opcode[0] == 'insert': + has_insertions = True + if opcode[0] == 'replace': + has_replacements = True + + equal_perc = link.stats.get('equal', 0) / float(len(node.value)) + insert_perc = link.stats.get('insert', 0) / float(len(node.value)) + + Logr.debug('\t({0:<24}) [{1:02d}:{2:02d} = {3:02d} {4:3.0f}% {5:3.0f}%] -- {6:<45}'.format( + 'd:%s, i:%s, r:%s' % (has_deletions, has_insertions, has_replacements), + len(node.value), len(link_node.value), link.stats.get('equal', 0), + equal_perc * 100, insert_perc * 100, + '"{0}"'.format(link_node.value) + )) + + Logr.debug('\t\t%s', link.stats) + + kill = all([ + is_valid, + equal_perc >= 0.5, + insert_perc < 2, + has_insertions, + not has_deletions, + not has_replacements + ]) + + if kill: + Logr.debug('\t\tkilled this node') + + link_node.dead = True + node.num_merges += 1 + killed_nodes.append(link_node) + + kill_nodes(nodes, killed_nodes) + +stats_print_format = "\t{0:<8} ({1:2d}:{2:2d}) ({3:2d}:{4:2d})" + + +def get_index_values(iterable, a, b): + return ( + iterable[a] if a else None, + iterable[b] if b else None + ) + + +def get_indices(iterable, a, b): + return ( + a if 0 < a < len(iterable) else None, + b if 0 < b < len(iterable) else None + ) + + +def get_opcode_stats(for_node, node, opcodes): + stats = {} + + for tag, i1, i2, j1, j2 in opcodes: + Logr.debug(stats_print_format.format( + tag, i1, i2, j1, j2 + )) + + if tag in ['insert', 'delete']: + ax = None, None + bx = None, None + + if tag == 'insert': + ax = get_indices(for_node.value, i1 - 1, i1) + bx = get_indices(node.value, j1, j2 - 1) + + if tag == 'delete': + ax = get_indices(for_node.value, j1 - 1, j1) + bx = get_indices(node.value, i1, i2 - 1) + + av = get_index_values(for_node.value, *ax) + bv = get_index_values(node.value, *bx) + + Logr.debug( + '\t\t%s %s [%s><%s] <---> %s %s [%s><%s]', + ax, av, av[0], av[1], + bx, bv, bv[0], bv[1] + ) + + head_valid = av[0] in [None, ' '] or bv[0] in [None, ' '] + tail_valid = av[1] in [None, ' '] or bv[1] in [None, ' '] + valid = head_valid and tail_valid + + if 'valid' not in stats or (stats['valid'] and not valid): + stats['valid'] = valid + + Logr.debug('\t\t' + ('VALID' if valid else 'INVALID')) + + if tag not in stats: + stats[tag] = 0 + + stats[tag] += (i2 - i1) or (j2 - j1) + + return stats + + +def calculate_sim_links(for_node, other_nodes): + for node in other_nodes: + if node in for_node.links: + continue + + Logr.debug('calculating similarity between "%s" and "%s"', for_node.value, node.value) + + # Get similarity + similarity_matcher = create_matcher(for_node.value, node.value) + similarity = similarity_matcher.quick_ratio() + + # Get for_node -> node opcodes + a_opcodes_matcher = create_matcher(for_node.value, node.value, swap_longest = False) + a_opcodes = a_opcodes_matcher.get_opcodes() + a_stats = get_opcode_stats(for_node, node, a_opcodes) + + Logr.debug('-' * 100) + + # Get node -> for_node opcodes + b_opcodes_matcher = create_matcher(node.value, for_node.value, swap_longest = False) + b_opcodes = b_opcodes_matcher.get_opcodes() + b_stats = get_opcode_stats(for_node, node, b_opcodes) + + for_node.links[node] = SimLink(similarity, a_opcodes, a_stats) + node.links[for_node] = SimLink(similarity, b_opcodes, b_stats) + + #raw_input('Press ENTER to continue') diff --git a/libs/qcond/transformers/strip_common.py b/libs/qcond/transformers/strip_common.py new file mode 100644 index 0000000..47b8401 --- /dev/null +++ b/libs/qcond/transformers/strip_common.py @@ -0,0 +1,26 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from qcond.transformers.base import Transformer + + +COMMON_WORDS = [ + 'the' +] + + +class StripCommonTransformer(Transformer): + def run(self, titles): + return [title for title in titles if title.lower() not in COMMON_WORDS] From da87e68fad461b36b2ae7b19628cbd3898d1604c Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Tue, 15 Oct 2013 21:50:43 +1300 Subject: [PATCH 17/21] Implemented basic usage of QueryCondenser --- couchpotato/core/media/show/searcher/main.py | 53 ++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index e8c0655..59eb92c 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -1,10 +1,12 @@ from couchpotato import get_session, Env from couchpotato.core.event import addEvent, fireEvent -from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles +from couchpotato.core.helpers.variable import getTitle, tryInt from couchpotato.core.logger import CPLog from couchpotato.core.media._base.searcher.main import SearchSetupError from couchpotato.core.plugins.base import Plugin from couchpotato.core.settings.model import Media, Library +from qcond import QueryCondenser +from qcond.helpers import simplify log = CPLog(__name__) @@ -25,6 +27,8 @@ class ShowSearcher(Plugin): def __init__(self): super(ShowSearcher, self).__init__() + self.query_condenser = QueryCondenser() + addEvent('show.searcher.single', self.single) addEvent('searcher.get_search_title', self.getSearchTitle) @@ -34,6 +38,7 @@ class ShowSearcher(Plugin): addEvent('searcher.get_media_identifier', self.getMediaIdentifier) addEvent('searcher.get_media_root', self.getMediaRoot) addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) + addEvent('searcher.get_media_titles', self.getMediaTitles) def single(self, media, search_protocols = None, manual = False): if media['type'] == 'show': @@ -129,26 +134,54 @@ class ShowSearcher(Plugin): return ret def getSearchTitle(self, media): + if media['type'] not in ['show', 'season', 'episode']: + return + show, season, episode = self.getMedia(media) if show is None: return None - # TODO this misses alternative titles from the database - show_title = getTitle(show) - if not show_title: + titles = [] + + # Add season map_names if they exist + if season is not None and 'map_names' in show.info: + season_names = show.info['map_names'].get(str(season.season_number), {}) + + # Add titles from all locations + # TODO only add name maps from a specific location + for location, names in season_names.items(): + titles += [name for name in names if name not in titles] + + # Add show titles + titles += [title.title for title in show.titles if title.title not in titles] + + # Use QueryCondenser to build a list of optimal search titles + condensed_titles = self.query_condenser.distinct(titles) + + title = None + + # TODO try other titles if searching doesn't return results + + if len(condensed_titles): + # Return the first condensed title if one exists + title = condensed_titles[0] + elif len(titles): + # Fallback to first raw title + title = simplify(titles[0]) + else: return None + # Add the identifier to search title + # TODO supporting other identifier formats identifier = fireEvent('searcher.get_media_identifier', media['library'], single = True) - name = show_title - if identifier['season']: - name += ' S%02d' % identifier['season'] + title += ' S%02d' % identifier['season'] if identifier['episode']: - name += 'E%02d' % identifier['episode'] + title += 'E%02d' % identifier['episode'] - return name + return title def correctRelease(self, release = None, media = None, quality = None, **kwargs): @@ -261,4 +294,4 @@ class ShowSearcher(Plugin): if media['type'] == 'show': show = media_library - return show, season, episode \ No newline at end of file + return show, season, episode From b90218638969ddc5b1f7300af0ff0190d9c15c30 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 16 Oct 2013 14:09:41 +1300 Subject: [PATCH 18/21] Cleaned up usage of helper functions --- couchpotato/core/helpers/variable.py | 3 +++ couchpotato/core/media/show/searcher/main.py | 1 - couchpotato/core/plugins/matcher/main.py | 19 ++++--------------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/couchpotato/core/helpers/variable.py b/couchpotato/core/helpers/variable.py index 15f9936..6296462 100644 --- a/couchpotato/core/helpers/variable.py +++ b/couchpotato/core/helpers/variable.py @@ -211,3 +211,6 @@ def randomString(size = 8, chars = string.ascii_uppercase + string.digits): def splitString(str, split_on = ',', clean = True): list = [x.strip() for x in str.split(split_on)] if str else [] return filter(None, list) if clean else list + +def dictIsSubset(a, b): + return all([k in b and b[k] == v for k, v in a.items()]) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 59eb92c..3e7b576 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -38,7 +38,6 @@ class ShowSearcher(Plugin): addEvent('searcher.get_media_identifier', self.getMediaIdentifier) addEvent('searcher.get_media_root', self.getMediaRoot) addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId) - addEvent('searcher.get_media_titles', self.getMediaTitles) def single(self, media, search_protocols = None, manual = False): if media['type'] == 'show': diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py index dda9311..d9b7600 100644 --- a/couchpotato/core/plugins/matcher/main.py +++ b/couchpotato/core/plugins/matcher/main.py @@ -2,7 +2,8 @@ import pprint from caper import Caper from couchpotato import CPLog, tryInt from couchpotato.core.event import addEvent, fireEvent -from couchpotato.core.helpers.variable import possibleTitles +from couchpotato.core.helpers.encoding import simplifyString +from couchpotato.core.helpers.variable import possibleTitles, dictIsSubset from couchpotato.core.plugins.base import Plugin log = CPLog(__name__) @@ -40,7 +41,7 @@ class Matcher(Plugin): for match in chain.info[group]: for ck, cv in match.items(): - if ck in tags and self.cleanMatchValue(cv) in tags[ck]: + if ck in tags and simplifyString(cv) in tags[ck]: found_tags.append(ck) @@ -49,18 +50,6 @@ class Matcher(Plugin): return set([key for key, value in tags.items() if None not in value]) == set(found_tags) - def cleanMatchValue(self, value): - value = value.lower() - value = value.strip() - - for ch in [' ', '-', '.']: - value = value.replace(ch, '') - - return value - - def dictIsSubset(self, a, b): - return all([k in b and b[k] == v for k, v in a.items()]) - def correctIdentifier(self, chain, media): required_id = fireEvent('searcher.get_media_identifier', media['library'], single = True) @@ -78,7 +67,7 @@ class Matcher(Plugin): for k, v in identifier.items(): identifier[k] = tryInt(v, None) - if not self.dictIsSubset(required_id, identifier): + if not dictIsSubset(required_id, identifier): log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(identifier))) return False From 3fa21560be2917b05302869eee084f1f9a295f8b Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 16 Oct 2013 14:25:56 +1300 Subject: [PATCH 19/21] Moved 'searcher.create_releases' from Searcher to Release. --- couchpotato/core/media/_base/searcher/main.py | 48 ------------------------ couchpotato/core/media/movie/searcher/main.py | 2 +- couchpotato/core/media/show/searcher/main.py | 2 +- couchpotato/core/plugins/release/main.py | 54 +++++++++++++++++++++++++-- 4 files changed, 53 insertions(+), 53 deletions(-) diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index a986f3d..1f6e9f3 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -7,7 +7,6 @@ from couchpotato.core.logger import CPLog from couchpotato.core.media._base.searcher.base import SearcherBase from couchpotato.core.settings.model import Media, Release, ReleaseInfo from couchpotato.environment import Env -from sqlalchemy.exc import InterfaceError from inspect import ismethod, isfunction import datetime import re @@ -28,7 +27,6 @@ class Searcher(SearcherBase): addEvent('searcher.try_download_result', self.tryDownloadResult) addEvent('searcher.download', self.download) addEvent('searcher.search', self.search) - addEvent('searcher.create_releases', self.createReleases) addApiView('searcher.full_search', self.searchAllView, docs = { 'desc': 'Starts a full search for all media', @@ -174,52 +172,6 @@ class Searcher(SearcherBase): return sorted_results - def createReleases(self, search_results, media, quality_type): - - available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) - db = get_session() - - found_releases = [] - - for rel in search_results: - - rel_identifier = md5(rel['url']) - found_releases.append(rel_identifier) - - rls = db.query(Release).filter_by(identifier = rel_identifier).first() - if not rls: - rls = Release( - identifier = rel_identifier, - media_id = media.get('id'), - quality_id = quality_type.get('quality_id'), - status_id = available_status.get('id') - ) - db.add(rls) - else: - [db.delete(old_info) for old_info in rls.info] - rls.last_edit = int(time.time()) - - db.commit() - - for info in rel: - try: - if not isinstance(rel[info], (str, unicode, int, long, float)): - continue - - rls_info = ReleaseInfo( - identifier = info, - value = toUnicode(rel[info]) - ) - rls.info.append(rls_info) - except InterfaceError: - log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc())) - - db.commit() - - rel['status_id'] = rls.status_id - - return found_releases - def getSearchProtocols(self): download_protocols = fireEvent('download.enabled_protocols', merge = True) diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py index 70b0278..0020c86 100644 --- a/couchpotato/core/media/movie/searcher/main.py +++ b/couchpotato/core/media/movie/searcher/main.py @@ -176,7 +176,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): break # Add them to this movie releases list - found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True) + found_releases += fireEvent('release.create_from_search', results, movie, quality_type, single = True) # Try find a valid result and download it if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True): diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 3e7b576..1cf1718 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -106,7 +106,7 @@ class ShowSearcher(Plugin): break # Add them to this movie releases list - found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True) + found_releases += fireEvent('release.create_from_search', results, media, quality_type, single = True) # Try find a valid result and download it if fireEvent('searcher.try_download_result', results, media, quality_type, manual, single = True): diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py index 833ceec..aa7ed50 100644 --- a/couchpotato/core/plugins/release/main.py +++ b/couchpotato/core/plugins/release/main.py @@ -1,11 +1,12 @@ -from couchpotato import get_session +from couchpotato import get_session, md5 from couchpotato.api import addApiView from couchpotato.core.event import fireEvent, addEvent -from couchpotato.core.helpers.encoding import ss +from couchpotato.core.helpers.encoding import ss, toUnicode from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.scanner.main import Scanner -from couchpotato.core.settings.model import File, Release as Relea, Media +from couchpotato.core.settings.model import File, Release as Relea, Media, ReleaseInfo +from sqlalchemy.exc import InterfaceError from sqlalchemy.orm import joinedload_all from sqlalchemy.sql.expression import and_, or_ import os @@ -45,6 +46,7 @@ class Release(Plugin): } }) + addEvent('release.create_from_search', self.createFromSearch) addEvent('release.for_movie', self.forMovie) addEvent('release.delete', self.delete) addEvent('release.clean', self.clean) @@ -213,6 +215,52 @@ class Release(Plugin): 'success': False } + def createFromSearch(self, search_results, media, quality_type): + + available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) + db = get_session() + + found_releases = [] + + for rel in search_results: + + rel_identifier = md5(rel['url']) + found_releases.append(rel_identifier) + + rls = db.query(Relea).filter_by(identifier = rel_identifier).first() + if not rls: + rls = Relea( + identifier = rel_identifier, + media_id = media.get('id'), + quality_id = quality_type.get('quality_id'), + status_id = available_status.get('id') + ) + db.add(rls) + else: + [db.delete(old_info) for old_info in rls.info] + rls.last_edit = int(time.time()) + + db.commit() + + for info in rel: + try: + if not isinstance(rel[info], (str, unicode, int, long, float)): + continue + + rls_info = ReleaseInfo( + identifier = info, + value = toUnicode(rel[info]) + ) + rls.info.append(rls_info) + except InterfaceError: + log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc())) + + db.commit() + + rel['status_id'] = rls.status_id + + return found_releases + def forMovie(self, id = None): db = get_session() From 46d4d34da76cba0069ba45f873a4359e974691f8 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 16 Oct 2013 14:48:06 +1300 Subject: [PATCH 20/21] Minor cleanup to Searcher and Matcher --- couchpotato/core/media/_base/searcher/main.py | 1 + couchpotato/core/plugins/matcher/main.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index 1f6e9f3..0ea8b23 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -157,6 +157,7 @@ class Searcher(SearcherBase): def search(self, protocols, media, quality): results = [] + # TODO could this be handled better? (removing the need for 'searcher.get_media_searcher_id') searcher_id = fireEvent('searcher.get_media_searcher_id', media['type'], single = True) for search_protocol in protocols: diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py index d9b7600..64237e2 100644 --- a/couchpotato/core/plugins/matcher/main.py +++ b/couchpotato/core/plugins/matcher/main.py @@ -1,4 +1,3 @@ -import pprint from caper import Caper from couchpotato import CPLog, tryInt from couchpotato.core.event import addEvent, fireEvent @@ -36,7 +35,7 @@ class Matcher(Plugin): return None - def chainMatches(self, chain, group, tags): + def chainMatch(self, chain, group, tags): found_tags = [] for match in chain.info[group]: @@ -83,6 +82,7 @@ class Matcher(Plugin): chain_words = [x.lower() for x in chain.info['show_name']] # Check show titles match + # TODO check xem names for title in root_library['info']['titles']: for valid_words in [x.split(' ') for x in possibleTitles(title)]: @@ -102,7 +102,7 @@ class Matcher(Plugin): video_tags = quality_map[quality['identifier']] - if not self.chainMatches(chain, 'video', video_tags): + if not self.chainMatch(chain, 'video', video_tags): log.info2('Wrong: %s tags not in chain', video_tags) return False From 180576f2b774e13a7fba8fa90d2f0960d242367c Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Wed, 16 Oct 2013 14:53:22 +1300 Subject: [PATCH 21/21] Minor change to ShowSearcher.correctMatch logging --- couchpotato/core/media/show/searcher/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py index 1cf1718..3aa1294 100644 --- a/couchpotato/core/media/show/searcher/main.py +++ b/couchpotato/core/media/show/searcher/main.py @@ -219,11 +219,7 @@ class ShowSearcher(Plugin): return False if not fireEvent('matcher.correct_title', chain, media): - log.info("Wrong: '%s', undetermined naming. Looking for '%s (%s)'", ( - ' '.join(chain.info['show_name']), - 'library_title', - media['library']['year']) - ) + log.info("Wrong: '%s', undetermined naming.", (' '.join(chain.info['show_name']))) return False return True