From 60d8934444d7fb994ad978d7ce1bc4a44b369104 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 2 Oct 2013 15:56:22 +1300
Subject: [PATCH 01/21] Created 'searcher.try_download_result' event from
 section in MovieSearcher.single

---
 couchpotato/core/media/_base/searcher/main.py | 25 +++++++++++++++++++++++++
 couchpotato/core/media/movie/searcher/main.py | 22 +++-------------------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py
index 4348f41..68efcfa 100644
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@@ -25,6 +25,7 @@ class Searcher(SearcherBase):
         addEvent('searcher.correct_year', self.correctYear)
         addEvent('searcher.correct_name', self.correctName)
         addEvent('searcher.correct_words', self.correctWords)
+        addEvent('searcher.try_download_result', self.tryDownloadResult)
         addEvent('searcher.download', self.download)
         addEvent('searcher.search', self.search)
         addEvent('searcher.create_releases', self.createReleases)
@@ -53,6 +54,30 @@ class Searcher(SearcherBase):
         progress = fireEvent('searcher.progress', merge = True)
         return progress
 
+    def tryDownloadResult(self, results, media, quality_type, manual = False):
+        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+
+        for rel in results:
+            if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0):
+                log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), rel['name']))
+                continue
+
+            if rel['status_id'] in [ignored_status.get('id'), failed_status.get('id')]:
+                log.info('Ignored: %s', rel['name'])
+                continue
+
+            if rel['score'] <= 0:
+                log.info('Ignored, score to low: %s', rel['name'])
+                continue
+
+            downloaded = fireEvent('searcher.download', data = rel, movie = media, manual = manual, single = True)
+            if downloaded is True:
+                return True
+            elif downloaded != 'try_next':
+                break
+
+        return False
+
     def download(self, data, movie, manual = False):
 
         if not data.get('protocol'):
diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py
index d6c3367..0f90941 100644
--- a/couchpotato/core/media/movie/searcher/main.py
+++ b/couchpotato/core/media/movie/searcher/main.py
@@ -177,25 +177,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
                 # Add them to this movie releases list
                 found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True)
 
-                for nzb in results:
-                    if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and nzb.get('age') <= quality_type.get('wait_for', 0):
-                        log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), nzb['name']))
-                        continue
-
-                    if nzb['status_id'] in [ignored_status.get('id'), failed_status.get('id')]:
-                        log.info('Ignored: %s', nzb['name'])
-                        continue
-
-                    if nzb['score'] <= 0:
-                        log.info('Ignored, score to low: %s', nzb['name'])
-                        continue
-
-                    downloaded = fireEvent('searcher.download', data = nzb, movie = movie, manual = manual, single = True)
-                    if downloaded is True:
-                        ret = True
-                        break
-                    elif downloaded != 'try_next':
-                        break
+                # Try find a valid result and download it
+                if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True):
+                    ret = True
 
                 # Remove releases that aren't found anymore
                 for release in movie.get('releases', []):

From ff63b8a1c5d3dd4f463a1b61cebf5a955a0a529e Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 2 Oct 2013 15:57:08 +1300
Subject: [PATCH 02/21] Added TV release snatching/downloading

---
 couchpotato/core/media/show/searcher/main.py | 31 +++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 41ac3ff..319e8f2 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -1,8 +1,5 @@
-import pprint
-import re
 from couchpotato import get_session, Env
 from couchpotato.core.event import addEvent, fireEvent
-from couchpotato.core.helpers.encoding import simplifyString
 from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles
 from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.main import SearchSetupError
@@ -58,9 +55,7 @@ class ShowSearcher(Plugin):
 
         return show, season, episode
 
-    def single(self, media, search_protocols = None):
-        pprint.pprint(media)
-
+    def single(self, media, search_protocols = None, manual = False):
         if media['type'] == 'show':
             # TODO handle show searches (scan all seasons)
             return
@@ -130,7 +125,29 @@ class ShowSearcher(Plugin):
                 # Add them to this movie releases list
                 found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True)
 
-                log.info('%d results found' % len(results))
+                # Try find a valid result and download it
+                if fireEvent('searcher.try_download_result', results, media, quality_type, manual, single = True):
+                    ret = True
+
+                # Remove releases that aren't found anymore
+                for release in media.get('releases', []):
+                    if release.get('status_id') == available_status.get('id') and release.get('identifier') not in found_releases:
+                        fireEvent('release.delete', release.get('id'), single = True)
+            else:
+                log.info('Better quality (%s) already available or snatched for %s', (quality_type['quality']['label'], default_title))
+                fireEvent('movie.restatus', media['id'])
+                break
+
+            # Break if CP wants to shut down
+            if self.shuttingDown() or ret:
+                break
+
+        if len(too_early_to_search) > 0:
+            log.info2('Too early to search for %s, %s', (too_early_to_search, default_title))
+
+        fireEvent('notify.frontend', type = 'show.searcher.ended.%s' % media['id'], data = True)
+
+        return ret
 
     def correctRelease(self, release = None, media = None, quality = None, **kwargs):
 

From 32c289fd3da92e654130a0b9c1c4a4e4318f17e8 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 2 Oct 2013 16:22:41 +1300
Subject: [PATCH 03/21] Renamed 'movie' -> 'media' in 'searcher.download'

---
 couchpotato/core/media/_base/searcher/main.py | 39 ++++++++++++++-------------
 couchpotato/core/plugins/release/main.py      |  2 +-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py
index 68efcfa..ec1c814 100644
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@@ -70,7 +70,7 @@ class Searcher(SearcherBase):
                 log.info('Ignored, score to low: %s', rel['name'])
                 continue
 
-            downloaded = fireEvent('searcher.download', data = rel, movie = media, manual = manual, single = True)
+            downloaded = fireEvent('searcher.download', data = rel, media = media, manual = manual, single = True)
             if downloaded is True:
                 return True
             elif downloaded != 'try_next':
@@ -78,11 +78,12 @@ class Searcher(SearcherBase):
 
         return False
 
-    def download(self, data, movie, manual = False):
+    def download(self, data, media, manual = False):
 
-        if not data.get('protocol'):
-            data['protocol'] = data['type']
-            data['type'] = 'movie'
+        # TODO what is this for?
+        #if not data.get('protocol'):
+        #    data['protocol'] = data['type']
+        #    data['type'] = 'movie'
 
         # Test to see if any downloaders are enabled for this type
         downloader_enabled = fireEvent('download.enabled', manual, data, single = True)
@@ -91,14 +92,14 @@ class Searcher(SearcherBase):
 
             snatched_status = fireEvent('status.get', 'snatched', single = True)
 
-            # Download movie to temp
+            # Download release to temp
             filedata = None
             if data.get('download') and (ismethod(data.get('download')) or isfunction(data.get('download'))):
                 filedata = data.get('download')(url = data.get('url'), nzb_id = data.get('id'))
                 if filedata == 'try_next':
                     return filedata
 
-            download_result = fireEvent('download', data = data, movie = movie, manual = manual, filedata = filedata, single = True)
+            download_result = fireEvent('download', data = data, movie = media, manual = manual, filedata = filedata, single = True)
             log.debug('Downloader result: %s', download_result)
 
             if download_result:
@@ -122,36 +123,36 @@ class Searcher(SearcherBase):
                                 rls.info.append(rls_info)
                         db.commit()
 
-                        log_movie = '%s (%s) in %s' % (getTitle(movie['library']), movie['library']['year'], rls.quality.label)
+                        log_movie = '%s (%s) in %s' % (getTitle(media['library']), media['library']['year'], rls.quality.label)
                         snatch_message = 'Snatched "%s": %s' % (data.get('name'), log_movie)
                         log.info(snatch_message)
-                        fireEvent('movie.snatched', message = snatch_message, data = rls.to_dict())
+                        fireEvent('%s.snatched' % data['type'], message = snatch_message, data = rls.to_dict())
 
-                        # If renamer isn't used, mark movie done
+                        # If renamer isn't used, mark media done
                         if not renamer_enabled:
                             active_status = fireEvent('status.get', 'active', single = True)
                             done_status = fireEvent('status.get', 'done', single = True)
                             try:
-                                if movie['status_id'] == active_status.get('id'):
-                                    for profile_type in movie['profile']['types']:
+                                if media['status_id'] == active_status.get('id'):
+                                    for profile_type in media['profile']['types']:
                                         if profile_type['quality_id'] == rls.quality.id and profile_type['finish']:
-                                            log.info('Renamer disabled, marking movie as finished: %s', log_movie)
+                                            log.info('Renamer disabled, marking media as finished: %s', log_movie)
 
                                             # Mark release done
                                             rls.status_id = done_status.get('id')
                                             rls.last_edit = int(time.time())
                                             db.commit()
 
-                                            # Mark movie done
-                                            mvie = db.query(Media).filter_by(id = movie['id']).first()
-                                            mvie.status_id = done_status.get('id')
-                                            mvie.last_edit = int(time.time())
+                                            # Mark media done
+                                            mdia = db.query(Media).filter_by(id = media['id']).first()
+                                            mdia.status_id = done_status.get('id')
+                                            mdia.last_edit = int(time.time())
                                             db.commit()
                             except:
-                                log.error('Failed marking movie finished, renamer disabled: %s', traceback.format_exc())
+                                log.error('Failed marking media finished, renamer disabled: %s', traceback.format_exc())
 
                 except:
-                    log.error('Failed marking movie finished: %s', traceback.format_exc())
+                    log.error('Failed marking media finished: %s', traceback.format_exc())
 
                 return True
 
diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py
index 9238e18..833ceec 100644
--- a/couchpotato/core/plugins/release/main.py
+++ b/couchpotato/core/plugins/release/main.py
@@ -191,7 +191,7 @@ class Release(Plugin):
             if item.get('protocol') != 'torrent_magnet':
                 item['download'] = provider.loginDownload if provider.urls.get('login') else provider.download
 
-            success = fireEvent('searcher.download', data = item, movie = rel.media.to_dict({
+            success = fireEvent('searcher.download', data = item, media = rel.media.to_dict({
                 'profile': {'types': {'quality': {}}},
                 'releases': {'status': {}, 'quality': {}},
                 'library': {'titles': {}, 'files':{}},

From 7ffa5dc7b679e05b4c7670b8301c6c70295f8c58 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 2 Oct 2013 20:06:12 +1300
Subject: [PATCH 04/21] Fixed IPT Show SD cat_ids

---
 couchpotato/core/providers/torrent/iptorrents/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py
index e8247c5..79e83d9 100644
--- a/couchpotato/core/providers/torrent/iptorrents/main.py
+++ b/couchpotato/core/providers/torrent/iptorrents/main.py
@@ -31,6 +31,7 @@ class Base(TorrentProvider):
 
     def _buildUrl(self, query, quality_identifier, cat_ids_group = None):
 
+        # TODO this should support searching multiple cat_ids under a group
         cat_id = self.getCatId(quality_identifier, cat_ids_group)[0]
         if not cat_id:
             log.warning('Unable to find category for quality %s', quality_identifier)
@@ -140,8 +141,7 @@ class Show(ShowProvider, Base):
         ]),
         ('episode', [
             ([5], ['hdtv_720p', 'webdl_720p', 'webdl_1080p']),
-            ([78], ['hdtv_sd']),
-            ([4, 79], ['hdtv_sd'])
+            ([4, 78, 79], ['hdtv_sd'])
         ])
     ]
 

From fb0719d6770c7c51194db832427c2c73f57ca563 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 2 Oct 2013 20:07:34 +1300
Subject: [PATCH 05/21] TV Searcher now supports xem scene mappings

---
 couchpotato/core/media/show/searcher/main.py | 58 ++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 319e8f2..7c1e847 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -81,7 +81,7 @@ class ShowSearcher(Plugin):
         found_releases = []
         too_early_to_search = []
 
-        default_title = self.getSearchTitle(media['library'])
+        default_title = self.getSearchTitle(media)
         if not default_title:
             log.error('No proper info found for episode, removing it from library to cause it from having more issues.')
             #fireEvent('episode.delete', episode['id'], single = True)
@@ -223,7 +223,7 @@ class ShowSearcher(Plugin):
         return True
 
     def correctIdentifier(self, chain, media):
-        required_id = self.getIdentifier(media['library'], 'season_number', 'episode_number')
+        required_id = self.getMediaIdentifier(media['library'])
 
         if 'identifier' not in chain.info:
             return False
@@ -234,7 +234,7 @@ class ShowSearcher(Plugin):
         identifier = chain.info['identifier'][0]
 
         # TODO air by date episodes
-        release_id = self.getIdentifier(identifier, 'season', 'episode')
+        release_id = self.toNumericIdentifier(identifier.get('season'), identifier.get('episode'))
 
         if required_id != release_id:
             log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id)))
@@ -242,11 +242,31 @@ class ShowSearcher(Plugin):
 
         return True
 
-    def getIdentifier(self, d, episode_key, season_key):
-        return (
-            tryInt(d.get(season_key), None) if season_key in d else None,
-            tryInt(d.get(episode_key), None) if episode_key in d else None
-        )
+    def getMediaIdentifier(self, media_library):
+        identifier = None, None
+
+        if media_library['type'] == 'episode':
+            map_episode = media_library['info'].get('map_episode')
+
+            if map_episode and 'scene' in map_episode:
+                identifier = (
+                    map_episode['scene'].get('season'),
+                    map_episode['scene'].get('episode')
+                )
+            else:
+                # TODO xem mapping?
+                identifier = (
+                    media_library.get('season_number'),
+                    media_library.get('episode_number')
+                )
+
+        if media_library['type'] == 'season':
+            identifier = media_library.get('season_number'), None
+
+        return self.toNumericIdentifier(*identifier)
+
+    def toNumericIdentifier(self, season, episode):
+        return tryInt(season, None), tryInt(episode, None)
 
     def chainMatches(self, chain, group, tags):
         found_tags = []
@@ -260,7 +280,7 @@ class ShowSearcher(Plugin):
         if set(tags.keys()) == set(found_tags):
             return True
 
-        return set([key for key, value in tags.items() if value]) == set(found_tags)
+        return set([key for key, value in tags.items() if None not in value]) == set(found_tags)
 
     def cleanMatchValue(self, value):
         value = value.lower()
@@ -276,15 +296,19 @@ class ShowSearcher(Plugin):
         if show is None:
             return None
 
-        name = ''
-        if season is not None:
-            name = ' S%02d' % season.season_number
-
-            if episode is not None:
-                name += 'E%02d' % episode.episode_number
-
+        # TODO this misses alternative titles from the database
         show_title = getTitle(show)
         if not show_title:
             return None
 
-        return show_title + name
+        season_num, episode_num = self.getMediaIdentifier(media['library'])
+
+        name = show_title
+
+        if season_num:
+            name += ' S%02d' % season_num
+
+            if episode_num:
+                name += 'E%02d' % episode_num
+
+        return name

From 2d2b0c90486b548193b71078522ae72651b0518a Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Thu, 3 Oct 2013 00:30:10 +1300
Subject: [PATCH 06/21] IPT provider now searches in multiple categories.

---
 couchpotato/core/providers/torrent/iptorrents/main.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py
index 79e83d9..7d358cb 100644
--- a/couchpotato/core/providers/torrent/iptorrents/main.py
+++ b/couchpotato/core/providers/torrent/iptorrents/main.py
@@ -23,7 +23,7 @@ class Base(TorrentProvider):
         'base_url' : 'http://www.iptorrents.com',
         'login' : 'http://www.iptorrents.com/torrents/',
         'login_check': 'http://www.iptorrents.com/inbox.php',
-        'search' : 'http://www.iptorrents.com/torrents/?l%d=1%%s&q=%s&qf=ti&p=%%d',
+        'search' : 'http://www.iptorrents.com/torrents/?%s%%s&q=%s&qf=ti&p=%%d',
     }
 
     http_time_between_calls = 1 #seconds
@@ -31,13 +31,13 @@ class Base(TorrentProvider):
 
     def _buildUrl(self, query, quality_identifier, cat_ids_group = None):
 
-        # TODO this should support searching multiple cat_ids under a group
-        cat_id = self.getCatId(quality_identifier, cat_ids_group)[0]
-        if not cat_id:
+        cat_ids = self.getCatId(quality_identifier, cat_ids_group)
+
+        if not len(cat_ids):
             log.warning('Unable to find category for quality %s', quality_identifier)
             return
 
-        return self.urls['search'] % (cat_id, tryUrlencode(query).replace('%', '%%'))
+        return self.urls['search'] % ("&".join(("l%d=" % x) for x in cat_ids), tryUrlencode(query).replace('%', '%%'))
 
     def _searchOnTitle(self, title, media, quality, results):
 

From 8d368ecf29dbfe476b08ee6dde918242740ad6f6 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Thu, 3 Oct 2013 00:59:15 +1300
Subject: [PATCH 07/21] 'searcher.correct_release' can now return a float
 indicating the weight/accuracy which is used to scale the score. Fix to IPT
 _buildUrl method.

---
 couchpotato/core/providers/base.py                    | 14 ++++++++++++--
 couchpotato/core/providers/torrent/iptorrents/main.py |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py
index 249ae85..2fc1e38 100644
--- a/couchpotato/core/providers/base.py
+++ b/couchpotato/core/providers/base.py
@@ -302,12 +302,22 @@ class ResultList(list):
 
         new_result = self.fillResult(result)
 
-        is_correct_movie = fireEvent('searcher.correct_release', new_result, self.movie, self.quality,
+        is_correct = fireEvent('searcher.correct_release', new_result, self.movie, self.quality,
                                      imdb_results = self.kwargs.get('imdb_results', False), single = True)
 
-        if is_correct_movie and new_result['id'] not in self.result_ids:
+        if is_correct and new_result['id'] not in self.result_ids:
+            is_correct_weight = float(is_correct)
+
             new_result['score'] += fireEvent('score.calculate', new_result, self.movie, single = True)
 
+            old_score = new_result['score']
+            new_result['score'] = int(old_score * is_correct_weight)
+            log.info('Found correct release with weight %.02f, old_score(%d) now scaled to score(%d)', (
+                is_correct_weight,
+                old_score,
+                new_result['score']
+            ))
+
             self.found(new_result)
             self.result_ids.append(result['id'])
 
diff --git a/couchpotato/core/providers/torrent/iptorrents/main.py b/couchpotato/core/providers/torrent/iptorrents/main.py
index 7d358cb..55fa815 100644
--- a/couchpotato/core/providers/torrent/iptorrents/main.py
+++ b/couchpotato/core/providers/torrent/iptorrents/main.py
@@ -33,7 +33,7 @@ class Base(TorrentProvider):
 
         cat_ids = self.getCatId(quality_identifier, cat_ids_group)
 
-        if not len(cat_ids):
+        if not cat_ids or not len(cat_ids):
             log.warning('Unable to find category for quality %s', quality_identifier)
             return
 

From 0793668e5cd47ab031fd2f5bcb4d53383fea40cf Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Thu, 3 Oct 2013 01:00:46 +1300
Subject: [PATCH 08/21] Chain result weight now returned from TV searcher
 correctRelease function.

---
 couchpotato/core/media/show/searcher/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 7c1e847..48f8dcc 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -200,7 +200,7 @@ class ShowSearcher(Plugin):
                     library_title = ' '.join(valid_words)
 
                 if valid_words == chain_words:
-                    return True
+                    return chain.weight
 
         log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year']))
         return False

From 529b535d9f697ded7a0911222c13a95245c39f71 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Thu, 3 Oct 2013 01:30:13 +1300
Subject: [PATCH 09/21] Added 'searcher.get_media_searcher_id' event, Cleaned
 up some 'status.get' calls, Renamed some references of 'nzb' to 'rel'.

---
 couchpotato/core/media/_base/searcher/main.py | 24 ++++++++----------------
 couchpotato/core/media/movie/searcher/main.py |  5 +++++
 couchpotato/core/providers/base.py            |  1 +
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py
index ec1c814..a986f3d 100644
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@@ -55,7 +55,7 @@ class Searcher(SearcherBase):
         return progress
 
     def tryDownloadResult(self, results, media, quality_type, manual = False):
-        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+        ignored_status, failed_status = fireEvent('status.get', ['ignored', 'failed'], single = True)
 
         for rel in results:
             if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0):
@@ -89,8 +89,7 @@ class Searcher(SearcherBase):
         downloader_enabled = fireEvent('download.enabled', manual, data, single = True)
 
         if downloader_enabled:
-
-            snatched_status = fireEvent('status.get', 'snatched', single = True)
+            snatched_status, active_status, done_status = fireEvent('status.get', ['snatched', 'active', 'done'], single = True)
 
             # Download release to temp
             filedata = None
@@ -110,7 +109,6 @@ class Searcher(SearcherBase):
                     if rls:
                         renamer_enabled = Env.setting('enabled', 'renamer')
 
-                        done_status = fireEvent('status.get', 'done', single = True)
                         rls.status_id = done_status.get('id') if not renamer_enabled else snatched_status.get('id')
 
                         # Save download-id info if returned
@@ -130,8 +128,6 @@ class Searcher(SearcherBase):
 
                         # If renamer isn't used, mark media done
                         if not renamer_enabled:
-                            active_status = fireEvent('status.get', 'active', single = True)
-                            done_status = fireEvent('status.get', 'done', single = True)
                             try:
                                 if media['status_id'] == active_status.get('id'):
                                     for profile_type in media['profile']['types']:
@@ -163,14 +159,10 @@ class Searcher(SearcherBase):
     def search(self, protocols, media, quality):
         results = []
 
-        search_type = None
-        if media['type'] == 'movie':
-            search_type = 'movie'
-        elif media['type'] in ['show', 'season', 'episode']:
-            search_type = 'show'
+        searcher_id = fireEvent('searcher.get_media_searcher_id', media['type'], single = True)
 
         for search_protocol in protocols:
-            protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, search_type), media, quality, merge = True)
+            protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, searcher_id), media, quality, merge = True)
             if protocol_results:
                 results += protocol_results
 
@@ -191,13 +183,13 @@ class Searcher(SearcherBase):
 
         for rel in search_results:
 
-            nzb_identifier = md5(rel['url'])
-            found_releases.append(nzb_identifier)
+            rel_identifier = md5(rel['url'])
+            found_releases.append(rel_identifier)
 
-            rls = db.query(Release).filter_by(identifier = nzb_identifier).first()
+            rls = db.query(Release).filter_by(identifier = rel_identifier).first()
             if not rls:
                 rls = Release(
-                    identifier = nzb_identifier,
+                    identifier = rel_identifier,
                     media_id = media.get('id'),
                     quality_id = quality_type.get('quality_id'),
                     status_id = available_status.get('id')
diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py
index 0f90941..70b0278 100644
--- a/couchpotato/core/media/movie/searcher/main.py
+++ b/couchpotato/core/media/movie/searcher/main.py
@@ -31,6 +31,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
         addEvent('movie.searcher.could_be_released', self.couldBeReleased)
         addEvent('searcher.correct_release', self.correctRelease)
         addEvent('searcher.get_search_title', self.getSearchTitle)
+        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
 
         addApiView('movie.searcher.try_next', self.tryNextReleaseView, docs = {
             'desc': 'Marks the snatched results as ignored and try the next best release',
@@ -343,5 +344,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
         if media['type'] == 'movie':
             return getTitle(media['library'])
 
+    def getMediaSearcherId(self, media_type):
+        if media_type == 'movie':
+            return 'movie'
+
 class SearchSetupError(Exception):
     pass
diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py
index 2fc1e38..66f7182 100644
--- a/couchpotato/core/providers/base.py
+++ b/couchpotato/core/providers/base.py
@@ -312,6 +312,7 @@ class ResultList(list):
 
             old_score = new_result['score']
             new_result['score'] = int(old_score * is_correct_weight)
+
             log.info('Found correct release with weight %.02f, old_score(%d) now scaled to score(%d)', (
                 is_correct_weight,
                 old_score,

From 8d24d96804652905939366b1526097a723417281 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Thu, 3 Oct 2013 01:30:42 +1300
Subject: [PATCH 10/21] Implemented 'searcher.get_media_searcher_id' in the TV
 searcher.

---
 couchpotato/core/media/show/searcher/main.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 48f8dcc..12e7d02 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -29,6 +29,7 @@ class ShowSearcher(Plugin):
         addEvent('show.searcher.single', self.single)
         addEvent('searcher.correct_release', self.correctRelease)
         addEvent('searcher.get_search_title', self.getSearchTitle)
+        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
 
         self.caper = Caper()
 
@@ -312,3 +313,7 @@ class ShowSearcher(Plugin):
                 name += 'E%02d' % episode_num
 
         return name
+
+    def getMediaSearcherId(self, media_type):
+        if media_type in ['show', 'season', 'episode']:
+            return 'show'

From e3745b5d74dd5253d56d310db1d3b38d1a1dc4c2 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Fri, 4 Oct 2013 02:39:44 +1300
Subject: [PATCH 11/21] Updated Caper library

---
 libs/caper/constraint.py | 2 +-
 libs/caper/group.py      | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libs/caper/constraint.py b/libs/caper/constraint.py
index 8e5ee28..d8f5280 100644
--- a/libs/caper/constraint.py
+++ b/libs/caper/constraint.py
@@ -38,7 +38,7 @@ class CaptureConstraint(object):
 
     def _compare_eq(self, fragment, name, expected):
         if not hasattr(fragment, name):
-            return None
+            return 1.0, False
 
         return 1.0, getattr(fragment, name) == expected
 
diff --git a/libs/caper/group.py b/libs/caper/group.py
index 3dcb00b..71b9766 100644
--- a/libs/caper/group.py
+++ b/libs/caper/group.py
@@ -14,8 +14,9 @@
 
 
 from logr import Logr
+from caper import CaperClosure
 from caper.helpers import clean_dict
-from caper.result import CaperFragmentNode
+from caper.result import CaperFragmentNode, CaperClosureNode
 from caper.step import CaptureStep
 from caper.constraint import CaptureConstraint
 
@@ -70,7 +71,9 @@ class CaptureGroup(object):
     def parse_subject(self, parent_head, subject):
         parent_node = parent_head[0] if type(parent_head) is list else parent_head
 
-        # TODO - if subject is a closure?
+        # TODO just jumping into closures for now, will be fixed later
+        if type(subject) is CaperClosure:
+            return [CaperClosureNode(subject, parent_head)]
 
         nodes = []
 

From fb5b17005f47603fb27502a8f4d21234ddf75be1 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Fri, 4 Oct 2013 02:43:57 +1300
Subject: [PATCH 12/21] Cleaned up status.get calls in TV searcher

---
 couchpotato/core/media/show/searcher/main.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 12e7d02..ec9da74 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -68,7 +68,7 @@ class ShowSearcher(Plugin):
         except SearchSetupError:
             return
 
-        done_status = fireEvent('status.get', 'done', single = True)
+        done_status, available_status, ignored_status, failed_status = fireEvent('status.get', ['done', 'available', 'ignored', 'failed'], single = True)
 
         if not media['profile'] or media['status_id'] == done_status.get('id'):
             log.debug('Episode doesn\'t have a profile or already done, assuming in manage tab.')
@@ -76,8 +76,7 @@ class ShowSearcher(Plugin):
 
         db = get_session()
 
-        pre_releases = fireEvent('quality.pre_releases', single = True)
-        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+        #pre_releases = fireEvent('quality.pre_releases', single = True)
 
         found_releases = []
         too_early_to_search = []

From 0a0935d635ce40b91afa6216ffe97267ad1255b5 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Sat, 5 Oct 2013 14:24:08 +1300
Subject: [PATCH 13/21] Fix to Provider getCatId when returning the
 cet_backup_id

---
 couchpotato/core/providers/base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py
index 66f7182..8760710 100644
--- a/couchpotato/core/providers/base.py
+++ b/couchpotato/core/providers/base.py
@@ -274,7 +274,10 @@ class YarrProvider(Provider):
             if identifier in qualities:
                 return ids
 
-        return [self.cat_backup_id]
+        if self.cat_backup_id:
+            return [self.cat_backup_id]
+
+        return []
 
 
 class ResultList(list):

From 6259684487f084d79bee0c896508f15ac47d4bfb Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Sat, 5 Oct 2013 14:26:49 +1300
Subject: [PATCH 14/21] Moved caper matching into a new 'matcher' plugin.

---
 couchpotato/core/media/show/searcher/main.py | 228 ++++++++++-----------------
 couchpotato/core/plugins/matcher/__init__.py |   6 +
 couchpotato/core/plugins/matcher/main.py     | 120 ++++++++++++++
 3 files changed, 213 insertions(+), 141 deletions(-)
 create mode 100644 couchpotato/core/plugins/matcher/__init__.py
 create mode 100644 couchpotato/core/plugins/matcher/main.py

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index ec9da74..e8c0655 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -5,7 +5,6 @@ from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.main import SearchSetupError
 from couchpotato.core.plugins.base import Plugin
 from couchpotato.core.settings.model import Media, Library
-from caper import Caper
 
 log = CPLog(__name__)
 
@@ -27,34 +26,14 @@ class ShowSearcher(Plugin):
         super(ShowSearcher, self).__init__()
 
         addEvent('show.searcher.single', self.single)
-        addEvent('searcher.correct_release', self.correctRelease)
         addEvent('searcher.get_search_title', self.getSearchTitle)
-        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
-
-        self.caper = Caper()
-
-    def _lookupMedia(self, media):
-        db = get_session()
-
-        media_library = db.query(Library).filter_by(id = media['library_id']).first()
 
-        show = None
-        season = None
-        episode = None
-
-        if media['type'] == 'episode':
-            show = media_library.parent.parent
-            season = media_library.parent
-            episode = media_library
-
-        if media['type'] == 'season':
-            show = media_library.parent
-            season = media_library
-
-        if media['type'] == 'show':
-            show = media_library
+        addEvent('searcher.correct_match', self.correctMatch)
+        addEvent('searcher.correct_release', self.correctRelease)
 
-        return show, season, episode
+        addEvent('searcher.get_media_identifier', self.getMediaIdentifier)
+        addEvent('searcher.get_media_root', self.getMediaRoot)
+        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
 
     def single(self, media, search_protocols = None, manual = False):
         if media['type'] == 'show':
@@ -87,7 +66,7 @@ class ShowSearcher(Plugin):
             #fireEvent('episode.delete', episode['id'], single = True)
             return
 
-        show, season, episode = self._lookupMedia(media)
+        show, season, episode = self.getMedia(media)
         if show is None or season is None:
             log.error('Unable to find show or season library in database, missing required data for searching')
             return
@@ -149,6 +128,28 @@ class ShowSearcher(Plugin):
 
         return ret
 
+    def getSearchTitle(self, media):
+        show, season, episode = self.getMedia(media)
+        if show is None:
+            return None
+
+        # TODO this misses alternative titles from the database
+        show_title = getTitle(show)
+        if not show_title:
+            return None
+
+        identifier = fireEvent('searcher.get_media_identifier', media['library'], single = True)
+
+        name = show_title
+
+        if identifier['season']:
+            name += ' S%02d' % identifier['season']
+
+            if identifier['episode']:
+                name += 'E%02d' % identifier['episode']
+
+        return name
+
     def correctRelease(self, release = None, media = None, quality = None, **kwargs):
 
         if media.get('type') not in ['season', 'episode']: return
@@ -163,156 +164,101 @@ class ShowSearcher(Plugin):
         if not fireEvent('searcher.correct_words', release['name'], media, single = True):
             return False
 
-        show, season, episode = self._lookupMedia(media)
+        show, season, episode = self.getMedia(media)
         if show is None or season is None:
             log.error('Unable to find show or season library in database, missing required data for searching')
             return
 
-        release_info = self.caper.parse(release['name'])
-        if len(release_info.chains) < 1:
-            log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
-            return False
-
-        # TODO look at all chains
-        chain = release_info.chains[0]
-
-        if not self.correctQuality(chain, quality['identifier']):
-            log.info('Wrong: %s, quality does not match', release['name'])
-            return False
-
-        if not self.correctIdentifier(chain, media):
-            log.info('Wrong: %s, identifier does not match', release['name'])
-            return False
-
-        if 'show_name' not in chain.info or not len(chain.info['show_name']):
-            log.info('Wrong: %s, missing show name in parsed result', release['name'])
-            return False
-
-        chain_words = [x.lower() for x in chain.info['show_name']]
-        chain_title = ' '.join(chain_words)
-
-        library_title = None
-
-        # Check show titles match
-        for raw_title in show.titles:
-            for valid_words in [x.split(' ') for x in possibleTitles(raw_title.title)]:
-                if not library_title:
-                    library_title = ' '.join(valid_words)
+        match = fireEvent('matcher.best', release, media, quality, single = True)
+        if match:
+            return match.weight
 
-                if valid_words == chain_words:
-                    return chain.weight
-
-        log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year']))
         return False
 
-    def correctQuality(self, chain, quality_identifier):
-        if quality_identifier not in self.quality_map:
-            log.info2('Wrong: unknown preferred quality %s for TV searching', quality_identifier)
-            return False
-
-        if 'video' not in chain.info:
-            log.info2('Wrong: no video tags found')
-            return False
-
-        video_tags = self.quality_map[quality_identifier]
-
-        if not self.chainMatches(chain, 'video', video_tags):
-            log.info2('Wrong: %s tags not in chain', video_tags)
-            return False
-
-        return True
-
-    def correctIdentifier(self, chain, media):
-        required_id = self.getMediaIdentifier(media['library'])
+    def correctMatch(self, chain, release, media, quality):
+        log.info("Checking if '%s' is valid", release['name'])
 
-        if 'identifier' not in chain.info:
+        if not fireEvent('matcher.correct_quality', chain, quality, self.quality_map, single = True):
+            log.info('Wrong: %s, quality does not match', release['name'])
             return False
 
-        # TODO could be handled better?
-        if len(chain.info['identifier']) != 1:
+        if not fireEvent('matcher.correct_identifier', chain, media):
+            log.info('Wrong: %s, identifier does not match', release['name'])
             return False
-        identifier = chain.info['identifier'][0]
-
-        # TODO air by date episodes
-        release_id = self.toNumericIdentifier(identifier.get('season'), identifier.get('episode'))
 
-        if required_id != release_id:
-            log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id)))
+        if not fireEvent('matcher.correct_title', chain, media):
+            log.info("Wrong: '%s', undetermined naming. Looking for '%s (%s)'", (
+                ' '.join(chain.info['show_name']),
+                'library_title',
+                media['library']['year'])
+            )
             return False
 
         return True
 
     def getMediaIdentifier(self, media_library):
-        identifier = None, None
+        if media_library['type'] not in ['show', 'season', 'episode']:
+            return None
+
+        identifier = {
+            'season': None,
+            'episode': None
+        }
 
         if media_library['type'] == 'episode':
             map_episode = media_library['info'].get('map_episode')
 
             if map_episode and 'scene' in map_episode:
-                identifier = (
-                    map_episode['scene'].get('season'),
-                    map_episode['scene'].get('episode')
-                )
+                identifier['season'] = map_episode['scene'].get('season')
+                identifier['episode'] = map_episode['scene'].get('episode')
             else:
                 # TODO xem mapping?
-                identifier = (
-                    media_library.get('season_number'),
-                    media_library.get('episode_number')
-                )
+                identifier['season'] = media_library.get('season_number')
+                identifier['episode'] = media_library.get('episode_number')
 
         if media_library['type'] == 'season':
-            identifier = media_library.get('season_number'), None
-
-        return self.toNumericIdentifier(*identifier)
-
-    def toNumericIdentifier(self, season, episode):
-        return tryInt(season, None), tryInt(episode, None)
-
-    def chainMatches(self, chain, group, tags):
-        found_tags = []
+            identifier['season'] = media_library.get('season_number')
 
-        for match in chain.info[group]:
-            for ck, cv in match.items():
-                if ck in tags and self.cleanMatchValue(cv) in tags[ck]:
-                    found_tags.append(ck)
+        # Try cast identifier values to integers
+        identifier['season'] = tryInt(identifier['season'], None)
+        identifier['episode'] = tryInt(identifier['episode'], None)
 
+        return identifier
 
-        if set(tags.keys()) == set(found_tags):
-            return True
-
-        return set([key for key, value in tags.items() if None not in value]) == set(found_tags)
-
-    def cleanMatchValue(self, value):
-        value = value.lower()
-        value = value.strip()
+    def getMediaRoot(self, media):
+        if media['type'] not in ['show', 'season', 'episode']:
+            return None
 
-        for ch in [' ', '-', '.']:
-            value = value.replace(ch, '')
+        show, season, episode = self.getMedia(media)
+        if show is None or season is None:
+            log.error('Unable to find show or season library in database, missing required data for searching')
+            return
 
-        return value
+        return show.to_dict()
 
-    def getSearchTitle(self, media):
-        show, season, episode = self._lookupMedia(media)
-        if show is None:
-            return None
+    def getMediaSearcherId(self, media_type):
+        if media_type in ['show', 'season', 'episode']:
+            return 'show'
 
-        # TODO this misses alternative titles from the database
-        show_title = getTitle(show)
-        if not show_title:
-            return None
+    def getMedia(self, media):
+        db = get_session()
 
-        season_num, episode_num = self.getMediaIdentifier(media['library'])
+        media_library = db.query(Library).filter_by(id = media['library_id']).first()
 
-        name = show_title
+        show = None
+        season = None
+        episode = None
 
-        if season_num:
-            name += ' S%02d' % season_num
+        if media['type'] == 'episode':
+            show = media_library.parent.parent
+            season = media_library.parent
+            episode = media_library
 
-            if episode_num:
-                name += 'E%02d' % episode_num
+        if media['type'] == 'season':
+            show = media_library.parent
+            season = media_library
 
-        return name
+        if media['type'] == 'show':
+            show = media_library
 
-    def getMediaSearcherId(self, media_type):
-        if media_type in ['show', 'season', 'episode']:
-            return 'show'
+        return show, season, episode
\ No newline at end of file
diff --git a/couchpotato/core/plugins/matcher/__init__.py b/couchpotato/core/plugins/matcher/__init__.py
new file mode 100644
index 0000000..46b1a8e
--- /dev/null
+++ b/couchpotato/core/plugins/matcher/__init__.py
@@ -0,0 +1,6 @@
+from .main import Matcher
+
+def start():
+    return Matcher()
+
+config = []
diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py
new file mode 100644
index 0000000..dda9311
--- /dev/null
+++ b/couchpotato/core/plugins/matcher/main.py
@@ -0,0 +1,120 @@
+import pprint
+from caper import Caper
+from couchpotato import CPLog, tryInt
+from couchpotato.core.event import addEvent, fireEvent
+from couchpotato.core.helpers.variable import possibleTitles
+from couchpotato.core.plugins.base import Plugin
+
+log = CPLog(__name__)
+
+
+class Matcher(Plugin):
+    def __init__(self):
+        self.caper = Caper()
+
+        addEvent('matcher.parse', self.parse)
+        addEvent('matcher.best', self.best)
+
+        addEvent('matcher.correct_title', self.correctTitle)
+        addEvent('matcher.correct_identifier', self.correctIdentifier)
+        addEvent('matcher.correct_quality', self.correctQuality)
+
+    def parse(self, release):
+        return self.caper.parse(release['name'])
+
+    def best(self, release, media, quality):
+        rel_info = fireEvent('matcher.parse', release, single = True)
+
+        if len(rel_info.chains) < 1:
+            log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
+            return False
+
+        for chain in rel_info.chains:
+            if fireEvent('searcher.correct_match', chain, release, media, quality, single = True):
+                return chain
+
+        return None
+
+    def chainMatches(self, chain, group, tags):
+        found_tags = []
+
+        for match in chain.info[group]:
+            for ck, cv in match.items():
+                if ck in tags and self.cleanMatchValue(cv) in tags[ck]:
+                    found_tags.append(ck)
+
+
+        if set(tags.keys()) == set(found_tags):
+            return True
+
+        return set([key for key, value in tags.items() if None not in value]) == set(found_tags)
+
+    def cleanMatchValue(self, value):
+        value = value.lower()
+        value = value.strip()
+
+        for ch in [' ', '-', '.']:
+            value = value.replace(ch, '')
+
+        return value
+
+    def dictIsSubset(self, a, b):
+        return all([k in b and b[k] == v for k, v in a.items()])
+
+    def correctIdentifier(self, chain, media):
+        required_id = fireEvent('searcher.get_media_identifier', media['library'], single = True)
+
+        if 'identifier' not in chain.info:
+            return False
+
+        # TODO could be handled better?
+        if len(chain.info['identifier']) != 1:
+            return False
+        identifier = chain.info['identifier'][0]
+
+        # TODO air by date episodes
+
+        # TODO this should support identifiers with characters 'a', 'b', etc..
+        for k, v in identifier.items():
+            identifier[k] = tryInt(v, None)
+
+        if not self.dictIsSubset(required_id, identifier):
+            log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(identifier)))
+            return False
+
+        return True
+
+    def correctTitle(self, chain, media):
+        root_library = fireEvent('searcher.get_media_root', media['library'], single = True)
+
+        if 'show_name' not in chain.info or not len(chain.info['show_name']):
+            log.info('Wrong: missing show name in parsed result')
+            return False
+
+        chain_words = [x.lower() for x in chain.info['show_name']]
+
+        # Check show titles match
+        for title in root_library['info']['titles']:
+            for valid_words in [x.split(' ') for x in possibleTitles(title)]:
+
+                if valid_words == chain_words:
+                    return True
+
+        return False
+
+    def correctQuality(self, chain, quality, quality_map):
+        if quality['identifier'] not in quality_map:
+            log.info2('Wrong: unknown preferred quality %s', quality['identifier'])
+            return False
+
+        if 'video' not in chain.info:
+            log.info2('Wrong: no video tags found')
+            return False
+
+        video_tags = quality_map[quality['identifier']]
+
+        if not self.chainMatches(chain, 'video', video_tags):
+            log.info2('Wrong: %s tags not in chain', video_tags)
+            return False
+
+        return True

From 07abf7c83da43d5b7f078d4fb9bb317500aed2e3 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Mon, 7 Oct 2013 01:35:18 +1300
Subject: [PATCH 15/21] Updated Caper to version 0.2.2

---
 libs/caper/__init__.py     |  2 +-
 libs/caper/matcher.py      | 61 +---------------------------------------------
 libs/caper/parsers/base.py | 57 -------------------------------------------
 libs/caper/step.py         | 12 ---------
 4 files changed, 2 insertions(+), 130 deletions(-)

diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py
index 23801ee..1638ec0 100644
--- a/libs/caper/__init__.py
+++ b/libs/caper/__init__.py
@@ -19,7 +19,7 @@ from caper.parsers.anime import AnimeParser
 from caper.parsers.scene import SceneParser
 
 
-__version_info__ = ('0', '2', '0')
+__version_info__ = ('0', '2', '2')
 __version_branch__ = 'master'
 
 __version__ = "%s%s" % (
diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py
index 24ef69a..23fdcf9 100644
--- a/libs/caper/matcher.py
+++ b/libs/caper/matcher.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pprint
 import re
 from logr import Logr
-from caper.helpers import is_list_type, clean_dict
+from caper.helpers import is_list_type
 
 
 class FragmentMatcher(object):
@@ -57,8 +56,6 @@ class FragmentMatcher(object):
 
                 self.regex[group_name].append((weight, weight_patterns))
 
-        pprint.pprint(self.regex)
-
     def find_group(self, name):
         for group_name, weight_groups in self.regex.items():
             if group_name and group_name == name:
@@ -66,62 +63,6 @@ class FragmentMatcher(object):
 
         return None
 
-    def parser_match(self, parser, group_name, single=True):
-        """
-
-        :type parser: caper.parsers.base.Parser
-        """
-        result = None
-
-        for group, weight_groups in self.regex.items():
-            if group_name and group != group_name:
-                continue
-
-            # TODO handle multiple weights
-            weight, patterns = weight_groups[0]
-
-            for pattern in patterns:
-                fragments = []
-                pattern_matched = True
-                pattern_result = {}
-
-                for fragment_pattern in pattern:
-                    if not parser.fragment_available():
-                        pattern_matched = False
-                        break
-
-                    fragment = parser.next_fragment()
-                    fragments.append(fragment)
-
-                    Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value)
-                    match = fragment_pattern.match(fragment.value)
-                    if match:
-                        Logr.debug('Pattern "%s" matched', fragment_pattern.pattern)
-                    else:
-                        pattern_matched = False
-                        break
-
-                    pattern_result.update(clean_dict(match.groupdict()))
-
-                if pattern_matched:
-                    if result is None:
-                        result = {}
-
-                    if group not in result:
-                        result[group] = {}
-
-                    Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments]))
-
-                    result[group].update(pattern_result)
-                    parser.commit()
-
-                    if single:
-                        return result
-                else:
-                    parser.rewind()
-
-        return result
-
     def value_match(self, value, group_name=None, single=True):
         result = None
 
diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py
index 70bc733..d0e109e 100644
--- a/libs/caper/parsers/base.py
+++ b/libs/caper/parsers/base.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from logr import Logr
 from caper import FragmentMatcher
 from caper.group import CaptureGroup
 from caper.result import CaperResult, CaperClosureNode
@@ -60,62 +59,6 @@ class Parser(object):
         raise NotImplementedError()
 
     #
-    # Closure Methods
-    #
-
-    def next_closure(self):
-        self._closure_pos += 1
-        closure = self.closures[self._closure_pos]
-
-        self._history.append(('fragment', -1 - self._fragment_pos))
-        self._fragment_pos = -1
-
-        if self._closure_pos != 0:
-            self._history.append(('closure', 1))
-
-        Logr.debug('(next_closure) closure.value: "%s"', closure.value)
-        return closure
-
-    def closure_available(self):
-        return self._closure_pos + 1 < len(self.closures)
-
-    #
-    # Fragment Methods
-    #
-
-    def next_fragment(self):
-        closure = self.closures[self._closure_pos]
-
-        self._fragment_pos += 1
-        fragment = closure.fragments[self._fragment_pos]
-
-        self._history.append(('fragment', 1))
-
-        Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value)
-        return fragment
-
-    def fragment_available(self):
-        if not self.closure_available():
-            return False
-        return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments)
-
-    def rewind(self):
-        for source, delta in reversed(self._history):
-            Logr.debug('(rewind) Rewinding step: %s', (source, delta))
-            if source == 'fragment':
-                self._fragment_pos -= delta
-            elif source == 'closure':
-                self._closure_pos -= delta
-            else:
-                raise NotImplementedError()
-
-        self.commit()
-
-    def commit(self):
-        Logr.debug('(commit)')
-        self._history = []
-
-    #
     # Capture Methods
     #
 
diff --git a/libs/caper/step.py b/libs/caper/step.py
index eb94b6f..a82a930 100644
--- a/libs/caper/step.py
+++ b/libs/caper/step.py
@@ -33,18 +33,6 @@ class CaptureStep(object):
         #: @type: bool
         self.single = single
 
-    def _get_next_subject(self, parser):
-        if self.source == 'fragment':
-            if not parser.fragment_available():
-                return None
-            return parser.next_fragment()
-        elif self.source == 'closure':
-            if not parser.closure_available():
-                return None
-            return parser.next_closure()
-
-        raise NotImplementedError()
-
     def execute(self, fragment):
         if self.regex:
             weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)

From f23412ea7e1393e13311c96b06c61cd1c873edb3 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Tue, 15 Oct 2013 16:16:26 +1300
Subject: [PATCH 16/21] Added qcond (Query Condenser) v0.1.0 library -
 https://github.com/fuzeman/QueryCondenser

---
 libs/qcond/__init__.py                  |  42 +++++
 libs/qcond/compat.py                    |  23 +++
 libs/qcond/helpers.py                   |  84 ++++++++++
 libs/qcond/transformers/__init__.py     |   0
 libs/qcond/transformers/base.py         |  21 +++
 libs/qcond/transformers/merge.py        | 238 +++++++++++++++++++++++++++
 libs/qcond/transformers/slice.py        | 280 ++++++++++++++++++++++++++++++++
 libs/qcond/transformers/strip_common.py |  26 +++
 8 files changed, 714 insertions(+)
 create mode 100644 libs/qcond/__init__.py
 create mode 100644 libs/qcond/compat.py
 create mode 100644 libs/qcond/helpers.py
 create mode 100644 libs/qcond/transformers/__init__.py
 create mode 100644 libs/qcond/transformers/base.py
 create mode 100644 libs/qcond/transformers/merge.py
 create mode 100644 libs/qcond/transformers/slice.py
 create mode 100644 libs/qcond/transformers/strip_common.py

diff --git a/libs/qcond/__init__.py b/libs/qcond/__init__.py
new file mode 100644
index 0000000..be64e7b
--- /dev/null
+++ b/libs/qcond/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from qcond.transformers.merge import MergeTransformer
+from qcond.transformers.slice import SliceTransformer
+from qcond.transformers.strip_common import StripCommonTransformer
+
+
+__version_info__ = ('0', '1', '0')
+__version_branch__ = 'master'
+
+__version__ = "%s%s" % (
+    '.'.join(__version_info__),
+    '-' + __version_branch__ if __version_branch__ else ''
+)
+
+
+class QueryCondenser(object):
+    def __init__(self):
+        self.transformers = [
+            MergeTransformer(),
+            SliceTransformer(),
+            StripCommonTransformer()
+        ]
+
+    def distinct(self, titles):
+        for transformer in self.transformers:
+            titles = transformer.run(titles)
+
+        return titles
diff --git a/libs/qcond/compat.py b/libs/qcond/compat.py
new file mode 100644
index 0000000..f3f0925
--- /dev/null
+++ b/libs/qcond/compat.py
@@ -0,0 +1,23 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    xrange = range
+else:
+    xrange = xrange
diff --git a/libs/qcond/helpers.py b/libs/qcond/helpers.py
new file mode 100644
index 0000000..a341b6e
--- /dev/null
+++ b/libs/qcond/helpers.py
@@ -0,0 +1,84 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from difflib import SequenceMatcher
+import re
+import sys
+from logr import Logr
+from qcond.compat import xrange
+
+
+PY3 = sys.version_info[0] == 3
+
+
+def simplify(s):
+    s = s.lower()
+    s = re.sub(r"(\w)'(\w)", r"\1\2", s)
+    return s
+
+
+def strip(s):
+    return re.sub(r"^(\W*)(.*?)(\W*)$", r"\2", s)
+
+
+def create_matcher(a, b, swap_longest = True, case_sensitive = False):
+    # Ensure longest string is a
+    if swap_longest and len(b) > len(a):
+        a_ = a
+        a = b
+        b = a_
+
+    if not case_sensitive:
+        a = a.upper()
+        b = b.upper()
+
+    return SequenceMatcher(None, a, b)
+
+
+def first(function_or_none, sequence):
+    if PY3:
+        for item in filter(function_or_none, sequence):
+            return item
+    else:
+        result = filter(function_or_none, sequence)
+        if len(result):
+            return result[0]
+
+    return None
+
+def sorted_append(sequence, item, func):
+    if not len(sequence):
+        sequence.insert(0, item)
+        return
+
+    x = 0
+    for x in xrange(len(sequence)):
+        if func(sequence[x]):
+            sequence.insert(x, item)
+            return
+
+    sequence.append(item)
+
+def itemsMatch(L1, L2):
+    return len(L1) == len(L2) and sorted(L1) == sorted(L2)
+
+def distinct(sequence):
+    result = []
+
+    for item in sequence:
+        if item not in result:
+            result.append(item)
+
+    return result
\ No newline at end of file
diff --git a/libs/qcond/transformers/__init__.py b/libs/qcond/transformers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/libs/qcond/transformers/base.py b/libs/qcond/transformers/base.py
new file mode 100644
index 0000000..7054729
--- /dev/null
+++ b/libs/qcond/transformers/base.py
@@ -0,0 +1,21 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class Transformer(object):
+    def __init__(self):
+        pass
+
+    def run(self, titles):
+        raise NotImplementedError()
diff --git a/libs/qcond/transformers/merge.py b/libs/qcond/transformers/merge.py
new file mode 100644
index 0000000..d82f249
--- /dev/null
+++ b/libs/qcond/transformers/merge.py
@@ -0,0 +1,238 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from operator import itemgetter
+from logr import Logr
+from qcond.helpers import simplify, strip, first, sorted_append, distinct
+from qcond.transformers.base import Transformer
+from qcond.compat import xrange
+
+
+class MergeTransformer(Transformer):
+    def __init__(self):
+        super(MergeTransformer, self).__init__()
+
+    def run(self, titles):
+        titles = distinct([simplify(title) for title in titles])
+
+        Logr.info(str(titles))
+
+        Logr.debug("------------------------------------------------------------")
+
+        root, tails = self.parse(titles)
+
+        Logr.debug("--------------------------PARSE-----------------------------")
+
+        for node in root:
+            print_tree(node)
+
+        Logr.debug("--------------------------MERGE-----------------------------")
+
+        self.merge(root)
+
+        Logr.debug("--------------------------FINAL-----------------------------")
+
+        for node in root:
+            print_tree(node)
+
+        Logr.debug("--------------------------RESULT-----------------------------")
+
+        scores = {}
+        results = []
+
+        for tail in tails:
+            score, value, original_value = tail.full_value()
+
+            if value in scores:
+                scores[value] += score
+            else:
+                results.append((value, original_value))
+                scores[value] = score
+
+                Logr.debug("%s %s %s", score, value, original_value)
+
+        sorted_results = sorted(results, key=lambda item: (scores[item[0]], item[1]), reverse = True)
+
+        return [result[0] for result in sorted_results]
+
+    def parse(self, titles):
+        root = []
+        tails = []
+
+        for title in titles:
+            Logr.debug(title)
+
+            cur = None
+            words = title.split(' ')
+
+            for wx in xrange(len(words)):
+                word = strip(words[wx])
+
+                if cur is None:
+                    cur = find_node(root, word)
+
+                    if cur is None:
+                        cur = DNode(word, None, num_children=len(words) - wx, original_value=title)
+                        root.append(cur)
+                else:
+                    parent = cur
+                    parent.weight += 1
+
+                    cur = find_node(parent.right, word)
+
+                    if cur is None:
+                        Logr.debug("%s %d", word, len(words) - wx)
+                        cur = DNode(word, parent, num_children=len(words) - wx)
+                        sorted_append(parent.right, cur, lambda a: a.num_children < cur.num_children)
+                    else:
+                        cur.weight += 1
+
+            tails.append(cur)
+
+        return root, tails
+
+    def merge(self, root):
+        for x in range(len(root)):
+            Logr.debug(root[x])
+            root[x].right = self._merge(root[x].right)
+            Logr.debug('=================================================================')
+
+        return root
+
+    def get_nodes_right(self, value):
+        if type(value) is not list:
+            value = [value]
+
+        nodes = []
+
+        for node in value:
+            nodes.append(node)
+
+            for child in self.get_nodes_right(node.right):
+                nodes.append(child)
+
+        return nodes
+
+    def destroy_nodes_right(self, value):
+        nodes = self.get_nodes_right(value)
+
+        for node in nodes:
+            node.value = None
+            node.dead = True
+
+    def _merge(self, nodes, depth = 0):
+        Logr.debug(str('\t' * depth) + str(nodes))
+
+        top = nodes[0]
+
+        # Merge into top
+        for x in range(len(nodes)):
+            # Merge extra results into top
+            if x > 0:
+                top.value = None
+                top.weight += nodes[x].weight
+                self.destroy_nodes_right(top.right)
+
+                if len(nodes[x].right):
+                    top.join_right(nodes[x].right)
+
+                    Logr.debug("= %s joined %s", nodes[x], top)
+
+                nodes[x].dead = True
+
+        nodes = [n for n in nodes if not n.dead]
+
+        # Traverse further
+        for node in nodes:
+            if len(node.right):
+                node.right = self._merge(node.right, depth + 1)
+
+        return nodes
+
+
+def print_tree(node, depth = 0):
+    Logr.debug(str('\t' * depth) + str(node))
+
+    if len(node.right):
+        for child in node.right:
+            print_tree(child, depth + 1)
+    else:
+        Logr.debug(node.full_value()[1])
+
+
+def find_node(node_list, value):
+    # Try find adjacent node match
+    for node in node_list:
+        if node.value == value:
+            return node
+
+    return None
+
+
+class DNode(object):
+    def __init__(self, value, parent, right=None, weight=1, num_children=None, original_value=None):
+        self.value = value
+
+        self.parent = parent
+
+        if right is None:
+            right = []
+        self.right = right
+
+        self.weight = weight
+
+        self.original_value = original_value
+        self.num_children = num_children
+
+        self.dead = False
+
+    def join_right(self, nodes):
+        for node in nodes:
+            duplicate = first(lambda x: x.value == node.value, self.right)
+
+            if duplicate:
+                duplicate.weight += node.weight
+                duplicate.join_right(node.right)
+            else:
+                node.parent = self
+                self.right.append(node)
+
+    def full_value(self):
+        words = []
+        total_score = 0
+
+        cur = self
+        root = None
+
+        while cur is not None:
+            if cur.value and not cur.dead:
+                words.insert(0, cur.value)
+                total_score += cur.weight
+
+            if cur.parent is None:
+                root = cur
+            cur = cur.parent
+
+        return float(total_score) / len(words), ' '.join(words), root.original_value if root else None
+
+    def __repr__(self):
+        return '<%s value:"%s", weight: %s, num_children: %s%s%s>' % (
+            'DNode',
+            self.value,
+            self.weight,
+            self.num_children,
+            (', original_value: %s' % self.original_value) if self.original_value else '',
+            ' REMOVING' if self.dead else ''
+        )
diff --git a/libs/qcond/transformers/slice.py b/libs/qcond/transformers/slice.py
new file mode 100644
index 0000000..864f673
--- /dev/null
+++ b/libs/qcond/transformers/slice.py
@@ -0,0 +1,280 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from logr import Logr
+from qcond.helpers import create_matcher
+from qcond.transformers.base import Transformer
+
+
+class SliceTransformer(Transformer):
+    def __init__(self):
+        super(SliceTransformer, self).__init__()
+
+    def run(self, titles):
+        nodes = []
+
+        # Create a node for each title
+        for title in titles:
+            nodes.append(SimNode(title))
+
+        # Calculate similarities between nodes
+        for node in nodes:
+            calculate_sim_links(node, [n for n in nodes if n != node])
+
+        kill_nodes_above(nodes, 0.90)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        print_link_tree(nodes)
+        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        kill_trailing_nodes(nodes)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        # Sort remaining nodes by 'num_merges'
+        nodes = sorted(nodes, key=lambda n: n.num_merges, reverse=True)
+
+        print_link_tree(nodes)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])
+
+        return [n.value for n in nodes]
+
+
+class SimLink(object):
+    def __init__(self, similarity, opcodes, stats):
+        self.similarity = similarity
+        self.opcodes = opcodes
+        self.stats = stats
+
+
+class SimNode(object):
+    def __init__(self, value):
+        self.value = value
+
+        self.dead = False
+        self.num_merges = 0
+
+        self.links = {}  # {<other SimNode>: <SimLink>}
+
+
+def kill_nodes(nodes, killed_nodes):
+    # Remove killed nodes from root list
+    for node in killed_nodes:
+        if node in nodes:
+            nodes.remove(node)
+
+    # Remove killed nodes from links
+    for killed_node in killed_nodes:
+        for node in nodes:
+            if killed_node in node.links:
+                node.links.pop(killed_node)
+
+
+def kill_nodes_above(nodes, above_sim):
+    killed_nodes = []
+
+    for node in nodes:
+        if node.dead:
+            continue
+
+        Logr.debug(node.value)
+
+        for link_node, link in node.links.items():
+            if link_node.dead:
+                continue
+
+            Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)
+
+            if link.similarity >= above_sim:
+                if len(link_node.value) > len(node.value):
+                    Logr.debug('\t\tvery similar, killed this node')
+                    link_node.dead = True
+                    node.num_merges += 1
+                    killed_nodes.append(link_node)
+                else:
+                    Logr.debug('\t\tvery similar, killed owner')
+                    node.dead = True
+                    link_node.num_merges += 1
+                    killed_nodes.append(node)
+
+    kill_nodes(nodes, killed_nodes)
+
+
+def print_link_tree(nodes):
+    for node in nodes:
+        Logr.debug(node.value)
+        Logr.debug('\tnum_merges: %s', node.num_merges)
+
+        if len(node.links):
+            Logr.debug('\t========== LINKS ==========')
+            for link_node, link in node.links.items():
+                Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)
+
+            Logr.debug('\t---------------------------')
+
+
+def kill_trailing_nodes(nodes):
+    killed_nodes = []
+
+    for node in nodes:
+        if node.dead:
+            continue
+
+        Logr.debug(node.value)
+
+        for link_node, link in node.links.items():
+            if link_node.dead:
+                continue
+
+            is_valid = link.stats.get('valid', False)
+
+            has_deletions = False
+            has_insertions = False
+            has_replacements = False
+
+            for opcode in link.opcodes:
+                if opcode[0] == 'delete':
+                    has_deletions = True
+                if opcode[0] == 'insert':
+                    has_insertions = True
+                if opcode[0] == 'replace':
+                    has_replacements = True
+
+            equal_perc = link.stats.get('equal', 0) / float(len(node.value))
+            insert_perc = link.stats.get('insert', 0) / float(len(node.value))
+
+            Logr.debug('\t({0:<24}) [{1:02d}:{2:02d} = {3:02d} {4:3.0f}% {5:3.0f}%] -- {6:<45}'.format(
+                'd:%s, i:%s, r:%s' % (has_deletions, has_insertions, has_replacements),
+                len(node.value), len(link_node.value), link.stats.get('equal', 0),
+                equal_perc * 100, insert_perc * 100,
+                '"{0}"'.format(link_node.value)
+            ))
+
+            Logr.debug('\t\t%s', link.stats)
+
+            kill = all([
+                is_valid,
+                equal_perc >= 0.5,
+                insert_perc < 2,
+                has_insertions,
+                not has_deletions,
+                not has_replacements
+            ])
+
+            if kill:
+                Logr.debug('\t\tkilled this node')
+
+                link_node.dead = True
+                node.num_merges += 1
+                killed_nodes.append(link_node)
+
+    kill_nodes(nodes, killed_nodes)
+
+stats_print_format = "\t{0:<8} ({1:2d}:{2:2d}) ({3:2d}:{4:2d})"
+
+
+def get_index_values(iterable, a, b):
+    return (
+        iterable[a] if a else None,
+        iterable[b] if b else None
+    )
+
+
+def get_indices(iterable, a, b):
+    return (
+        a if 0 < a < len(iterable) else None,
+        b if 0 < b < len(iterable) else None
+    )
+
+
+def get_opcode_stats(for_node, node, opcodes):
+    stats = {}
+
+    for tag, i1, i2, j1, j2 in opcodes:
+        Logr.debug(stats_print_format.format(
+            tag, i1, i2, j1, j2
+        ))
+
+        if tag in ['insert', 'delete']:
+            ax = None, None
+            bx = None, None
+
+            if tag == 'insert':
+                ax = get_indices(for_node.value, i1 - 1, i1)
+                bx = get_indices(node.value, j1, j2 - 1)
+
+            if tag == 'delete':
+                ax = get_indices(for_node.value, j1 - 1, j1)
+                bx = get_indices(node.value, i1, i2 - 1)
+
+            av = get_index_values(for_node.value, *ax)
+            bv = get_index_values(node.value, *bx)
+
+            Logr.debug(
+                '\t\t%s %s [%s><%s] <---> %s %s [%s><%s]',
+                ax, av, av[0], av[1],
+                bx, bv, bv[0], bv[1]
+            )
+
+            head_valid = av[0] in [None, ' '] or bv[0] in [None, ' ']
+            tail_valid = av[1] in [None, ' '] or bv[1] in [None, ' ']
+            valid = head_valid and tail_valid
+
+            if 'valid' not in stats or (stats['valid'] and not valid):
+                stats['valid'] = valid
+
+            Logr.debug('\t\t' + ('VALID' if valid else 'INVALID'))
+
+        if tag not in stats:
+            stats[tag] = 0
+
+        stats[tag] += (i2 - i1) or (j2 - j1)
+
+    return stats
+
+
+def calculate_sim_links(for_node, other_nodes):
+    for node in other_nodes:
+        if node in for_node.links:
+            continue
+
+        Logr.debug('calculating similarity between "%s" and "%s"', for_node.value, node.value)
+
+        # Get similarity
+        similarity_matcher = create_matcher(for_node.value, node.value)
+        similarity = similarity_matcher.quick_ratio()
+
+        # Get for_node -> node opcodes
+        a_opcodes_matcher = create_matcher(for_node.value, node.value, swap_longest = False)
+        a_opcodes = a_opcodes_matcher.get_opcodes()
+        a_stats = get_opcode_stats(for_node, node, a_opcodes)
+
+        Logr.debug('-' * 100)
+
+        # Get node -> for_node opcodes
+        b_opcodes_matcher = create_matcher(node.value, for_node.value, swap_longest = False)
+        b_opcodes = b_opcodes_matcher.get_opcodes()
+        b_stats = get_opcode_stats(for_node, node, b_opcodes)
+
+        for_node.links[node] = SimLink(similarity, a_opcodes, a_stats)
+        node.links[for_node] = SimLink(similarity, b_opcodes, b_stats)
+
+        #raw_input('Press ENTER to continue')
diff --git a/libs/qcond/transformers/strip_common.py b/libs/qcond/transformers/strip_common.py
new file mode 100644
index 0000000..47b8401
--- /dev/null
+++ b/libs/qcond/transformers/strip_common.py
@@ -0,0 +1,26 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from qcond.transformers.base import Transformer
+
+
+COMMON_WORDS = [
+    'the'
+]
+
+
+class StripCommonTransformer(Transformer):
+    def run(self, titles):
+        return [title for title in titles if title.lower() not in COMMON_WORDS]

From da87e68fad461b36b2ae7b19628cbd3898d1604c Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Tue, 15 Oct 2013 21:50:43 +1300
Subject: [PATCH 17/21] Implemented basic usage of QueryCondenser

---
 couchpotato/core/media/show/searcher/main.py | 53 ++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index e8c0655..59eb92c 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -1,10 +1,12 @@
 from couchpotato import get_session, Env
 from couchpotato.core.event import addEvent, fireEvent
-from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles
+from couchpotato.core.helpers.variable import getTitle, tryInt
 from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.main import SearchSetupError
 from couchpotato.core.plugins.base import Plugin
 from couchpotato.core.settings.model import Media, Library
+from qcond import QueryCondenser
+from qcond.helpers import simplify
 
 log = CPLog(__name__)
 
@@ -25,6 +27,8 @@ class ShowSearcher(Plugin):
     def __init__(self):
         super(ShowSearcher, self).__init__()
 
+        self.query_condenser = QueryCondenser()
+
         addEvent('show.searcher.single', self.single)
         addEvent('searcher.get_search_title', self.getSearchTitle)
 
@@ -34,6 +38,7 @@ class ShowSearcher(Plugin):
         addEvent('searcher.get_media_identifier', self.getMediaIdentifier)
         addEvent('searcher.get_media_root', self.getMediaRoot)
         addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
+        addEvent('searcher.get_media_titles', self.getMediaTitles)
 
     def single(self, media, search_protocols = None, manual = False):
         if media['type'] == 'show':
@@ -129,26 +134,54 @@ class ShowSearcher(Plugin):
         return ret
 
     def getSearchTitle(self, media):
+        if media['type'] not in ['show', 'season', 'episode']:
+            return
+
         show, season, episode = self.getMedia(media)
         if show is None:
             return None
 
-        # TODO this misses alternative titles from the database
-        show_title = getTitle(show)
-        if not show_title:
+        titles = []
+
+        # Add season map_names if they exist
+        if season is not None and 'map_names' in show.info:
+            season_names = show.info['map_names'].get(str(season.season_number), {})
+
+            # Add titles from all locations
+            # TODO only add name maps from a specific location
+            for location, names in season_names.items():
+                titles += [name for name in names if name not in titles]
+
+        # Add show titles
+        titles += [title.title for title in show.titles if title.title not in titles]
+
+        # Use QueryCondenser to build a list of optimal search titles
+        condensed_titles = self.query_condenser.distinct(titles)
+
+        title = None
+
+        # TODO try other titles if searching doesn't return results
+
+        if len(condensed_titles):
+            # Return the first condensed title if one exists
+            title = condensed_titles[0]
+        elif len(titles):
+            # Fallback to first raw title
+            title = simplify(titles[0])
+        else:
             return None
 
+        # Add the identifier to search title
+        # TODO supporting other identifier formats
         identifier = fireEvent('searcher.get_media_identifier', media['library'], single = True)
 
-        name = show_title
-
         if identifier['season']:
-            name += ' S%02d' % identifier['season']
+            title += ' S%02d' % identifier['season']
 
             if identifier['episode']:
-                name += 'E%02d' % identifier['episode']
+                title += 'E%02d' % identifier['episode']
 
-        return name
+        return title
 
     def correctRelease(self, release = None, media = None, quality = None, **kwargs):
 
@@ -261,4 +294,4 @@ class ShowSearcher(Plugin):
         if media['type'] == 'show':
             show = media_library
 
-        return show, season, episode
\ No newline at end of file
+        return show, season, episode

From b90218638969ddc5b1f7300af0ff0190d9c15c30 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 16 Oct 2013 14:09:41 +1300
Subject: [PATCH 18/21] Cleaned up usage of helper functions

---
 couchpotato/core/helpers/variable.py         |  3 +++
 couchpotato/core/media/show/searcher/main.py |  1 -
 couchpotato/core/plugins/matcher/main.py     | 19 ++++---------------
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/couchpotato/core/helpers/variable.py b/couchpotato/core/helpers/variable.py
index 15f9936..6296462 100644
--- a/couchpotato/core/helpers/variable.py
+++ b/couchpotato/core/helpers/variable.py
@@ -211,3 +211,6 @@ def randomString(size = 8, chars = string.ascii_uppercase + string.digits):
 def splitString(str, split_on = ',', clean = True):
     list = [x.strip() for x in str.split(split_on)] if str else []
     return filter(None, list) if clean else list
+
+def dictIsSubset(a, b):
+    return all([k in b and b[k] == v for k, v in a.items()])
diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 59eb92c..3e7b576 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -38,7 +38,6 @@ class ShowSearcher(Plugin):
         addEvent('searcher.get_media_identifier', self.getMediaIdentifier)
         addEvent('searcher.get_media_root', self.getMediaRoot)
         addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)
-        addEvent('searcher.get_media_titles', self.getMediaTitles)
 
     def single(self, media, search_protocols = None, manual = False):
         if media['type'] == 'show':
diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py
index dda9311..d9b7600 100644
--- a/couchpotato/core/plugins/matcher/main.py
+++ b/couchpotato/core/plugins/matcher/main.py
@@ -2,7 +2,8 @@ import pprint
 from caper import Caper
 from couchpotato import CPLog, tryInt
 from couchpotato.core.event import addEvent, fireEvent
-from couchpotato.core.helpers.variable import possibleTitles
+from couchpotato.core.helpers.encoding import simplifyString
+from couchpotato.core.helpers.variable import possibleTitles, dictIsSubset
 from couchpotato.core.plugins.base import Plugin
 
 log = CPLog(__name__)
@@ -40,7 +41,7 @@ class Matcher(Plugin):
 
         for match in chain.info[group]:
             for ck, cv in match.items():
-                if ck in tags and self.cleanMatchValue(cv) in tags[ck]:
+                if ck in tags and simplifyString(cv) in tags[ck]:
                     found_tags.append(ck)
 
 
@@ -49,18 +50,6 @@ class Matcher(Plugin):
 
         return set([key for key, value in tags.items() if None not in value]) == set(found_tags)
 
-    def cleanMatchValue(self, value):
-        value = value.lower()
-        value = value.strip()
-
-        for ch in [' ', '-', '.']:
-            value = value.replace(ch, '')
-
-        return value
-
-    def dictIsSubset(self, a, b):
-        return all([k in b and b[k] == v for k, v in a.items()])
-
     def correctIdentifier(self, chain, media):
         required_id = fireEvent('searcher.get_media_identifier', media['library'], single = True)
 
@@ -78,7 +67,7 @@ class Matcher(Plugin):
         for k, v in identifier.items():
             identifier[k] = tryInt(v, None)
 
-        if not self.dictIsSubset(required_id, identifier):
+        if not dictIsSubset(required_id, identifier):
             log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(identifier)))
             return False
 

From 3fa21560be2917b05302869eee084f1f9a295f8b Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 16 Oct 2013 14:25:56 +1300
Subject: [PATCH 19/21] Moved 'searcher.create_releases' from Searcher to
 Release.

---
 couchpotato/core/media/_base/searcher/main.py | 48 ------------------------
 couchpotato/core/media/movie/searcher/main.py |  2 +-
 couchpotato/core/media/show/searcher/main.py  |  2 +-
 couchpotato/core/plugins/release/main.py      | 54 +++++++++++++++++++++++++--
 4 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py
index a986f3d..1f6e9f3 100644
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@@ -7,7 +7,6 @@ from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.base import SearcherBase
 from couchpotato.core.settings.model import Media, Release, ReleaseInfo
 from couchpotato.environment import Env
-from sqlalchemy.exc import InterfaceError
 from inspect import ismethod, isfunction
 import datetime
 import re
@@ -28,7 +27,6 @@ class Searcher(SearcherBase):
         addEvent('searcher.try_download_result', self.tryDownloadResult)
         addEvent('searcher.download', self.download)
         addEvent('searcher.search', self.search)
-        addEvent('searcher.create_releases', self.createReleases)
 
         addApiView('searcher.full_search', self.searchAllView, docs = {
             'desc': 'Starts a full search for all media',
@@ -174,52 +172,6 @@ class Searcher(SearcherBase):
 
         return sorted_results
 
-    def createReleases(self, search_results, media, quality_type):
-
-        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
-        db = get_session()
-
-        found_releases = []
-
-        for rel in search_results:
-
-            rel_identifier = md5(rel['url'])
-            found_releases.append(rel_identifier)
-
-            rls = db.query(Release).filter_by(identifier = rel_identifier).first()
-            if not rls:
-                rls = Release(
-                    identifier = rel_identifier,
-                    media_id = media.get('id'),
-                    quality_id = quality_type.get('quality_id'),
-                    status_id = available_status.get('id')
-                )
-                db.add(rls)
-            else:
-                [db.delete(old_info) for old_info in rls.info]
-                rls.last_edit = int(time.time())
-
-            db.commit()
-
-            for info in rel:
-                try:
-                    if not isinstance(rel[info], (str, unicode, int, long, float)):
-                        continue
-
-                    rls_info = ReleaseInfo(
-                        identifier = info,
-                        value = toUnicode(rel[info])
-                    )
-                    rls.info.append(rls_info)
-                except InterfaceError:
-                    log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
-
-            db.commit()
-
-            rel['status_id'] = rls.status_id
-
-        return found_releases
-
     def getSearchProtocols(self):
 
         download_protocols = fireEvent('download.enabled_protocols', merge = True)
diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py
index 70b0278..0020c86 100644
--- a/couchpotato/core/media/movie/searcher/main.py
+++ b/couchpotato/core/media/movie/searcher/main.py
@@ -176,7 +176,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
                     break
 
                 # Add them to this movie releases list
-                found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True)
+                found_releases += fireEvent('release.create_from_search', results, movie, quality_type, single = True)
 
                 # Try find a valid result and download it
                 if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True):
diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 3e7b576..1cf1718 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -106,7 +106,7 @@ class ShowSearcher(Plugin):
                     break
 
                 # Add them to this movie releases list
-                found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True)
+                found_releases += fireEvent('release.create_from_search', results, media, quality_type, single = True)
 
                 # Try find a valid result and download it
                 if fireEvent('searcher.try_download_result', results, media, quality_type, manual, single = True):
diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py
index 833ceec..aa7ed50 100644
--- a/couchpotato/core/plugins/release/main.py
+++ b/couchpotato/core/plugins/release/main.py
@@ -1,11 +1,12 @@
-from couchpotato import get_session
+from couchpotato import get_session, md5
 from couchpotato.api import addApiView
 from couchpotato.core.event import fireEvent, addEvent
-from couchpotato.core.helpers.encoding import ss
+from couchpotato.core.helpers.encoding import ss, toUnicode
 from couchpotato.core.logger import CPLog
 from couchpotato.core.plugins.base import Plugin
 from couchpotato.core.plugins.scanner.main import Scanner
-from couchpotato.core.settings.model import File, Release as Relea, Media
+from couchpotato.core.settings.model import File, Release as Relea, Media, ReleaseInfo
+from sqlalchemy.exc import InterfaceError
 from sqlalchemy.orm import joinedload_all
 from sqlalchemy.sql.expression import and_, or_
 import os
@@ -45,6 +46,7 @@ class Release(Plugin):
             }
         })
 
+        addEvent('release.create_from_search', self.createFromSearch)
         addEvent('release.for_movie', self.forMovie)
         addEvent('release.delete', self.delete)
         addEvent('release.clean', self.clean)
@@ -213,6 +215,52 @@ class Release(Plugin):
             'success': False
         }
 
+    def createFromSearch(self, search_results, media, quality_type):
+
+        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+        db = get_session()
+
+        found_releases = []
+
+        for rel in search_results:
+
+            rel_identifier = md5(rel['url'])
+            found_releases.append(rel_identifier)
+
+            rls = db.query(Relea).filter_by(identifier = rel_identifier).first()
+            if not rls:
+                rls = Relea(
+                    identifier = rel_identifier,
+                    media_id = media.get('id'),
+                    quality_id = quality_type.get('quality_id'),
+                    status_id = available_status.get('id')
+                )
+                db.add(rls)
+            else:
+                [db.delete(old_info) for old_info in rls.info]
+                rls.last_edit = int(time.time())
+
+            db.commit()
+
+            for info in rel:
+                try:
+                    if not isinstance(rel[info], (str, unicode, int, long, float)):
+                        continue
+
+                    rls_info = ReleaseInfo(
+                        identifier = info,
+                        value = toUnicode(rel[info])
+                    )
+                    rls.info.append(rls_info)
+                except InterfaceError:
+                    log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
+
+            db.commit()
+
+            rel['status_id'] = rls.status_id
+
+        return found_releases
+
     def forMovie(self, id = None):
 
         db = get_session()

From 46d4d34da76cba0069ba45f873a4359e974691f8 Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 16 Oct 2013 14:48:06 +1300
Subject: [PATCH 20/21] Minor cleanup to Searcher and Matcher

---
 couchpotato/core/media/_base/searcher/main.py | 1 +
 couchpotato/core/plugins/matcher/main.py      | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py
index 1f6e9f3..0ea8b23 100644
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@@ -157,6 +157,7 @@ class Searcher(SearcherBase):
     def search(self, protocols, media, quality):
         results = []
 
+        # TODO could this be handled better? (removing the need for 'searcher.get_media_searcher_id')
         searcher_id = fireEvent('searcher.get_media_searcher_id', media['type'], single = True)
 
         for search_protocol in protocols:
diff --git a/couchpotato/core/plugins/matcher/main.py b/couchpotato/core/plugins/matcher/main.py
index d9b7600..64237e2 100644
--- a/couchpotato/core/plugins/matcher/main.py
+++ b/couchpotato/core/plugins/matcher/main.py
@@ -1,4 +1,3 @@
-import pprint
 from caper import Caper
 from couchpotato import CPLog, tryInt
 from couchpotato.core.event import addEvent, fireEvent
@@ -36,7 +35,7 @@ class Matcher(Plugin):
 
         return None
 
-    def chainMatches(self, chain, group, tags):
+    def chainMatch(self, chain, group, tags):
         found_tags = []
 
         for match in chain.info[group]:
@@ -83,6 +82,7 @@ class Matcher(Plugin):
         chain_words = [x.lower() for x in chain.info['show_name']]
 
         # Check show titles match
+        # TODO check xem names
         for title in root_library['info']['titles']:
             for valid_words in [x.split(' ') for x in possibleTitles(title)]:
 
@@ -102,7 +102,7 @@ class Matcher(Plugin):
 
         video_tags = quality_map[quality['identifier']]
 
-        if not self.chainMatches(chain, 'video', video_tags):
+        if not self.chainMatch(chain, 'video', video_tags):
             log.info2('Wrong: %s tags not in chain', video_tags)
             return False
 

From 180576f2b774e13a7fba8fa90d2f0960d242367c Mon Sep 17 00:00:00 2001
From: Dean Gardiner <gardiner91@gmail.com>
Date: Wed, 16 Oct 2013 14:53:22 +1300
Subject: [PATCH 21/21] Minor change to ShowSearcher.correctMatch logging

---
 couchpotato/core/media/show/searcher/main.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/couchpotato/core/media/show/searcher/main.py b/couchpotato/core/media/show/searcher/main.py
index 1cf1718..3aa1294 100644
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@@ -219,11 +219,7 @@ class ShowSearcher(Plugin):
             return False
 
         if not fireEvent('matcher.correct_title', chain, media):
-            log.info("Wrong: '%s', undetermined naming. Looking for '%s (%s)'", (
-                ' '.join(chain.info['show_name']),
-                'library_title',
-                media['library']['year'])
-            )
+            log.info("Wrong: '%s', undetermined naming.", (' '.join(chain.info['show_name'])))
             return False
 
         return True