Merge pull request #2356 from fuzeman/tv_searcher

[TV][Searcher] Release Matching and Snatching
12 years ago · 611c159373
23 changed files with 1122 additions and 374 deletions
--- a/couchpotato/core/helpers/variable.py
+++ b/couchpotato/core/helpers/variable.py
@ -211,3 +211,6 @@ def randomString(size = 8, chars = string.ascii_uppercase + string.digits):
 def splitString(str, split_on = ',', clean = True):
    list = [x.strip() for x in str.split(split_on)] if str else []
    return filter(None, list) if clean else list
+
+def dictIsSubset(a, b):
+    return all([k in b and b[k] == v for k, v in a.items()])
--- a/couchpotato/core/media/_base/searcher/main.py
+++ b/couchpotato/core/media/_base/searcher/main.py
@ -7,7 +7,6 @@ from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.base import SearcherBase
 from couchpotato.core.settings.model import Media, Release, ReleaseInfo
 from couchpotato.environment import Env
-from sqlalchemy.exc import InterfaceError
 from inspect import ismethod, isfunction
 import datetime
 import re
@ -25,9 +24,9 @@ class Searcher(SearcherBase):
        addEvent('searcher.correct_year', self.correctYear)
        addEvent('searcher.correct_name', self.correctName)
        addEvent('searcher.correct_words', self.correctWords)
+        addEvent('searcher.try_download_result', self.tryDownloadResult)
        addEvent('searcher.download', self.download)
        addEvent('searcher.search', self.search)
-        addEvent('searcher.create_releases', self.createReleases)

        addApiView('searcher.full_search', self.searchAllView, docs = {
            'desc': 'Starts a full search for all media',
@ -53,27 +52,51 @@ class Searcher(SearcherBase):
        progress = fireEvent('searcher.progress', merge = True)
        return progress

-    def download(self, data, movie, manual = False):
+    def tryDownloadResult(self, results, media, quality_type, manual = False):
+        ignored_status, failed_status = fireEvent('status.get', ['ignored', 'failed'], single = True)

-        if not data.get('protocol'):
-            data['protocol'] = data['type']
-            data['type'] = 'movie'
+        for rel in results:
+            if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0):
+                log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), rel['name']))
+                continue
+
+            if rel['status_id'] in [ignored_status.get('id'), failed_status.get('id')]:
+                log.info('Ignored: %s', rel['name'])
+                continue
+
+            if rel['score'] <= 0:
+                log.info('Ignored, score to low: %s', rel['name'])
+                continue
+
+            downloaded = fireEvent('searcher.download', data = rel, media = media, manual = manual, single = True)
+            if downloaded is True:
+                return True
+            elif downloaded != 'try_next':
+                break
+
+        return False
+
+    def download(self, data, media, manual = False):
+
+        # TODO what is this for?
+        #if not data.get('protocol'):
+        #    data['protocol'] = data['type']
+        #    data['type'] = 'movie'

        # Test to see if any downloaders are enabled for this type
        downloader_enabled = fireEvent('download.enabled', manual, data, single = True)

        if downloader_enabled:
+            snatched_status, active_status, done_status = fireEvent('status.get', ['snatched', 'active', 'done'], single = True)

-            snatched_status = fireEvent('status.get', 'snatched', single = True)
-
-            # Download movie to temp
+            # Download release to temp
            filedata = None
            if data.get('download') and (ismethod(data.get('download')) or isfunction(data.get('download'))):
                filedata = data.get('download')(url = data.get('url'), nzb_id = data.get('id'))
                if filedata == 'try_next':
                    return filedata

-            download_result = fireEvent('download', data = data, movie = movie, manual = manual, filedata = filedata, single = True)
+            download_result = fireEvent('download', data = data, movie = media, manual = manual, filedata = filedata, single = True)
            log.debug('Downloader result: %s', download_result)

            if download_result:
@ -84,7 +107,6 @@ class Searcher(SearcherBase):
                    if rls:
                        renamer_enabled = Env.setting('enabled', 'renamer')

-                        done_status = fireEvent('status.get', 'done', single = True)
                        rls.status_id = done_status.get('id') if not renamer_enabled else snatched_status.get('id')

                        # Save download-id info if returned
@ -97,36 +119,34 @@ class Searcher(SearcherBase):
                                rls.info.append(rls_info)
                        db.commit()

-                        log_movie = '%s (%s) in %s' % (getTitle(movie['library']), movie['library']['year'], rls.quality.label)
+                        log_movie = '%s (%s) in %s' % (getTitle(media['library']), media['library']['year'], rls.quality.label)
                        snatch_message = 'Snatched "%s": %s' % (data.get('name'), log_movie)
                        log.info(snatch_message)
-                        fireEvent('movie.snatched', message = snatch_message, data = rls.to_dict())
+                        fireEvent('%s.snatched' % data['type'], message = snatch_message, data = rls.to_dict())

-                        # If renamer isn't used, mark movie done
+                        # If renamer isn't used, mark media done
                        if not renamer_enabled:
-                            active_status = fireEvent('status.get', 'active', single = True)
-                            done_status = fireEvent('status.get', 'done', single = True)
                            try:
-                                if movie['status_id'] == active_status.get('id'):
-                                    for profile_type in movie['profile']['types']:
+                                if media['status_id'] == active_status.get('id'):
+                                    for profile_type in media['profile']['types']:
                                        if profile_type['quality_id'] == rls.quality.id and profile_type['finish']:
-                                            log.info('Renamer disabled, marking movie as finished: %s', log_movie)
+                                            log.info('Renamer disabled, marking media as finished: %s', log_movie)

                                            # Mark release done
                                            rls.status_id = done_status.get('id')
                                            rls.last_edit = int(time.time())
                                            db.commit()

-                                            # Mark movie done
-                                            mvie = db.query(Media).filter_by(id = movie['id']).first()
-                                            mvie.status_id = done_status.get('id')
-                                            mvie.last_edit = int(time.time())
+                                            # Mark media done
+                                            mdia = db.query(Media).filter_by(id = media['id']).first()
+                                            mdia.status_id = done_status.get('id')
+                                            mdia.last_edit = int(time.time())
                                            db.commit()
                            except:
-                                log.error('Failed marking movie finished, renamer disabled: %s', traceback.format_exc())
+                                log.error('Failed marking media finished, renamer disabled: %s', traceback.format_exc())

                except:
-                    log.error('Failed marking movie finished: %s', traceback.format_exc())
+                    log.error('Failed marking media finished: %s', traceback.format_exc())

                return True

@ -137,14 +157,11 @@ class Searcher(SearcherBase):
    def search(self, protocols, media, quality):
        results = []

-        search_type = None
-        if media['type'] == 'movie':
-            search_type = 'movie'
-        elif media['type'] in ['show', 'season', 'episode']:
-            search_type = 'show'
+        # TODO could this be handled better? (removing the need for 'searcher.get_media_searcher_id')
+        searcher_id = fireEvent('searcher.get_media_searcher_id', media['type'], single = True)

        for search_protocol in protocols:
-            protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, search_type), media, quality, merge = True)
+            protocol_results = fireEvent('provider.search.%s.%s' % (search_protocol, searcher_id), media, quality, merge = True)
            if protocol_results:
                results += protocol_results

@ -156,52 +173,6 @@ class Searcher(SearcherBase):

        return sorted_results

-    def createReleases(self, search_results, media, quality_type):
-
-        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
-        db = get_session()
-
-        found_releases = []
-
-        for rel in search_results:
-
-            nzb_identifier = md5(rel['url'])
-            found_releases.append(nzb_identifier)
-
-            rls = db.query(Release).filter_by(identifier = nzb_identifier).first()
-            if not rls:
-                rls = Release(
-                    identifier = nzb_identifier,
-                    media_id = media.get('id'),
-                    quality_id = quality_type.get('quality_id'),
-                    status_id = available_status.get('id')
-                )
-                db.add(rls)
-            else:
-                [db.delete(old_info) for old_info in rls.info]
-                rls.last_edit = int(time.time())
-
-            db.commit()
-
-            for info in rel:
-                try:
-                    if not isinstance(rel[info], (str, unicode, int, long, float)):
-                        continue
-
-                    rls_info = ReleaseInfo(
-                        identifier = info,
-                        value = toUnicode(rel[info])
-                    )
-                    rls.info.append(rls_info)
-                except InterfaceError:
-                    log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
-
-            db.commit()
-
-            rel['status_id'] = rls.status_id
-
-        return found_releases
-
    def getSearchProtocols(self):

        download_protocols = fireEvent('download.enabled_protocols', merge = True)
--- a/couchpotato/core/media/movie/searcher/main.py
+++ b/couchpotato/core/media/movie/searcher/main.py
@ -31,6 +31,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
        addEvent('movie.searcher.could_be_released', self.couldBeReleased)
        addEvent('searcher.correct_release', self.correctRelease)
        addEvent('searcher.get_search_title', self.getSearchTitle)
+        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)

        addApiView('movie.searcher.try_next', self.tryNextReleaseView, docs = {
            'desc': 'Marks the snatched results as ignored and try the next best release',
@ -175,27 +176,11 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
                    break

                # Add them to this movie releases list
-                found_releases += fireEvent('searcher.create_releases', results, movie, quality_type, single = True)
+                found_releases += fireEvent('release.create_from_search', results, movie, quality_type, single = True)

-                for nzb in results:
-                    if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and nzb.get('age') <= quality_type.get('wait_for', 0):
-                        log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), nzb['name']))
-                        continue
-
-                    if nzb['status_id'] in [ignored_status.get('id'), failed_status.get('id')]:
-                        log.info('Ignored: %s', nzb['name'])
-                        continue
-
-                    if nzb['score'] <= 0:
-                        log.info('Ignored, score to low: %s', nzb['name'])
-                        continue
-
-                    downloaded = fireEvent('searcher.download', data = nzb, movie = movie, manual = manual, single = True)
-                    if downloaded is True:
-                        ret = True
-                        break
-                    elif downloaded != 'try_next':
-                        break
+                # Try find a valid result and download it
+                if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True):
+                    ret = True

                # Remove releases that aren't found anymore
                for release in movie.get('releases', []):
@ -359,5 +344,9 @@ class MovieSearcher(SearcherBase, MovieTypeBase):
        if media['type'] == 'movie':
            return getTitle(media['library'])

+    def getMediaSearcherId(self, media_type):
+        if media_type == 'movie':
+            return 'movie'
+
 class SearchSetupError(Exception):
    pass
--- a/couchpotato/core/media/show/searcher/main.py
+++ b/couchpotato/core/media/show/searcher/main.py
@ -1,14 +1,12 @@
-import pprint
-import re
 from couchpotato import get_session, Env
 from couchpotato.core.event import addEvent, fireEvent
-from couchpotato.core.helpers.encoding import simplifyString
-from couchpotato.core.helpers.variable import getTitle, tryInt, possibleTitles
+from couchpotato.core.helpers.variable import getTitle, tryInt
 from couchpotato.core.logger import CPLog
 from couchpotato.core.media._base.searcher.main import SearchSetupError
 from couchpotato.core.plugins.base import Plugin
 from couchpotato.core.settings.model import Media, Library
-from caper import Caper
+from qcond import QueryCondenser
+from qcond.helpers import simplify

 log = CPLog(__name__)

@ -29,38 +27,19 @@ class ShowSearcher(Plugin):
    def __init__(self):
        super(ShowSearcher, self).__init__()

+        self.query_condenser = QueryCondenser()
+
        addEvent('show.searcher.single', self.single)
-        addEvent('searcher.correct_release', self.correctRelease)
        addEvent('searcher.get_search_title', self.getSearchTitle)

-        self.caper = Caper()
-
-    def _lookupMedia(self, media):
-        db = get_session()
-
-        media_library = db.query(Library).filter_by(id = media['library_id']).first()
-
-        show = None
-        season = None
-        episode = None
-
-        if media['type'] == 'episode':
-            show = media_library.parent.parent
-            season = media_library.parent
-            episode = media_library
-
-        if media['type'] == 'season':
-            show = media_library.parent
-            season = media_library
-
-        if media['type'] == 'show':
-            show = media_library
-
-        return show, season, episode
+        addEvent('searcher.correct_match', self.correctMatch)
+        addEvent('searcher.correct_release', self.correctRelease)

-    def single(self, media, search_protocols = None):
-        pprint.pprint(media)
+        addEvent('searcher.get_media_identifier', self.getMediaIdentifier)
+        addEvent('searcher.get_media_root', self.getMediaRoot)
+        addEvent('searcher.get_media_searcher_id', self.getMediaSearcherId)

+    def single(self, media, search_protocols = None, manual = False):
        if media['type'] == 'show':
            # TODO handle show searches (scan all seasons)
            return
@ -72,7 +51,7 @@ class ShowSearcher(Plugin):
        except SearchSetupError:
            return

-        done_status = fireEvent('status.get', 'done', single = True)
+        done_status, available_status, ignored_status, failed_status = fireEvent('status.get', ['done', 'available', 'ignored', 'failed'], single = True)

        if not media['profile'] or media['status_id'] == done_status.get('id'):
            log.debug('Episode doesn\'t have a profile or already done, assuming in manage tab.')
@ -80,19 +59,18 @@ class ShowSearcher(Plugin):

        db = get_session()

-        pre_releases = fireEvent('quality.pre_releases', single = True)
-        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+        #pre_releases = fireEvent('quality.pre_releases', single = True)

        found_releases = []
        too_early_to_search = []

-        default_title = self.getSearchTitle(media['library'])
+        default_title = self.getSearchTitle(media)
        if not default_title:
            log.error('No proper info found for episode, removing it from library to cause it from having more issues.')
            #fireEvent('episode.delete', episode['id'], single = True)
            return

-        show, season, episode = self._lookupMedia(media)
+        show, season, episode = self.getMedia(media)
        if show is None or season is None:
            log.error('Unable to find show or season library in database, missing required data for searching')
            return
@ -128,9 +106,81 @@ class ShowSearcher(Plugin):
                    break

                # Add them to this movie releases list
-                found_releases += fireEvent('searcher.create_releases', results, media, quality_type, single = True)
+                found_releases += fireEvent('release.create_from_search', results, media, quality_type, single = True)
+
+                # Try find a valid result and download it
+                if fireEvent('searcher.try_download_result', results, media, quality_type, manual, single = True):
+                    ret = True
+
+                # Remove releases that aren't found anymore
+                for release in media.get('releases', []):
+                    if release.get('status_id') == available_status.get('id') and release.get('identifier') not in found_releases:
+                        fireEvent('release.delete', release.get('id'), single = True)
+            else:
+                log.info('Better quality (%s) already available or snatched for %s', (quality_type['quality']['label'], default_title))
+                fireEvent('movie.restatus', media['id'])
+                break
+
+            # Break if CP wants to shut down
+            if self.shuttingDown() or ret:
+                break
+
+        if len(too_early_to_search) > 0:
+            log.info2('Too early to search for %s, %s', (too_early_to_search, default_title))
+
+        fireEvent('notify.frontend', type = 'show.searcher.ended.%s' % media['id'], data = True)
+
+        return ret
+
+    def getSearchTitle(self, media):
+        if media['type'] not in ['show', 'season', 'episode']:
+            return
+
+        show, season, episode = self.getMedia(media)
+        if show is None:
+            return None
+
+        titles = []
+
+        # Add season map_names if they exist
+        if season is not None and 'map_names' in show.info:
+            season_names = show.info['map_names'].get(str(season.season_number), {})
+
+            # Add titles from all locations
+            # TODO only add name maps from a specific location
+            for location, names in season_names.items():
+                titles += [name for name in names if name not in titles]
+
+        # Add show titles
+        titles += [title.title for title in show.titles if title.title not in titles]
+
+        # Use QueryCondenser to build a list of optimal search titles
+        condensed_titles = self.query_condenser.distinct(titles)
+
+        title = None
+
+        # TODO try other titles if searching doesn't return results
+
+        if len(condensed_titles):
+            # Return the first condensed title if one exists
+            title = condensed_titles[0]
+        elif len(titles):
+            # Fallback to first raw title
+            title = simplify(titles[0])
+        else:
+            return None
+
+        # Add the identifier to search title
+        # TODO supporting other identifier formats
+        identifier = fireEvent('searcher.get_media_identifier', media['library'], single = True)
+
+        if identifier['season']:
+            title += ' S%02d' % identifier['season']

-                log.info('%d results found' % len(results))
+            if identifier['episode']:
+                title += 'E%02d' % identifier['episode']
+
+        return title

    def correctRelease(self, release = None, media = None, quality = None, **kwargs):

@ -146,128 +196,97 @@ class ShowSearcher(Plugin):
        if not fireEvent('searcher.correct_words', release['name'], media, single = True):
            return False

-        show, season, episode = self._lookupMedia(media)
+        show, season, episode = self.getMedia(media)
        if show is None or season is None:
            log.error('Unable to find show or season library in database, missing required data for searching')
            return

-        release_info = self.caper.parse(release['name'])
-        if len(release_info.chains) < 1:
-            log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
-            return False
+        match = fireEvent('matcher.best', release, media, quality, single = True)
+        if match:
+            return match.weight
+
+        return False

-        # TODO look at all chains
-        chain = release_info.chains[0]
+    def correctMatch(self, chain, release, media, quality):
+        log.info("Checking if '%s' is valid", release['name'])

-        if not self.correctQuality(chain, quality['identifier']):
+        if not fireEvent('matcher.correct_quality', chain, quality, self.quality_map, single = True):
            log.info('Wrong: %s, quality does not match', release['name'])
            return False

-        if not self.correctIdentifier(chain, media):
+        if not fireEvent('matcher.correct_identifier', chain, media):
            log.info('Wrong: %s, identifier does not match', release['name'])
            return False

-        if 'show_name' not in chain.info or not len(chain.info['show_name']):
-            log.info('Wrong: %s, missing show name in parsed result', release['name'])
-            return False
-
-        chain_words = [x.lower() for x in chain.info['show_name']]
-        chain_title = ' '.join(chain_words)
-
-        library_title = None
-
-        # Check show titles match
-        for raw_title in show.titles:
-            for valid_words in [x.split(' ') for x in possibleTitles(raw_title.title)]:
-                if not library_title:
-                    library_title = ' '.join(valid_words)
-
-                if valid_words == chain_words:
-                    return True
-
-        log.info("Wrong: title '%s', undetermined show naming. Looking for '%s (%s)'", (chain_title, library_title, media['library']['year']))
-        return False
-
-    def correctQuality(self, chain, quality_identifier):
-        if quality_identifier not in self.quality_map:
-            log.info2('Wrong: unknown preferred quality %s for TV searching', quality_identifier)
-            return False
-
-        if 'video' not in chain.info:
-            log.info2('Wrong: no video tags found')
-            return False
-
-        video_tags = self.quality_map[quality_identifier]
-
-        if not self.chainMatches(chain, 'video', video_tags):
-            log.info2('Wrong: %s tags not in chain', video_tags)
+        if not fireEvent('matcher.correct_title', chain, media):
+            log.info("Wrong: '%s', undetermined naming.", (' '.join(chain.info['show_name'])))
            return False

        return True

-    def correctIdentifier(self, chain, media):
-        required_id = self.getIdentifier(media['library'], 'season_number', 'episode_number')
-
-        if 'identifier' not in chain.info:
-            return False
-
-        # TODO could be handled better?
-        if len(chain.info['identifier']) != 1:
-            return False
-        identifier = chain.info['identifier'][0]
+    def getMediaIdentifier(self, media_library):
+        if media_library['type'] not in ['show', 'season', 'episode']:
+            return None

-        # TODO air by date episodes
-        release_id = self.getIdentifier(identifier, 'season', 'episode')
+        identifier = {
+            'season': None,
+            'episode': None
+        }

-        if required_id != release_id:
-            log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(release_id)))
-            return False
+        if media_library['type'] == 'episode':
+            map_episode = media_library['info'].get('map_episode')

-        return True
+            if map_episode and 'scene' in map_episode:
+                identifier['season'] = map_episode['scene'].get('season')
+                identifier['episode'] = map_episode['scene'].get('episode')
+            else:
+                # TODO xem mapping?
+                identifier['season'] = media_library.get('season_number')
+                identifier['episode'] = media_library.get('episode_number')

-    def getIdentifier(self, d, episode_key, season_key):
-        return (
-            tryInt(d.get(season_key), None) if season_key in d else None,
-            tryInt(d.get(episode_key), None) if episode_key in d else None
-        )
+        if media_library['type'] == 'season':
+            identifier['season'] = media_library.get('season_number')

-    def chainMatches(self, chain, group, tags):
-        found_tags = []
+        # Try cast identifier values to integers
+        identifier['season'] = tryInt(identifier['season'], None)
+        identifier['episode'] = tryInt(identifier['episode'], None)

-        for match in chain.info[group]:
-            for ck, cv in match.items():
-                if ck in tags and self.cleanMatchValue(cv) in tags[ck]:
-                    found_tags.append(ck)
+        return identifier

+    def getMediaRoot(self, media):
+        if media['type'] not in ['show', 'season', 'episode']:
+            return None

-        if set(tags.keys()) == set(found_tags):
-            return True
+        show, season, episode = self.getMedia(media)
+        if show is None or season is None:
+            log.error('Unable to find show or season library in database, missing required data for searching')
+            return

-        return set([key for key, value in tags.items() if value]) == set(found_tags)
+        return show.to_dict()

-    def cleanMatchValue(self, value):
-        value = value.lower()
-        value = value.strip()
+    def getMediaSearcherId(self, media_type):
+        if media_type in ['show', 'season', 'episode']:
+            return 'show'

-        for ch in [' ', '-', '.']:
-            value = value.replace(ch, '')
+    def getMedia(self, media):
+        db = get_session()

-        return value
+        media_library = db.query(Library).filter_by(id = media['library_id']).first()

-    def getSearchTitle(self, media):
-        show, season, episode = self._lookupMedia(media)
-        if show is None:
-            return None
+        show = None
+        season = None
+        episode = None

-        name = ''
-        if season is not None:
-            name = ' S%02d' % season.season_number
+        if media['type'] == 'episode':
+            show = media_library.parent.parent
+            season = media_library.parent
+            episode = media_library

-            if episode is not None:
-                name += 'E%02d' % episode.episode_number
+        if media['type'] == 'season':
+            show = media_library.parent
+            season = media_library

-        show_title = getTitle(show)
-        if not show_title:
-            return None
+        if media['type'] == 'show':
+            show = media_library

-        return show_title + name
+        return show, season, episode
--- a/couchpotato/core/plugins/matcher/init.py
+++ b/couchpotato/core/plugins/matcher/init.py
@ -0,0 +1,6 @@
+from .main import Matcher
+
+def start():
+    return Matcher()
+
+config = []
--- a/couchpotato/core/plugins/matcher/main.py
+++ b/couchpotato/core/plugins/matcher/main.py
@ -0,0 +1,109 @@
+from caper import Caper
+from couchpotato import CPLog, tryInt
+from couchpotato.core.event import addEvent, fireEvent
+from couchpotato.core.helpers.encoding import simplifyString
+from couchpotato.core.helpers.variable import possibleTitles, dictIsSubset
+from couchpotato.core.plugins.base import Plugin
+
+log = CPLog(__name__)
+
+
+class Matcher(Plugin):
+    def __init__(self):
+        self.caper = Caper()
+
+        addEvent('matcher.parse', self.parse)
+        addEvent('matcher.best', self.best)
+
+        addEvent('matcher.correct_title', self.correctTitle)
+        addEvent('matcher.correct_identifier', self.correctIdentifier)
+        addEvent('matcher.correct_quality', self.correctQuality)
+
+    def parse(self, release):
+        return self.caper.parse(release['name'])
+
+    def best(self, release, media, quality):
+        rel_info = fireEvent('matcher.parse', release, single = True)
+
+        if len(rel_info.chains) < 1:
+            log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
+            return False
+
+        for chain in rel_info.chains:
+            if fireEvent('searcher.correct_match', chain, release, media, quality, single = True):
+                return chain
+
+        return None
+
+    def chainMatch(self, chain, group, tags):
+        found_tags = []
+
+        for match in chain.info[group]:
+            for ck, cv in match.items():
+                if ck in tags and simplifyString(cv) in tags[ck]:
+                    found_tags.append(ck)
+
+
+        if set(tags.keys()) == set(found_tags):
+            return True
+
+        return set([key for key, value in tags.items() if None not in value]) == set(found_tags)
+
+    def correctIdentifier(self, chain, media):
+        required_id = fireEvent('searcher.get_media_identifier', media['library'], single = True)
+
+        if 'identifier' not in chain.info:
+            return False
+
+        # TODO could be handled better?
+        if len(chain.info['identifier']) != 1:
+            return False
+        identifier = chain.info['identifier'][0]
+
+        # TODO air by date episodes
+
+        # TODO this should support identifiers with characters 'a', 'b', etc..
+        for k, v in identifier.items():
+            identifier[k] = tryInt(v, None)
+
+        if not dictIsSubset(required_id, identifier):
+            log.info2('Wrong: required identifier %s does not match release identifier %s', (str(required_id), str(identifier)))
+            return False
+
+        return True
+
+    def correctTitle(self, chain, media):
+        root_library = fireEvent('searcher.get_media_root', media['library'], single = True)
+
+        if 'show_name' not in chain.info or not len(chain.info['show_name']):
+            log.info('Wrong: missing show name in parsed result')
+            return False
+
+        chain_words = [x.lower() for x in chain.info['show_name']]
+
+        # Check show titles match
+        # TODO check xem names
+        for title in root_library['info']['titles']:
+            for valid_words in [x.split(' ') for x in possibleTitles(title)]:
+
+                if valid_words == chain_words:
+                    return True
+
+        return False
+
+    def correctQuality(self, chain, quality, quality_map):
+        if quality['identifier'] not in quality_map:
+            log.info2('Wrong: unknown preferred quality %s', quality['identifier'])
+            return False
+
+        if 'video' not in chain.info:
+            log.info2('Wrong: no video tags found')
+            return False
+
+        video_tags = quality_map[quality['identifier']]
+
+        if not self.chainMatch(chain, 'video', video_tags):
+            log.info2('Wrong: %s tags not in chain', video_tags)
+            return False
+
+        return True
--- a/couchpotato/core/plugins/release/main.py
+++ b/couchpotato/core/plugins/release/main.py
@ -1,11 +1,12 @@
-from couchpotato import get_session
+from couchpotato import get_session, md5
 from couchpotato.api import addApiView
 from couchpotato.core.event import fireEvent, addEvent
-from couchpotato.core.helpers.encoding import ss
+from couchpotato.core.helpers.encoding import ss, toUnicode
 from couchpotato.core.logger import CPLog
 from couchpotato.core.plugins.base import Plugin
 from couchpotato.core.plugins.scanner.main import Scanner
-from couchpotato.core.settings.model import File, Release as Relea, Media
+from couchpotato.core.settings.model import File, Release as Relea, Media, ReleaseInfo
+from sqlalchemy.exc import InterfaceError
 from sqlalchemy.orm import joinedload_all
 from sqlalchemy.sql.expression import and_, or_
 import os
@ -45,6 +46,7 @@ class Release(Plugin):
            }
        })

+        addEvent('release.create_from_search', self.createFromSearch)
        addEvent('release.for_movie', self.forMovie)
        addEvent('release.delete', self.delete)
        addEvent('release.clean', self.clean)
@ -191,7 +193,7 @@ class Release(Plugin):
            if item.get('protocol') != 'torrent_magnet':
                item['download'] = provider.loginDownload if provider.urls.get('login') else provider.download

-            success = fireEvent('searcher.download', data = item, movie = rel.media.to_dict({
+            success = fireEvent('searcher.download', data = item, media = rel.media.to_dict({
                'profile': {'types': {'quality': {}}},
                'releases': {'status': {}, 'quality': {}},
                'library': {'titles': {}, 'files':{}},
@ -213,6 +215,52 @@ class Release(Plugin):
            'success': False
        }

+    def createFromSearch(self, search_results, media, quality_type):
+
+        available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True)
+        db = get_session()
+
+        found_releases = []
+
+        for rel in search_results:
+
+            rel_identifier = md5(rel['url'])
+            found_releases.append(rel_identifier)
+
+            rls = db.query(Relea).filter_by(identifier = rel_identifier).first()
+            if not rls:
+                rls = Relea(
+                    identifier = rel_identifier,
+                    media_id = media.get('id'),
+                    quality_id = quality_type.get('quality_id'),
+                    status_id = available_status.get('id')
+                )
+                db.add(rls)
+            else:
+                [db.delete(old_info) for old_info in rls.info]
+                rls.last_edit = int(time.time())
+
+            db.commit()
+
+            for info in rel:
+                try:
+                    if not isinstance(rel[info], (str, unicode, int, long, float)):
+                        continue
+
+                    rls_info = ReleaseInfo(
+                        identifier = info,
+                        value = toUnicode(rel[info])
+                    )
+                    rls.info.append(rls_info)
+                except InterfaceError:
+                    log.debug('Couldn\'t add %s to ReleaseInfo: %s', (info, traceback.format_exc()))
+
+            db.commit()
+
+            rel['status_id'] = rls.status_id
+
+        return found_releases
+
    def forMovie(self, id = None):

        db = get_session()
--- a/couchpotato/core/providers/base.py
+++ b/couchpotato/core/providers/base.py
@ -274,7 +274,10 @@ class YarrProvider(Provider):
            if identifier in qualities:
                return ids

-        return [self.cat_backup_id]
+        if self.cat_backup_id:
+            return [self.cat_backup_id]
+
+        return []


 class ResultList(list):
@ -302,12 +305,23 @@ class ResultList(list):

        new_result = self.fillResult(result)

-        is_correct_movie = fireEvent('searcher.correct_release', new_result, self.movie, self.quality,
+        is_correct = fireEvent('searcher.correct_release', new_result, self.movie, self.quality,
                                     imdb_results = self.kwargs.get('imdb_results', False), single = True)

-        if is_correct_movie and new_result['id'] not in self.result_ids:
+        if is_correct and new_result['id'] not in self.result_ids:
+            is_correct_weight = float(is_correct)
+
            new_result['score'] += fireEvent('score.calculate', new_result, self.movie, single = True)

+            old_score = new_result['score']
+            new_result['score'] = int(old_score * is_correct_weight)
+
+            log.info('Found correct release with weight %.02f, old_score(%d) now scaled to score(%d)', (
+                is_correct_weight,
+                old_score,
+                new_result['score']
+            ))
+
            self.found(new_result)
            self.result_ids.append(result['id'])

--- a/couchpotato/core/providers/torrent/iptorrents/main.py
+++ b/couchpotato/core/providers/torrent/iptorrents/main.py
@ -23,7 +23,7 @@ class Base(TorrentProvider):
        'base_url' : 'http://www.iptorrents.com',
        'login' : 'http://www.iptorrents.com/torrents/',
        'login_check': 'http://www.iptorrents.com/inbox.php',
-        'search' : 'http://www.iptorrents.com/torrents/?l%d=1%%s&q=%s&qf=ti&p=%%d',
+        'search' : 'http://www.iptorrents.com/torrents/?%s%%s&q=%s&qf=ti&p=%%d',
    }

    http_time_between_calls = 1 #seconds
@ -31,12 +31,13 @@ class Base(TorrentProvider):

    def _buildUrl(self, query, quality_identifier, cat_ids_group = None):

-        cat_id = self.getCatId(quality_identifier, cat_ids_group)[0]
-        if not cat_id:
+        cat_ids = self.getCatId(quality_identifier, cat_ids_group)
+
+        if not cat_ids or not len(cat_ids):
            log.warning('Unable to find category for quality %s', quality_identifier)
            return

-        return self.urls['search'] % (cat_id, tryUrlencode(query).replace('%', '%%'))
+        return self.urls['search'] % ("&".join(("l%d=" % x) for x in cat_ids), tryUrlencode(query).replace('%', '%%'))

    def _searchOnTitle(self, title, media, quality, results):

@ -140,8 +141,7 @@ class Show(ShowProvider, Base):
        ]),
        ('episode', [
            ([5], ['hdtv_720p', 'webdl_720p', 'webdl_1080p']),
-            ([78], ['hdtv_sd']),
-            ([4, 79], ['hdtv_sd'])
+            ([4, 78, 79], ['hdtv_sd'])
        ])
    ]

--- a/libs/caper/init.py
+++ b/libs/caper/init.py
@ -19,7 +19,7 @@ from caper.parsers.anime import AnimeParser
 from caper.parsers.scene import SceneParser


-__version_info__ = ('0', '2', '0')
+__version_info__ = ('0', '2', '2')
 __version_branch__ = 'master'

 __version__ = "%s%s" % (
--- a/libs/caper/constraint.py
+++ b/libs/caper/constraint.py
@ -38,7 +38,7 @@ class CaptureConstraint(object):

    def _compare_eq(self, fragment, name, expected):
        if not hasattr(fragment, name):
-            return None
+            return 1.0, False

        return 1.0, getattr(fragment, name) == expected

--- a/libs/caper/group.py
+++ b/libs/caper/group.py
@ -14,8 +14,9 @@


 from logr import Logr
+from caper import CaperClosure
 from caper.helpers import clean_dict
-from caper.result import CaperFragmentNode
+from caper.result import CaperFragmentNode, CaperClosureNode
 from caper.step import CaptureStep
 from caper.constraint import CaptureConstraint

@ -70,7 +71,9 @@ class CaptureGroup(object):
    def parse_subject(self, parent_head, subject):
        parent_node = parent_head[0] if type(parent_head) is list else parent_head

-        # TODO - if subject is a closure?
+        # TODO just jumping into closures for now, will be fixed later
+        if type(subject) is CaperClosure:
+            return [CaperClosureNode(subject, parent_head)]

        nodes = []

--- a/libs/caper/matcher.py
+++ b/libs/caper/matcher.py
@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import pprint
 import re
 from logr import Logr
-from caper.helpers import is_list_type, clean_dict
+from caper.helpers import is_list_type


 class FragmentMatcher(object):
@ -57,8 +56,6 @@ class FragmentMatcher(object):

                self.regex[group_name].append((weight, weight_patterns))

-        pprint.pprint(self.regex)
-
    def find_group(self, name):
        for group_name, weight_groups in self.regex.items():
            if group_name and group_name == name:
@ -66,62 +63,6 @@ class FragmentMatcher(object):

        return None

-    def parser_match(self, parser, group_name, single=True):
-        """
-
-        :type parser: caper.parsers.base.Parser
-        """
-        result = None
-
-        for group, weight_groups in self.regex.items():
-            if group_name and group != group_name:
-                continue
-
-            # TODO handle multiple weights
-            weight, patterns = weight_groups[0]
-
-            for pattern in patterns:
-                fragments = []
-                pattern_matched = True
-                pattern_result = {}
-
-                for fragment_pattern in pattern:
-                    if not parser.fragment_available():
-                        pattern_matched = False
-                        break
-
-                    fragment = parser.next_fragment()
-                    fragments.append(fragment)
-
-                    Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value)
-                    match = fragment_pattern.match(fragment.value)
-                    if match:
-                        Logr.debug('Pattern "%s" matched', fragment_pattern.pattern)
-                    else:
-                        pattern_matched = False
-                        break
-
-                    pattern_result.update(clean_dict(match.groupdict()))
-
-                if pattern_matched:
-                    if result is None:
-                        result = {}
-
-                    if group not in result:
-                        result[group] = {}
-
-                    Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments]))
-
-                    result[group].update(pattern_result)
-                    parser.commit()
-
-                    if single:
-                        return result
-                else:
-                    parser.rewind()
-
-        return result
-
    def value_match(self, value, group_name=None, single=True):
        result = None

--- a/libs/caper/parsers/base.py
+++ b/libs/caper/parsers/base.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from logr import Logr
 from caper import FragmentMatcher
 from caper.group import CaptureGroup
 from caper.result import CaperResult, CaperClosureNode
@ -60,62 +59,6 @@ class Parser(object):
        raise NotImplementedError()

    #
-    # Closure Methods
-    #
-
-    def next_closure(self):
-        self._closure_pos += 1
-        closure = self.closures[self._closure_pos]
-
-        self._history.append(('fragment', -1 - self._fragment_pos))
-        self._fragment_pos = -1
-
-        if self._closure_pos != 0:
-            self._history.append(('closure', 1))
-
-        Logr.debug('(next_closure) closure.value: "%s"', closure.value)
-        return closure
-
-    def closure_available(self):
-        return self._closure_pos + 1 < len(self.closures)
-
-    #
-    # Fragment Methods
-    #
-
-    def next_fragment(self):
-        closure = self.closures[self._closure_pos]
-
-        self._fragment_pos += 1
-        fragment = closure.fragments[self._fragment_pos]
-
-        self._history.append(('fragment', 1))
-
-        Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value)
-        return fragment
-
-    def fragment_available(self):
-        if not self.closure_available():
-            return False
-        return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments)
-
-    def rewind(self):
-        for source, delta in reversed(self._history):
-            Logr.debug('(rewind) Rewinding step: %s', (source, delta))
-            if source == 'fragment':
-                self._fragment_pos -= delta
-            elif source == 'closure':
-                self._closure_pos -= delta
-            else:
-                raise NotImplementedError()
-
-        self.commit()
-
-    def commit(self):
-        Logr.debug('(commit)')
-        self._history = []
-
-    #
    # Capture Methods
    #

--- a/libs/caper/step.py
+++ b/libs/caper/step.py
@ -33,18 +33,6 @@ class CaptureStep(object):
        #: @type: bool
        self.single = single

-    def _get_next_subject(self, parser):
-        if self.source == 'fragment':
-            if not parser.fragment_available():
-                return None
-            return parser.next_fragment()
-        elif self.source == 'closure':
-            if not parser.closure_available():
-                return None
-            return parser.next_closure()
-
-        raise NotImplementedError()
-
    def execute(self, fragment):
        if self.regex:
            weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
--- a/libs/qcond/init.py
+++ b/libs/qcond/init.py
@ -0,0 +1,42 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from qcond.transformers.merge import MergeTransformer
+from qcond.transformers.slice import SliceTransformer
+from qcond.transformers.strip_common import StripCommonTransformer
+
+
+__version_info__ = ('0', '1', '0')
+__version_branch__ = 'master'
+
+__version__ = "%s%s" % (
+    '.'.join(__version_info__),
+    '-' + __version_branch__ if __version_branch__ else ''
+)
+
+
+class QueryCondenser(object):
+    def __init__(self):
+        self.transformers = [
+            MergeTransformer(),
+            SliceTransformer(),
+            StripCommonTransformer()
+        ]
+
+    def distinct(self, titles):
+        for transformer in self.transformers:
+            titles = transformer.run(titles)
+
+        return titles
--- a/libs/qcond/compat.py
+++ b/libs/qcond/compat.py
@ -0,0 +1,23 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    xrange = range
+else:
+    xrange = xrange
--- a/libs/qcond/helpers.py
+++ b/libs/qcond/helpers.py
@ -0,0 +1,84 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from difflib import SequenceMatcher
+import re
+import sys
+from logr import Logr
+from qcond.compat import xrange
+
+
+PY3 = sys.version_info[0] == 3
+
+
+def simplify(s):
+    s = s.lower()
+    s = re.sub(r"(\w)'(\w)", r"\1\2", s)
+    return s
+
+
+def strip(s):
+    return re.sub(r"^(\W*)(.*?)(\W*)$", r"\2", s)
+
+
+def create_matcher(a, b, swap_longest = True, case_sensitive = False):
+    # Ensure longest string is a
+    if swap_longest and len(b) > len(a):
+        a_ = a
+        a = b
+        b = a_
+
+    if not case_sensitive:
+        a = a.upper()
+        b = b.upper()
+
+    return SequenceMatcher(None, a, b)
+
+
+def first(function_or_none, sequence):
+    if PY3:
+        for item in filter(function_or_none, sequence):
+            return item
+    else:
+        result = filter(function_or_none, sequence)
+        if len(result):
+            return result[0]
+
+    return None
+
+def sorted_append(sequence, item, func):
+    if not len(sequence):
+        sequence.insert(0, item)
+        return
+
+    x = 0
+    for x in xrange(len(sequence)):
+        if func(sequence[x]):
+            sequence.insert(x, item)
+            return
+
+    sequence.append(item)
+
+def itemsMatch(L1, L2):
+    return len(L1) == len(L2) and sorted(L1) == sorted(L2)
+
+def distinct(sequence):
+    result = []
+
+    for item in sequence:
+        if item not in result:
+            result.append(item)
+
+    return result
--- a/libs/qcond/transformers/init.py
+++ b/libs/qcond/transformers/init.py
--- a/libs/qcond/transformers/base.py
+++ b/libs/qcond/transformers/base.py
@ -0,0 +1,21 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class Transformer(object):
+    def __init__(self):
+        pass
+
+    def run(self, titles):
+        raise NotImplementedError()
--- a/libs/qcond/transformers/merge.py
+++ b/libs/qcond/transformers/merge.py
@ -0,0 +1,238 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from operator import itemgetter
+from logr import Logr
+from qcond.helpers import simplify, strip, first, sorted_append, distinct
+from qcond.transformers.base import Transformer
+from qcond.compat import xrange
+
+
+class MergeTransformer(Transformer):
+    def __init__(self):
+        super(MergeTransformer, self).__init__()
+
+    def run(self, titles):
+        titles = distinct([simplify(title) for title in titles])
+
+        Logr.info(str(titles))
+
+        Logr.debug("------------------------------------------------------------")
+
+        root, tails = self.parse(titles)
+
+        Logr.debug("--------------------------PARSE-----------------------------")
+
+        for node in root:
+            print_tree(node)
+
+        Logr.debug("--------------------------MERGE-----------------------------")
+
+        self.merge(root)
+
+        Logr.debug("--------------------------FINAL-----------------------------")
+
+        for node in root:
+            print_tree(node)
+
+        Logr.debug("--------------------------RESULT-----------------------------")
+
+        scores = {}
+        results = []
+
+        for tail in tails:
+            score, value, original_value = tail.full_value()
+
+            if value in scores:
+                scores[value] += score
+            else:
+                results.append((value, original_value))
+                scores[value] = score
+
+                Logr.debug("%s %s %s", score, value, original_value)
+
+        sorted_results = sorted(results, key=lambda item: (scores[item[0]], item[1]), reverse = True)
+
+        return [result[0] for result in sorted_results]
+
+    def parse(self, titles):
+        root = []
+        tails = []
+
+        for title in titles:
+            Logr.debug(title)
+
+            cur = None
+            words = title.split(' ')
+
+            for wx in xrange(len(words)):
+                word = strip(words[wx])
+
+                if cur is None:
+                    cur = find_node(root, word)
+
+                    if cur is None:
+                        cur = DNode(word, None, num_children=len(words) - wx, original_value=title)
+                        root.append(cur)
+                else:
+                    parent = cur
+                    parent.weight += 1
+
+                    cur = find_node(parent.right, word)
+
+                    if cur is None:
+                        Logr.debug("%s %d", word, len(words) - wx)
+                        cur = DNode(word, parent, num_children=len(words) - wx)
+                        sorted_append(parent.right, cur, lambda a: a.num_children < cur.num_children)
+                    else:
+                        cur.weight += 1
+
+            tails.append(cur)
+
+        return root, tails
+
+    def merge(self, root):
+        for x in range(len(root)):
+            Logr.debug(root[x])
+            root[x].right = self._merge(root[x].right)
+            Logr.debug('=================================================================')
+
+        return root
+
+    def get_nodes_right(self, value):
+        if type(value) is not list:
+            value = [value]
+
+        nodes = []
+
+        for node in value:
+            nodes.append(node)
+
+            for child in self.get_nodes_right(node.right):
+                nodes.append(child)
+
+        return nodes
+
+    def destroy_nodes_right(self, value):
+        nodes = self.get_nodes_right(value)
+
+        for node in nodes:
+            node.value = None
+            node.dead = True
+
+    def _merge(self, nodes, depth = 0):
+        Logr.debug(str('\t' * depth) + str(nodes))
+
+        top = nodes[0]
+
+        # Merge into top
+        for x in range(len(nodes)):
+            # Merge extra results into top
+            if x > 0:
+                top.value = None
+                top.weight += nodes[x].weight
+                self.destroy_nodes_right(top.right)
+
+                if len(nodes[x].right):
+                    top.join_right(nodes[x].right)
+
+                    Logr.debug("= %s joined %s", nodes[x], top)
+
+                nodes[x].dead = True
+
+        nodes = [n for n in nodes if not n.dead]
+
+        # Traverse further
+        for node in nodes:
+            if len(node.right):
+                node.right = self._merge(node.right, depth + 1)
+
+        return nodes
+
+
+def print_tree(node, depth = 0):
+    Logr.debug(str('\t' * depth) + str(node))
+
+    if len(node.right):
+        for child in node.right:
+            print_tree(child, depth + 1)
+    else:
+        Logr.debug(node.full_value()[1])
+
+
+def find_node(node_list, value):
+    # Try find adjacent node match
+    for node in node_list:
+        if node.value == value:
+            return node
+
+    return None
+
+
+class DNode(object):
+    def __init__(self, value, parent, right=None, weight=1, num_children=None, original_value=None):
+        self.value = value
+
+        self.parent = parent
+
+        if right is None:
+            right = []
+        self.right = right
+
+        self.weight = weight
+
+        self.original_value = original_value
+        self.num_children = num_children
+
+        self.dead = False
+
+    def join_right(self, nodes):
+        for node in nodes:
+            duplicate = first(lambda x: x.value == node.value, self.right)
+
+            if duplicate:
+                duplicate.weight += node.weight
+                duplicate.join_right(node.right)
+            else:
+                node.parent = self
+                self.right.append(node)
+
+    def full_value(self):
+        words = []
+        total_score = 0
+
+        cur = self
+        root = None
+
+        while cur is not None:
+            if cur.value and not cur.dead:
+                words.insert(0, cur.value)
+                total_score += cur.weight
+
+            if cur.parent is None:
+                root = cur
+            cur = cur.parent
+
+        return float(total_score) / len(words), ' '.join(words), root.original_value if root else None
+
+    def __repr__(self):
+        return '<%s value:"%s", weight: %s, num_children: %s%s%s>' % (
+            'DNode',
+            self.value,
+            self.weight,
+            self.num_children,
+            (', original_value: %s' % self.original_value) if self.original_value else '',
+            ' REMOVING' if self.dead else ''
+        )
--- a/libs/qcond/transformers/slice.py
+++ b/libs/qcond/transformers/slice.py
@ -0,0 +1,280 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from logr import Logr
+from qcond.helpers import create_matcher
+from qcond.transformers.base import Transformer
+
+
+class SliceTransformer(Transformer):
+    def __init__(self):
+        super(SliceTransformer, self).__init__()
+
+    def run(self, titles):
+        nodes = []
+
+        # Create a node for each title
+        for title in titles:
+            nodes.append(SimNode(title))
+
+        # Calculate similarities between nodes
+        for node in nodes:
+            calculate_sim_links(node, [n for n in nodes if n != node])
+
+        kill_nodes_above(nodes, 0.90)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        print_link_tree(nodes)
+        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        kill_trailing_nodes(nodes)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        # Sort remaining nodes by 'num_merges'
+        nodes = sorted(nodes, key=lambda n: n.num_merges, reverse=True)
+
+        print_link_tree(nodes)
+
+        Logr.debug('---------------------------------------------------------------------')
+
+        Logr.debug('%s %s', len(nodes), [n.value for n in nodes])
+
+        return [n.value for n in nodes]
+
+
+class SimLink(object):
+    def __init__(self, similarity, opcodes, stats):
+        self.similarity = similarity
+        self.opcodes = opcodes
+        self.stats = stats
+
+
+class SimNode(object):
+    def __init__(self, value):
+        self.value = value
+
+        self.dead = False
+        self.num_merges = 0
+
+        self.links = {}  # {<other SimNode>: <SimLink>}
+
+
+def kill_nodes(nodes, killed_nodes):
+    # Remove killed nodes from root list
+    for node in killed_nodes:
+        if node in nodes:
+            nodes.remove(node)
+
+    # Remove killed nodes from links
+    for killed_node in killed_nodes:
+        for node in nodes:
+            if killed_node in node.links:
+                node.links.pop(killed_node)
+
+
+def kill_nodes_above(nodes, above_sim):
+    killed_nodes = []
+
+    for node in nodes:
+        if node.dead:
+            continue
+
+        Logr.debug(node.value)
+
+        for link_node, link in node.links.items():
+            if link_node.dead:
+                continue
+
+            Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)
+
+            if link.similarity >= above_sim:
+                if len(link_node.value) > len(node.value):
+                    Logr.debug('\t\tvery similar, killed this node')
+                    link_node.dead = True
+                    node.num_merges += 1
+                    killed_nodes.append(link_node)
+                else:
+                    Logr.debug('\t\tvery similar, killed owner')
+                    node.dead = True
+                    link_node.num_merges += 1
+                    killed_nodes.append(node)
+
+    kill_nodes(nodes, killed_nodes)
+
+
+def print_link_tree(nodes):
+    for node in nodes:
+        Logr.debug(node.value)
+        Logr.debug('\tnum_merges: %s', node.num_merges)
+
+        if len(node.links):
+            Logr.debug('\t========== LINKS ==========')
+            for link_node, link in node.links.items():
+                Logr.debug('\t%0.2f -- %s', link.similarity, link_node.value)
+
+            Logr.debug('\t---------------------------')
+
+
+def kill_trailing_nodes(nodes):
+    killed_nodes = []
+
+    for node in nodes:
+        if node.dead:
+            continue
+
+        Logr.debug(node.value)
+
+        for link_node, link in node.links.items():
+            if link_node.dead:
+                continue
+
+            is_valid = link.stats.get('valid', False)
+
+            has_deletions = False
+            has_insertions = False
+            has_replacements = False
+
+            for opcode in link.opcodes:
+                if opcode[0] == 'delete':
+                    has_deletions = True
+                if opcode[0] == 'insert':
+                    has_insertions = True
+                if opcode[0] == 'replace':
+                    has_replacements = True
+
+            equal_perc = link.stats.get('equal', 0) / float(len(node.value))
+            insert_perc = link.stats.get('insert', 0) / float(len(node.value))
+
+            Logr.debug('\t({0:<24}) [{1:02d}:{2:02d} = {3:02d} {4:3.0f}% {5:3.0f}%] -- {6:<45}'.format(
+                'd:%s, i:%s, r:%s' % (has_deletions, has_insertions, has_replacements),
+                len(node.value), len(link_node.value), link.stats.get('equal', 0),
+                equal_perc * 100, insert_perc * 100,
+                '"{0}"'.format(link_node.value)
+            ))
+
+            Logr.debug('\t\t%s', link.stats)
+
+            kill = all([
+                is_valid,
+                equal_perc >= 0.5,
+                insert_perc < 2,
+                has_insertions,
+                not has_deletions,
+                not has_replacements
+            ])
+
+            if kill:
+                Logr.debug('\t\tkilled this node')
+
+                link_node.dead = True
+                node.num_merges += 1
+                killed_nodes.append(link_node)
+
+    kill_nodes(nodes, killed_nodes)
+
+stats_print_format = "\t{0:<8} ({1:2d}:{2:2d}) ({3:2d}:{4:2d})"
+
+
+def get_index_values(iterable, a, b):
+    return (
+        iterable[a] if a else None,
+        iterable[b] if b else None
+    )
+
+
+def get_indices(iterable, a, b):
+    return (
+        a if 0 < a < len(iterable) else None,
+        b if 0 < b < len(iterable) else None
+    )
+
+
+def get_opcode_stats(for_node, node, opcodes):
+    stats = {}
+
+    for tag, i1, i2, j1, j2 in opcodes:
+        Logr.debug(stats_print_format.format(
+            tag, i1, i2, j1, j2
+        ))
+
+        if tag in ['insert', 'delete']:
+            ax = None, None
+            bx = None, None
+
+            if tag == 'insert':
+                ax = get_indices(for_node.value, i1 - 1, i1)
+                bx = get_indices(node.value, j1, j2 - 1)
+
+            if tag == 'delete':
+                ax = get_indices(for_node.value, j1 - 1, j1)
+                bx = get_indices(node.value, i1, i2 - 1)
+
+            av = get_index_values(for_node.value, *ax)
+            bv = get_index_values(node.value, *bx)
+
+            Logr.debug(
+                '\t\t%s %s [%s><%s] <---> %s %s [%s><%s]',
+                ax, av, av[0], av[1],
+                bx, bv, bv[0], bv[1]
+            )
+
+            head_valid = av[0] in [None, ' '] or bv[0] in [None, ' ']
+            tail_valid = av[1] in [None, ' '] or bv[1] in [None, ' ']
+            valid = head_valid and tail_valid
+
+            if 'valid' not in stats or (stats['valid'] and not valid):
+                stats['valid'] = valid
+
+            Logr.debug('\t\t' + ('VALID' if valid else 'INVALID'))
+
+        if tag not in stats:
+            stats[tag] = 0
+
+        stats[tag] += (i2 - i1) or (j2 - j1)
+
+    return stats
+
+
+def calculate_sim_links(for_node, other_nodes):
+    for node in other_nodes:
+        if node in for_node.links:
+            continue
+
+        Logr.debug('calculating similarity between "%s" and "%s"', for_node.value, node.value)
+
+        # Get similarity
+        similarity_matcher = create_matcher(for_node.value, node.value)
+        similarity = similarity_matcher.quick_ratio()
+
+        # Get for_node -> node opcodes
+        a_opcodes_matcher = create_matcher(for_node.value, node.value, swap_longest = False)
+        a_opcodes = a_opcodes_matcher.get_opcodes()
+        a_stats = get_opcode_stats(for_node, node, a_opcodes)
+
+        Logr.debug('-' * 100)
+
+        # Get node -> for_node opcodes
+        b_opcodes_matcher = create_matcher(node.value, for_node.value, swap_longest = False)
+        b_opcodes = b_opcodes_matcher.get_opcodes()
+        b_stats = get_opcode_stats(for_node, node, b_opcodes)
+
+        for_node.links[node] = SimLink(similarity, a_opcodes, a_stats)
+        node.links[for_node] = SimLink(similarity, b_opcodes, b_stats)
+
+        #raw_input('Press ENTER to continue')
--- a/libs/qcond/transformers/strip_common.py
+++ b/libs/qcond/transformers/strip_common.py
@ -0,0 +1,26 @@
+# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from qcond.transformers.base import Transformer
+
+
+COMMON_WORDS = [
+    'the'
+]
+
+
+class StripCommonTransformer(Transformer):
+    def run(self, titles):
+        return [title for title in titles if title.lower() not in COMMON_WORDS]