From 65570ba479f82155a803ab29ce26bd7b8fab478e Mon Sep 17 00:00:00 2001 From: Ruud Date: Mon, 17 Dec 2012 18:22:12 +0100 Subject: [PATCH] Improve name searching. closes #1137 --- couchpotato/core/helpers/variable.py | 11 +++++++++++ couchpotato/core/plugins/base.py | 4 ++-- couchpotato/core/plugins/searcher/main.py | 22 ++++++++++++---------- couchpotato/core/providers/base.py | 15 ++++++++++++++- couchpotato/core/providers/nzb/ftdworld/main.py | 23 +++++++++++++++-------- couchpotato/core/providers/nzb/nzbclub/main.py | 20 +++++++++++++------- couchpotato/core/providers/nzb/nzbindex/main.py | 23 +++++++++++++++-------- couchpotato/core/providers/nzb/nzbsrus/main.py | 3 +-- couchpotato/core/providers/nzb/omgwtfnzbs/main.py | 18 ++++++++++++------ 9 files changed, 95 insertions(+), 44 deletions(-) diff --git a/couchpotato/core/helpers/variable.py b/couchpotato/core/helpers/variable.py index 28cbc88..1ecb35b 100644 --- a/couchpotato/core/helpers/variable.py +++ b/couchpotato/core/helpers/variable.py @@ -1,3 +1,4 @@ +from couchpotato.core.helpers.encoding import simplifyString, toSafeString from couchpotato.core.logger import CPLog import hashlib import os.path @@ -153,6 +154,16 @@ def getTitle(library_dict): log.error('Could not get title for library item: %s', library_dict) return None +def possibleTitles(raw_title): + + titles = [] + + titles.append(toSafeString(raw_title).lower()) + titles.append(raw_title.lower()) + titles.append(simplifyString(raw_title)) + + return list(set(titles)) + def randomString(size = 8, chars = string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for x in range(size)) diff --git a/couchpotato/core/plugins/base.py b/couchpotato/core/plugins/base.py index fc2bfdb..2186054 100644 --- a/couchpotato/core/plugins/base.py +++ b/couchpotato/core/plugins/base.py @@ -3,7 +3,7 @@ from couchpotato import addView from couchpotato.core.event import fireEvent, addEvent from couchpotato.core.helpers.encoding import tryUrlencode, simplifyString, ss, \ toSafeString -from couchpotato.core.helpers.variable import getExt +from couchpotato.core.helpers.variable import getExt, md5 from couchpotato.core.logger import CPLog from couchpotato.environment import Env from flask.templating import render_template_string @@ -222,7 +222,7 @@ class Plugin(object): def getCache(self, cache_key, url = None, **kwargs): - cache_key = simplifyString(cache_key) + cache_key = md5(cache_key) cache = Env.get('cache').get(cache_key) if cache: if not Env.get('dev'): log.debug('Getting cache %s', cache_key) diff --git a/couchpotato/core/plugins/searcher/main.py b/couchpotato/core/plugins/searcher/main.py index c8188ac..b7076d4 100644 --- a/couchpotato/core/plugins/searcher/main.py +++ b/couchpotato/core/plugins/searcher/main.py @@ -3,7 +3,8 @@ from couchpotato.api import addApiView from couchpotato.core.event import addEvent, fireEvent, fireEventAsync from couchpotato.core.helpers.encoding import simplifyString, toUnicode from couchpotato.core.helpers.request import jsonified, getParam -from couchpotato.core.helpers.variable import md5, getTitle, splitString +from couchpotato.core.helpers.variable import md5, getTitle, splitString, \ + possibleTitles from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.settings.model import Movie, Release, ReleaseInfo @@ -365,17 +366,18 @@ class Searcher(Plugin): if self.checkIMDB([nzb['description']], movie['library']['identifier']): return True - for movie_title in movie['library']['titles']: - movie_words = re.split('\W+', simplifyString(movie_title['title'])) + for raw_title in movie['library']['titles']: + for movie_title in possibleTitles(raw_title['title']): + movie_words = re.split('\W+', simplifyString(movie_title)) - if self.correctName(nzb['name'], movie_title['title']): - # if no IMDB link, at least check year range 1 - if len(movie_words) > 2 and self.correctYear([nzb['name']], movie['library']['year'], 1): - return True + if self.correctName(nzb['name'], movie_title): + # if no IMDB link, at least check year range 1 + if len(movie_words) > 2 and self.correctYear([nzb['name']], movie['library']['year'], 1): + return True - # if no IMDB link, at least check year - if len(movie_words) <= 2 and self.correctYear([nzb['name']], movie['library']['year'], 0): - return True + # if no IMDB link, at least check year + if len(movie_words) <= 2 and self.correctYear([nzb['name']], movie['library']['year'], 0): + return True log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'" % (nzb['name'], movie_name, movie['library']['year'])) return False diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py index c218724..9cbbdb5 100644 --- a/couchpotato/core/providers/base.py +++ b/couchpotato/core/providers/base.py @@ -1,5 +1,6 @@ from couchpotato.core.event import addEvent -from couchpotato.core.helpers.variable import tryFloat +from couchpotato.core.helpers.encoding import simplifyString +from couchpotato.core.helpers.variable import tryFloat, getTitle from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.environment import Env @@ -155,3 +156,15 @@ class YarrProvider(Provider): new['provider_extra'] = ', %s' % new['provider_extra'] log.info('Found: score(%(score)s) on %(provider)s%(provider_extra)s: %(name)s', new) + + def removeDuplicateResults(self, results): + + result_ids = [] + new_results = [] + + for result in results: + if result['id'] not in result_ids: + new_results.append(result) + result_ids.append(result['id']) + + return new_results diff --git a/couchpotato/core/providers/nzb/ftdworld/main.py b/couchpotato/core/providers/nzb/ftdworld/main.py index 5748671..e684e21 100644 --- a/couchpotato/core/providers/nzb/ftdworld/main.py +++ b/couchpotato/core/providers/nzb/ftdworld/main.py @@ -1,8 +1,7 @@ from bs4 import BeautifulSoup from couchpotato.core.event import fireEvent -from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ - simplifyString -from couchpotato.core.helpers.variable import tryInt, getTitle +from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode +from couchpotato.core.helpers.variable import tryInt, possibleTitles, getTitle from couchpotato.core.logger import CPLog from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.environment import Env @@ -22,7 +21,7 @@ class FTDWorld(NZBProvider): 'login': 'http://ftdworld.net/index.php', } - http_time_between_calls = 1 #seconds + http_time_between_calls = 3 #seconds cat_ids = [ ([4, 11], ['dvdr']), @@ -33,11 +32,19 @@ class FTDWorld(NZBProvider): def search(self, movie, quality): - results = [] if self.isDisabled(): - return results + return [] + + results = [] + for title in possibleTitles(getTitle(movie['library'])): + results.extend(self._search(title, movie, quality)) + + return self.removeDuplicateResults(results) + + def _search(self, title, movie, quality): + results = [] - q = '%s %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year']) + q = '"%s" %s' % (title, movie['library']['year']) params = { 'ctitle': q, @@ -81,7 +88,7 @@ class FTDWorld(NZBProvider): 'download': self.loginDownload, 'detail_url': self.urls['detail'] % nzb_id, 'description': '', - 'score': (tryInt(up.attrs['title'].split(' ')[0]) * 3) - (tryInt(down.attrs['title'].split(' ')[0]) * 3), + 'score': (tryInt(up.attrs['title'].split(' ')[0]) * 3) - (tryInt(down.attrs['title'].split(' ')[0]) * 3) if up else 0, } is_correct_movie = fireEvent('searcher.correct_movie', diff --git a/couchpotato/core/providers/nzb/nzbclub/main.py b/couchpotato/core/providers/nzb/nzbclub/main.py index 9f85348..487b9a6 100644 --- a/couchpotato/core/providers/nzb/nzbclub/main.py +++ b/couchpotato/core/providers/nzb/nzbclub/main.py @@ -1,12 +1,10 @@ from bs4 import BeautifulSoup from couchpotato.core.event import fireEvent -from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ - simplifyString +from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode from couchpotato.core.helpers.rss import RSS -from couchpotato.core.helpers.variable import tryInt, getTitle +from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles from couchpotato.core.logger import CPLog from couchpotato.core.providers.nzb.base import NZBProvider -from couchpotato.environment import Env from dateutil.parser import parse import time import xml.etree.ElementTree as XMLTree @@ -24,11 +22,19 @@ class NZBClub(NZBProvider, RSS): def search(self, movie, quality): - results = [] if self.isDisabled(): - return results + return [] + + results = [] + for title in possibleTitles(getTitle(movie['library'])): + results.extend(self._search(title, movie, quality)) + + return self.removeDuplicateResults(results) + + def _search(self, title, movie, quality): + results = [] - q = '"%s %s" %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year'], quality.get('identifier')) + q = '"%s %s" %s' % (title, movie['library']['year'], quality.get('identifier')) params = { 'q': q, diff --git a/couchpotato/core/providers/nzb/nzbindex/main.py b/couchpotato/core/providers/nzb/nzbindex/main.py index 109792e..f4f1d6b 100644 --- a/couchpotato/core/providers/nzb/nzbindex/main.py +++ b/couchpotato/core/providers/nzb/nzbindex/main.py @@ -1,9 +1,8 @@ from bs4 import BeautifulSoup from couchpotato.core.event import fireEvent -from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ - simplifyString +from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode from couchpotato.core.helpers.rss import RSS -from couchpotato.core.helpers.variable import tryInt, getTitle +from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles from couchpotato.core.logger import CPLog from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.environment import Env @@ -27,11 +26,19 @@ class NzbIndex(NZBProvider, RSS): def search(self, movie, quality): - results = [] if self.isDisabled(): - return results + return [] + + results = [] + for title in possibleTitles(getTitle(movie['library'])): + results.extend(self._search(title, movie, quality)) + + return self.removeDuplicateResults(results) - q = '"%s %s" %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year'], quality.get('identifier')) + def _search(self, title, movie, quality): + results = [] + + q = '"%s" %s %s' % (title, movie['library']['year'], quality.get('identifier')) arguments = tryUrlencode({ 'q': q, 'age': Env.setting('retention', 'nzb'), @@ -45,9 +52,9 @@ class NzbIndex(NZBProvider, RSS): }) url = "%s?%s" % (self.urls['api'], arguments) - cache_key = 'nzbindex.%s.%s' % (movie['library']['identifier'], quality.get('identifier')) - + cache_key = 'nzbindex.%s.%s' % (movie['library']['identifier'], q) data = self.getCache(cache_key, url) + if data: try: try: diff --git a/couchpotato/core/providers/nzb/nzbsrus/main.py b/couchpotato/core/providers/nzb/nzbsrus/main.py index b92d677..ee2b223 100644 --- a/couchpotato/core/providers/nzb/nzbsrus/main.py +++ b/couchpotato/core/providers/nzb/nzbsrus/main.py @@ -46,8 +46,7 @@ class Nzbsrus(NZBProvider, RSS): url = "%s&%s&%s" % (self.urls['search'], arguments , cat_id_string) - cache_key = 'nzbsrus_1.%s.%s' % (movie['library'].get('identifier'), cat_id_string) - single_cat = True + cache_key = 'nzbsrus.%s.%s' % (movie['library'].get('identifier'), cat_id_string) data = self.getCache(cache_key, url, cache_timeout = 1800, headers = {'User-Agent': Env.getIdentifier()}) if data: diff --git a/couchpotato/core/providers/nzb/omgwtfnzbs/main.py b/couchpotato/core/providers/nzb/omgwtfnzbs/main.py index a6ea831..533190b 100644 --- a/couchpotato/core/providers/nzb/omgwtfnzbs/main.py +++ b/couchpotato/core/providers/nzb/omgwtfnzbs/main.py @@ -1,9 +1,8 @@ from bs4 import BeautifulSoup from couchpotato.core.event import fireEvent -from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ - simplifyString +from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode from couchpotato.core.helpers.rss import RSS -from couchpotato.core.helpers.variable import tryInt, getTitle +from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles from couchpotato.core.logger import CPLog from couchpotato.core.providers.nzb.base import NZBProvider from dateutil.parser import parse @@ -33,12 +32,19 @@ class OMGWTFNZBs(NZBProvider, RSS): def search(self, movie, quality): pre_releases = fireEvent('quality.pre_releases', single = True) + if self.isDisabled() or quality['identifier'] in pre_releases: + return [] results = [] - if self.isDisabled() or quality['identifier'] in pre_releases: - return results + for title in possibleTitles(getTitle(movie['library'])): + results.extend(self._search(title, movie, quality)) + + return self.removeDuplicateResults(results) + + def _search(self, title, movie, quality): + results = [] - q = '%s %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year']) + q = '%s %s' % (title, movie['library']['year']) params = { 'search': q,