Browse Source

Improve name searching. closes #1137

pull/1143/merge
Ruud 13 years ago
parent
commit
65570ba479
  1. 11
      couchpotato/core/helpers/variable.py
  2. 4
      couchpotato/core/plugins/base.py
  3. 22
      couchpotato/core/plugins/searcher/main.py
  4. 15
      couchpotato/core/providers/base.py
  5. 23
      couchpotato/core/providers/nzb/ftdworld/main.py
  6. 20
      couchpotato/core/providers/nzb/nzbclub/main.py
  7. 23
      couchpotato/core/providers/nzb/nzbindex/main.py
  8. 3
      couchpotato/core/providers/nzb/nzbsrus/main.py
  9. 18
      couchpotato/core/providers/nzb/omgwtfnzbs/main.py

11
couchpotato/core/helpers/variable.py

@ -1,3 +1,4 @@
from couchpotato.core.helpers.encoding import simplifyString, toSafeString
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
import hashlib import hashlib
import os.path import os.path
@ -153,6 +154,16 @@ def getTitle(library_dict):
log.error('Could not get title for library item: %s', library_dict) log.error('Could not get title for library item: %s', library_dict)
return None return None
def possibleTitles(raw_title):
titles = []
titles.append(toSafeString(raw_title).lower())
titles.append(raw_title.lower())
titles.append(simplifyString(raw_title))
return list(set(titles))
def randomString(size = 8, chars = string.ascii_uppercase + string.digits): def randomString(size = 8, chars = string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for x in range(size)) return ''.join(random.choice(chars) for x in range(size))

4
couchpotato/core/plugins/base.py

@ -3,7 +3,7 @@ from couchpotato import addView
from couchpotato.core.event import fireEvent, addEvent from couchpotato.core.event import fireEvent, addEvent
from couchpotato.core.helpers.encoding import tryUrlencode, simplifyString, ss, \ from couchpotato.core.helpers.encoding import tryUrlencode, simplifyString, ss, \
toSafeString toSafeString
from couchpotato.core.helpers.variable import getExt from couchpotato.core.helpers.variable import getExt, md5
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.environment import Env from couchpotato.environment import Env
from flask.templating import render_template_string from flask.templating import render_template_string
@ -222,7 +222,7 @@ class Plugin(object):
def getCache(self, cache_key, url = None, **kwargs): def getCache(self, cache_key, url = None, **kwargs):
cache_key = simplifyString(cache_key) cache_key = md5(cache_key)
cache = Env.get('cache').get(cache_key) cache = Env.get('cache').get(cache_key)
if cache: if cache:
if not Env.get('dev'): log.debug('Getting cache %s', cache_key) if not Env.get('dev'): log.debug('Getting cache %s', cache_key)

22
couchpotato/core/plugins/searcher/main.py

@ -3,7 +3,8 @@ from couchpotato.api import addApiView
from couchpotato.core.event import addEvent, fireEvent, fireEventAsync from couchpotato.core.event import addEvent, fireEvent, fireEventAsync
from couchpotato.core.helpers.encoding import simplifyString, toUnicode from couchpotato.core.helpers.encoding import simplifyString, toUnicode
from couchpotato.core.helpers.request import jsonified, getParam from couchpotato.core.helpers.request import jsonified, getParam
from couchpotato.core.helpers.variable import md5, getTitle, splitString from couchpotato.core.helpers.variable import md5, getTitle, splitString, \
possibleTitles
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.base import Plugin
from couchpotato.core.settings.model import Movie, Release, ReleaseInfo from couchpotato.core.settings.model import Movie, Release, ReleaseInfo
@ -365,17 +366,18 @@ class Searcher(Plugin):
if self.checkIMDB([nzb['description']], movie['library']['identifier']): if self.checkIMDB([nzb['description']], movie['library']['identifier']):
return True return True
for movie_title in movie['library']['titles']: for raw_title in movie['library']['titles']:
movie_words = re.split('\W+', simplifyString(movie_title['title'])) for movie_title in possibleTitles(raw_title['title']):
movie_words = re.split('\W+', simplifyString(movie_title))
if self.correctName(nzb['name'], movie_title['title']): if self.correctName(nzb['name'], movie_title):
# if no IMDB link, at least check year range 1 # if no IMDB link, at least check year range 1
if len(movie_words) > 2 and self.correctYear([nzb['name']], movie['library']['year'], 1): if len(movie_words) > 2 and self.correctYear([nzb['name']], movie['library']['year'], 1):
return True return True
# if no IMDB link, at least check year # if no IMDB link, at least check year
if len(movie_words) <= 2 and self.correctYear([nzb['name']], movie['library']['year'], 0): if len(movie_words) <= 2 and self.correctYear([nzb['name']], movie['library']['year'], 0):
return True return True
log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'" % (nzb['name'], movie_name, movie['library']['year'])) log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'" % (nzb['name'], movie_name, movie['library']['year']))
return False return False

15
couchpotato/core/providers/base.py

@ -1,5 +1,6 @@
from couchpotato.core.event import addEvent from couchpotato.core.event import addEvent
from couchpotato.core.helpers.variable import tryFloat from couchpotato.core.helpers.encoding import simplifyString
from couchpotato.core.helpers.variable import tryFloat, getTitle
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.base import Plugin
from couchpotato.environment import Env from couchpotato.environment import Env
@ -155,3 +156,15 @@ class YarrProvider(Provider):
new['provider_extra'] = ', %s' % new['provider_extra'] new['provider_extra'] = ', %s' % new['provider_extra']
log.info('Found: score(%(score)s) on %(provider)s%(provider_extra)s: %(name)s', new) log.info('Found: score(%(score)s) on %(provider)s%(provider_extra)s: %(name)s', new)
def removeDuplicateResults(self, results):
result_ids = []
new_results = []
for result in results:
if result['id'] not in result_ids:
new_results.append(result)
result_ids.append(result['id'])
return new_results

23
couchpotato/core/providers/nzb/ftdworld/main.py

@ -1,8 +1,7 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from couchpotato.core.event import fireEvent from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode
simplifyString from couchpotato.core.helpers.variable import tryInt, possibleTitles, getTitle
from couchpotato.core.helpers.variable import tryInt, getTitle
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.core.providers.nzb.base import NZBProvider
from couchpotato.environment import Env from couchpotato.environment import Env
@ -22,7 +21,7 @@ class FTDWorld(NZBProvider):
'login': 'http://ftdworld.net/index.php', 'login': 'http://ftdworld.net/index.php',
} }
http_time_between_calls = 1 #seconds http_time_between_calls = 3 #seconds
cat_ids = [ cat_ids = [
([4, 11], ['dvdr']), ([4, 11], ['dvdr']),
@ -33,11 +32,19 @@ class FTDWorld(NZBProvider):
def search(self, movie, quality): def search(self, movie, quality):
results = []
if self.isDisabled(): if self.isDisabled():
return results return []
results = []
for title in possibleTitles(getTitle(movie['library'])):
results.extend(self._search(title, movie, quality))
return self.removeDuplicateResults(results)
def _search(self, title, movie, quality):
results = []
q = '%s %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year']) q = '"%s" %s' % (title, movie['library']['year'])
params = { params = {
'ctitle': q, 'ctitle': q,
@ -81,7 +88,7 @@ class FTDWorld(NZBProvider):
'download': self.loginDownload, 'download': self.loginDownload,
'detail_url': self.urls['detail'] % nzb_id, 'detail_url': self.urls['detail'] % nzb_id,
'description': '', 'description': '',
'score': (tryInt(up.attrs['title'].split(' ')[0]) * 3) - (tryInt(down.attrs['title'].split(' ')[0]) * 3), 'score': (tryInt(up.attrs['title'].split(' ')[0]) * 3) - (tryInt(down.attrs['title'].split(' ')[0]) * 3) if up else 0,
} }
is_correct_movie = fireEvent('searcher.correct_movie', is_correct_movie = fireEvent('searcher.correct_movie',

20
couchpotato/core/providers/nzb/nzbclub/main.py

@ -1,12 +1,10 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from couchpotato.core.event import fireEvent from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode
simplifyString
from couchpotato.core.helpers.rss import RSS from couchpotato.core.helpers.rss import RSS
from couchpotato.core.helpers.variable import tryInt, getTitle from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.core.providers.nzb.base import NZBProvider
from couchpotato.environment import Env
from dateutil.parser import parse from dateutil.parser import parse
import time import time
import xml.etree.ElementTree as XMLTree import xml.etree.ElementTree as XMLTree
@ -24,11 +22,19 @@ class NZBClub(NZBProvider, RSS):
def search(self, movie, quality): def search(self, movie, quality):
results = []
if self.isDisabled(): if self.isDisabled():
return results return []
results = []
for title in possibleTitles(getTitle(movie['library'])):
results.extend(self._search(title, movie, quality))
return self.removeDuplicateResults(results)
def _search(self, title, movie, quality):
results = []
q = '"%s %s" %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year'], quality.get('identifier')) q = '"%s %s" %s' % (title, movie['library']['year'], quality.get('identifier'))
params = { params = {
'q': q, 'q': q,

23
couchpotato/core/providers/nzb/nzbindex/main.py

@ -1,9 +1,8 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from couchpotato.core.event import fireEvent from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode
simplifyString
from couchpotato.core.helpers.rss import RSS from couchpotato.core.helpers.rss import RSS
from couchpotato.core.helpers.variable import tryInt, getTitle from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.core.providers.nzb.base import NZBProvider
from couchpotato.environment import Env from couchpotato.environment import Env
@ -27,11 +26,19 @@ class NzbIndex(NZBProvider, RSS):
def search(self, movie, quality): def search(self, movie, quality):
results = []
if self.isDisabled(): if self.isDisabled():
return results return []
results = []
for title in possibleTitles(getTitle(movie['library'])):
results.extend(self._search(title, movie, quality))
return self.removeDuplicateResults(results)
q = '"%s %s" %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year'], quality.get('identifier')) def _search(self, title, movie, quality):
results = []
q = '"%s" %s %s' % (title, movie['library']['year'], quality.get('identifier'))
arguments = tryUrlencode({ arguments = tryUrlencode({
'q': q, 'q': q,
'age': Env.setting('retention', 'nzb'), 'age': Env.setting('retention', 'nzb'),
@ -45,9 +52,9 @@ class NzbIndex(NZBProvider, RSS):
}) })
url = "%s?%s" % (self.urls['api'], arguments) url = "%s?%s" % (self.urls['api'], arguments)
cache_key = 'nzbindex.%s.%s' % (movie['library']['identifier'], quality.get('identifier')) cache_key = 'nzbindex.%s.%s' % (movie['library']['identifier'], q)
data = self.getCache(cache_key, url) data = self.getCache(cache_key, url)
if data: if data:
try: try:
try: try:

3
couchpotato/core/providers/nzb/nzbsrus/main.py

@ -46,8 +46,7 @@ class Nzbsrus(NZBProvider, RSS):
url = "%s&%s&%s" % (self.urls['search'], arguments , cat_id_string) url = "%s&%s&%s" % (self.urls['search'], arguments , cat_id_string)
cache_key = 'nzbsrus_1.%s.%s' % (movie['library'].get('identifier'), cat_id_string) cache_key = 'nzbsrus.%s.%s' % (movie['library'].get('identifier'), cat_id_string)
single_cat = True
data = self.getCache(cache_key, url, cache_timeout = 1800, headers = {'User-Agent': Env.getIdentifier()}) data = self.getCache(cache_key, url, cache_timeout = 1800, headers = {'User-Agent': Env.getIdentifier()})
if data: if data:

18
couchpotato/core/providers/nzb/omgwtfnzbs/main.py

@ -1,9 +1,8 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from couchpotato.core.event import fireEvent from couchpotato.core.event import fireEvent
from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode, \ from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode
simplifyString
from couchpotato.core.helpers.rss import RSS from couchpotato.core.helpers.rss import RSS
from couchpotato.core.helpers.variable import tryInt, getTitle from couchpotato.core.helpers.variable import tryInt, getTitle, possibleTitles
from couchpotato.core.logger import CPLog from couchpotato.core.logger import CPLog
from couchpotato.core.providers.nzb.base import NZBProvider from couchpotato.core.providers.nzb.base import NZBProvider
from dateutil.parser import parse from dateutil.parser import parse
@ -33,12 +32,19 @@ class OMGWTFNZBs(NZBProvider, RSS):
def search(self, movie, quality): def search(self, movie, quality):
pre_releases = fireEvent('quality.pre_releases', single = True) pre_releases = fireEvent('quality.pre_releases', single = True)
if self.isDisabled() or quality['identifier'] in pre_releases:
return []
results = [] results = []
if self.isDisabled() or quality['identifier'] in pre_releases: for title in possibleTitles(getTitle(movie['library'])):
return results results.extend(self._search(title, movie, quality))
return self.removeDuplicateResults(results)
def _search(self, title, movie, quality):
results = []
q = '%s %s' % (simplifyString(getTitle(movie['library'])), movie['library']['year']) q = '%s %s' % (title, movie['library']['year'])
params = { params = {
'search': q, 'search': q,

Loading…
Cancel
Save