You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

279 lines
9.0 KiB

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
__version__ = '0.7.dev0'
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info']
# Do python3 detection before importing any other module, to be sure that
# it will then always be available
# with code from http://lucumr.pocoo.org/2011/1/22/forwards-compatible-python/
import sys
if sys.version_info[0] >= 3:
PY3 = True
unicode_text_type = str
native_text_type = str
base_text_type = str
def u(x):
return str(x)
def s(x):
return x
class UnicodeMixin(object):
__str__ = lambda x: x.__unicode__()
import binascii
def to_hex(x):
return binascii.hexlify(x).decode('utf-8')
else:
PY3 = False
__all__ = [ str(s) for s in __all__ ] # fix imports for python2
unicode_text_type = unicode
native_text_type = str
base_text_type = basestring
def u(x):
if isinstance(x, str):
return x.decode('utf-8')
return unicode(x)
def s(x):
if isinstance(x, unicode):
return x.encode('utf-8')
if isinstance(x, list):
return [ s(y) for y in x ]
if isinstance(x, tuple):
return tuple(s(y) for y in x)
if isinstance(x, dict):
return dict((s(key), s(value)) for key, value in x.items())
return x
class UnicodeMixin(object):
__str__ = lambda x: unicode(x).encode('utf-8')
def to_hex(x):
return x.encode('hex')
from guessit.guess import Guess, merge_all
from guessit.language import Language
from guessit.matcher import IterativeMatcher
from guessit.textutils import clean_string
import logging
log = logging.getLogger(__name__)
class NullHandler(logging.Handler):
def emit(self, record):
pass
# let's be a nicely behaving library
h = NullHandler()
log.addHandler(h)
def _guess_filename(filename, filetype):
def find_nodes(tree, props):
"""Yields all nodes containing any of the given props."""
if isinstance(props, base_text_type):
props = [props]
for node in tree.nodes():
if any(prop in node.guess for prop in props):
yield node
def warning(title):
log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string()))
return m
mtree = IterativeMatcher(filename, filetype=filetype)
# if there are multiple possible years found, we assume the first one is
# part of the title, reparse the tree taking this into account
years = set(n.value for n in find_nodes(mtree.match_tree, 'year'))
if len(years) >= 2:
mtree = IterativeMatcher(filename, filetype=filetype,
opts=['skip_first_year'])
m = mtree.matched()
if 'language' not in m and 'subtitleLanguage' not in m:
return m
# if we found some language, make sure we didn't cut a title or sth...
mtree2 = IterativeMatcher(filename, filetype=filetype,
opts=['nolanguage', 'nocountry'])
m2 = mtree2.matched()
if m.get('title') is None:
return m
if m.get('title') != m2.get('title'):
title = next(find_nodes(mtree.match_tree, 'title'))
title2 = next(find_nodes(mtree2.match_tree, 'title'))
langs = list(find_nodes(mtree.match_tree, ['language', 'subtitleLanguage']))
if not langs:
return warning('A weird error happened with language detection')
# find the language that is likely more relevant
for lng in langs:
if lng.value in title2.value:
# if the language was detected as part of a potential title,
# look at this one in particular
lang = lng
break
else:
# pick the first one if we don't have a better choice
lang = langs[0]
# language code are rarely part of a title, and those
# should be handled by the Language exceptions anyway
if len(lang.value) <= 3:
return m
# if filetype is subtitle and the language appears last, just before
# the extension, then it is likely a subtitle language
parts = clean_string(title.root.value).split()
Change core system to improve performance and facilitate multi TV info sources. Change migrate core objects TVShow and TVEpisode and everywhere that these objects affect. Add message to logs and disable ui backlog buttons when no media provider has active and/or scheduled searching enabled. Change views for py3 compat. Change set default runtime of 5 mins if none is given for layout Day by Day. Add OpenSubtitles authentication support to config/Subtitles/Subtitles Plugin. Add &#34;Enforce media hash match&#34; to config/Subtitles Plugin/Opensubtitles for accurate subs if enabled, but if disabled, search failures will fallback to use less reliable subtitle results. Add Apprise 0.8.0 (6aa52c3). Add hachoir_py3 3.0a6 (5b9e05a). Add sgmllib3k 1.0.0 Update soupsieve 1.9.1 (24859cc) to soupsieve_py2 1.9.5 (6a38398) Add soupsieve_py3 2.0.0.dev (69194a2). Add Tornado_py3 Web Server 6.0.3 (ff985fe). Add xmlrpclib_to 0.1.1 (c37db9e). Remove ancient Growl lib 0.1 Remove xmltodict library. Change requirements.txt for Cheetah3 to minimum 3.2.4 Change update sabToSickBeard. Change update autoProcessTV. Change remove Twitter notifier. Update NZBGet Process Media extension, SickGear-NG 1.7 → 2.4 Update Kodi addon 1.0.3 → 1.0.4 Update ADBA for py3. Update Beautiful Soup 4.8.0 (r526) to 4.8.1 (r531). Update Send2Trash 1.3.0 (a568370) to 1.5.0 (66afce7). Update soupsieve 1.9.1 (24859cc) to 1.9.5 (6a38398). Change use GNTP (Growl Notification Transport Protocol) from Apprise. Change add multi host support to Growl notifier. Fix Growl notifier when using empty password. Change update links for Growl notifications. Change deprecate confg/Notifications/Growl password field as these are now stored with host setting. Fix prevent infinite memoryError from a particular jpg data structure. Change subliminal for py3. Change enzyme for py3. Change browser_ua for py3. Change feedparser for py3 (sgmlib is no longer available on py3 as standardlib so added ext lib) Fix Guessit. Fix parse_xml for py3. Fix name parser with multi eps for py3. Fix tvdb_api fixes for py3 (search show). Fix config/media process to only display &#34;pattern is invalid&#34; qtip on &#34;Episode naming&#34; tab if the associated field is actually visible. Also, if the field becomes hidden due to a setting change, hide any previously displayed qtip. Note for Javascript::getelementbyid (or $(&#39;tag[id=&#34;&lt;name&gt;&#34;&#39;)) is required when an id is being searched in the dom due to &#34;:&#34; used in a shows id name. Change download anidb xml files to main cache folder and use adba lib folder as a last resort. Change create get anidb show groups as centralised helper func and consolidate dupe code. Change move anidb related functions to newly renamed anime.py (from blacklistandwhitelist.py). Change str encode hex no longer exits in py3, use codecs.encode(...) instead. Change fix b64decode on py3 returns bytestrings. Change use binary read when downloading log file via browser to prevent any encoding issues. Change add case insensitive ordering to anime black/whitelist. Fix anime groups list not excluding whitelisted stuff. Change add Windows utf8 fix ... see: ytdl-org/youtube-dl#820 Change if no qualities are wanted, exit manual search thread. Fix keepalive for py3 process media. Change add a once a month update of tvinfo show mappings to the daily updater. Change autocorrect ids of new shows by updating from -8 to 31 days of the airdate of episode one. Add next run time to Manage/Show Tasks/Daily show update. Change when fetching imdb data, if imdb id is an episode id then try to find and use real show id. Change delete diskcache db in imdbpie when value error (due to change in Python version). Change during startup, cleanup any _cleaner.pyc/o to prevent issues when switching python versions. Add .pyc cleaner if python version is switched. Change replace deprecated gettz_db_metadata() and gettz. Change rebrand &#34;SickGear PostProcessing script&#34; to &#34;SickGear Process Media extension&#34;. Change improve setup guide to use the NZBGet version to minimise displayed text based on version. Change NZBGet versions prior to v17 now told to upgrade as those version are no longer supported - code has actually exit on start up for some time but docs were outdated. Change comment out code and unused option sg_base_path. Change supported Python version 2.7.9-2.7.18 inclusive expanded to 3.7.1-3.8.1 inclusive. Change pidfile creation under Linux 0o644. Make logger accept lists to output continuously using the log_lock instead of split up by other processes. Fix long path issues with Windows process media.
6 years ago
try:
if (m['type'] in ['moviesubtitle', 'episodesubtitle'] and
parts.index(lang.value) == len(parts) - 2):
return m
except ValueError:
pass
# if the language was in the middle of the other potential title,
# keep the other title (eg: The Italian Job), except if it is at the
# very beginning, in which case we consider it an error
if m2['title'].startswith(lang.value):
return m
elif lang.value in title2.value:
return m2
# if a node is in an explicit group, then the correct title is probably
# the other one
if title.root.node_at(title.node_idx[:2]).is_explicit():
return m2
elif title2.root.node_at(title2.node_idx[:2]).is_explicit():
return m
return warning('Not sure of the title because of the language position')
return m
def guess_file_info(filename, filetype, info=None):
"""info can contain the names of the various plugins, such as 'filename' to
detect filename info, or 'hash_md5' to get the md5 hash of the file.
>>> guess_file_info('tests/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
"""
result = []
hashers = []
# Force unicode as soon as possible
filename = u(filename)
if info is None:
info = ['filename']
if isinstance(info, base_text_type):
info = [info]
for infotype in info:
if infotype == 'filename':
result.append(_guess_filename(filename, filetype))
elif infotype == 'hash_mpc':
from guessit.hash_mpc import hash_file
try:
result.append(Guess({'hash_mpc': hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute MPC-style hash because: %s' % e)
elif infotype == 'hash_ed2k':
from guessit.hash_ed2k import hash_file
try:
result.append(Guess({'hash_ed2k': hash_file(filename)},
confidence=1.0))
except Exception as e:
log.warning('Could not compute ed2k hash because: %s' % e)
elif infotype.startswith('hash_'):
import hashlib
hashname = infotype[5:]
try:
hasher = getattr(hashlib, hashname)()
hashers.append((infotype, hasher))
except AttributeError:
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
else:
log.warning('Invalid infotype: %s' % infotype)
# do all the hashes now, but on a single pass
if hashers:
try:
blocksize = 8192
hasherobjs = dict(hashers).values()
with open(filename, 'rb') as f:
chunk = f.read(blocksize)
while chunk:
for hasher in hasherobjs:
hasher.update(chunk)
chunk = f.read(blocksize)
for infotype, hasher in hashers:
result.append(Guess({infotype: hasher.hexdigest()},
confidence=1.0))
except Exception as e:
log.warning('Could not compute hash because: %s' % e)
result = merge_all(result)
# last minute adjustments
# if country is in the guessed properties, make it part of the filename
if 'series' in result and 'country' in result:
result['series'] += ' (%s)' % result['country'].alpha2.upper()
return result
def guess_video_info(filename, info=None):
return guess_file_info(filename, 'autodetect', info)
def guess_movie_info(filename, info=None):
return guess_file_info(filename, 'movie', info)
def guess_episode_info(filename, info=None):
return guess_file_info(filename, 'episode', info)