You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

133 lines
4.4 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '0.3-dev'
__all__ = [ 'Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info' ]
from guessit.guess import Guess, merge_all
from guessit.language import Language
from guessit.matcher import IterativeMatcher
import logging
log = logging.getLogger("guessit")
class NullHandler(logging.Handler):
def emit(self, record):
pass
# let's be a nicely behaving library
h = NullHandler()
log.addHandler(h)
def guess_file_info(filename, filetype, info = [ 'filename' ]):
"""info can contain the names of the various plugins, such as 'filename' to
detect filename info, or 'hash_md5' to get the md5 hash of the file.
>>> guess_file_info('test/dummy.srt', 'autodetect', info = ['hash_md5', 'hash_sha1'])
{'hash_md5': 'e781de9b94ba2753a8e2945b2c0a123d', 'hash_sha1': 'bfd18e2f4e5d59775c2bc14d80f56971891ed620'}
"""
result = []
hashers = []
if isinstance(info, basestring):
info = [ info ]
for infotype in info:
if infotype == 'filename':
m = IterativeMatcher(filename, filetype = filetype)
result.append(m.matched())
elif infotype == 'hash_mpc':
import hash_mpc
try:
result.append(Guess({ 'hash_mpc': hash_mpc.hash_file(filename) },
confidence = 1.0))
except Exception, e:
log.warning('Could not compute MPC-style hash because: %s' % e)
elif infotype == 'hash_ed2k':
import hash_ed2k
try:
result.append(Guess({ 'hash_ed2k': hash_ed2k.hash_file(filename) },
confidence = 1.0))
except Exception, e:
log.warning('Could not compute ed2k hash because: %s' % e)
elif infotype.startswith('hash_'):
import hashlib
hashname = infotype[5:]
try:
hasher = getattr(hashlib, hashname)()
hashers.append((infotype, hasher))
except AttributeError:
log.warning('Could not compute %s hash because it is not available from python\'s hashlib module' % hashname)
else:
log.warning('Invalid infotype: %s' % infotype)
"""For plugins which depend on some optional library, import them like that:
if infotype == 'plugin_name':
try:
import optional_lib
except ImportError:
raise Exception, 'The plugin module cannot be loaded because the optional_lib lib is missing'
# do some stuff
"""
# do all the hashes now, but on a single pass
if hashers:
try:
blocksize = 8192
hasherobjs = dict(hashers).values()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(blocksize), ''):
for hasher in hasherobjs:
hasher.update(chunk)
for infotype, hasher in hashers:
result.append(Guess({ infotype: hasher.hexdigest() },
confidence = 1.0))
except Exception, e:
log.warning('Could not compute hash because: %s' % e)
return merge_all(result)
def guess_video_info(filename, info = [ 'filename' ]):
return guess_file_info(filename, 'autodetect', info)
def guess_movie_info(filename, info = [ 'filename' ]):
return guess_file_info(filename, 'movie', info)
def guess_episode_info(filename, info = [ 'filename' ]):
return guess_file_info(filename, 'episode', info)