diff --git a/libs/guessit/ISO-3166-1_utf8.txt b/libs/guessit/ISO-3166-1_utf8.txt old mode 100644 new mode 100755 diff --git a/libs/guessit/ISO-639-2_utf-8.txt b/libs/guessit/ISO-639-2_utf-8.txt old mode 100644 new mode 100755 diff --git a/libs/guessit/__init__.py b/libs/guessit/__init__.py old mode 100644 new mode 100755 index e19da09..386aa7f --- a/libs/guessit/__init__.py +++ b/libs/guessit/__init__.py @@ -18,8 +18,9 @@ # along with this program. If not, see . # +from __future__ import unicode_literals -__version__ = '0.5.2' +__version__ = '0.6-dev' __all__ = ['Guess', 'Language', 'guess_file_info', 'guess_video_info', 'guess_movie_info', 'guess_episode_info'] @@ -73,6 +74,7 @@ else: from guessit.guess import Guess, merge_all from guessit.language import Language from guessit.matcher import IterativeMatcher +from guessit.textutils import clean_string import logging log = logging.getLogger(__name__) @@ -88,6 +90,86 @@ h = NullHandler() log.addHandler(h) +def _guess_filename(filename, filetype): + mtree = IterativeMatcher(filename, filetype=filetype) + m = mtree.matched() + + if 'language' not in m and 'subtitleLanguage' not in m: + return m + + # if we found some language, make sure we didn't cut a title or sth... + mtree2 = IterativeMatcher(filename, filetype=filetype, + opts=['nolanguage', 'nocountry']) + m2 = mtree2.matched() + + def find_nodes(tree, props): + """Yields all nodes containing any of the given props.""" + if isinstance(props, base_text_type): + props = [props] + for node in tree.nodes(): + if any(prop in node.guess for prop in props): + yield node + + + def warning(title): + log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string())) + return m + + + if m.get('title') != m2.get('title'): + title = next(find_nodes(mtree.match_tree, 'title')) + title2 = next(find_nodes(mtree2.match_tree, 'title')) + + langs = list(find_nodes(mtree.match_tree, ['language', 'subtitleLanguage'])) + if not langs: + return warning('A weird error happened with language detection') + + # find the language that is likely more relevant + for lng in langs: + if lng.value in title2.value: + # if the language was detected as part of a potential title, + # look at this one in particular + lang = lng + break + else: + # pick the first one if we don't have a better choice + lang = langs[0] + + + # language code are rarely part of a title, and those + # should be handled by the Language exceptions anyway + if len(lang.value) <= 3: + return m + + + # if filetype is subtitle and the language appears last, just before + # the extension, then it is likely a subtitle language + parts = clean_string(title.root.value).split() + if (m['type'] in ['moviesubtitle', 'episodesubtitle'] and + parts.index(lang.value) == len(parts) - 2): + return m + + # if the language was in the middle of the other potential title, + # keep the other title (eg: The Italian Job), except if it is at the + # very beginning, in which case we consider it an error + if m2['title'].startswith(lang.value): + return m + elif lang.value in title2.value: + return m2 + + # if a node is in an explicit group, then the correct title is probably + # the other one + if title.root.node_at(title.node_idx[:2]).is_explicit(): + return m2 + elif title2.root.node_at(title2.node_idx[:2]).is_explicit(): + return m + + return warning('Not sure of the title because of the language position') + + + return m + + def guess_file_info(filename, filetype, info=None): """info can contain the names of the various plugins, such as 'filename' to detect filename info, or 'hash_md5' to get the md5 hash of the file. @@ -98,6 +180,9 @@ def guess_file_info(filename, filetype, info=None): result = [] hashers = [] + # Force unicode as soon as possible + filename = u(filename) + if info is None: info = ['filename'] @@ -106,8 +191,7 @@ def guess_file_info(filename, filetype, info=None): for infotype in info: if infotype == 'filename': - m = IterativeMatcher(filename, filetype=filetype) - result.append(m.matched()) + result.append(_guess_filename(filename, filetype)) elif infotype == 'hash_mpc': from guessit.hash_mpc import hash_file @@ -161,7 +245,7 @@ def guess_file_info(filename, filetype, info=None): # last minute adjustments # if country is in the guessed properties, make it part of the filename - if 'country' in result: + if 'series' in result and 'country' in result: result['series'] += ' (%s)' % result['country'].alpha2.upper() diff --git a/libs/guessit/__main__.py b/libs/guessit/__main__.py new file mode 100755 index 0000000..957ec9d --- /dev/null +++ b/libs/guessit/__main__.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2011 Nicolas Wack +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see . +# + +from __future__ import unicode_literals +from __future__ import print_function +from guessit import u +from guessit import slogging, guess_file_info +from optparse import OptionParser +import logging + + +def detect_filename(filename, filetype, info=['filename']): + filename = u(filename) + + print('For:', filename) + print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string()) + + +def run_demo(episodes=True, movies=True): + # NOTE: tests should not be added here but rather in the tests/ folder + # this is just intended as a quick example + if episodes: + testeps = [ 'Series/Californication/Season 2/Californication.2x05.Vaginatown.HDTV.XviD-0TV.[tvu.org.ru].avi', + 'Series/dexter/Dexter.5x02.Hello,.Bandit.ENG.-.sub.FR.HDTV.XviD-AlFleNi-TeaM.[tvu.org.ru].avi', + 'Series/Treme/Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.[tvu.org.ru].avi', + 'Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi', + 'Series/Duckman/Duckman - S1E13 Joking The Chicken (unedited).avi', + 'Series/Simpsons/The_simpsons_s13e18_-_i_am_furious_yellow.mpg', + 'Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.[tvu.org.ru].avi', + 'Series/Dr._Slump_-_002_DVB-Rip_Catalan_by_kelf.avi', + 'Series/Kaamelott/Kaamelott - Livre V - Second Volet - HD 704x396 Xvid 2 pass - Son 5.1 - TntRip by Slurm.avi' + ] + + for f in testeps: + print('-'*80) + detect_filename(f, filetype='episode') + + + if movies: + testmovies = [ 'Movies/Fear and Loathing in Las Vegas (1998)/Fear.and.Loathing.in.Las.Vegas.720p.HDDVD.DTS.x264-ESiR.mkv', + 'Movies/El Dia de la Bestia (1995)/El.dia.de.la.bestia.DVDrip.Spanish.DivX.by.Artik[SEDG].avi', + 'Movies/Blade Runner (1982)/Blade.Runner.(1982).(Director\'s.Cut).CD1.DVDRip.XviD.AC3-WAF.avi', + 'Movies/Dark City (1998)/Dark.City.(1998).DC.BDRip.720p.DTS.X264-CHD.mkv', + 'Movies/Sin City (BluRay) (2005)/Sin.City.2005.BDRip.720p.x264.AC3-SEPTiC.mkv', + 'Movies/Borat (2006)/Borat.(2006).R5.PROPER.REPACK.DVDRip.XviD-PUKKA.avi', # FIXME: PROPER and R5 get overwritten + '[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv', # FIXME: title gets overwritten + 'Battle Royale (2000)/Battle.Royale.(Batoru.Rowaiaru).(2000).(Special.Edition).CD1of2.DVDRiP.XviD-[ZeaL].avi', + 'Movies/Brazil (1985)/Brazil_Criterion_Edition_(1985).CD2.English.srt', + 'Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv', + 'Movies/Toy Story (1995)/Toy Story [HDTV 720p English-Spanish].mkv', + 'Movies/Pirates of the Caribbean: The Curse of the Black Pearl (2003)/Pirates.Of.The.Carribean.DC.2003.iNT.DVDRip.XviD.AC3-NDRT.CD1.avi', + 'Movies/Office Space (1999)/Office.Space.[Dual-DVDRip].[Spanish-English].[XviD-AC3-AC3].[by.Oswald].avi', + 'Movies/The NeverEnding Story (1984)/The.NeverEnding.Story.1.1984.DVDRip.AC3.Xvid-Monteque.avi', + 'Movies/Juno (2007)/Juno KLAXXON.avi', + 'Movies/Chat noir, chat blanc (1998)/Chat noir, Chat blanc - Emir Kusturica (VO - VF - sub FR - Chapters).mkv', + 'Movies/Wild Zero (2000)/Wild.Zero.DVDivX-EPiC.srt', + 'Movies/El Bosque Animado (1987)/El.Bosque.Animado.[Jose.Luis.Cuerda.1987].[Xvid-Dvdrip-720x432].avi', + 'testsmewt_bugs/movies/Baraka_Edition_Collector.avi' + ] + + for f in testmovies: + print('-'*80) + detect_filename(f, filetype = 'movie') + + +def main(): + slogging.setupLogging() + + parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]') + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help = 'display debug output') + parser.add_option('-i', '--info', dest = 'info', default = 'filename', + help = 'the desired information type: filename, hash_mpc or a hash from python\'s ' + 'hashlib module, such as hash_md5, hash_sha1, ...; or a list of any of ' + 'them, comma-separated') + parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect', + help = 'the suggested file type: movie, episode or autodetect') + parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False, + help = 'run a few builtin tests instead of analyzing a file') + + options, args = parser.parse_args() + if options.verbose: + logging.getLogger('guessit').setLevel(logging.DEBUG) + + if options.demo: + run_demo(episodes=True, movies=True) + else: + if args: + for filename in args: + detect_filename(filename, + filetype = options.filetype, + info = options.info.split(',')) + + else: + parser.print_help() + +if __name__ == '__main__': + main() diff --git a/libs/guessit/country.py b/libs/guessit/country.py old mode 100644 new mode 100755 diff --git a/libs/guessit/date.py b/libs/guessit/date.py old mode 100644 new mode 100755 diff --git a/libs/guessit/fileutils.py b/libs/guessit/fileutils.py old mode 100644 new mode 100755 index 2fca6b7..45f07e8 --- a/libs/guessit/fileutils.py +++ b/libs/guessit/fileutils.py @@ -22,6 +22,7 @@ from __future__ import unicode_literals from guessit import s, u import os.path import zipfile +import io def split_path(path): @@ -76,12 +77,12 @@ def file_in_same_dir(ref_file, desired_file): def load_file_in_same_dir(ref_file, filename): """Load a given file. Works even when the file is contained inside a zip.""" - path = split_path(ref_file)[:-1] + [filename] + path = split_path(ref_file)[:-1] + [str(filename)] for i, p in enumerate(path): - if p.endswith('.zip'): + if p[-4:] == '.zip': zfilename = os.path.join(*path[:i + 1]) zfile = zipfile.ZipFile(zfilename) return zfile.read('/'.join(path[i + 1:])) - return u(open(os.path.join(*path)).read()) + return u(io.open(os.path.join(*path), encoding = 'utf-8').read()) diff --git a/libs/guessit/guess.py b/libs/guessit/guess.py old mode 100644 new mode 100755 index 801af55..62385e8 --- a/libs/guessit/guess.py +++ b/libs/guessit/guess.py @@ -253,48 +253,26 @@ def merge_similar_guesses(guesses, prop, choose): merge_similar_guesses(guesses, prop, choose) -def merge_append_guesses(guesses, prop): - """Take a list of guesses and merge those which have the same properties by - appending them in a list. - - DEPRECATED, remove with old guessers - - """ - similar = [guess for guess in guesses if prop in guess] - if not similar: - return - - merged = similar[0] - merged[prop] = [merged[prop]] - # TODO: what to do with global confidence? mean of them all? - - for m in similar[1:]: - for prop2 in m: - if prop == prop2: - merged[prop].append(m[prop]) - else: - if prop2 in m: - log.warning('overwriting property "%s" with value %s' % (prop2, m[prop2])) - merged[prop2] = m[prop2] - # TODO: confidence also - - guesses.remove(m) - - def merge_all(guesses, append=None): """Merge all the guesses in a single result, remove very unlikely values, and return it. You can specify a list of properties that should be appended into a list instead of being merged. - >>> s(merge_all([ Guess({ 'season': 2 }, confidence = 0.6), - ... Guess({ 'episodeNumber': 13 }, confidence = 0.8) ])) + >>> s(merge_all([ Guess({'season': 2}, confidence=0.6), + ... Guess({'episodeNumber': 13}, confidence=0.8) ])) {'season': 2, 'episodeNumber': 13} - >>> s(merge_all([ Guess({ 'episodeNumber': 27 }, confidence = 0.02), - ... Guess({ 'season': 1 }, confidence = 0.2) ])) + >>> s(merge_all([ Guess({'episodeNumber': 27}, confidence=0.02), + ... Guess({'season': 1}, confidence=0.2) ])) {'season': 1} + >>> s(merge_all([ Guess({'other': 'PROPER'}, confidence=0.8), + ... Guess({'releaseGroup': '2HD'}, confidence=0.8) ], + ... append=['other'])) + {'releaseGroup': '2HD', 'other': ['PROPER']} + + """ if not guesses: return Guess() @@ -328,7 +306,13 @@ def merge_all(guesses, append=None): # make sure our appendable properties contain unique values for prop in append: - if prop in result: - result[prop] = list(set(result[prop])) + try: + value = result[prop] + if isinstance(value, list): + result[prop] = list(set(value)) + else: + result[prop] = [ value ] + except KeyError: + pass return result diff --git a/libs/guessit/hash_ed2k.py b/libs/guessit/hash_ed2k.py old mode 100644 new mode 100755 diff --git a/libs/guessit/hash_mpc.py b/libs/guessit/hash_mpc.py old mode 100644 new mode 100755 diff --git a/libs/guessit/language.py b/libs/guessit/language.py old mode 100644 new mode 100755 index ccdd9ca..3b3a86a --- a/libs/guessit/language.py +++ b/libs/guessit/language.py @@ -21,13 +21,14 @@ from __future__ import unicode_literals from guessit import UnicodeMixin, base_text_type, u, s from guessit.fileutils import load_file_in_same_dir +from guessit.textutils import find_words from guessit.country import Country import re import logging __all__ = [ 'is_iso_language', 'is_language', 'lang_set', 'Language', 'ALL_LANGUAGES', 'ALL_LANGUAGES_NAMES', 'UNDETERMINED', - 'search_language' ] + 'search_language', 'guess_language' ] log = logging.getLogger(__name__) @@ -317,7 +318,7 @@ def search_language(string, lang_filter=None): 'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to', 'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan', 'fry', 'cop', 'zen', 'gay', 'fat', 'cherokee', 'got', 'an', 'as', - 'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', + 'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', # french words 'bas', 'de', 'le', 'son', 'vo', 'vf', 'ne', 'ca', 'ce', 'et', 'que', 'mal', 'est', 'vol', 'or', 'mon', 'se', @@ -325,7 +326,7 @@ def search_language(string, lang_filter=None): 'la', 'el', 'del', 'por', 'mar', # other 'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii', - 'vi' + 'vi', 'ben', 'da' ]) sep = r'[](){} \._-+' @@ -334,7 +335,8 @@ def search_language(string, lang_filter=None): slow = ' %s ' % string.lower() confidence = 1.0 # for all of them - for lang in lng_all_names: + + for lang in set(find_words(slow)) & lng_all_names: if lang in lng_common_words: continue @@ -351,7 +353,7 @@ def search_language(string, lang_filter=None): if lang_filter and language not in lang_filter: continue - # only allow those languages that have a 2-letter code, those who + # only allow those languages that have a 2-letter code, those that # don't are too esoteric and probably false matches if language.lang not in lng3_to_lng2: continue @@ -364,9 +366,25 @@ def search_language(string, lang_filter=None): else: # Note: we could either be really confident that we found a # language or assume that full language names are too - # common words + # common words and lower their confidence accordingly confidence = 0.3 # going with the low-confidence route here return language, (pos - 1, end - 1), confidence return None, None, None + + +def guess_language(text): + """Guess the language in which a body of text is written. + + This uses the external guess-language python module, and will fail and return + Language(Undetermined) if it is not installed. + """ + try: + from guess_language import guessLanguage + return Language(guessLanguage(text)) + + except ImportError: + log.error('Cannot detect the language of the given text body, missing dependency: guess-language') + log.error('Please install it from PyPI, by doing eg: pip install guess-language') + return UNDETERMINED diff --git a/libs/guessit/matcher.py b/libs/guessit/matcher.py old mode 100644 new mode 100755 index d3392f6..cc77b81 --- a/libs/guessit/matcher.py +++ b/libs/guessit/matcher.py @@ -19,18 +19,16 @@ # from __future__ import unicode_literals -from guessit import PY3, u +from guessit import PY3, u, base_text_type from guessit.matchtree import MatchTree -from guessit.guess import (merge_similar_guesses, merge_all, - choose_int, choose_string) -import copy +from guessit.textutils import normalize_unicode import logging log = logging.getLogger(__name__) class IterativeMatcher(object): - def __init__(self, filename, filetype='autodetect'): + def __init__(self, filename, filetype='autodetect', opts=None): """An iterative matcher tries to match different patterns that appear in the filename. @@ -76,6 +74,14 @@ class IterativeMatcher(object): raise ValueError("filetype needs to be one of %s" % valid_filetypes) if not PY3 and not isinstance(filename, unicode): log.warning('Given filename to matcher is not unicode...') + filename = filename.decode('utf-8') + + filename = normalize_unicode(filename) + + if opts is None: + opts = [] + elif isinstance(opts, base_text_type): + opts = opts.split() self.match_tree = MatchTree(filename) mtree = self.match_tree @@ -84,7 +90,7 @@ class IterativeMatcher(object): def apply_transfo(transfo_name, *args, **kwargs): transfo = __import__('guessit.transfo.' + transfo_name, globals=globals(), locals=locals(), - fromlist=['process'], level=-1) + fromlist=['process'], level=0) transfo.process(mtree, *args, **kwargs) # 1- first split our path into dirs + basename + ext @@ -115,13 +121,20 @@ class IterativeMatcher(object): 'guess_properties', 'guess_language', 'guess_video_rexps' ] + if 'nolanguage' in opts: + strategy.remove('guess_language') + for name in strategy: apply_transfo(name) # more guessers for both movies and episodes - for name in ['guess_bonus_features', 'guess_year', 'guess_country']: + for name in ['guess_bonus_features', 'guess_year']: apply_transfo(name) + if 'nocountry' not in opts: + apply_transfo('guess_country') + + # split into '-' separated subgroups (with required separator chars # around the dash) apply_transfo('split_on_dash') @@ -139,27 +152,4 @@ class IterativeMatcher(object): log.debug('Found match tree:\n%s' % u(mtree)) def matched(self): - # we need to make a copy here, as the merge functions work in place and - # calling them on the match tree would modify it - - parts = [node.guess for node in self.match_tree.nodes() if node.guess] - parts = copy.deepcopy(parts) - - # 1- try to merge similar information together and give it a higher - # confidence - for int_part in ('year', 'season', 'episodeNumber'): - merge_similar_guesses(parts, int_part, choose_int) - - for string_part in ('title', 'series', 'container', 'format', - 'releaseGroup', 'website', 'audioCodec', - 'videoCodec', 'screenSize', 'episodeFormat', - 'audioChannels'): - merge_similar_guesses(parts, string_part, choose_string) - - # 2- merge the rest, potentially discarding information not properly - # merged before - result = merge_all(parts, - append=['language', 'subtitleLanguage', 'other']) - - log.debug('Final result: ' + result.nice_string()) - return result + return self.match_tree.matched() diff --git a/libs/guessit/matchtree.py b/libs/guessit/matchtree.py old mode 100644 new mode 100755 index 28c8efa..2853c3a --- a/libs/guessit/matchtree.py +++ b/libs/guessit/matchtree.py @@ -22,6 +22,9 @@ from __future__ import unicode_literals from guessit import UnicodeMixin, base_text_type, Guess from guessit.textutils import clean_string, str_fill from guessit.patterns import group_delimiters +from guessit.guess import (merge_similar_guesses, merge_all, + choose_int, choose_string) +import copy import logging log = logging.getLogger(__name__) @@ -257,3 +260,28 @@ class MatchTree(BaseMatchTree): """Return whether the group was explicitly enclosed by parentheses/square brackets/etc.""" return (self.value[0] + self.value[-1]) in group_delimiters + + def matched(self): + # we need to make a copy here, as the merge functions work in place and + # calling them on the match tree would modify it + parts = [node.guess for node in self.nodes() if node.guess] + parts = copy.deepcopy(parts) + + # 1- try to merge similar information together and give it a higher + # confidence + for int_part in ('year', 'season', 'episodeNumber'): + merge_similar_guesses(parts, int_part, choose_int) + + for string_part in ('title', 'series', 'container', 'format', + 'releaseGroup', 'website', 'audioCodec', + 'videoCodec', 'screenSize', 'episodeFormat', + 'audioChannels'): + merge_similar_guesses(parts, string_part, choose_string) + + # 2- merge the rest, potentially discarding information not properly + # merged before + result = merge_all(parts, + append=['language', 'subtitleLanguage', 'other']) + + log.debug('Final result: ' + result.nice_string()) + return result diff --git a/libs/guessit/patterns.py b/libs/guessit/patterns.py index b75ca89..a8a0607 100755 --- a/libs/guessit/patterns.py +++ b/libs/guessit/patterns.py @@ -20,9 +20,10 @@ # from __future__ import unicode_literals +import re -subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa', 'txt' ] +subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ] video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', @@ -42,13 +43,13 @@ episode_rexps = [ # ... Season 2 ... (r'saison (?P[0-9]+)', 1.0, (0, 0)), # ... s02e13 ... - (r'[Ss](?P[0-9]{1,2}).{,3}(?P(?:[Ee][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + (r'[Ss](?P[0-9]{1,2}).?(?P(?:[Ee-][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), # ... s03-x02 ... - (r'[Ss](?P[0-9]{1,2}).{,3}(?P(?:[Xx][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + (r'[Ss](?P[0-9]{1,2}).?(?P(?:[Xx][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), # ... 2x13 ... - (r'[^0-9](?P[0-9]{1,2})(?P(?:[xX][0-9]{1,2})+)[^0-9]', 0.8, (1, -1)), + (r'[^0-9](?P[0-9]{1,2}).?(?P(?:[xX][0-9]{1,2})+)[^0-9]', 0.8, (1, -1)), # ... s02 ... #(sep + r's(?P[0-9]{1,2})' + sep, 0.6, (1, -1)), @@ -61,7 +62,7 @@ episode_rexps = [ # ... Season 2 ... ('ep' + sep + r'(?P[0-9]{1,2})[^0-9]', 0.7, (0, -1)), # ... e13 ... for a mini-series without a season number - (r'e(?P[0-9]{1,2})[^0-9]', 0.6, (0, -1)) + (sep + r'e(?P[0-9]{1,2})' + sep, 0.6, (1, -1)) ] @@ -99,92 +100,129 @@ video_rexps = [ # cd number (r'f(?P[0-9]{1,2})', 1.0, (0, 0)) ] -websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com', 'sharethefiles.com' ] +websites = [ 'tvu.org.ru', 'emule-island.com', 'UsaBit.com', 'www.divx-overnet.com', + 'sharethefiles.com' ] -unlikely_series = ['series'] +unlikely_series = [ 'series' ] -properties = { 'format': [ 'DVDRip', 'HD-DVD', 'HDDVD', 'HDDVDRip', 'BluRay', 'Blu-ray', 'BDRip', 'BRRip', - 'HDRip', 'DVD', 'DVDivX', 'HDTV', 'DVB', 'DVBRip', 'PDTV', 'WEBRip', - 'DVDSCR', 'Screener', 'VHS', 'VIDEO_TS', 'WEB-DL', 'WEBDL' ], - 'screenSize': [ '720p', '720', '1080p', '1080' ], +# prop_multi is a dict of { property_name: { canonical_form: [ pattern ] } } +# pattern is a string considered as a regexp, with the addition that dashes are +# replaced with '([ \.-_])?' which matches more types of separators (or none) +# note: simpler patterns need to be at the end of the list to not shadow more +# complete ones, eg: 'AAC' needs to come after 'He-AAC' +# ie: from most specific to less specific +prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], + 'HD-DVD': [ 'HD-(?:DVD)?-Rip', 'HD-DVD' ], + 'BluRay': [ 'Blu-ray', 'B[DR]Rip' ], + 'HDTV': [ 'HD-TV' ], + 'DVB': [ 'DVB-Rip', 'DVB', 'PD-TV' ], + 'WEBRip': [ 'WEB-Rip' ], + 'Screener': [ 'DVD-SCR', 'Screener' ], + 'VHS': [ 'VHS' ], + 'WEB-DL': [ 'WEB-DL' ] }, - 'videoCodec': [ 'XviD', 'DivX', 'x264', 'h264', 'Rv10' ], + 'screenSize': { '480p': [ '480p?' ], + '720p': [ '720p?' ], + '1080p': [ '1080p?' ] }, - 'audioCodec': [ 'AC3', 'DTS', 'He-AAC', 'AAC-He', 'AAC' ], + 'videoCodec': { 'XviD': [ 'Xvid' ], + 'DivX': [ 'DVDivX', 'DivX' ], + 'h264': [ '[hx]-264' ], + 'Rv10': [ 'Rv10' ] }, - 'audioChannels': [ '5.1' ], + 'audioCodec': { 'AC3': [ 'AC3' ], + 'DTS': [ 'DTS' ], + 'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] }, - 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', '[XCT]', 'iNT', 'PUKKA', - 'CHD', 'ViTE', 'TLF', 'DEiTY', 'FLAiTE', - 'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS', ' FiNaLe', - 'UnSeeN', 'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL', - 'SiNNERS', 'DiRTY', 'REWARD', 'ECI', 'KiNGS', 'CLUE', - 'CtrlHD', 'POD', 'WiKi', 'DIMENSION', 'IMMERSE', 'FQM', - '2HD', 'REPTiLE', 'CTU', 'HALCYON', 'EbP', 'SiTV', 'SAiNTS', - 'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV' ], + 'audioChannels': { '5.1': [ r'5\.1', 'DD5\.1', '5ch' ] }, - 'episodeFormat': [ 'Minisode', 'Minisodes' ], + 'episodeFormat': { 'Minisode': [ 'Minisodes?' ] } - 'other': [ '5ch', 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'iNTERNAL', 'Audiofixed', 'R5', - 'complete', 'classic', # not so sure about these ones, could appear in a title - 'ws', # widescreen - ], } +# prop_single dict of { property_name: [ canonical_form ] } +prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA', + 'CHD', 'ViTE', 'TLF', 'DEiTY', 'FLAiTE', + 'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS', 'FiNaLe', + 'UnSeeN', 'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL', + 'SiNNERS', 'DiRTY', 'REWARD', 'ECI', 'KiNGS', 'CLUE', + 'CtrlHD', 'POD', 'WiKi', 'DIMENSION', 'IMMERSE', 'FQM', + '2HD', 'REPTiLE', 'CTU', 'HALCYON', 'EbP', 'SiTV', + 'SAiNTS', 'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV', + 'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3' ], -def find_properties(filename): + 'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5', + 'complete', 'classic', # not so sure about these ones, could appear in a title + 'ws' ] # widescreen + } + +_dash = '-' +_psep = '[-\. _]?' + +def _to_rexp(prop): + return re.compile(prop.replace(_dash, _psep), re.IGNORECASE) + +# properties_rexps dict of { property_name: { canonical_form: [ rexp ] } } +# containing the rexps compiled from both prop_multi and prop_single +properties_rexps = dict((type, dict((canonical_form, + [ _to_rexp(pattern) for pattern in patterns ]) + for canonical_form, patterns in props.items())) + for type, props in prop_multi.items()) + +properties_rexps.update(dict((type, dict((canonical_form, [ _to_rexp(canonical_form) ]) + for canonical_form in props)) + for type, props in prop_single.items())) + + + +def find_properties(string): result = [] - clow = filename.lower() - for prop, values in properties.items(): - for value in values: - pos = clow.find(value.lower()) - if pos != -1: - end = pos + len(value) - # make sure our word is always surrounded by separators - if ((pos > 0 and clow[pos - 1] not in sep) or - (end < len(clow) and clow[end] not in sep)): + for property_name, props in properties_rexps.items(): + for canonical_form, rexps in props.items(): + for value_rexp in rexps: + match = value_rexp.search(string) + if match: + start, end = match.span() + # make sure our word is always surrounded by separators # note: sep is a regexp, but in this case using it as - # a sequence achieves the same goal - continue + # a char sequence achieves the same goal + if ((start > 0 and string[start-1] not in sep) or + (end < len(string) and string[end] not in sep)): + continue - result.append((prop, value, pos, end)) + result.append((property_name, canonical_form, start, end)) return result -property_synonyms = { 'DVD': [ 'DVDRip', 'VIDEO_TS' ], - 'HD-DVD': [ 'HDDVD', 'HDDVDRip' ], - 'BluRay': [ 'BDRip', 'BRRip', 'Blu-ray' ], - 'WEB-DL': [ 'WEBDL' ], - 'DVB': [ 'DVBRip', 'PDTV' ], - 'Screener': [ 'DVDSCR' ], - 'DivX': [ 'DVDivX' ], - 'h264': [ 'x264' ], - '720p': [ '720' ], - '1080p': [ '1080' ], - 'AAC': [ 'He-AAC', 'AAC-He' ], - 'Special Edition': [ 'Special' ], +property_synonyms = { 'Special Edition': [ 'Special' ], 'Collector Edition': [ 'Collector' ], - 'Criterion Edition': [ 'Criterion' ], - 'Minisode': [ 'Minisodes' ] + 'Criterion Edition': [ 'Criterion' ] } def revert_synonyms(): reverse = {} - for _, values in properties.items(): - for value in values: - reverse[value.lower()] = value - for canonical, synonyms in property_synonyms.items(): for synonym in synonyms: reverse[synonym.lower()] = canonical return reverse + reverse_synonyms = revert_synonyms() def canonical_form(string): return reverse_synonyms.get(string.lower(), string) + + +def compute_canonical_form(property_name, value): + """Return the canonical form of a property given its type if it is a valid + one, None otherwise.""" + for canonical_form, rexps in properties_rexps[property_name].items(): + for rexp in rexps: + if rexp.match(value): + return canonical_form + return None diff --git a/libs/guessit/slogging.py b/libs/guessit/slogging.py old mode 100644 new mode 100755 index f75773c..75e261c --- a/libs/guessit/slogging.py +++ b/libs/guessit/slogging.py @@ -21,6 +21,8 @@ from __future__ import unicode_literals import logging import sys +import os, os.path + GREEN_FONT = "\x1B[0;32m" YELLOW_FONT = "\x1B[0;33m" @@ -29,33 +31,57 @@ RED_FONT = "\x1B[0;31m" RESET_FONT = "\x1B[0m" -def setupLogging(colored=True): +def setupLogging(colored=True, with_time=False, with_thread=False, filename=None): """Set up a nice colored logger as the main application logger.""" class SimpleFormatter(logging.Formatter): - def __init__(self): - self.fmt = '%(levelname)-8s %(module)s:%(funcName)s -- %(message)s' + def __init__(self, with_time, with_thread): + self.fmt = (('%(asctime)s ' if with_time else '') + + '%(levelname)-8s ' + + '[%(name)s:%(funcName)s]' + + ('[%(threadName)s]' if with_thread else '') + + ' -- %(message)s') logging.Formatter.__init__(self, self.fmt) class ColoredFormatter(logging.Formatter): - def __init__(self): - self.fmt = ('%(levelname)-8s ' + - BLUE_FONT + '%(name)s:%(funcName)s' + - RESET_FONT + ' -- %(message)s') + def __init__(self, with_time, with_thread): + self.fmt = (('%(asctime)s ' if with_time else '') + + '-CC-%(levelname)-8s ' + + BLUE_FONT + '[%(name)s:%(funcName)s]' + + RESET_FONT + ('[%(threadName)s]' if with_thread else '') + + ' -- %(message)s') + logging.Formatter.__init__(self, self.fmt) def format(self, record): + modpath = record.name.split('.') + record.mname = modpath[0] + record.mmodule = '.'.join(modpath[1:]) result = logging.Formatter.format(self, record) - if record.levelno in (logging.DEBUG, logging.INFO): - return GREEN_FONT + result + if record.levelno == logging.DEBUG: + color = BLUE_FONT + elif record.levelno == logging.INFO: + color = GREEN_FONT elif record.levelno == logging.WARNING: - return YELLOW_FONT + result + color = YELLOW_FONT else: - return RED_FONT + result + color = RED_FONT - ch = logging.StreamHandler() - if colored and sys.platform != 'win32': - ch.setFormatter(ColoredFormatter()) + result = result.replace('-CC-', color) + return result + + if filename is not None: + # make sure we can write to our log file + logdir = os.path.dirname(filename) + if not os.path.exists(logdir): + os.makedirs(logdir) + ch = logging.FileHandler(filename, mode='w') + ch.setFormatter(SimpleFormatter(with_time, with_thread)) else: - ch.setFormatter(SimpleFormatter()) + ch = logging.StreamHandler() + if colored and sys.platform != 'win32': + ch.setFormatter(ColoredFormatter(with_time, with_thread)) + else: + ch.setFormatter(SimpleFormatter(with_time, with_thread)) + logging.getLogger().addHandler(ch) diff --git a/libs/guessit/textutils.py b/libs/guessit/textutils.py old mode 100644 new mode 100755 index 12043e5..f195e2b --- a/libs/guessit/textutils.py +++ b/libs/guessit/textutils.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # Smewt - A smart collection manager -# Copyright (c) 2008 Nicolas Wack +# Copyright (c) 2008-2012 Nicolas Wack # # Smewt is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,10 +23,13 @@ from guessit import s from guessit.patterns import sep import functools import unicodedata -import copy +import re # string-related functions +def normalize_unicode(s): + return unicodedata.normalize('NFC', s) + def strip_brackets(s): if not s: @@ -55,6 +58,21 @@ def clean_string(s): return result +_words_rexp = re.compile('\w+', re.UNICODE) + +def find_words(s): + return _words_rexp.findall(s.replace('_', ' ')) + + +def reorder_title(title): + ltitle = title.lower() + if ltitle[-4:] == ',the': + return title[-3:] + ' ' + title[:-4] + if ltitle[-5:] == ', the': + return title[-3:] + ' ' + title[:-5] + return title + + def str_replace(string, pos, c): return string[:pos] + c + string[pos+1:] diff --git a/libs/guessit/transfo/__init__.py b/libs/guessit/transfo/__init__.py old mode 100644 new mode 100755 index 67875dc..820690a --- a/libs/guessit/transfo/__init__.py +++ b/libs/guessit/transfo/__init__.py @@ -45,7 +45,7 @@ def format_guess(guess): elif isinstance(value, base_text_type): if prop in ('edition',): value = clean_string(value) - guess[prop] = canonical_form(value) + guess[prop] = canonical_form(value).replace('\\', '') return guess diff --git a/libs/guessit/transfo/guess_bonus_features.py b/libs/guessit/transfo/guess_bonus_features.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_country.py b/libs/guessit/transfo/guess_country.py old mode 100644 new mode 100755 index f95b62c..1d69069 --- a/libs/guessit/transfo/guess_country.py +++ b/libs/guessit/transfo/guess_country.py @@ -19,24 +19,30 @@ # from __future__ import unicode_literals -#from guessit.transfo import SingleNodeGuesser -#from guessit.date import search_year from guessit.country import Country from guessit import Guess import logging log = logging.getLogger(__name__) +# list of common words which could be interpreted as countries, but which +# are far too common to be able to say they represent a country +country_common_words = frozenset([ 'bt', 'bb' ]) def process(mtree): for node in mtree.unidentified_leaves(): - # only keep explicit groups (enclosed in parentheses/brackets) if len(node.node_idx) == 2: - try: - country = Country(node.value[1:-1], strict=True) - if node.value[0] + node.value[-1] not in ['()', '[]', '{}']: - continue - node.guess = Guess(country=country, confidence=1.0) + c = node.value[1:-1].lower() + if c in country_common_words: + continue + + # only keep explicit groups (enclosed in parentheses/brackets) + if node.value[0] + node.value[-1] not in ['()', '[]', '{}']: + continue + try: + country = Country(c, strict=True) except ValueError: - pass + continue + + node.guess = Guess(country=country, confidence=1.0) diff --git a/libs/guessit/transfo/guess_date.py b/libs/guessit/transfo/guess_date.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_episode_info_from_position.py b/libs/guessit/transfo/guess_episode_info_from_position.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_episodes_rexps.py b/libs/guessit/transfo/guess_episodes_rexps.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_filetype.py b/libs/guessit/transfo/guess_filetype.py old mode 100644 new mode 100755 index cdaf114..4d98d01 --- a/libs/guessit/transfo/guess_filetype.py +++ b/libs/guessit/transfo/guess_filetype.py @@ -21,7 +21,7 @@ from __future__ import unicode_literals from guessit import Guess from guessit.patterns import (subtitle_exts, video_exts, episode_rexps, - find_properties, canonical_form) + find_properties, compute_canonical_form) from guessit.date import valid_year from guessit.textutils import clean_string import os.path @@ -89,7 +89,7 @@ def guess_filetype(mtree, filetype): # check whether we are in a 'Movies', 'Tv Shows', ... folder folder_rexps = [ (r'Movies?', upgrade_movie), - (r'Tv ?Shows?', upgrade_episode), + (r'Tv[ _-]?Shows?', upgrade_episode), (r'Series', upgrade_episode) ] for frexp, upgrade_func in folder_rexps: @@ -142,7 +142,7 @@ def guess_filetype(mtree, filetype): upgrade_episode() break - elif canonical_form(value) == 'DVB': + elif compute_canonical_form('format', value) == 'DVB': upgrade_episode() break diff --git a/libs/guessit/transfo/guess_language.py b/libs/guessit/transfo/guess_language.py old mode 100644 new mode 100755 index fe547e6..86c1cf5 --- a/libs/guessit/transfo/guess_language.py +++ b/libs/guessit/transfo/guess_language.py @@ -22,7 +22,7 @@ from __future__ import unicode_literals from guessit import Guess from guessit.transfo import SingleNodeGuesser from guessit.language import search_language -from guessit.textutils import clean_string +from guessit.textutils import clean_string, find_words import logging log = logging.getLogger(__name__) @@ -31,18 +31,13 @@ log = logging.getLogger(__name__) def guess_language(string): language, span, confidence = search_language(string) if language: - # is it a subtitle language? - if 'sub' in clean_string(string[:span[0]]).lower().split(' '): - return (Guess({'subtitleLanguage': language}, - confidence=confidence), - span) - else: - return (Guess({'language': language}, - confidence=confidence), - span) + return (Guess({'language': language}, + confidence=confidence), + span) return None, None def process(mtree): SingleNodeGuesser(guess_language, None, log).process(mtree) + # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo diff --git a/libs/guessit/transfo/guess_movie_title_from_position.py b/libs/guessit/transfo/guess_movie_title_from_position.py old mode 100644 new mode 100755 index 8b6f5d0..d2e2deb --- a/libs/guessit/transfo/guess_movie_title_from_position.py +++ b/libs/guessit/transfo/guess_movie_title_from_position.py @@ -20,6 +20,7 @@ from __future__ import unicode_literals from guessit import Guess +import unicodedata import logging log = logging.getLogger(__name__) diff --git a/libs/guessit/transfo/guess_properties.py b/libs/guessit/transfo/guess_properties.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_release_group.py b/libs/guessit/transfo/guess_release_group.py old mode 100644 new mode 100755 index 2cee4b0..2ff237d --- a/libs/guessit/transfo/guess_release_group.py +++ b/libs/guessit/transfo/guess_release_group.py @@ -20,49 +20,51 @@ from __future__ import unicode_literals from guessit.transfo import SingleNodeGuesser -from guessit.patterns import properties, canonical_form +from guessit.patterns import prop_multi, compute_canonical_form, _dash, _psep import re import logging log = logging.getLogger(__name__) +def get_patterns(property_name): + return [ p.replace(_dash, _psep) for patterns in prop_multi[property_name].values() for p in patterns ] -CODECS = properties['videoCodec'] -FORMATS = properties['format'] +CODECS = get_patterns('videoCodec') +FORMATS = get_patterns('format') + +GROUP_NAMES = [ r'(?P' + codec + r')-?(?P.*?)[ \.]' + for codec in CODECS ] +GROUP_NAMES += [ r'(?P' + fmt + r')-?(?P.*?)[ \.]' + for fmt in FORMATS ] + +GROUP_NAMES2 = [ r'\.(?P' + codec + r')-(?P.*?)(-(.*?))?[ \.]' + for codec in CODECS ] +GROUP_NAMES2 += [ r'\.(?P' + fmt + r')-(?P.*?)(-(.*?))?[ \.]' + for fmt in FORMATS ] + +GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ] +GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ] def adjust_metadata(md): - codec = canonical_form(md['videoCodec']) - if codec in FORMATS: - md['format'] = codec - del md['videoCodec'] - return md + return dict((property_name, compute_canonical_form(property_name, value) or value) + for property_name, value in md.items()) def guess_release_group(string): - group_names = [ r'\.(Xvid)-(?P.*?)[ \.]', - r'\.(DivX)-(?P.*?)[\. ]', - r'\.(DVDivX)-(?P.*?)[\. ]', - ] - # first try to see whether we have both a known codec and a known release group - group_names = [ r'\.(?P' + codec + r')-(?P.*?)[ \.]' - for codec in (CODECS + FORMATS) ] - - for rexp in group_names: - match = re.search(rexp, string, re.IGNORECASE) + for rexp in GROUP_NAMES: + match = rexp.search(string) if match: metadata = match.groupdict() - if canonical_form(metadata['releaseGroup']) in properties['releaseGroup']: + release_group = compute_canonical_form('releaseGroup', metadata['releaseGroup']) + if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] - group_names = [ r'\.(?P' + codec + r')-(?P.*?)(-(.*?))?[ \.]' - for codec in (CODECS + FORMATS) ] - - for rexp in group_names: - match = re.search(rexp, string, re.IGNORECASE) + for rexp in GROUP_NAMES2: + match = rexp.search(string) if match: return adjust_metadata(match.groupdict()), (match.start(1), match.end(2)) diff --git a/libs/guessit/transfo/guess_video_rexps.py b/libs/guessit/transfo/guess_video_rexps.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_weak_episodes_rexps.py b/libs/guessit/transfo/guess_weak_episodes_rexps.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_website.py b/libs/guessit/transfo/guess_website.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/guess_year.py b/libs/guessit/transfo/guess_year.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/post_process.py b/libs/guessit/transfo/post_process.py old mode 100644 new mode 100755 index a2a7a33..5920e3a --- a/libs/guessit/transfo/post_process.py +++ b/libs/guessit/transfo/post_process.py @@ -20,6 +20,7 @@ from __future__ import unicode_literals from guessit.patterns import subtitle_exts +from guessit.textutils import reorder_title, find_words import logging log = logging.getLogger(__name__) @@ -45,6 +46,15 @@ def process(mtree): node == mtree.leaves()[-2]): promote_subtitle() + # - if we find the word 'sub' before the language, and in the same explicit + # group, then upgrade the language + explicit_group = mtree.node_at(node.node_idx[:2]) + group_str = explicit_group.value.lower() + + if ('sub' in find_words(group_str) and + 0 <= group_str.find('sub') < (node.span[0] - explicit_group.span[0])): + promote_subtitle() + # - if a language is in an explicit group just preceded by "st", # it is a subtitle language (eg: '...st[fr-eng]...') try: @@ -60,11 +70,4 @@ def process(mtree): if 'series' not in node.guess: continue - series = node.guess['series'] - lseries = series.lower() - - if lseries[-4:] == ',the': - node.guess['series'] = 'The ' + series[:-4] - - if lseries[-5:] == ', the': - node.guess['series'] = 'The ' + series[:-5] + node.guess['series'] = reorder_title(node.guess['series']) diff --git a/libs/guessit/transfo/split_explicit_groups.py b/libs/guessit/transfo/split_explicit_groups.py old mode 100644 new mode 100755 diff --git a/libs/guessit/transfo/split_on_dash.py b/libs/guessit/transfo/split_on_dash.py old mode 100644 new mode 100755 index b4454dc..031baff --- a/libs/guessit/transfo/split_on_dash.py +++ b/libs/guessit/transfo/split_on_dash.py @@ -38,15 +38,5 @@ def process(mtree): indices.extend([ span[0], span[1] ]) match = pattern.search(node.value, span[1]) - didx = node.value.find('-') - while didx > 0: - if (didx > 10 and - (didx - 1 not in indices and - didx + 2 not in indices)): - - indices.extend([ didx, didx + 1 ]) - - didx = node.value.find('-', didx + 1) - if indices: node.partition(indices) diff --git a/libs/guessit/transfo/split_path_components.py b/libs/guessit/transfo/split_path_components.py old mode 100644 new mode 100755