You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
5.3 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from guessit.patterns import deleted
from guessit.textutils import clean_string
import logging
log = logging.getLogger("guessit.matchtree")
def tree_to_string(tree):
"""Return a string representation for the given tree.
The lines convey the following information:
- line 1: path idx
- line 2: explicit group idx
- line 3: group index
- line 4: remaining info
- line 5: meaning conveyed
Meaning is a letter indicating what type of info was matched by this group,
for instance 't' = title, 'f' = format, 'l' = language, etc...
An example is the following:
0000000000000000000000000000000000000000000000000000000000000000000000000000000000 111
0000011111111111112222222222222233333333444444444444444455555555666777777778888888 000
0000000000000000000000000000000001111112011112222333333401123334000011233340000000 000
__________________(The.Prestige).______.[____.HP.______.{__-___}.St{__-___}.Chaps].___
xxxxxttttttttttttt ffffff vvvv xxxxxx ll lll xx xxx ccc
[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv
(note: the last line representing the filename is not pat of the tree representation)
"""
m_tree = [ '', # path level index
'', # explicit group index
'', # matched regexp and dash-separated
'', # groups leftover that couldn't be matched
'', # meaning conveyed: E = episodenumber, S = season, ...
]
def add_char(pidx, eidx, gidx, remaining, meaning = None):
nr = len(remaining)
def to_hex(x):
if isinstance(x, int):
return str(x) if x < 10 else chr(55+x)
return x
m_tree[0] = m_tree[0] + to_hex(pidx) * nr
m_tree[1] = m_tree[1] + to_hex(eidx) * nr
m_tree[2] = m_tree[2] + to_hex(gidx) * nr
m_tree[3] = m_tree[3] + remaining
m_tree[4] = m_tree[4] + str(meaning or ' ') * nr
def meaning(result):
mmap = { 'episodeNumber': 'E',
'season': 'S',
'extension': 'e',
'format': 'f',
'language': 'l',
'videoCodec': 'v',
'audioCodec': 'a',
'website': 'w',
'container': 'c',
'series': 'T',
'title': 't',
'date': 'd',
'year': 'y',
'releaseGroup': 'r',
'screenSize': 's'
}
if result is None:
return ' '
for prop, l in mmap.items():
if prop in result:
return l
return 'x'
for pidx, pathpart in enumerate(tree):
for eidx, explicit_group in enumerate(pathpart):
for gidx, (group, remaining, result) in enumerate(explicit_group):
add_char(pidx, eidx, gidx, remaining, meaning(result))
# special conditions for the path separator
if pidx < len(tree) - 2:
add_char(' ', ' ', ' ', '/')
elif pidx == len(tree) - 2:
add_char(' ', ' ', ' ', '.')
return '\n'.join(m_tree)
def iterate_groups(match_tree):
"""Iterate over all the groups in a match_tree and return them as pairs
of (group_pos, group) where:
- group_pos = (pidx, eidx, gidx)
- group = (string, remaining, guess)
"""
for pidx, pathpart in enumerate(match_tree):
for eidx, explicit_group in enumerate(pathpart):
for gidx, group in enumerate(explicit_group):
yield (pidx, eidx, gidx), group
def find_group(match_tree, prop):
"""Find the list of groups that resulted in a guess that contains the
asked property."""
result = []
for gpos, (string, remaining, guess) in iterate_groups(match_tree):
if guess and prop in guess:
result.append(gpos)
return result
def get_group(match_tree, gpos):
pidx, eidx, gidx = gpos
return match_tree[pidx][eidx][gidx]
def leftover_valid_groups(match_tree, valid = lambda s: len(s[0]) > 3):
"""Return the list of valid string groups (eg: len(s) > 3) that could not be
matched to anything as a list of pairs (cleaned_str, group_pos)."""
leftover = []
for gpos, (group, remaining, guess) in iterate_groups(match_tree):
if not guess:
clean_str = clean_string(remaining)
if valid((clean_str, gpos)):
leftover.append((clean_str, gpos))
return leftover