20 changed files with 2228 additions and 0 deletions
@ -0,0 +1,6 @@ |
|||||
|
from .main import Library |
||||
|
|
||||
|
def autoload(): |
||||
|
return Library() |
||||
|
|
||||
|
config = [] |
@ -0,0 +1,13 @@ |
|||||
|
from couchpotato.core.event import addEvent |
||||
|
from couchpotato.core.plugins.base import Plugin |
||||
|
|
||||
|
|
||||
|
class LibraryBase(Plugin): |
||||
|
|
||||
|
_type = None |
||||
|
|
||||
|
def initType(self): |
||||
|
addEvent('library.types', self.getType) |
||||
|
|
||||
|
def getType(self): |
||||
|
return self._type |
@ -0,0 +1,18 @@ |
|||||
|
from couchpotato.core.event import addEvent, fireEvent |
||||
|
from couchpotato.core.media._base.library.base import LibraryBase |
||||
|
|
||||
|
|
||||
|
class Library(LibraryBase): |
||||
|
def __init__(self): |
||||
|
addEvent('library.title', self.title) |
||||
|
|
||||
|
def title(self, library): |
||||
|
return fireEvent( |
||||
|
'library.query', |
||||
|
library, |
||||
|
|
||||
|
condense = False, |
||||
|
include_year = False, |
||||
|
include_identifier = False, |
||||
|
single = True |
||||
|
) |
@ -0,0 +1,6 @@ |
|||||
|
from .main import Matcher |
||||
|
|
||||
|
def autoload(): |
||||
|
return Matcher() |
||||
|
|
||||
|
config = [] |
@ -0,0 +1,84 @@ |
|||||
|
from couchpotato.core.event import addEvent |
||||
|
from couchpotato.core.helpers.encoding import simplifyString |
||||
|
from couchpotato.core.logger import CPLog |
||||
|
from couchpotato.core.plugins.base import Plugin |
||||
|
|
||||
|
log = CPLog(__name__) |
||||
|
|
||||
|
|
||||
|
class MatcherBase(Plugin): |
||||
|
type = None |
||||
|
|
||||
|
def __init__(self): |
||||
|
if self.type: |
||||
|
addEvent('%s.matcher.correct' % self.type, self.correct) |
||||
|
|
||||
|
def correct(self, chain, release, media, quality): |
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
def flattenInfo(self, info): |
||||
|
# Flatten dictionary of matches (chain info) |
||||
|
if isinstance(info, dict): |
||||
|
return dict([(key, self.flattenInfo(value)) for key, value in info.items()]) |
||||
|
|
||||
|
# Flatten matches |
||||
|
result = None |
||||
|
|
||||
|
for match in info: |
||||
|
if isinstance(match, dict): |
||||
|
if result is None: |
||||
|
result = {} |
||||
|
|
||||
|
for key, value in match.items(): |
||||
|
if key not in result: |
||||
|
result[key] = [] |
||||
|
|
||||
|
result[key].append(value) |
||||
|
else: |
||||
|
if result is None: |
||||
|
result = [] |
||||
|
|
||||
|
result.append(match) |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def constructFromRaw(self, match): |
||||
|
if not match: |
||||
|
return None |
||||
|
|
||||
|
parts = [ |
||||
|
''.join([ |
||||
|
y for y in x[1:] if y |
||||
|
]) for x in match |
||||
|
] |
||||
|
|
||||
|
return ''.join(parts)[:-1].strip() |
||||
|
|
||||
|
def simplifyValue(self, value): |
||||
|
if not value: |
||||
|
return value |
||||
|
|
||||
|
if isinstance(value, basestring): |
||||
|
return simplifyString(value) |
||||
|
|
||||
|
if isinstance(value, list): |
||||
|
return [self.simplifyValue(x) for x in value] |
||||
|
|
||||
|
raise ValueError("Unsupported value type") |
||||
|
|
||||
|
def chainMatch(self, chain, group, tags): |
||||
|
info = self.flattenInfo(chain.info[group]) |
||||
|
|
||||
|
found_tags = [] |
||||
|
for tag, accepted in tags.items(): |
||||
|
values = [self.simplifyValue(x) for x in info.get(tag, [None])] |
||||
|
|
||||
|
if any([val in accepted for val in values]): |
||||
|
found_tags.append(tag) |
||||
|
|
||||
|
log.debug('tags found: %s, required: %s' % (found_tags, tags.keys())) |
||||
|
|
||||
|
if set(tags.keys()) == set(found_tags): |
||||
|
return True |
||||
|
|
||||
|
return all([key in found_tags for key, value in tags.items()]) |
@ -0,0 +1,89 @@ |
|||||
|
from couchpotato.core.event import addEvent, fireEvent |
||||
|
from couchpotato.core.helpers.variable import possibleTitles |
||||
|
from couchpotato.core.logger import CPLog |
||||
|
from couchpotato.core.media._base.matcher.base import MatcherBase |
||||
|
from caper import Caper |
||||
|
|
||||
|
log = CPLog(__name__) |
||||
|
|
||||
|
|
||||
|
class Matcher(MatcherBase): |
||||
|
|
||||
|
def __init__(self): |
||||
|
super(Matcher, self).__init__() |
||||
|
|
||||
|
self.caper = Caper() |
||||
|
|
||||
|
addEvent('matcher.parse', self.parse) |
||||
|
addEvent('matcher.match', self.match) |
||||
|
|
||||
|
addEvent('matcher.flatten_info', self.flattenInfo) |
||||
|
addEvent('matcher.construct_from_raw', self.constructFromRaw) |
||||
|
|
||||
|
addEvent('matcher.correct_title', self.correctTitle) |
||||
|
addEvent('matcher.correct_quality', self.correctQuality) |
||||
|
|
||||
|
def parse(self, name, parser='scene'): |
||||
|
return self.caper.parse(name, parser) |
||||
|
|
||||
|
def match(self, release, media, quality): |
||||
|
match = fireEvent('matcher.parse', release['name'], single = True) |
||||
|
|
||||
|
if len(match.chains) < 1: |
||||
|
log.info2('Wrong: %s, unable to parse release name (no chains)', release['name']) |
||||
|
return False |
||||
|
|
||||
|
for chain in match.chains: |
||||
|
if fireEvent('%s.matcher.correct' % media['type'], chain, release, media, quality, single = True): |
||||
|
return chain |
||||
|
|
||||
|
return False |
||||
|
|
||||
|
def correctTitle(self, chain, media): |
||||
|
root_library = media['library']['root_library'] |
||||
|
|
||||
|
if 'show_name' not in chain.info or not len(chain.info['show_name']): |
||||
|
log.info('Wrong: missing show name in parsed result') |
||||
|
return False |
||||
|
|
||||
|
# Get the lower-case parsed show name from the chain |
||||
|
chain_words = [x.lower() for x in chain.info['show_name']] |
||||
|
|
||||
|
# Build a list of possible titles of the media we are searching for |
||||
|
titles = root_library['info']['titles'] |
||||
|
|
||||
|
# Add year suffix titles (will result in ['<name_one>', '<name_one> <suffix_one>', '<name_two>', ...]) |
||||
|
suffixes = [None, root_library['info']['year']] |
||||
|
|
||||
|
titles = [ |
||||
|
title + ((' %s' % suffix) if suffix else '') |
||||
|
for title in titles |
||||
|
for suffix in suffixes |
||||
|
] |
||||
|
|
||||
|
# Check show titles match |
||||
|
# TODO check xem names |
||||
|
for title in titles: |
||||
|
for valid_words in [x.split(' ') for x in possibleTitles(title)]: |
||||
|
|
||||
|
if valid_words == chain_words: |
||||
|
return True |
||||
|
|
||||
|
return False |
||||
|
|
||||
|
def correctQuality(self, chain, quality, quality_map): |
||||
|
if quality['identifier'] not in quality_map: |
||||
|
log.info2('Wrong: unknown preferred quality %s', quality['identifier']) |
||||
|
return False |
||||
|
|
||||
|
if 'video' not in chain.info: |
||||
|
log.info2('Wrong: no video tags found') |
||||
|
return False |
||||
|
|
||||
|
video_tags = quality_map[quality['identifier']] |
||||
|
|
||||
|
if not self.chainMatch(chain, 'video', video_tags): |
||||
|
log.info2('Wrong: %s tags not in chain', video_tags) |
||||
|
return False |
||||
|
|
||||
|
return True |
@ -0,0 +1,195 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper.matcher import FragmentMatcher |
||||
|
from caper.objects import CaperFragment, CaperClosure |
||||
|
from caper.parsers.anime import AnimeParser |
||||
|
from caper.parsers.scene import SceneParser |
||||
|
from caper.parsers.usenet import UsenetParser |
||||
|
|
||||
|
|
||||
|
__version_info__ = ('0', '3', '1') |
||||
|
__version_branch__ = 'master' |
||||
|
|
||||
|
__version__ = "%s%s" % ( |
||||
|
'.'.join(__version_info__), |
||||
|
'-' + __version_branch__ if __version_branch__ else '' |
||||
|
) |
||||
|
|
||||
|
|
||||
|
CL_START_CHARS = ['(', '[', '<', '>'] |
||||
|
CL_END_CHARS = [')', ']', '<', '>'] |
||||
|
CL_END_STRINGS = [' - '] |
||||
|
|
||||
|
STRIP_START_CHARS = ''.join(CL_START_CHARS) |
||||
|
STRIP_END_CHARS = ''.join(CL_END_CHARS) |
||||
|
STRIP_CHARS = ''.join(['_', ' ', '.']) |
||||
|
|
||||
|
FRAGMENT_SEPARATORS = ['.', '-', '_', ' '] |
||||
|
|
||||
|
|
||||
|
CL_START = 0 |
||||
|
CL_END = 1 |
||||
|
|
||||
|
|
||||
|
class Caper(object): |
||||
|
def __init__(self, debug=False): |
||||
|
self.debug = debug |
||||
|
|
||||
|
self.parsers = { |
||||
|
'anime': AnimeParser, |
||||
|
'scene': SceneParser, |
||||
|
'usenet': UsenetParser |
||||
|
} |
||||
|
|
||||
|
def _closure_split(self, name): |
||||
|
""" |
||||
|
:type name: str |
||||
|
|
||||
|
:rtype: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
closures = [] |
||||
|
|
||||
|
def end_closure(closures, buf): |
||||
|
buf = buf.strip(STRIP_CHARS) |
||||
|
if len(buf) < 2: |
||||
|
return |
||||
|
|
||||
|
cur = CaperClosure(len(closures), buf) |
||||
|
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None |
||||
|
|
||||
|
if cur.left: |
||||
|
cur.left.right = cur |
||||
|
|
||||
|
closures.append(cur) |
||||
|
|
||||
|
state = CL_START |
||||
|
buf = "" |
||||
|
for x, ch in enumerate(name): |
||||
|
# Check for start characters |
||||
|
if state == CL_START and ch in CL_START_CHARS: |
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
state = CL_END |
||||
|
buf = "" |
||||
|
|
||||
|
buf += ch |
||||
|
|
||||
|
if state == CL_END and ch in CL_END_CHARS: |
||||
|
# End character found, create the closure |
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
state = CL_START |
||||
|
buf = "" |
||||
|
elif state == CL_START and buf[-3:] in CL_END_STRINGS: |
||||
|
# End string found, create the closure |
||||
|
end_closure(closures, buf[:-3]) |
||||
|
|
||||
|
state = CL_START |
||||
|
buf = "" |
||||
|
|
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
return closures |
||||
|
|
||||
|
def _clean_closure(self, closure): |
||||
|
""" |
||||
|
:type closure: str |
||||
|
|
||||
|
:rtype: str |
||||
|
""" |
||||
|
|
||||
|
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS) |
||||
|
|
||||
|
def _fragment_split(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
|
||||
|
:rtype: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
cur_position = 0 |
||||
|
cur = None |
||||
|
|
||||
|
def end_fragment(fragments, cur, cur_position): |
||||
|
cur.position = cur_position |
||||
|
|
||||
|
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None |
||||
|
if cur.left: |
||||
|
cur.left_sep = cur.left.right_sep |
||||
|
cur.left.right = cur |
||||
|
|
||||
|
cur.right_sep = ch |
||||
|
|
||||
|
fragments.append(cur) |
||||
|
|
||||
|
for closure in closures: |
||||
|
closure.fragments = [] |
||||
|
|
||||
|
separator_buffer = "" |
||||
|
|
||||
|
for x, ch in enumerate(self._clean_closure(closure.value)): |
||||
|
if not cur: |
||||
|
cur = CaperFragment(closure) |
||||
|
|
||||
|
if ch in FRAGMENT_SEPARATORS: |
||||
|
if cur.value: |
||||
|
separator_buffer = "" |
||||
|
|
||||
|
separator_buffer += ch |
||||
|
|
||||
|
if cur.value or not closure.fragments: |
||||
|
end_fragment(closure.fragments, cur, cur_position) |
||||
|
elif len(separator_buffer) > 1: |
||||
|
cur.value = separator_buffer.strip() |
||||
|
|
||||
|
if cur.value: |
||||
|
end_fragment(closure.fragments, cur, cur_position) |
||||
|
|
||||
|
separator_buffer = "" |
||||
|
|
||||
|
# Reset |
||||
|
cur = None |
||||
|
cur_position += 1 |
||||
|
else: |
||||
|
cur.value += ch |
||||
|
|
||||
|
# Finish parsing the last fragment |
||||
|
if cur and cur.value: |
||||
|
end_fragment(closure.fragments, cur, cur_position) |
||||
|
|
||||
|
# Reset |
||||
|
cur_position = 0 |
||||
|
cur = None |
||||
|
|
||||
|
return closures |
||||
|
|
||||
|
def parse(self, name, parser='scene'): |
||||
|
closures = self._closure_split(name) |
||||
|
closures = self._fragment_split(closures) |
||||
|
|
||||
|
# Print closures |
||||
|
for closure in closures: |
||||
|
Logr.debug("closure [%s]", closure.value) |
||||
|
|
||||
|
for fragment in closure.fragments: |
||||
|
Logr.debug("\tfragment [%s]", fragment.value) |
||||
|
|
||||
|
if parser not in self.parsers: |
||||
|
raise ValueError("Unknown parser") |
||||
|
|
||||
|
# TODO autodetect the parser type |
||||
|
return self.parsers[parser](self.debug).run(closures) |
@ -0,0 +1,134 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
|
||||
|
class CaptureConstraint(object): |
||||
|
def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs): |
||||
|
"""Capture constraint object |
||||
|
|
||||
|
:type capture_group: CaptureGroup |
||||
|
""" |
||||
|
|
||||
|
self.capture_group = capture_group |
||||
|
|
||||
|
self.constraint_type = constraint_type |
||||
|
self.target = target |
||||
|
|
||||
|
self.comparisons = comparisons if comparisons else [] |
||||
|
self.kwargs = {} |
||||
|
|
||||
|
for orig_key, value in kwargs.items(): |
||||
|
key = orig_key.split('__') |
||||
|
if len(key) != 2: |
||||
|
self.kwargs[orig_key] = value |
||||
|
continue |
||||
|
name, method = key |
||||
|
|
||||
|
method = 'constraint_match_' + method |
||||
|
if not hasattr(self, method): |
||||
|
self.kwargs[orig_key] = value |
||||
|
continue |
||||
|
|
||||
|
self.comparisons.append((name, getattr(self, method), value)) |
||||
|
|
||||
|
def execute(self, parent_node, node, **kwargs): |
||||
|
func_name = 'constraint_%s' % self.constraint_type |
||||
|
|
||||
|
if hasattr(self, func_name): |
||||
|
return getattr(self, func_name)(parent_node, node, **kwargs) |
||||
|
|
||||
|
raise ValueError('Unknown constraint type "%s"' % self.constraint_type) |
||||
|
|
||||
|
# |
||||
|
# Node Matching |
||||
|
# |
||||
|
|
||||
|
def constraint_match(self, parent_node, node): |
||||
|
results = [] |
||||
|
total_weight = 0 |
||||
|
|
||||
|
for name, method, argument in self.comparisons: |
||||
|
weight, success = method(node, name, argument) |
||||
|
total_weight += weight |
||||
|
results.append(success) |
||||
|
|
||||
|
return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False |
||||
|
|
||||
|
def constraint_match_eq(self, node, name, expected): |
||||
|
if not hasattr(node, name): |
||||
|
return 1.0, False |
||||
|
|
||||
|
return 1.0, getattr(node, name) == expected |
||||
|
|
||||
|
def constraint_match_re(self, node, name, arg): |
||||
|
# Node match |
||||
|
if name == 'node': |
||||
|
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) |
||||
|
|
||||
|
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group) |
||||
|
return weight, weight > minimum_weight |
||||
|
|
||||
|
# Regex match |
||||
|
if type(arg).__name__ == 'SRE_Pattern': |
||||
|
return 1.0, arg.match(getattr(node, name)) is not None |
||||
|
|
||||
|
# Value match |
||||
|
if hasattr(node, name): |
||||
|
match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True) |
||||
|
return 1.0, match is not None |
||||
|
|
||||
|
raise ValueError("Unknown constraint match type '%s'" % name) |
||||
|
|
||||
|
# |
||||
|
# Result |
||||
|
# |
||||
|
|
||||
|
def constraint_result(self, parent_node, fragment): |
||||
|
ctag = self.kwargs.get('tag') |
||||
|
if not ctag: |
||||
|
return 0, False |
||||
|
|
||||
|
ckey = self.kwargs.get('key') |
||||
|
|
||||
|
for tag, result in parent_node.captured(): |
||||
|
if tag != ctag: |
||||
|
continue |
||||
|
|
||||
|
if not ckey or ckey in result.keys(): |
||||
|
return 1.0, True |
||||
|
|
||||
|
return 0.0, False |
||||
|
|
||||
|
# |
||||
|
# Failure |
||||
|
# |
||||
|
|
||||
|
def constraint_failure(self, parent_node, fragment, match): |
||||
|
if not match or not match.success: |
||||
|
return 1.0, True |
||||
|
|
||||
|
return 0, False |
||||
|
|
||||
|
# |
||||
|
# Success |
||||
|
# |
||||
|
|
||||
|
def constraint_success(self, parent_node, fragment, match): |
||||
|
if match and match.success: |
||||
|
return 1.0, True |
||||
|
|
||||
|
return 0, False |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) |
@ -0,0 +1,284 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper import CaperClosure, CaperFragment |
||||
|
from caper.helpers import clean_dict |
||||
|
from caper.result import CaperFragmentNode, CaperClosureNode |
||||
|
from caper.step import CaptureStep |
||||
|
from caper.constraint import CaptureConstraint |
||||
|
|
||||
|
|
||||
|
class CaptureGroup(object): |
||||
|
def __init__(self, parser, result): |
||||
|
"""Capture group object |
||||
|
|
||||
|
:type parser: caper.parsers.base.Parser |
||||
|
:type result: caper.result.CaperResult |
||||
|
""" |
||||
|
|
||||
|
self.parser = parser |
||||
|
self.result = result |
||||
|
|
||||
|
#: @type: list of CaptureStep |
||||
|
self.steps = [] |
||||
|
|
||||
|
#: type: str |
||||
|
self.step_source = None |
||||
|
|
||||
|
#: @type: list of CaptureConstraint |
||||
|
self.pre_constraints = [] |
||||
|
|
||||
|
#: :type: list of CaptureConstraint |
||||
|
self.post_constraints = [] |
||||
|
|
||||
|
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): |
||||
|
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) |
||||
|
|
||||
|
if self.step_source != 'fragment': |
||||
|
if self.step_source is None: |
||||
|
self.step_source = 'fragment' |
||||
|
else: |
||||
|
raise ValueError("Unable to mix fragment and closure capturing in a group") |
||||
|
|
||||
|
self.steps.append(CaptureStep( |
||||
|
self, tag, |
||||
|
'fragment', |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single, |
||||
|
**kwargs |
||||
|
)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): |
||||
|
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) |
||||
|
|
||||
|
if self.step_source != 'closure': |
||||
|
if self.step_source is None: |
||||
|
self.step_source = 'closure' |
||||
|
else: |
||||
|
raise ValueError("Unable to mix fragment and closure capturing in a group") |
||||
|
|
||||
|
self.steps.append(CaptureStep( |
||||
|
self, tag, |
||||
|
'closure', |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single, |
||||
|
**kwargs |
||||
|
)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until_closure(self, **kwargs): |
||||
|
self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until_fragment(self, **kwargs): |
||||
|
self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until_result(self, **kwargs): |
||||
|
self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until_failure(self, **kwargs): |
||||
|
self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until_success(self, **kwargs): |
||||
|
self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def parse_subject(self, parent_head, subject): |
||||
|
Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject)) |
||||
|
|
||||
|
if type(subject) is CaperClosure: |
||||
|
return self.parse_closure(parent_head, subject) |
||||
|
|
||||
|
if type(subject) is CaperFragment: |
||||
|
return self.parse_fragment(parent_head, subject) |
||||
|
|
||||
|
raise ValueError('Unknown subject (%s)', subject) |
||||
|
|
||||
|
def parse_fragment(self, parent_head, subject): |
||||
|
parent_node = parent_head[0] if type(parent_head) is list else parent_head |
||||
|
|
||||
|
nodes, match = self.match(parent_head, parent_node, subject) |
||||
|
|
||||
|
# Capturing broke on constraint, return now |
||||
|
if not match: |
||||
|
return nodes |
||||
|
|
||||
|
Logr.debug('created fragment node with subject.value: "%s"' % subject.value) |
||||
|
|
||||
|
result = [CaperFragmentNode( |
||||
|
parent_node.closure, |
||||
|
subject.take_right(match.num_fragments), |
||||
|
parent_head, |
||||
|
match |
||||
|
)] |
||||
|
|
||||
|
# Branch if the match was indefinite (weight below 1.0) |
||||
|
if match.result and match.weight < 1.0: |
||||
|
if match.num_fragments == 1: |
||||
|
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) |
||||
|
else: |
||||
|
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) |
||||
|
|
||||
|
nodes.append(result[0] if len(result) == 1 else result) |
||||
|
|
||||
|
return nodes |
||||
|
|
||||
|
def parse_closure(self, parent_head, subject): |
||||
|
parent_node = parent_head[0] if type(parent_head) is list else parent_head |
||||
|
|
||||
|
nodes, match = self.match(parent_head, parent_node, subject) |
||||
|
|
||||
|
# Capturing broke on constraint, return now |
||||
|
if not match: |
||||
|
return nodes |
||||
|
|
||||
|
Logr.debug('created closure node with subject.value: "%s"' % subject.value) |
||||
|
|
||||
|
result = [CaperClosureNode( |
||||
|
subject, |
||||
|
parent_head, |
||||
|
match |
||||
|
)] |
||||
|
|
||||
|
# Branch if the match was indefinite (weight below 1.0) |
||||
|
if match.result and match.weight < 1.0: |
||||
|
if match.num_fragments == 1: |
||||
|
result.append(CaperClosureNode(subject, parent_head)) |
||||
|
else: |
||||
|
nodes.append(CaperClosureNode(subject, parent_head)) |
||||
|
|
||||
|
nodes.append(result[0] if len(result) == 1 else result) |
||||
|
|
||||
|
return nodes |
||||
|
|
||||
|
def match(self, parent_head, parent_node, subject): |
||||
|
nodes = [] |
||||
|
|
||||
|
# Check pre constaints |
||||
|
broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject) |
||||
|
|
||||
|
if broke: |
||||
|
nodes.append(parent_head) |
||||
|
|
||||
|
if definite: |
||||
|
return nodes, None |
||||
|
|
||||
|
# Try match subject against the steps available |
||||
|
match = None |
||||
|
|
||||
|
for step in self.steps: |
||||
|
if step.source == 'closure' and type(subject) is not CaperClosure: |
||||
|
pass |
||||
|
elif step.source == 'fragment' and type(subject) is CaperClosure: |
||||
|
Logr.debug('Closure encountered on fragment step, jumping into fragments') |
||||
|
return [CaperClosureNode(subject, parent_head, None)], None |
||||
|
|
||||
|
match = step.execute(subject) |
||||
|
|
||||
|
if match.success: |
||||
|
if type(match.result) is dict: |
||||
|
match.result = clean_dict(match.result) |
||||
|
|
||||
|
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % ( |
||||
|
match.weight, match.result, match.num_fragments |
||||
|
)) |
||||
|
|
||||
|
step.matched = True |
||||
|
|
||||
|
break |
||||
|
|
||||
|
if all([step.single and step.matched for step in self.steps]): |
||||
|
Logr.debug('All steps completed, group finished') |
||||
|
parent_node.finished_groups.append(self) |
||||
|
return nodes, match |
||||
|
|
||||
|
# Check post constraints |
||||
|
broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match) |
||||
|
if broke: |
||||
|
return nodes, None |
||||
|
|
||||
|
return nodes, match |
||||
|
|
||||
|
def check_constraints(self, constraints, parent_head, subject, **kwargs): |
||||
|
parent_node = parent_head[0] if type(parent_head) is list else parent_head |
||||
|
|
||||
|
# Check constraints |
||||
|
for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]: |
||||
|
Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject)) |
||||
|
|
||||
|
weight, success = constraint.execute(parent_node, subject, **kwargs) |
||||
|
|
||||
|
if success: |
||||
|
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) |
||||
|
parent_node.finished_groups.append(self) |
||||
|
|
||||
|
return True, weight == 1.0 |
||||
|
|
||||
|
return False, None |
||||
|
|
||||
|
def execute(self): |
||||
|
heads_finished = None |
||||
|
|
||||
|
while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)): |
||||
|
heads_finished = [] |
||||
|
|
||||
|
heads = self.result.heads |
||||
|
self.result.heads = [] |
||||
|
|
||||
|
for head in heads: |
||||
|
node = head[0] if type(head) is list else head |
||||
|
|
||||
|
if self in node.finished_groups: |
||||
|
Logr.debug("head finished for group") |
||||
|
self.result.heads.append(head) |
||||
|
heads_finished.append(True) |
||||
|
continue |
||||
|
|
||||
|
Logr.debug('') |
||||
|
|
||||
|
Logr.debug(node) |
||||
|
|
||||
|
next_subject = node.next() |
||||
|
|
||||
|
Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None)) |
||||
|
|
||||
|
if next_subject: |
||||
|
for node_result in self.parse_subject(head, next_subject): |
||||
|
self.result.heads.append(node_result) |
||||
|
|
||||
|
Logr.debug('Heads: %s', self.result.heads) |
||||
|
|
||||
|
heads_finished.append(self in node.finished_groups or next_subject is None) |
||||
|
|
||||
|
if len(self.result.heads) == 0: |
||||
|
self.result.heads = heads |
||||
|
|
||||
|
Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads) |
||||
|
|
||||
|
Logr.debug("group finished") |
@ -0,0 +1,80 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import sys |
||||
|
|
||||
|
|
||||
|
PY2 = sys.version_info[0] == 2 |
||||
|
PY3 = sys.version_info[0] == 3 |
||||
|
|
||||
|
|
||||
|
def is_list_type(obj, element_type): |
||||
|
if not type(obj) is list: |
||||
|
return False |
||||
|
|
||||
|
if len(obj) < 1: |
||||
|
raise ValueError("Unable to determine list element type from empty list") |
||||
|
|
||||
|
return type(obj[0]) is element_type |
||||
|
|
||||
|
|
||||
|
def clean_dict(target, remove=None): |
||||
|
"""Recursively remove items matching a value 'remove' from the dictionary |
||||
|
|
||||
|
:type target: dict |
||||
|
""" |
||||
|
if type(target) is not dict: |
||||
|
raise ValueError("Target is required to be a dict") |
||||
|
|
||||
|
remove_keys = [] |
||||
|
for key in target.keys(): |
||||
|
if type(target[key]) is not dict: |
||||
|
if target[key] == remove: |
||||
|
remove_keys.append(key) |
||||
|
else: |
||||
|
clean_dict(target[key], remove) |
||||
|
|
||||
|
for key in remove_keys: |
||||
|
target.pop(key) |
||||
|
|
||||
|
return target |
||||
|
|
||||
|
|
||||
|
def update_dict(a, b): |
||||
|
for key, value in b.items(): |
||||
|
if key not in a: |
||||
|
a[key] = value |
||||
|
elif isinstance(a[key], dict) and isinstance(value, dict): |
||||
|
update_dict(a[key], value) |
||||
|
elif isinstance(a[key], list): |
||||
|
a[key].append(value) |
||||
|
else: |
||||
|
a[key] = [a[key], value] |
||||
|
|
||||
|
|
||||
|
def xrange_six(start, stop=None, step=None): |
||||
|
if stop is not None and step is not None: |
||||
|
if PY3: |
||||
|
return range(start, stop, step) |
||||
|
else: |
||||
|
return xrange(start, stop, step) |
||||
|
else: |
||||
|
if PY3: |
||||
|
return range(start) |
||||
|
else: |
||||
|
return xrange(start) |
||||
|
|
||||
|
|
||||
|
def delta_seconds(td): |
||||
|
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6 |
@ -0,0 +1,144 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from caper.helpers import is_list_type, update_dict, delta_seconds |
||||
|
from datetime import datetime |
||||
|
from logr import Logr |
||||
|
import re |
||||
|
|
||||
|
|
||||
|
class FragmentMatcher(object): |
||||
|
def __init__(self, pattern_groups): |
||||
|
self.regex = {} |
||||
|
|
||||
|
self.construct_patterns(pattern_groups) |
||||
|
|
||||
|
def construct_patterns(self, pattern_groups): |
||||
|
compile_start = datetime.now() |
||||
|
compile_count = 0 |
||||
|
|
||||
|
for group_name, patterns in pattern_groups: |
||||
|
if group_name not in self.regex: |
||||
|
self.regex[group_name] = [] |
||||
|
|
||||
|
# Transform into weight groups |
||||
|
if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]: |
||||
|
patterns = [(1.0, patterns)] |
||||
|
|
||||
|
for weight, patterns in patterns: |
||||
|
weight_patterns = [] |
||||
|
|
||||
|
for pattern in patterns: |
||||
|
# Transform into multi-fragment patterns |
||||
|
if type(pattern) is str: |
||||
|
pattern = (pattern,) |
||||
|
|
||||
|
if type(pattern) is tuple and len(pattern) == 2: |
||||
|
if type(pattern[0]) is str and is_list_type(pattern[1], str): |
||||
|
pattern = (pattern,) |
||||
|
|
||||
|
result = [] |
||||
|
for value in pattern: |
||||
|
if type(value) is tuple: |
||||
|
if len(value) == 2: |
||||
|
# Construct OR-list pattern |
||||
|
value = value[0] % '|'.join(value[1]) |
||||
|
elif len(value) == 1: |
||||
|
value = value[0] |
||||
|
|
||||
|
result.append(re.compile(value, re.IGNORECASE)) |
||||
|
compile_count += 1 |
||||
|
|
||||
|
weight_patterns.append(tuple(result)) |
||||
|
|
||||
|
self.regex[group_name].append((weight, weight_patterns)) |
||||
|
|
||||
|
Logr.info("Compiled %s patterns in %ss", compile_count, delta_seconds(datetime.now() - compile_start)) |
||||
|
|
||||
|
def find_group(self, name): |
||||
|
for group_name, weight_groups in self.regex.items(): |
||||
|
if group_name and group_name == name: |
||||
|
return group_name, weight_groups |
||||
|
|
||||
|
return None, None |
||||
|
|
||||
|
def value_match(self, value, group_name=None, single=True): |
||||
|
result = None |
||||
|
|
||||
|
for group, weight_groups in self.regex.items(): |
||||
|
if group_name and group != group_name: |
||||
|
continue |
||||
|
|
||||
|
# TODO handle multiple weights |
||||
|
weight, patterns = weight_groups[0] |
||||
|
|
||||
|
for pattern in patterns: |
||||
|
match = pattern[0].match(value) |
||||
|
if not match: |
||||
|
continue |
||||
|
|
||||
|
if result is None: |
||||
|
result = {} |
||||
|
if group not in result: |
||||
|
result[group] = {} |
||||
|
|
||||
|
result[group].update(match.groupdict()) |
||||
|
|
||||
|
if single: |
||||
|
return result |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def fragment_match(self, fragment, group_name=None): |
||||
|
"""Follow a fragment chain to try find a match |
||||
|
|
||||
|
:type fragment: caper.objects.CaperFragment |
||||
|
:type group_name: str or None |
||||
|
|
||||
|
:return: The weight of the match found between 0.0 and 1.0, |
||||
|
where 1.0 means perfect match and 0.0 means no match |
||||
|
:rtype: (float, dict, int) |
||||
|
""" |
||||
|
|
||||
|
group_name, weight_groups = self.find_group(group_name) |
||||
|
|
||||
|
for weight, patterns in weight_groups: |
||||
|
for pattern in patterns: |
||||
|
cur_fragment = fragment |
||||
|
success = True |
||||
|
result = {} |
||||
|
|
||||
|
# Ignore empty patterns |
||||
|
if len(pattern) < 1: |
||||
|
break |
||||
|
|
||||
|
for fragment_pattern in pattern: |
||||
|
if not cur_fragment: |
||||
|
success = False |
||||
|
break |
||||
|
|
||||
|
match = fragment_pattern.match(cur_fragment.value) |
||||
|
if match: |
||||
|
update_dict(result, match.groupdict()) |
||||
|
else: |
||||
|
success = False |
||||
|
break |
||||
|
|
||||
|
cur_fragment = cur_fragment.right if cur_fragment else None |
||||
|
|
||||
|
if success: |
||||
|
Logr.debug("Found match with weight %s" % weight) |
||||
|
return float(weight), result, len(pattern) |
||||
|
|
||||
|
return 0.0, None, 1 |
@ -0,0 +1,124 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from caper.helpers import xrange_six |
||||
|
|
||||
|
|
||||
|
class CaperClosure(object): |
||||
|
__key__ = 'closure' |
||||
|
|
||||
|
def __init__(self, index, value): |
||||
|
#: :type: int |
||||
|
self.index = index |
||||
|
|
||||
|
#: :type: str |
||||
|
self.value = value |
||||
|
|
||||
|
#: :type: CaperClosure |
||||
|
self.left = None |
||||
|
#: :type: CaperClosure |
||||
|
self.right = None |
||||
|
|
||||
|
#: :type: list of CaperFragment |
||||
|
self.fragments = [] |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<CaperClosure value: %s" % repr(self.value) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
||||
|
|
||||
|
|
||||
|
class CaperFragment(object): |
||||
|
__key__ = 'fragment' |
||||
|
|
||||
|
def __init__(self, closure=None): |
||||
|
#: :type: CaperClosure |
||||
|
self.closure = closure |
||||
|
|
||||
|
#: :type: str |
||||
|
self.value = "" |
||||
|
|
||||
|
#: :type: CaperFragment |
||||
|
self.left = None |
||||
|
#: :type: str |
||||
|
self.left_sep = None |
||||
|
|
||||
|
#: :type: CaperFragment |
||||
|
self.right = None |
||||
|
#: :type: str |
||||
|
self.right_sep = None |
||||
|
|
||||
|
#: :type: int |
||||
|
self.position = None |
||||
|
|
||||
|
def take(self, direction, count, include_self=True): |
||||
|
if direction not in ['left', 'right']: |
||||
|
raise ValueError('Un-Expected value for "direction", expected "left" or "right".') |
||||
|
|
||||
|
result = [] |
||||
|
|
||||
|
if include_self: |
||||
|
result.append(self) |
||||
|
count -= 1 |
||||
|
|
||||
|
cur = self |
||||
|
for x in xrange_six(count): |
||||
|
if cur and getattr(cur, direction): |
||||
|
cur = getattr(cur, direction) |
||||
|
result.append(cur) |
||||
|
else: |
||||
|
result.append(None) |
||||
|
cur = None |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def take_left(self, count, include_self=True): |
||||
|
return self.take('left', count, include_self) |
||||
|
|
||||
|
def take_right(self, count, include_self=True): |
||||
|
return self.take('right', count, include_self) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<CaperFragment value: %s" % repr(self.value) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
||||
|
|
||||
|
|
||||
|
class CaptureMatch(object): |
||||
|
def __init__(self, tag, step, success=False, weight=None, result=None, num_fragments=1): |
||||
|
#: :type: bool |
||||
|
self.success = success |
||||
|
|
||||
|
#: :type: float |
||||
|
self.weight = weight |
||||
|
|
||||
|
#: :type: dict or str |
||||
|
self.result = result |
||||
|
|
||||
|
#: :type: int |
||||
|
self.num_fragments = num_fragments |
||||
|
|
||||
|
#: :type: str |
||||
|
self.tag = tag |
||||
|
|
||||
|
#: :type: CaptureStep |
||||
|
self.step = step |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<CaperMatch result: %s>" % repr(self.result) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
@ -0,0 +1,88 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import re |
||||
|
from caper.parsers.base import Parser |
||||
|
|
||||
|
|
||||
|
REGEX_GROUP = re.compile(r'(\(|\[)(?P<group>.*?)(\)|\])', re.IGNORECASE) |
||||
|
|
||||
|
|
||||
|
PATTERN_GROUPS = [ |
||||
|
('identifier', [ |
||||
|
r'S(?P<season>\d+)E(?P<episode>\d+)', |
||||
|
r'(S(?P<season>\d+))|(E(?P<episode>\d+))', |
||||
|
|
||||
|
r'Ep(?P<episode>\d+)', |
||||
|
r'$(?P<absolute>\d+)^', |
||||
|
|
||||
|
(r'Episode', r'(?P<episode>\d+)'), |
||||
|
]), |
||||
|
('video', [ |
||||
|
(r'(?P<h264_profile>%s)', [ |
||||
|
'Hi10P' |
||||
|
]), |
||||
|
(r'.(?P<resolution>%s)', [ |
||||
|
'720p', |
||||
|
'1080p', |
||||
|
|
||||
|
'960x720', |
||||
|
'1920x1080' |
||||
|
]), |
||||
|
(r'(?P<source>%s)', [ |
||||
|
'BD' |
||||
|
]), |
||||
|
]), |
||||
|
('audio', [ |
||||
|
(r'(?P<codec>%s)', [ |
||||
|
'FLAC' |
||||
|
]), |
||||
|
]) |
||||
|
] |
||||
|
|
||||
|
|
||||
|
class AnimeParser(Parser): |
||||
|
def __init__(self, debug=False): |
||||
|
super(AnimeParser, self).__init__(PATTERN_GROUPS, debug) |
||||
|
|
||||
|
def capture_group(self, fragment): |
||||
|
match = REGEX_GROUP.match(fragment.value) |
||||
|
|
||||
|
if not match: |
||||
|
return None |
||||
|
|
||||
|
return match.group('group') |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.setup(closures) |
||||
|
|
||||
|
self.capture_closure('group', func=self.capture_group)\ |
||||
|
.execute(once=True) |
||||
|
|
||||
|
self.capture_fragment('show_name', single=False)\ |
||||
|
.until_fragment(value__re='identifier')\ |
||||
|
.until_fragment(value__re='video')\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('identifier', regex='identifier') \ |
||||
|
.capture_fragment('video', regex='video', single=False) \ |
||||
|
.capture_fragment('audio', regex='audio', single=False) \ |
||||
|
.execute() |
||||
|
|
||||
|
self.result.build() |
||||
|
return self.result |
@ -0,0 +1,84 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from caper import FragmentMatcher |
||||
|
from caper.group import CaptureGroup |
||||
|
from caper.result import CaperResult, CaperClosureNode, CaperRootNode |
||||
|
from logr import Logr |
||||
|
|
||||
|
|
||||
|
class Parser(object): |
||||
|
def __init__(self, matcher, debug=False): |
||||
|
self.debug = debug |
||||
|
|
||||
|
self.matcher = matcher |
||||
|
|
||||
|
self.closures = None |
||||
|
#: :type: caper.result.CaperResult |
||||
|
self.result = None |
||||
|
|
||||
|
self._match_cache = None |
||||
|
self._fragment_pos = None |
||||
|
self._closure_pos = None |
||||
|
self._history = None |
||||
|
|
||||
|
self.reset() |
||||
|
|
||||
|
def reset(self): |
||||
|
self.closures = None |
||||
|
self.result = CaperResult() |
||||
|
|
||||
|
self._match_cache = {} |
||||
|
self._fragment_pos = -1 |
||||
|
self._closure_pos = -1 |
||||
|
self._history = [] |
||||
|
|
||||
|
def setup(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.reset() |
||||
|
self.closures = closures |
||||
|
|
||||
|
self.result.heads = [CaperRootNode(closures[0])] |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
# |
||||
|
# Capture Methods |
||||
|
# |
||||
|
|
||||
|
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): |
||||
|
return CaptureGroup(self, self.result).capture_fragment( |
||||
|
tag, |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single, |
||||
|
**kwargs |
||||
|
) |
||||
|
|
||||
|
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): |
||||
|
return CaptureGroup(self, self.result).capture_closure( |
||||
|
tag, |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single, |
||||
|
**kwargs |
||||
|
) |
@ -0,0 +1,230 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper import FragmentMatcher |
||||
|
from caper.parsers.base import Parser |
||||
|
from caper.result import CaperFragmentNode |
||||
|
|
||||
|
|
||||
|
PATTERN_GROUPS = [ |
||||
|
('identifier', [ |
||||
|
(1.0, [ |
||||
|
# S01E01-E02 |
||||
|
('^S(?P<season>\d+)E(?P<episode_from>\d+)$', '^E(?P<episode_to>\d+)$'), |
||||
|
# 'S03 E01 to E08' or 'S03 E01 - E09' |
||||
|
('^S(?P<season>\d+)$', '^E(?P<episode_from>\d+)$', '^(to|-)$', '^E(?P<episode_to>\d+)$'), |
||||
|
# 'E01 to E08' or 'E01 - E09' |
||||
|
('^E(?P<episode_from>\d+)$', '^(to|-)$', '^E(?P<episode_to>\d+)$'), |
||||
|
|
||||
|
# S01-S03 |
||||
|
('^S(?P<season_from>\d+)$', '^S(?P<season_to>\d+)$'), |
||||
|
|
||||
|
# S02E13 |
||||
|
r'^S(?P<season>\d+)E(?P<episode>\d+)$', |
||||
|
# S01 E13 |
||||
|
(r'^(S(?P<season>\d+))$', r'^(E(?P<episode>\d+))$'), |
||||
|
# S02 |
||||
|
# E13 |
||||
|
r'^((S(?P<season>\d+))|(E(?P<episode>\d+)))$', |
||||
|
# 3x19 |
||||
|
r'^(?P<season>\d+)x(?P<episode>\d+)$', |
||||
|
|
||||
|
# 2013.09.15 |
||||
|
(r'^(?P<year>\d{4})$', r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$'), |
||||
|
# 09.15.2013 |
||||
|
(r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$', r'^(?P<year>\d{4})$'), |
||||
|
# TODO - US/UK Date Format Conflict? will only support US format for now.. |
||||
|
# 15.09.2013 |
||||
|
#(r'^(?P<day>\d{2})$', r'^(?P<month>\d{2})$', r'^(?P<year>\d{4})$'), |
||||
|
# 130915 |
||||
|
r'^(?P<year_short>\d{2})(?P<month>\d{2})(?P<day>\d{2})$', |
||||
|
|
||||
|
# Season 3 Episode 14 |
||||
|
(r'^Se(ason)?$', r'^(?P<season>\d+)$', r'^Ep(isode)?$', r'^(?P<episode>\d+)$'), |
||||
|
# Season 3 |
||||
|
(r'^Se(ason)?$', r'^(?P<season>\d+)$'), |
||||
|
# Episode 14 |
||||
|
(r'^Ep(isode)?$', r'^(?P<episode>\d+)$'), |
||||
|
|
||||
|
# Part.3 |
||||
|
# Part.1.and.Part.3 |
||||
|
('^Part$', '(?P<part>\d+)'), |
||||
|
|
||||
|
r'(?P<extra>Special)', |
||||
|
r'(?P<country>NZ|AU|US|UK)' |
||||
|
]), |
||||
|
(0.8, [ |
||||
|
# 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake) |
||||
|
# TODO - Update this pattern on 31 Dec 2099 |
||||
|
r'^(?P<season>([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P<episode>\d{2})$' |
||||
|
]), |
||||
|
(0.5, [ |
||||
|
# 100 - 9999 |
||||
|
r'^(?P<season>([1-9])|([1-9][0-9]))(?P<episode>\d{2})$' |
||||
|
]) |
||||
|
]), |
||||
|
|
||||
|
('video', [ |
||||
|
r'(?P<aspect>FS|WS)', |
||||
|
|
||||
|
(r'(?P<resolution>%s)', [ |
||||
|
'480p', |
||||
|
'720p', |
||||
|
'1080p' |
||||
|
]), |
||||
|
|
||||
|
# |
||||
|
# Source |
||||
|
# |
||||
|
|
||||
|
(r'(?P<source>%s)', [ |
||||
|
'DVDRiP', |
||||
|
# HDTV |
||||
|
'HDTV', |
||||
|
'PDTV', |
||||
|
'DSR', |
||||
|
# WEB |
||||
|
'WEBRip', |
||||
|
'WEBDL', |
||||
|
# BluRay |
||||
|
'BluRay', |
||||
|
'B(D|R)Rip', |
||||
|
# DVD |
||||
|
'DVDR', |
||||
|
'DVD9', |
||||
|
'DVD5' |
||||
|
]), |
||||
|
|
||||
|
# For multi-fragment 'WEB-DL', 'WEB-Rip', etc... matches |
||||
|
('(?P<source>WEB)', '(?P<source>DL|Rip)'), |
||||
|
|
||||
|
# |
||||
|
# Codec |
||||
|
# |
||||
|
|
||||
|
(r'(?P<codec>%s)', [ |
||||
|
'x264', |
||||
|
'XViD', |
||||
|
'H264', |
||||
|
'AVC' |
||||
|
]), |
||||
|
|
||||
|
# For multi-fragment 'H 264' tags |
||||
|
('(?P<codec>H)', '(?P<codec>264)'), |
||||
|
]), |
||||
|
|
||||
|
('dvd', [ |
||||
|
r'D(ISC)?(?P<disc>\d+)', |
||||
|
|
||||
|
r'R(?P<region>[0-8])', |
||||
|
|
||||
|
(r'(?P<encoding>%s)', [ |
||||
|
'PAL', |
||||
|
'NTSC' |
||||
|
]), |
||||
|
]), |
||||
|
|
||||
|
('audio', [ |
||||
|
(r'(?P<codec>%s)', [ |
||||
|
'AC3', |
||||
|
'TrueHD' |
||||
|
]), |
||||
|
|
||||
|
(r'(?P<language>%s)', [ |
||||
|
'GERMAN', |
||||
|
'DUTCH', |
||||
|
'FRENCH', |
||||
|
'SWEDiSH', |
||||
|
'DANiSH', |
||||
|
'iTALiAN' |
||||
|
]), |
||||
|
]), |
||||
|
|
||||
|
('scene', [ |
||||
|
r'(?P<proper>PROPER|REAL)', |
||||
|
]) |
||||
|
] |
||||
|
|
||||
|
|
||||
|
class SceneParser(Parser): |
||||
|
matcher = None |
||||
|
|
||||
|
def __init__(self, debug=False): |
||||
|
if not SceneParser.matcher: |
||||
|
SceneParser.matcher = FragmentMatcher(PATTERN_GROUPS) |
||||
|
Logr.info("Fragment matcher for %s created", self.__class__.__name__) |
||||
|
|
||||
|
super(SceneParser, self).__init__(SceneParser.matcher, debug) |
||||
|
|
||||
|
def capture_group(self, fragment): |
||||
|
if fragment.closure.index + 1 != len(self.closures): |
||||
|
return None |
||||
|
|
||||
|
if fragment.left_sep != '-' or fragment.right: |
||||
|
return None |
||||
|
|
||||
|
return fragment.value |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.setup(closures) |
||||
|
|
||||
|
self.capture_fragment('show_name', single=False)\ |
||||
|
.until_fragment(node__re='identifier')\ |
||||
|
.until_fragment(node__re='video')\ |
||||
|
.until_fragment(node__re='dvd')\ |
||||
|
.until_fragment(node__re='audio')\ |
||||
|
.until_fragment(node__re='scene')\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('identifier', regex='identifier', single=False)\ |
||||
|
.capture_fragment('video', regex='video', single=False)\ |
||||
|
.capture_fragment('dvd', regex='dvd', single=False)\ |
||||
|
.capture_fragment('audio', regex='audio', single=False)\ |
||||
|
.capture_fragment('scene', regex='scene', single=False)\ |
||||
|
.until_fragment(left_sep__eq='-', right__eq=None)\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('group', func=self.capture_group)\ |
||||
|
.execute() |
||||
|
|
||||
|
self.print_tree(self.result.heads) |
||||
|
|
||||
|
self.result.build() |
||||
|
return self.result |
||||
|
|
||||
|
def print_tree(self, heads): |
||||
|
if not self.debug: |
||||
|
return |
||||
|
|
||||
|
for head in heads: |
||||
|
head = head if type(head) is list else [head] |
||||
|
|
||||
|
if type(head[0]) is CaperFragmentNode: |
||||
|
for fragment in head[0].fragments: |
||||
|
Logr.debug(fragment.value) |
||||
|
else: |
||||
|
Logr.debug(head[0].closure.value) |
||||
|
|
||||
|
for node in head: |
||||
|
Logr.debug('\t' + str(node).ljust(55) + '\t' + ( |
||||
|
str(node.match.weight) + '\t' + str(node.match.result) |
||||
|
) if node.match else '') |
||||
|
|
||||
|
if len(head) > 0 and head[0].parent: |
||||
|
self.print_tree([head[0].parent]) |
@ -0,0 +1,115 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper import FragmentMatcher |
||||
|
from caper.parsers.base import Parser |
||||
|
|
||||
|
|
||||
|
PATTERN_GROUPS = [ |
||||
|
('usenet', [ |
||||
|
r'\[(?P<group>#[\w\.@]+)\]', |
||||
|
r'^\[(?P<code>\w+)\]$', |
||||
|
r'\[(?P<full>FULL)\]', |
||||
|
r'\[\s?(?P<group>TOWN)\s?\]', |
||||
|
r'(.*?\s)?[_\W]*(?P<site>www\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?', |
||||
|
r'(.*?\s)?[_\W]*(?P<site>(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?' |
||||
|
]), |
||||
|
|
||||
|
('part', [ |
||||
|
r'.?(?P<current>\d+)/(?P<total>\d+).?' |
||||
|
]), |
||||
|
|
||||
|
('detail', [ |
||||
|
r'[\s-]*\w*?[\s-]*\"(?P<file_name>.*?)\"[\s-]*\w*?[\s-]*(?P<size>[\d,\.]*\s?MB)?[\s-]*(?P<extra>yEnc)?', |
||||
|
r'(?P<size>[\d,\.]*\s?MB)[\s-]*(?P<extra>yEnc)', |
||||
|
r'(?P<size>[\d,\.]*\s?MB)|(?P<extra>yEnc)' |
||||
|
]) |
||||
|
] |
||||
|
|
||||
|
|
||||
|
class UsenetParser(Parser): |
||||
|
matcher = None |
||||
|
|
||||
|
def __init__(self, debug=False): |
||||
|
if not UsenetParser.matcher: |
||||
|
UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS) |
||||
|
Logr.info("Fragment matcher for %s created", self.__class__.__name__) |
||||
|
|
||||
|
super(UsenetParser, self).__init__(UsenetParser.matcher, debug) |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.setup(closures) |
||||
|
|
||||
|
# Capture usenet or part info until we get a part or matching fails |
||||
|
self.capture_closure('usenet', regex='usenet', single=False)\ |
||||
|
.capture_closure('part', regex='part', single=True) \ |
||||
|
.until_result(tag='part') \ |
||||
|
.until_failure()\ |
||||
|
.execute() |
||||
|
|
||||
|
is_town_release, has_part = self.get_state() |
||||
|
|
||||
|
if not is_town_release: |
||||
|
self.capture_release_name() |
||||
|
|
||||
|
# If we already have the part (TOWN releases), ignore matching part again |
||||
|
if not is_town_release and not has_part: |
||||
|
self.capture_fragment('part', regex='part', single=True)\ |
||||
|
.until_closure(node__re='usenet')\ |
||||
|
.until_success()\ |
||||
|
.execute() |
||||
|
|
||||
|
# Capture any leftover details |
||||
|
self.capture_closure('usenet', regex='usenet', single=False)\ |
||||
|
.capture_closure('detail', regex='detail', single=False)\ |
||||
|
.execute() |
||||
|
|
||||
|
self.result.build() |
||||
|
return self.result |
||||
|
|
||||
|
def capture_release_name(self): |
||||
|
self.capture_closure('detail', regex='detail', single=False)\ |
||||
|
.until_failure()\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('release_name', single=False, include_separators=True) \ |
||||
|
.until_closure(node__re='usenet') \ |
||||
|
.until_closure(node__re='detail') \ |
||||
|
.until_closure(node__re='part') \ |
||||
|
.until_fragment(value__eq='-')\ |
||||
|
.execute() |
||||
|
|
||||
|
# Capture any detail after the release name |
||||
|
self.capture_closure('detail', regex='detail', single=False)\ |
||||
|
.until_failure()\ |
||||
|
.execute() |
||||
|
|
||||
|
def get_state(self): |
||||
|
# TODO multiple-chains? |
||||
|
is_town_release = False |
||||
|
has_part = False |
||||
|
|
||||
|
for tag, result in self.result.heads[0].captured(): |
||||
|
if tag == 'usenet' and result.get('group') == 'TOWN': |
||||
|
is_town_release = True |
||||
|
|
||||
|
if tag == 'part': |
||||
|
has_part = True |
||||
|
|
||||
|
return is_town_release, has_part |
@ -0,0 +1,213 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import copy |
||||
|
from logr import Logr |
||||
|
|
||||
|
|
||||
|
GROUP_MATCHES = ['identifier'] |
||||
|
|
||||
|
|
||||
|
class CaperNode(object): |
||||
|
def __init__(self, closure, parent=None, match=None): |
||||
|
""" |
||||
|
:type parent: CaperNode |
||||
|
:type weight: float |
||||
|
""" |
||||
|
|
||||
|
#: :type: caper.objects.CaperClosure |
||||
|
self.closure = closure |
||||
|
|
||||
|
#: :type: CaperNode |
||||
|
self.parent = parent |
||||
|
|
||||
|
#: :type: CaptureMatch |
||||
|
self.match = match |
||||
|
|
||||
|
#: :type: list of CaptureGroup |
||||
|
self.finished_groups = [] |
||||
|
|
||||
|
def next(self): |
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
def captured(self): |
||||
|
cur = self |
||||
|
|
||||
|
if cur.match: |
||||
|
yield cur.match.tag, cur.match.result |
||||
|
|
||||
|
while cur.parent: |
||||
|
cur = cur.parent |
||||
|
|
||||
|
if cur.match: |
||||
|
yield cur.match.tag, cur.match.result |
||||
|
|
||||
|
|
||||
|
class CaperRootNode(CaperNode): |
||||
|
def __init__(self, closure): |
||||
|
""" |
||||
|
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure |
||||
|
""" |
||||
|
super(CaperRootNode, self).__init__(closure) |
||||
|
|
||||
|
def next(self): |
||||
|
return self.closure |
||||
|
|
||||
|
|
||||
|
class CaperClosureNode(CaperNode): |
||||
|
def __init__(self, closure, parent=None, match=None): |
||||
|
""" |
||||
|
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure |
||||
|
""" |
||||
|
super(CaperClosureNode, self).__init__(closure, parent, match) |
||||
|
|
||||
|
def next(self): |
||||
|
if not self.closure: |
||||
|
return None |
||||
|
|
||||
|
if self.match: |
||||
|
# Jump to next closure if we have a match |
||||
|
return self.closure.right |
||||
|
elif len(self.closure.fragments) > 0: |
||||
|
# Otherwise parse the fragments |
||||
|
return self.closure.fragments[0] |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<CaperClosureNode match: %s>" % repr(self.match) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
||||
|
|
||||
|
|
||||
|
class CaperFragmentNode(CaperNode): |
||||
|
def __init__(self, closure, fragments, parent=None, match=None): |
||||
|
""" |
||||
|
:type closure: caper.objects.CaperClosure |
||||
|
:type fragments: list of caper.objects.CaperFragment |
||||
|
""" |
||||
|
super(CaperFragmentNode, self).__init__(closure, parent, match) |
||||
|
|
||||
|
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment |
||||
|
self.fragments = fragments |
||||
|
|
||||
|
def next(self): |
||||
|
if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right: |
||||
|
return self.fragments[-1].right |
||||
|
|
||||
|
if self.closure.right: |
||||
|
return self.closure.right |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<CaperFragmentNode match: %s>" % repr(self.match) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
||||
|
|
||||
|
|
||||
|
class CaperResult(object): |
||||
|
def __init__(self): |
||||
|
#: :type: list of CaperNode |
||||
|
self.heads = [] |
||||
|
|
||||
|
self.chains = [] |
||||
|
|
||||
|
def build(self): |
||||
|
max_matched = 0 |
||||
|
|
||||
|
for head in self.heads: |
||||
|
for chain in self.combine_chain(head): |
||||
|
if chain.num_matched > max_matched: |
||||
|
max_matched = chain.num_matched |
||||
|
|
||||
|
self.chains.append(chain) |
||||
|
|
||||
|
for chain in self.chains: |
||||
|
chain.weights.append(chain.num_matched / float(max_matched or chain.num_matched or 1)) |
||||
|
chain.finish() |
||||
|
|
||||
|
self.chains.sort(key=lambda chain: chain.weight, reverse=True) |
||||
|
|
||||
|
for chain in self.chains: |
||||
|
Logr.debug("chain weight: %.02f", chain.weight) |
||||
|
Logr.debug("\tInfo: %s", chain.info) |
||||
|
|
||||
|
Logr.debug("\tWeights: %s", chain.weights) |
||||
|
Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched) |
||||
|
|
||||
|
def combine_chain(self, subject, chain=None): |
||||
|
nodes = subject if type(subject) is list else [subject] |
||||
|
|
||||
|
if chain is None: |
||||
|
chain = CaperResultChain() |
||||
|
|
||||
|
result = [] |
||||
|
|
||||
|
for x, node in enumerate(nodes): |
||||
|
node_chain = chain if x == len(nodes) - 1 else chain.copy() |
||||
|
|
||||
|
if not node.parent: |
||||
|
result.append(node_chain) |
||||
|
continue |
||||
|
|
||||
|
node_chain.update(node) |
||||
|
result.extend(self.combine_chain(node.parent, node_chain)) |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
|
||||
|
class CaperResultChain(object): |
||||
|
def __init__(self): |
||||
|
#: :type: float |
||||
|
self.weight = None |
||||
|
self.info = {} |
||||
|
self.num_matched = 0 |
||||
|
|
||||
|
self.weights = [] |
||||
|
|
||||
|
def update(self, subject): |
||||
|
""" |
||||
|
:type subject: CaperFragmentNode |
||||
|
""" |
||||
|
if not subject.match or not subject.match.success: |
||||
|
return |
||||
|
|
||||
|
# TODO this should support closure nodes |
||||
|
if type(subject) is CaperFragmentNode: |
||||
|
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 |
||||
|
|
||||
|
self.weights.append(subject.match.weight) |
||||
|
|
||||
|
if subject.match: |
||||
|
if subject.match.tag not in self.info: |
||||
|
self.info[subject.match.tag] = [] |
||||
|
|
||||
|
self.info[subject.match.tag].insert(0, subject.match.result) |
||||
|
|
||||
|
def finish(self): |
||||
|
self.weight = sum(self.weights) / len(self.weights) |
||||
|
|
||||
|
def copy(self): |
||||
|
chain = CaperResultChain() |
||||
|
|
||||
|
chain.weight = self.weight |
||||
|
chain.info = copy.deepcopy(self.info) |
||||
|
|
||||
|
chain.num_matched = self.num_matched |
||||
|
chain.weights = copy.copy(self.weights) |
||||
|
|
||||
|
return chain |
@ -0,0 +1,96 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from caper.objects import CaptureMatch |
||||
|
from logr import Logr |
||||
|
|
||||
|
|
||||
|
class CaptureStep(object): |
||||
|
REPR_KEYS = ['regex', 'func', 'single'] |
||||
|
|
||||
|
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs): |
||||
|
#: @type: CaptureGroup |
||||
|
self.capture_group = capture_group |
||||
|
|
||||
|
#: @type: str |
||||
|
self.tag = tag |
||||
|
#: @type: str |
||||
|
self.source = source |
||||
|
#: @type: str |
||||
|
self.regex = regex |
||||
|
#: @type: function |
||||
|
self.func = func |
||||
|
#: @type: bool |
||||
|
self.single = single |
||||
|
|
||||
|
self.kwargs = kwargs |
||||
|
|
||||
|
self.matched = False |
||||
|
|
||||
|
def execute(self, fragment): |
||||
|
"""Execute step on fragment |
||||
|
|
||||
|
:type fragment: CaperFragment |
||||
|
:rtype : CaptureMatch |
||||
|
""" |
||||
|
|
||||
|
match = CaptureMatch(self.tag, self) |
||||
|
|
||||
|
if self.regex: |
||||
|
weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) |
||||
|
Logr.debug('(execute) [regex] tag: "%s"', self.tag) |
||||
|
|
||||
|
if not result: |
||||
|
return match |
||||
|
|
||||
|
# Populate CaptureMatch |
||||
|
match.success = True |
||||
|
match.weight = weight |
||||
|
match.result = result |
||||
|
match.num_fragments = num_fragments |
||||
|
elif self.func: |
||||
|
result = self.func(fragment) |
||||
|
Logr.debug('(execute) [func] %s += "%s"', self.tag, match) |
||||
|
|
||||
|
if not result: |
||||
|
return match |
||||
|
|
||||
|
# Populate CaptureMatch |
||||
|
match.success = True |
||||
|
match.weight = 1.0 |
||||
|
match.result = result |
||||
|
else: |
||||
|
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) |
||||
|
|
||||
|
include_separators = self.kwargs.get('include_separators', False) |
||||
|
|
||||
|
# Populate CaptureMatch |
||||
|
match.success = True |
||||
|
match.weight = 1.0 |
||||
|
|
||||
|
if include_separators: |
||||
|
match.result = (fragment.left_sep, fragment.value, fragment.right_sep) |
||||
|
else: |
||||
|
match.result = fragment.value |
||||
|
|
||||
|
return match |
||||
|
|
||||
|
def __repr__(self): |
||||
|
attribute_values = [key + '=' + repr(getattr(self, key)) |
||||
|
for key in self.REPR_KEYS |
||||
|
if hasattr(self, key) and getattr(self, key)] |
||||
|
|
||||
|
attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else '' |
||||
|
|
||||
|
return "CaptureStep('%s'%s)" % (self.tag, attribute_string) |
@ -0,0 +1,225 @@ |
|||||
|
# logr - Simple python logging wrapper |
||||
|
# Packed by Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# File part of: |
||||
|
# rdio-sock - Rdio WebSocket Library |
||||
|
# Copyright (C) 2013 fzza- <fzzzzzzzza@gmail.com> |
||||
|
|
||||
|
# This program is free software: you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU General Public License as published by |
||||
|
# the Free Software Foundation, either version 3 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
|
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU General Public License for more details. |
||||
|
|
||||
|
# You should have received a copy of the GNU General Public License |
||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
||||
|
|
||||
|
|
||||
|
import inspect |
||||
|
import logging |
||||
|
import os |
||||
|
import sys |
||||
|
|
||||
|
IGNORE = () |
||||
|
PY3 = sys.version_info[0] == 3 |
||||
|
|
||||
|
|
||||
|
class Logr(object): |
||||
|
loggers = {} |
||||
|
handler = None |
||||
|
|
||||
|
trace_origin = False |
||||
|
name = "Logr" |
||||
|
|
||||
|
@staticmethod |
||||
|
def configure(level=logging.WARNING, handler=None, formatter=None, trace_origin=False, name="Logr"): |
||||
|
"""Configure Logr |
||||
|
|
||||
|
@param handler: Logger message handler |
||||
|
@type handler: logging.Handler or None |
||||
|
|
||||
|
@param formatter: Logger message Formatter |
||||
|
@type formatter: logging.Formatter or None |
||||
|
""" |
||||
|
if formatter is None: |
||||
|
formatter = LogrFormatter() |
||||
|
|
||||
|
if handler is None: |
||||
|
handler = logging.StreamHandler() |
||||
|
|
||||
|
handler.setFormatter(formatter) |
||||
|
handler.setLevel(level) |
||||
|
Logr.handler = handler |
||||
|
|
||||
|
Logr.trace_origin = trace_origin |
||||
|
Logr.name = name |
||||
|
|
||||
|
@staticmethod |
||||
|
def configure_check(): |
||||
|
if Logr.handler is None: |
||||
|
Logr.configure() |
||||
|
|
||||
|
@staticmethod |
||||
|
def _get_name_from_path(filename): |
||||
|
try: |
||||
|
return os.path.splitext(os.path.basename(filename))[0] |
||||
|
except TypeError: |
||||
|
return "<unknown>" |
||||
|
|
||||
|
@staticmethod |
||||
|
def get_frame_class(frame): |
||||
|
if len(frame.f_code.co_varnames) <= 0: |
||||
|
return None |
||||
|
|
||||
|
farg = frame.f_code.co_varnames[0] |
||||
|
|
||||
|
if farg not in frame.f_locals: |
||||
|
return None |
||||
|
|
||||
|
if farg == 'self': |
||||
|
return frame.f_locals[farg].__class__ |
||||
|
|
||||
|
if farg == 'cls': |
||||
|
return frame.f_locals[farg] |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
|
||||
|
@staticmethod |
||||
|
def get_logger_name(): |
||||
|
if not Logr.trace_origin: |
||||
|
return Logr.name |
||||
|
|
||||
|
stack = inspect.stack() |
||||
|
|
||||
|
for x in xrange_six(len(stack)): |
||||
|
frame = stack[x][0] |
||||
|
name = None |
||||
|
|
||||
|
# Try find name of function defined inside a class |
||||
|
frame_class = Logr.get_frame_class(frame) |
||||
|
|
||||
|
if frame_class: |
||||
|
class_name = frame_class.__name__ |
||||
|
module_name = frame_class.__module__ |
||||
|
|
||||
|
if module_name != '__main__': |
||||
|
name = module_name + '.' + class_name |
||||
|
else: |
||||
|
name = class_name |
||||
|
|
||||
|
# Try find name of function defined outside of a class |
||||
|
if name is None: |
||||
|
if frame.f_code.co_name in frame.f_globals: |
||||
|
name = frame.f_globals.get('__name__') |
||||
|
if name == '__main__': |
||||
|
name = Logr._get_name_from_path(frame.f_globals.get('__file__')) |
||||
|
name = name |
||||
|
elif frame.f_code.co_name == '<module>': |
||||
|
name = Logr._get_name_from_path(frame.f_globals.get('__file__')) |
||||
|
|
||||
|
if name is not None and name not in IGNORE: |
||||
|
return name |
||||
|
|
||||
|
return "" |
||||
|
|
||||
|
@staticmethod |
||||
|
def get_logger(): |
||||
|
"""Get or create logger (if it does not exist) |
||||
|
|
||||
|
@rtype: RootLogger |
||||
|
""" |
||||
|
name = Logr.get_logger_name() |
||||
|
if name not in Logr.loggers: |
||||
|
Logr.configure_check() |
||||
|
Logr.loggers[name] = logging.Logger(name) |
||||
|
Logr.loggers[name].addHandler(Logr.handler) |
||||
|
return Logr.loggers[name] |
||||
|
|
||||
|
@staticmethod |
||||
|
def debug(msg, *args, **kwargs): |
||||
|
Logr.get_logger().debug(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def info(msg, *args, **kwargs): |
||||
|
Logr.get_logger().info(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def warning(msg, *args, **kwargs): |
||||
|
Logr.get_logger().warning(msg, *args, **kwargs) |
||||
|
|
||||
|
warn = warning |
||||
|
|
||||
|
@staticmethod |
||||
|
def error(msg, *args, **kwargs): |
||||
|
Logr.get_logger().error(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def exception(msg, *args, **kwargs): |
||||
|
Logr.get_logger().exception(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def critical(msg, *args, **kwargs): |
||||
|
Logr.get_logger().critical(msg, *args, **kwargs) |
||||
|
|
||||
|
fatal = critical |
||||
|
|
||||
|
@staticmethod |
||||
|
def log(level, msg, *args, **kwargs): |
||||
|
Logr.get_logger().log(level, msg, *args, **kwargs) |
||||
|
|
||||
|
|
||||
|
class LogrFormatter(logging.Formatter): |
||||
|
LENGTH_NAME = 32 |
||||
|
LENGTH_LEVEL_NAME = 5 |
||||
|
|
||||
|
def __init__(self, fmt=None, datefmt=None): |
||||
|
if sys.version_info[:2] > (2,6): |
||||
|
super(LogrFormatter, self).__init__(fmt, datefmt) |
||||
|
else: |
||||
|
logging.Formatter.__init__(self, fmt, datefmt) |
||||
|
|
||||
|
def usesTime(self): |
||||
|
return True |
||||
|
|
||||
|
def format(self, record): |
||||
|
record.message = record.getMessage() |
||||
|
if self.usesTime(): |
||||
|
record.asctime = self.formatTime(record, self.datefmt) |
||||
|
|
||||
|
s = "%(asctime)s %(name)s %(levelname)s %(message)s" % { |
||||
|
'asctime': record.asctime, |
||||
|
'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '), |
||||
|
'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '), |
||||
|
'message': record.message |
||||
|
} |
||||
|
|
||||
|
if record.exc_info: |
||||
|
if not record.exc_text: |
||||
|
record.exc_text = self.formatException(record.exc_info) |
||||
|
if record.exc_text: |
||||
|
if s[-1:] != "\n": |
||||
|
s += "\n" |
||||
|
try: |
||||
|
s += record.exc_text |
||||
|
except UnicodeError: |
||||
|
s = s + record.exc_text.decode(sys.getfilesystemencoding(), |
||||
|
'replace') |
||||
|
return s |
||||
|
|
||||
|
|
||||
|
def xrange_six(start, stop=None, step=None): |
||||
|
if stop is not None and step is not None: |
||||
|
if PY3: |
||||
|
return range(start, stop, step) |
||||
|
else: |
||||
|
return xrange(start, stop, step) |
||||
|
else: |
||||
|
if PY3: |
||||
|
return range(start) |
||||
|
else: |
||||
|
return xrange(start) |
Loading…
Reference in new issue