From eb151a4c5d8b02dce6d6269a59a4014673e41768 Mon Sep 17 00:00:00 2001 From: Dean Gardiner Date: Fri, 13 Dec 2013 14:02:07 +1300 Subject: [PATCH] Updated Caper to v0.3.1 --- libs/caper/__init__.py | 24 ++++- libs/caper/constraint.py | 109 +++++++++++++++++----- libs/caper/group.py | 212 +++++++++++++++++++++++++++++++++++-------- libs/caper/matcher.py | 2 +- libs/caper/objects.py | 43 +++++++++ libs/caper/parsers/anime.py | 4 +- libs/caper/parsers/base.py | 14 +-- libs/caper/parsers/scene.py | 16 ++-- libs/caper/parsers/usenet.py | 115 +++++++++++++++++++++++ libs/caper/result.py | 93 +++++++++++++------ libs/caper/step.py | 54 +++++++++-- 11 files changed, 568 insertions(+), 118 deletions(-) create mode 100644 libs/caper/parsers/usenet.py diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py index 8b2e61a..95fb6d7 100644 --- a/libs/caper/__init__.py +++ b/libs/caper/__init__.py @@ -17,9 +17,10 @@ from caper.matcher import FragmentMatcher from caper.objects import CaperFragment, CaperClosure from caper.parsers.anime import AnimeParser from caper.parsers.scene import SceneParser +from caper.parsers.usenet import UsenetParser -__version_info__ = ('0', '2', '9') +__version_info__ = ('0', '3', '1') __version_branch__ = 'master' __version__ = "%s%s" % ( @@ -28,8 +29,9 @@ __version__ = "%s%s" % ( ) -CL_START_CHARS = ['(', '['] -CL_END_CHARS = [')', ']'] +CL_START_CHARS = ['(', '[', '<', '>'] +CL_END_CHARS = [')', ']', '<', '>'] +CL_END_STRINGS = [' - '] STRIP_START_CHARS = ''.join(CL_START_CHARS) STRIP_END_CHARS = ''.join(CL_END_CHARS) @@ -47,8 +49,9 @@ class Caper(object): self.debug = debug self.parsers = { + 'anime': AnimeParser, 'scene': SceneParser, - 'anime': AnimeParser + 'usenet': UsenetParser } def _closure_split(self, name): @@ -62,7 +65,7 @@ class Caper(object): def end_closure(closures, buf): buf = buf.strip(STRIP_CHARS) - if len(buf) < 1: + if len(buf) < 2: return cur = CaperClosure(len(closures), buf) @@ -76,6 +79,7 @@ class Caper(object): state = CL_START buf = "" for x, ch in enumerate(name): + # Check for start characters if state == CL_START and ch in CL_START_CHARS: end_closure(closures, buf) @@ -85,10 +89,17 @@ class Caper(object): buf += ch if state == CL_END and ch in CL_END_CHARS: + # End character found, create the closure end_closure(closures, buf) state = CL_START buf = "" + elif state == CL_START and buf[-3:] in CL_END_STRINGS: + # End string found, create the closure + end_closure(closures, buf[:-3]) + + state = CL_START + buf = "" end_closure(closures, buf) @@ -174,6 +185,9 @@ class Caper(object): for closure in closures: Logr.debug("closure [%s]", closure.value) + for fragment in closure.fragments: + Logr.debug("\tfragment [%s]", fragment.value) + if parser not in self.parsers: raise ValueError("Unknown parser") diff --git a/libs/caper/constraint.py b/libs/caper/constraint.py index 96f45c3..e092d33 100644 --- a/libs/caper/constraint.py +++ b/libs/caper/constraint.py @@ -14,7 +14,7 @@ class CaptureConstraint(object): - def __init__(self, capture_group, comparisons=None, **kwargs): + def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs): """Capture constraint object :type capture_group: CaptureGroup @@ -22,50 +22,113 @@ class CaptureConstraint(object): self.capture_group = capture_group + self.constraint_type = constraint_type + self.target = target + self.comparisons = comparisons if comparisons else [] + self.kwargs = {} - for key, value in kwargs.items(): - key = key.split('__') + for orig_key, value in kwargs.items(): + key = orig_key.split('__') if len(key) != 2: + self.kwargs[orig_key] = value continue name, method = key - method = '_compare_' + method + method = 'constraint_match_' + method if not hasattr(self, method): + self.kwargs[orig_key] = value continue self.comparisons.append((name, getattr(self, method), value)) - def _compare_eq(self, fragment, name, expected): - if not hasattr(fragment, name): - return 1.0, False + def execute(self, parent_node, node, **kwargs): + func_name = 'constraint_%s' % self.constraint_type - return 1.0, getattr(fragment, name) == expected + if hasattr(self, func_name): + return getattr(self, func_name)(parent_node, node, **kwargs) - def _compare_re(self, fragment, name, arg): - if name == 'fragment': - group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) + raise ValueError('Unknown constraint type "%s"' % self.constraint_type) - weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group) - return weight, weight > minimum_weight - elif type(arg).__name__ == 'SRE_Pattern': - return 1.0, arg.match(getattr(fragment, name)) is not None - elif hasattr(fragment, name): - match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True) - return 1.0, match is not None - else: - raise ValueError("Unable to find attribute with name '%s'" % name) + # + # Node Matching + # - def execute(self, fragment): + def constraint_match(self, parent_node, node): results = [] total_weight = 0 for name, method, argument in self.comparisons: - weight, success = method(fragment, name, argument) + weight, success = method(node, name, argument) total_weight += weight results.append(success) - return total_weight / float(len(results)), all(results) if len(results) > 0 else False + return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False + + def constraint_match_eq(self, node, name, expected): + if not hasattr(node, name): + return 1.0, False + + return 1.0, getattr(node, name) == expected + + def constraint_match_re(self, node, name, arg): + # Node match + if name == 'node': + group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) + + weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group) + return weight, weight > minimum_weight + + # Regex match + if type(arg).__name__ == 'SRE_Pattern': + return 1.0, arg.match(getattr(node, name)) is not None + + # Value match + if hasattr(node, name): + match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True) + return 1.0, match is not None + + raise ValueError("Unknown constraint match type '%s'" % name) + + # + # Result + # + + def constraint_result(self, parent_node, fragment): + ctag = self.kwargs.get('tag') + if not ctag: + return 0, False + + ckey = self.kwargs.get('key') + + for tag, result in parent_node.captured(): + if tag != ctag: + continue + + if not ckey or ckey in result.keys(): + return 1.0, True + + return 0.0, False + + # + # Failure + # + + def constraint_failure(self, parent_node, fragment, match): + if not match or not match.success: + return 1.0, True + + return 0, False + + # + # Success + # + + def constraint_success(self, parent_node, fragment, match): + if match and match.success: + return 1.0, True + + return 0, False def __repr__(self): return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) diff --git a/libs/caper/group.py b/libs/caper/group.py index 71b9766..8f0399e 100644 --- a/libs/caper/group.py +++ b/libs/caper/group.py @@ -14,7 +14,7 @@ from logr import Logr -from caper import CaperClosure +from caper import CaperClosure, CaperFragment from caper.helpers import clean_dict from caper.result import CaperFragmentNode, CaperClosureNode from caper.step import CaptureStep @@ -34,86 +34,214 @@ class CaptureGroup(object): #: @type: list of CaptureStep self.steps = [] + + #: type: str + self.step_source = None + #: @type: list of CaptureConstraint - self.constraints = [] + self.pre_constraints = [] + + #: :type: list of CaptureConstraint + self.post_constraints = [] - def capture_fragment(self, tag, regex=None, func=None, single=True): + def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) + if self.step_source != 'fragment': + if self.step_source is None: + self.step_source = 'fragment' + else: + raise ValueError("Unable to mix fragment and closure capturing in a group") + self.steps.append(CaptureStep( self, tag, 'fragment', regex=regex, func=func, - single=single + single=single, + **kwargs )) return self - def capture_closure(self, tag, regex=None, func=None, single=True): + def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) + if self.step_source != 'closure': + if self.step_source is None: + self.step_source = 'closure' + else: + raise ValueError("Unable to mix fragment and closure capturing in a group") + self.steps.append(CaptureStep( self, tag, 'closure', regex=regex, func=func, - single=single + single=single, + **kwargs )) return self - def until(self, **kwargs): - self.constraints.append(CaptureConstraint(self, **kwargs)) + def until_closure(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs)) + + return self + + def until_fragment(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs)) + + return self + + def until_result(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs)) + + return self + + def until_failure(self, **kwargs): + self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs)) + + return self + + def until_success(self, **kwargs): + self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs)) return self def parse_subject(self, parent_head, subject): - parent_node = parent_head[0] if type(parent_head) is list else parent_head + Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject)) - # TODO just jumping into closures for now, will be fixed later if type(subject) is CaperClosure: - return [CaperClosureNode(subject, parent_head)] + return self.parse_closure(parent_head, subject) - nodes = [] + if type(subject) is CaperFragment: + return self.parse_fragment(parent_head, subject) - # Check constraints - for constraint in self.constraints: - weight, success = constraint.execute(subject) - if success: - Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) - parent_node.finished_groups.append(self) - nodes.append(parent_head) + raise ValueError('Unknown subject (%s)', subject) - if weight == 1.0: - return nodes - else: - Logr.debug('Branching result') + def parse_fragment(self, parent_head, subject): + parent_node = parent_head[0] if type(parent_head) is list else parent_head - # Try match subject against the steps available - tag, success, weight, match, num_fragments = (None, None, None, None, None) - for step in self.steps: - tag = step.tag - success, weight, match, num_fragments = step.execute(subject) - if success: - match = clean_dict(match) if type(match) is dict else match - Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments)) - break + nodes, match = self.match(parent_head, parent_node, subject) + + # Capturing broke on constraint, return now + if not match: + return nodes Logr.debug('created fragment node with subject.value: "%s"' % subject.value) - result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)] + result = [CaperFragmentNode( + parent_node.closure, + subject.take_right(match.num_fragments), + parent_head, + match + )] + + # Branch if the match was indefinite (weight below 1.0) + if match.result and match.weight < 1.0: + if match.num_fragments == 1: + result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) + else: + nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) + + nodes.append(result[0] if len(result) == 1 else result) + + return nodes + + def parse_closure(self, parent_head, subject): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + nodes, match = self.match(parent_head, parent_node, subject) + + # Capturing broke on constraint, return now + if not match: + return nodes + + Logr.debug('created closure node with subject.value: "%s"' % subject.value) - if match and weight < 1.0: - if num_fragments == 1: - result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) + result = [CaperClosureNode( + subject, + parent_head, + match + )] + + # Branch if the match was indefinite (weight below 1.0) + if match.result and match.weight < 1.0: + if match.num_fragments == 1: + result.append(CaperClosureNode(subject, parent_head)) else: - nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) + nodes.append(CaperClosureNode(subject, parent_head)) nodes.append(result[0] if len(result) == 1 else result) return nodes + def match(self, parent_head, parent_node, subject): + nodes = [] + + # Check pre constaints + broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject) + + if broke: + nodes.append(parent_head) + + if definite: + return nodes, None + + # Try match subject against the steps available + match = None + + for step in self.steps: + if step.source == 'closure' and type(subject) is not CaperClosure: + pass + elif step.source == 'fragment' and type(subject) is CaperClosure: + Logr.debug('Closure encountered on fragment step, jumping into fragments') + return [CaperClosureNode(subject, parent_head, None)], None + + match = step.execute(subject) + + if match.success: + if type(match.result) is dict: + match.result = clean_dict(match.result) + + Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % ( + match.weight, match.result, match.num_fragments + )) + + step.matched = True + + break + + if all([step.single and step.matched for step in self.steps]): + Logr.debug('All steps completed, group finished') + parent_node.finished_groups.append(self) + return nodes, match + + # Check post constraints + broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match) + if broke: + return nodes, None + + return nodes, match + + def check_constraints(self, constraints, parent_head, subject, **kwargs): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + # Check constraints + for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]: + Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject)) + + weight, success = constraint.execute(parent_node, subject, **kwargs) + + if success: + Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) + parent_node.finished_groups.append(self) + + return True, weight == 1.0 + + return False, None + def execute(self): heads_finished = None @@ -126,20 +254,26 @@ class CaptureGroup(object): for head in heads: node = head[0] if type(head) is list else head - Logr.debug("head node: %s" % node) - if self in node.finished_groups: Logr.debug("head finished for group") self.result.heads.append(head) heads_finished.append(True) continue + Logr.debug('') + + Logr.debug(node) + next_subject = node.next() + Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None)) + if next_subject: for node_result in self.parse_subject(head, next_subject): self.result.heads.append(node_result) + Logr.debug('Heads: %s', self.result.heads) + heads_finished.append(self in node.finished_groups or next_subject is None) if len(self.result.heads) == 0: diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py index c154cd7..3acf2e6 100644 --- a/libs/caper/matcher.py +++ b/libs/caper/matcher.py @@ -71,7 +71,7 @@ class FragmentMatcher(object): if group_name and group_name == name: return group_name, weight_groups - return None + return None, None def value_match(self, value, group_name=None, single=True): result = None diff --git a/libs/caper/objects.py b/libs/caper/objects.py index 1f82c33..b7d9084 100644 --- a/libs/caper/objects.py +++ b/libs/caper/objects.py @@ -16,6 +16,8 @@ from caper.helpers import xrange_six class CaperClosure(object): + __key__ = 'closure' + def __init__(self, index, value): #: :type: int self.index = index @@ -31,8 +33,16 @@ class CaperClosure(object): #: :type: list of CaperFragment self.fragments = [] + def __str__(self): + return "" % repr(self.result) + + def __repr__(self): + return self.__str__() diff --git a/libs/caper/parsers/anime.py b/libs/caper/parsers/anime.py index 88313a2..86c7091 100644 --- a/libs/caper/parsers/anime.py +++ b/libs/caper/parsers/anime.py @@ -75,8 +75,8 @@ class AnimeParser(Parser): .execute(once=True) self.capture_fragment('show_name', single=False)\ - .until(value__re='identifier')\ - .until(value__re='video')\ + .until_fragment(value__re='identifier')\ + .until_fragment(value__re='video')\ .execute() self.capture_fragment('identifier', regex='identifier') \ diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py index 6bae537..16bbc19 100644 --- a/libs/caper/parsers/base.py +++ b/libs/caper/parsers/base.py @@ -14,7 +14,7 @@ from caper import FragmentMatcher from caper.group import CaptureGroup -from caper.result import CaperResult, CaperClosureNode +from caper.result import CaperResult, CaperClosureNode, CaperRootNode from logr import Logr @@ -52,7 +52,7 @@ class Parser(object): self.reset() self.closures = closures - self.result.heads = [CaperClosureNode(closures[0])] + self.result.heads = [CaperRootNode(closures[0])] def run(self, closures): """ @@ -65,18 +65,20 @@ class Parser(object): # Capture Methods # - def capture_fragment(self, tag, regex=None, func=None, single=True): + def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): return CaptureGroup(self, self.result).capture_fragment( tag, regex=regex, func=func, - single=single + single=single, + **kwargs ) - def capture_closure(self, tag, regex=None, func=None, single=True): + def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): return CaptureGroup(self, self.result).capture_closure( tag, regex=regex, func=func, - single=single + single=single, + **kwargs ) diff --git a/libs/caper/parsers/scene.py b/libs/caper/parsers/scene.py index 0dfe378..cd0a8fd 100644 --- a/libs/caper/parsers/scene.py +++ b/libs/caper/parsers/scene.py @@ -185,11 +185,11 @@ class SceneParser(Parser): self.setup(closures) self.capture_fragment('show_name', single=False)\ - .until(fragment__re='identifier')\ - .until(fragment__re='video')\ - .until(fragment__re='dvd')\ - .until(fragment__re='audio')\ - .until(fragment__re='scene')\ + .until_fragment(node__re='identifier')\ + .until_fragment(node__re='video')\ + .until_fragment(node__re='dvd')\ + .until_fragment(node__re='audio')\ + .until_fragment(node__re='scene')\ .execute() self.capture_fragment('identifier', regex='identifier', single=False)\ @@ -197,7 +197,7 @@ class SceneParser(Parser): .capture_fragment('dvd', regex='dvd', single=False)\ .capture_fragment('audio', regex='audio', single=False)\ .capture_fragment('scene', regex='scene', single=False)\ - .until(left_sep__eq='-', right__eq=None)\ + .until_fragment(left_sep__eq='-', right__eq=None)\ .execute() self.capture_fragment('group', func=self.capture_group)\ @@ -222,7 +222,9 @@ class SceneParser(Parser): Logr.debug(head[0].closure.value) for node in head: - Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match)) + Logr.debug('\t' + str(node).ljust(55) + '\t' + ( + str(node.match.weight) + '\t' + str(node.match.result) + ) if node.match else '') if len(head) > 0 and head[0].parent: self.print_tree([head[0].parent]) diff --git a/libs/caper/parsers/usenet.py b/libs/caper/parsers/usenet.py new file mode 100644 index 0000000..f622d43 --- /dev/null +++ b/libs/caper/parsers/usenet.py @@ -0,0 +1,115 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper import FragmentMatcher +from caper.parsers.base import Parser + + +PATTERN_GROUPS = [ + ('usenet', [ + r'\[(?P#[\w\.@]+)\]', + r'^\[(?P\w+)\]$', + r'\[(?PFULL)\]', + r'\[\s?(?PTOWN)\s?\]', + r'(.*?\s)?[_\W]*(?Pwww\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?', + r'(.*?\s)?[_\W]*(?P(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?' + ]), + + ('part', [ + r'.?(?P\d+)/(?P\d+).?' + ]), + + ('detail', [ + r'[\s-]*\w*?[\s-]*\"(?P.*?)\"[\s-]*\w*?[\s-]*(?P[\d,\.]*\s?MB)?[\s-]*(?PyEnc)?', + r'(?P[\d,\.]*\s?MB)[\s-]*(?PyEnc)', + r'(?P[\d,\.]*\s?MB)|(?PyEnc)' + ]) +] + + +class UsenetParser(Parser): + matcher = None + + def __init__(self, debug=False): + if not UsenetParser.matcher: + UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS) + Logr.info("Fragment matcher for %s created", self.__class__.__name__) + + super(UsenetParser, self).__init__(UsenetParser.matcher, debug) + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + # Capture usenet or part info until we get a part or matching fails + self.capture_closure('usenet', regex='usenet', single=False)\ + .capture_closure('part', regex='part', single=True) \ + .until_result(tag='part') \ + .until_failure()\ + .execute() + + is_town_release, has_part = self.get_state() + + if not is_town_release: + self.capture_release_name() + + # If we already have the part (TOWN releases), ignore matching part again + if not is_town_release and not has_part: + self.capture_fragment('part', regex='part', single=True)\ + .until_closure(node__re='usenet')\ + .until_success()\ + .execute() + + # Capture any leftover details + self.capture_closure('usenet', regex='usenet', single=False)\ + .capture_closure('detail', regex='detail', single=False)\ + .execute() + + self.result.build() + return self.result + + def capture_release_name(self): + self.capture_closure('detail', regex='detail', single=False)\ + .until_failure()\ + .execute() + + self.capture_fragment('release_name', single=False, include_separators=True) \ + .until_closure(node__re='usenet') \ + .until_closure(node__re='detail') \ + .until_closure(node__re='part') \ + .until_fragment(value__eq='-')\ + .execute() + + # Capture any detail after the release name + self.capture_closure('detail', regex='detail', single=False)\ + .until_failure()\ + .execute() + + def get_state(self): + # TODO multiple-chains? + is_town_release = False + has_part = False + + for tag, result in self.result.heads[0].captured(): + if tag == 'usenet' and result.get('group') == 'TOWN': + is_town_release = True + + if tag == 'part': + has_part = True + + return is_town_release, has_part diff --git a/libs/caper/result.py b/libs/caper/result.py index 24037cd..c9e3423 100644 --- a/libs/caper/result.py +++ b/libs/caper/result.py @@ -20,7 +20,7 @@ GROUP_MATCHES = ['identifier'] class CaperNode(object): - def __init__(self, closure, parent=None, tag=None, weight=None, match=None): + def __init__(self, closure, parent=None, match=None): """ :type parent: CaperNode :type weight: float @@ -28,41 +28,77 @@ class CaperNode(object): #: :type: caper.objects.CaperClosure self.closure = closure + #: :type: CaperNode self.parent = parent - #: :type: str - self.tag = tag - #: :type: float - self.weight = weight - #: :type: dict + + #: :type: CaptureMatch self.match = match + #: :type: list of CaptureGroup self.finished_groups = [] def next(self): raise NotImplementedError() + def captured(self): + cur = self + + if cur.match: + yield cur.match.tag, cur.match.result + + while cur.parent: + cur = cur.parent + + if cur.match: + yield cur.match.tag, cur.match.result + + +class CaperRootNode(CaperNode): + def __init__(self, closure): + """ + :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure + """ + super(CaperRootNode, self).__init__(closure) + + def next(self): + return self.closure + class CaperClosureNode(CaperNode): - def __init__(self, closure, parent=None, tag=None, weight=None, match=None): + def __init__(self, closure, parent=None, match=None): """ :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure """ - super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match) + super(CaperClosureNode, self).__init__(closure, parent, match) def next(self): - if self.closure and len(self.closure.fragments) > 0: + if not self.closure: + return None + + if self.match: + # Jump to next closure if we have a match + return self.closure.right + elif len(self.closure.fragments) > 0: + # Otherwise parse the fragments return self.closure.fragments[0] + return None + def __str__(self): + return "" % repr(self.match) + + def __repr__(self): + return self.__str__() + class CaperFragmentNode(CaperNode): - def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None): + def __init__(self, closure, fragments, parent=None, match=None): """ :type closure: caper.objects.CaperClosure :type fragments: list of caper.objects.CaperFragment """ - super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match) + super(CaperFragmentNode, self).__init__(closure, parent, match) #: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment self.fragments = fragments @@ -76,6 +112,12 @@ class CaperFragmentNode(CaperNode): return None + def __str__(self): + return "" % repr(self.match) + + def __repr__(self): + return self.__str__() + class CaperResult(object): def __init__(self): @@ -122,15 +164,8 @@ class CaperResult(object): result.append(node_chain) continue - # Skip over closure nodes - if type(node) is CaperClosureNode: - result.extend(self.combine_chain(node.parent, node_chain)) - - # Parse fragment matches - if type(node) is CaperFragmentNode: - node_chain.update(node) - - result.extend(self.combine_chain(node.parent, node_chain)) + node_chain.update(node) + result.extend(self.combine_chain(node.parent, node_chain)) return result @@ -145,17 +180,23 @@ class CaperResultChain(object): self.weights = [] def update(self, subject): - if subject.weight is None: + """ + :type subject: CaperFragmentNode + """ + if not subject.match or not subject.match.success: return - self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 - self.weights.append(subject.weight) + # TODO this should support closure nodes + if type(subject) is CaperFragmentNode: + self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 + + self.weights.append(subject.match.weight) if subject.match: - if subject.tag not in self.info: - self.info[subject.tag] = [] + if subject.match.tag not in self.info: + self.info[subject.match.tag] = [] - self.info[subject.tag].insert(0, subject.match) + self.info[subject.match.tag].insert(0, subject.match.result) def finish(self): self.weight = sum(self.weights) / len(self.weights) diff --git a/libs/caper/step.py b/libs/caper/step.py index a82a930..817514b 100644 --- a/libs/caper/step.py +++ b/libs/caper/step.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from caper.objects import CaptureMatch from logr import Logr class CaptureStep(object): REPR_KEYS = ['regex', 'func', 'single'] - def __init__(self, capture_group, tag, source, regex=None, func=None, single=None): + def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs): #: @type: CaptureGroup self.capture_group = capture_group @@ -33,22 +34,57 @@ class CaptureStep(object): #: @type: bool self.single = single + self.kwargs = kwargs + + self.matched = False + def execute(self, fragment): + """Execute step on fragment + + :type fragment: CaperFragment + :rtype : CaptureMatch + """ + + match = CaptureMatch(self.tag, self) + if self.regex: - weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) + weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) Logr.debug('(execute) [regex] tag: "%s"', self.tag) - if match: - return True, weight, match, num_fragments + + if not result: + return match + + # Populate CaptureMatch + match.success = True + match.weight = weight + match.result = result + match.num_fragments = num_fragments elif self.func: - match = self.func(fragment) + result = self.func(fragment) Logr.debug('(execute) [func] %s += "%s"', self.tag, match) - if match: - return True, 1.0, match, 1 + + if not result: + return match + + # Populate CaptureMatch + match.success = True + match.weight = 1.0 + match.result = result else: Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) - return True, 1.0, fragment.value, 1 - return False, None, None, 1 + include_separators = self.kwargs.get('include_separators', False) + + # Populate CaptureMatch + match.success = True + match.weight = 1.0 + + if include_separators: + match.result = (fragment.left_sep, fragment.value, fragment.right_sep) + else: + match.result = fragment.value + + return match def __repr__(self): attribute_values = [key + '=' + repr(getattr(self, key))