Browse Source

Updated Caper to v0.3.1

pull/2599/head
Dean Gardiner 12 years ago
parent
commit
eb151a4c5d
  1. 24
      libs/caper/__init__.py
  2. 109
      libs/caper/constraint.py
  3. 210
      libs/caper/group.py
  4. 2
      libs/caper/matcher.py
  5. 43
      libs/caper/objects.py
  6. 4
      libs/caper/parsers/anime.py
  7. 14
      libs/caper/parsers/base.py
  8. 16
      libs/caper/parsers/scene.py
  9. 115
      libs/caper/parsers/usenet.py
  10. 87
      libs/caper/result.py
  11. 54
      libs/caper/step.py

24
libs/caper/__init__.py

@ -17,9 +17,10 @@ from caper.matcher import FragmentMatcher
from caper.objects import CaperFragment, CaperClosure
from caper.parsers.anime import AnimeParser
from caper.parsers.scene import SceneParser
from caper.parsers.usenet import UsenetParser
__version_info__ = ('0', '2', '9')
__version_info__ = ('0', '3', '1')
__version_branch__ = 'master'
__version__ = "%s%s" % (
@ -28,8 +29,9 @@ __version__ = "%s%s" % (
)
CL_START_CHARS = ['(', '[']
CL_END_CHARS = [')', ']']
CL_START_CHARS = ['(', '[', '<', '>']
CL_END_CHARS = [')', ']', '<', '>']
CL_END_STRINGS = [' - ']
STRIP_START_CHARS = ''.join(CL_START_CHARS)
STRIP_END_CHARS = ''.join(CL_END_CHARS)
@ -47,8 +49,9 @@ class Caper(object):
self.debug = debug
self.parsers = {
'anime': AnimeParser,
'scene': SceneParser,
'anime': AnimeParser
'usenet': UsenetParser
}
def _closure_split(self, name):
@ -62,7 +65,7 @@ class Caper(object):
def end_closure(closures, buf):
buf = buf.strip(STRIP_CHARS)
if len(buf) < 1:
if len(buf) < 2:
return
cur = CaperClosure(len(closures), buf)
@ -76,6 +79,7 @@ class Caper(object):
state = CL_START
buf = ""
for x, ch in enumerate(name):
# Check for start characters
if state == CL_START and ch in CL_START_CHARS:
end_closure(closures, buf)
@ -85,10 +89,17 @@ class Caper(object):
buf += ch
if state == CL_END and ch in CL_END_CHARS:
# End character found, create the closure
end_closure(closures, buf)
state = CL_START
buf = ""
elif state == CL_START and buf[-3:] in CL_END_STRINGS:
# End string found, create the closure
end_closure(closures, buf[:-3])
state = CL_START
buf = ""
end_closure(closures, buf)
@ -174,6 +185,9 @@ class Caper(object):
for closure in closures:
Logr.debug("closure [%s]", closure.value)
for fragment in closure.fragments:
Logr.debug("\tfragment [%s]", fragment.value)
if parser not in self.parsers:
raise ValueError("Unknown parser")

109
libs/caper/constraint.py

@ -14,7 +14,7 @@
class CaptureConstraint(object):
def __init__(self, capture_group, comparisons=None, **kwargs):
def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs):
"""Capture constraint object
:type capture_group: CaptureGroup
@ -22,50 +22,113 @@ class CaptureConstraint(object):
self.capture_group = capture_group
self.constraint_type = constraint_type
self.target = target
self.comparisons = comparisons if comparisons else []
self.kwargs = {}
for key, value in kwargs.items():
key = key.split('__')
for orig_key, value in kwargs.items():
key = orig_key.split('__')
if len(key) != 2:
self.kwargs[orig_key] = value
continue
name, method = key
method = '_compare_' + method
method = 'constraint_match_' + method
if not hasattr(self, method):
self.kwargs[orig_key] = value
continue
self.comparisons.append((name, getattr(self, method), value))
def _compare_eq(self, fragment, name, expected):
if not hasattr(fragment, name):
return 1.0, False
def execute(self, parent_node, node, **kwargs):
func_name = 'constraint_%s' % self.constraint_type
return 1.0, getattr(fragment, name) == expected
if hasattr(self, func_name):
return getattr(self, func_name)(parent_node, node, **kwargs)
def _compare_re(self, fragment, name, arg):
if name == 'fragment':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
raise ValueError('Unknown constraint type "%s"' % self.constraint_type)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group)
return weight, weight > minimum_weight
elif type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(fragment, name)) is not None
elif hasattr(fragment, name):
match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True)
return 1.0, match is not None
else:
raise ValueError("Unable to find attribute with name '%s'" % name)
#
# Node Matching
#
def execute(self, fragment):
def constraint_match(self, parent_node, node):
results = []
total_weight = 0
for name, method, argument in self.comparisons:
weight, success = method(fragment, name, argument)
weight, success = method(node, name, argument)
total_weight += weight
results.append(success)
return total_weight / float(len(results)), all(results) if len(results) > 0 else False
return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False
def constraint_match_eq(self, node, name, expected):
if not hasattr(node, name):
return 1.0, False
return 1.0, getattr(node, name) == expected
def constraint_match_re(self, node, name, arg):
# Node match
if name == 'node':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group)
return weight, weight > minimum_weight
# Regex match
if type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(node, name)) is not None
# Value match
if hasattr(node, name):
match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True)
return 1.0, match is not None
raise ValueError("Unknown constraint match type '%s'" % name)
#
# Result
#
def constraint_result(self, parent_node, fragment):
ctag = self.kwargs.get('tag')
if not ctag:
return 0, False
ckey = self.kwargs.get('key')
for tag, result in parent_node.captured():
if tag != ctag:
continue
if not ckey or ckey in result.keys():
return 1.0, True
return 0.0, False
#
# Failure
#
def constraint_failure(self, parent_node, fragment, match):
if not match or not match.success:
return 1.0, True
return 0, False
#
# Success
#
def constraint_success(self, parent_node, fragment, match):
if match and match.success:
return 1.0, True
return 0, False
def __repr__(self):
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons)

210
libs/caper/group.py

@ -14,7 +14,7 @@
from logr import Logr
from caper import CaperClosure
from caper import CaperClosure, CaperFragment
from caper.helpers import clean_dict
from caper.result import CaperFragmentNode, CaperClosureNode
from caper.step import CaptureStep
@ -34,86 +34,214 @@ class CaptureGroup(object):
#: @type: list of CaptureStep
self.steps = []
#: type: str
self.step_source = None
#: @type: list of CaptureConstraint
self.constraints = []
self.pre_constraints = []
#: :type: list of CaptureConstraint
self.post_constraints = []
def capture_fragment(self, tag, regex=None, func=None, single=True):
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'fragment':
if self.step_source is None:
self.step_source = 'fragment'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep(
self, tag,
'fragment',
regex=regex,
func=func,
single=single
single=single,
**kwargs
))
return self
def capture_closure(self, tag, regex=None, func=None, single=True):
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'closure':
if self.step_source is None:
self.step_source = 'closure'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep(
self, tag,
'closure',
regex=regex,
func=func,
single=single
single=single,
**kwargs
))
return self
def until(self, **kwargs):
self.constraints.append(CaptureConstraint(self, **kwargs))
def until_closure(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs))
return self
def until_fragment(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs))
return self
def until_result(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs))
return self
def until_failure(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs))
return self
def until_success(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs))
return self
def parse_subject(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject))
# TODO just jumping into closures for now, will be fixed later
if type(subject) is CaperClosure:
return [CaperClosureNode(subject, parent_head)]
return self.parse_closure(parent_head, subject)
nodes = []
if type(subject) is CaperFragment:
return self.parse_fragment(parent_head, subject)
# Check constraints
for constraint in self.constraints:
weight, success = constraint.execute(subject)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
nodes.append(parent_head)
raise ValueError('Unknown subject (%s)', subject)
if weight == 1.0:
def parse_fragment(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
nodes, match = self.match(parent_head, parent_node, subject)
# Capturing broke on constraint, return now
if not match:
return nodes
Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
result = [CaperFragmentNode(
parent_node.closure,
subject.take_right(match.num_fragments),
parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
else:
Logr.debug('Branching result')
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
# Try match subject against the steps available
tag, success, weight, match, num_fragments = (None, None, None, None, None)
for step in self.steps:
tag = step.tag
success, weight, match, num_fragments = step.execute(subject)
if success:
match = clean_dict(match) if type(match) is dict else match
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments))
break
nodes.append(result[0] if len(result) == 1 else result)
Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
return nodes
def parse_closure(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)]
nodes, match = self.match(parent_head, parent_node, subject)
if match and weight < 1.0:
if num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
# Capturing broke on constraint, return now
if not match:
return nodes
Logr.debug('created closure node with subject.value: "%s"' % subject.value)
result = [CaperClosureNode(
subject,
parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperClosureNode(subject, parent_head))
else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
nodes.append(CaperClosureNode(subject, parent_head))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def match(self, parent_head, parent_node, subject):
nodes = []
# Check pre constaints
broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject)
if broke:
nodes.append(parent_head)
if definite:
return nodes, None
# Try match subject against the steps available
match = None
for step in self.steps:
if step.source == 'closure' and type(subject) is not CaperClosure:
pass
elif step.source == 'fragment' and type(subject) is CaperClosure:
Logr.debug('Closure encountered on fragment step, jumping into fragments')
return [CaperClosureNode(subject, parent_head, None)], None
match = step.execute(subject)
if match.success:
if type(match.result) is dict:
match.result = clean_dict(match.result)
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (
match.weight, match.result, match.num_fragments
))
step.matched = True
break
if all([step.single and step.matched for step in self.steps]):
Logr.debug('All steps completed, group finished')
parent_node.finished_groups.append(self)
return nodes, match
# Check post constraints
broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match)
if broke:
return nodes, None
return nodes, match
def check_constraints(self, constraints, parent_head, subject, **kwargs):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
# Check constraints
for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]:
Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject))
weight, success = constraint.execute(parent_node, subject, **kwargs)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
return True, weight == 1.0
return False, None
def execute(self):
heads_finished = None
@ -126,20 +254,26 @@ class CaptureGroup(object):
for head in heads:
node = head[0] if type(head) is list else head
Logr.debug("head node: %s" % node)
if self in node.finished_groups:
Logr.debug("head finished for group")
self.result.heads.append(head)
heads_finished.append(True)
continue
Logr.debug('')
Logr.debug(node)
next_subject = node.next()
Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None))
if next_subject:
for node_result in self.parse_subject(head, next_subject):
self.result.heads.append(node_result)
Logr.debug('Heads: %s', self.result.heads)
heads_finished.append(self in node.finished_groups or next_subject is None)
if len(self.result.heads) == 0:

2
libs/caper/matcher.py

@ -71,7 +71,7 @@ class FragmentMatcher(object):
if group_name and group_name == name:
return group_name, weight_groups
return None
return None, None
def value_match(self, value, group_name=None, single=True):
result = None

43
libs/caper/objects.py

@ -16,6 +16,8 @@ from caper.helpers import xrange_six
class CaperClosure(object):
__key__ = 'closure'
def __init__(self, index, value):
#: :type: int
self.index = index
@ -31,8 +33,16 @@ class CaperClosure(object):
#: :type: list of CaperFragment
self.fragments = []
def __str__(self):
return "<CaperClosure value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaperFragment(object):
__key__ = 'fragment'
def __init__(self, closure=None):
#: :type: CaperClosure
self.closure = closure
@ -79,3 +89,36 @@ class CaperFragment(object):
def take_right(self, count, include_self=True):
return self.take('right', count, include_self)
def __str__(self):
return "<CaperFragment value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaptureMatch(object):
def __init__(self, tag, step, success=False, weight=None, result=None, num_fragments=1):
#: :type: bool
self.success = success
#: :type: float
self.weight = weight
#: :type: dict or str
self.result = result
#: :type: int
self.num_fragments = num_fragments
#: :type: str
self.tag = tag
#: :type: CaptureStep
self.step = step
def __str__(self):
return "<CaperMatch result: %s>" % repr(self.result)
def __repr__(self):
return self.__str__()

4
libs/caper/parsers/anime.py

@ -75,8 +75,8 @@ class AnimeParser(Parser):
.execute(once=True)
self.capture_fragment('show_name', single=False)\
.until(value__re='identifier')\
.until(value__re='video')\
.until_fragment(value__re='identifier')\
.until_fragment(value__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier') \

14
libs/caper/parsers/base.py

@ -14,7 +14,7 @@
from caper import FragmentMatcher
from caper.group import CaptureGroup
from caper.result import CaperResult, CaperClosureNode
from caper.result import CaperResult, CaperClosureNode, CaperRootNode
from logr import Logr
@ -52,7 +52,7 @@ class Parser(object):
self.reset()
self.closures = closures
self.result.heads = [CaperClosureNode(closures[0])]
self.result.heads = [CaperRootNode(closures[0])]
def run(self, closures):
"""
@ -65,18 +65,20 @@ class Parser(object):
# Capture Methods
#
def capture_fragment(self, tag, regex=None, func=None, single=True):
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_fragment(
tag,
regex=regex,
func=func,
single=single
single=single,
**kwargs
)
def capture_closure(self, tag, regex=None, func=None, single=True):
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_closure(
tag,
regex=regex,
func=func,
single=single
single=single,
**kwargs
)

16
libs/caper/parsers/scene.py

@ -185,11 +185,11 @@ class SceneParser(Parser):
self.setup(closures)
self.capture_fragment('show_name', single=False)\
.until(fragment__re='identifier')\
.until(fragment__re='video')\
.until(fragment__re='dvd')\
.until(fragment__re='audio')\
.until(fragment__re='scene')\
.until_fragment(node__re='identifier')\
.until_fragment(node__re='video')\
.until_fragment(node__re='dvd')\
.until_fragment(node__re='audio')\
.until_fragment(node__re='scene')\
.execute()
self.capture_fragment('identifier', regex='identifier', single=False)\
@ -197,7 +197,7 @@ class SceneParser(Parser):
.capture_fragment('dvd', regex='dvd', single=False)\
.capture_fragment('audio', regex='audio', single=False)\
.capture_fragment('scene', regex='scene', single=False)\
.until(left_sep__eq='-', right__eq=None)\
.until_fragment(left_sep__eq='-', right__eq=None)\
.execute()
self.capture_fragment('group', func=self.capture_group)\
@ -222,7 +222,9 @@ class SceneParser(Parser):
Logr.debug(head[0].closure.value)
for node in head:
Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match))
Logr.debug('\t' + str(node).ljust(55) + '\t' + (
str(node.match.weight) + '\t' + str(node.match.result)
) if node.match else '')
if len(head) > 0 and head[0].parent:
self.print_tree([head[0].parent])

115
libs/caper/parsers/usenet.py

@ -0,0 +1,115 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.parsers.base import Parser
PATTERN_GROUPS = [
('usenet', [
r'\[(?P<group>#[\w\.@]+)\]',
r'^\[(?P<code>\w+)\]$',
r'\[(?P<full>FULL)\]',
r'\[\s?(?P<group>TOWN)\s?\]',
r'(.*?\s)?[_\W]*(?P<site>www\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?',
r'(.*?\s)?[_\W]*(?P<site>(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?'
]),
('part', [
r'.?(?P<current>\d+)/(?P<total>\d+).?'
]),
('detail', [
r'[\s-]*\w*?[\s-]*\"(?P<file_name>.*?)\"[\s-]*\w*?[\s-]*(?P<size>[\d,\.]*\s?MB)?[\s-]*(?P<extra>yEnc)?',
r'(?P<size>[\d,\.]*\s?MB)[\s-]*(?P<extra>yEnc)',
r'(?P<size>[\d,\.]*\s?MB)|(?P<extra>yEnc)'
])
]
class UsenetParser(Parser):
matcher = None
def __init__(self, debug=False):
if not UsenetParser.matcher:
UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS)
Logr.info("Fragment matcher for %s created", self.__class__.__name__)
super(UsenetParser, self).__init__(UsenetParser.matcher, debug)
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
# Capture usenet or part info until we get a part or matching fails
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('part', regex='part', single=True) \
.until_result(tag='part') \
.until_failure()\
.execute()
is_town_release, has_part = self.get_state()
if not is_town_release:
self.capture_release_name()
# If we already have the part (TOWN releases), ignore matching part again
if not is_town_release and not has_part:
self.capture_fragment('part', regex='part', single=True)\
.until_closure(node__re='usenet')\
.until_success()\
.execute()
# Capture any leftover details
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('detail', regex='detail', single=False)\
.execute()
self.result.build()
return self.result
def capture_release_name(self):
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
self.capture_fragment('release_name', single=False, include_separators=True) \
.until_closure(node__re='usenet') \
.until_closure(node__re='detail') \
.until_closure(node__re='part') \
.until_fragment(value__eq='-')\
.execute()
# Capture any detail after the release name
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
def get_state(self):
# TODO multiple-chains?
is_town_release = False
has_part = False
for tag, result in self.result.heads[0].captured():
if tag == 'usenet' and result.get('group') == 'TOWN':
is_town_release = True
if tag == 'part':
has_part = True
return is_town_release, has_part

87
libs/caper/result.py

@ -20,7 +20,7 @@ GROUP_MATCHES = ['identifier']
class CaperNode(object):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
def __init__(self, closure, parent=None, match=None):
"""
:type parent: CaperNode
:type weight: float
@ -28,41 +28,77 @@ class CaperNode(object):
#: :type: caper.objects.CaperClosure
self.closure = closure
#: :type: CaperNode
self.parent = parent
#: :type: str
self.tag = tag
#: :type: float
self.weight = weight
#: :type: dict
#: :type: CaptureMatch
self.match = match
#: :type: list of CaptureGroup
self.finished_groups = []
def next(self):
raise NotImplementedError()
def captured(self):
cur = self
if cur.match:
yield cur.match.tag, cur.match.result
while cur.parent:
cur = cur.parent
if cur.match:
yield cur.match.tag, cur.match.result
class CaperRootNode(CaperNode):
def __init__(self, closure):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperRootNode, self).__init__(closure)
def next(self):
return self.closure
class CaperClosureNode(CaperNode):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
def __init__(self, closure, parent=None, match=None):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match)
super(CaperClosureNode, self).__init__(closure, parent, match)
def next(self):
if self.closure and len(self.closure.fragments) > 0:
if not self.closure:
return None
if self.match:
# Jump to next closure if we have a match
return self.closure.right
elif len(self.closure.fragments) > 0:
# Otherwise parse the fragments
return self.closure.fragments[0]
return None
def __str__(self):
return "<CaperClosureNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperFragmentNode(CaperNode):
def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None):
def __init__(self, closure, fragments, parent=None, match=None):
"""
:type closure: caper.objects.CaperClosure
:type fragments: list of caper.objects.CaperFragment
"""
super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match)
super(CaperFragmentNode, self).__init__(closure, parent, match)
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment
self.fragments = fragments
@ -76,6 +112,12 @@ class CaperFragmentNode(CaperNode):
return None
def __str__(self):
return "<CaperFragmentNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperResult(object):
def __init__(self):
@ -122,14 +164,7 @@ class CaperResult(object):
result.append(node_chain)
continue
# Skip over closure nodes
if type(node) is CaperClosureNode:
result.extend(self.combine_chain(node.parent, node_chain))
# Parse fragment matches
if type(node) is CaperFragmentNode:
node_chain.update(node)
result.extend(self.combine_chain(node.parent, node_chain))
return result
@ -145,17 +180,23 @@ class CaperResultChain(object):
self.weights = []
def update(self, subject):
if subject.weight is None:
"""
:type subject: CaperFragmentNode
"""
if not subject.match or not subject.match.success:
return
# TODO this should support closure nodes
if type(subject) is CaperFragmentNode:
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0
self.weights.append(subject.weight)
self.weights.append(subject.match.weight)
if subject.match:
if subject.tag not in self.info:
self.info[subject.tag] = []
if subject.match.tag not in self.info:
self.info[subject.match.tag] = []
self.info[subject.tag].insert(0, subject.match)
self.info[subject.match.tag].insert(0, subject.match.result)
def finish(self):
self.weight = sum(self.weights) / len(self.weights)

54
libs/caper/step.py

@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.objects import CaptureMatch
from logr import Logr
class CaptureStep(object):
REPR_KEYS = ['regex', 'func', 'single']
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None):
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs):
#: @type: CaptureGroup
self.capture_group = capture_group
@ -33,22 +34,57 @@ class CaptureStep(object):
#: @type: bool
self.single = single
self.kwargs = kwargs
self.matched = False
def execute(self, fragment):
"""Execute step on fragment
:type fragment: CaperFragment
:rtype : CaptureMatch
"""
match = CaptureMatch(self.tag, self)
if self.regex:
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
Logr.debug('(execute) [regex] tag: "%s"', self.tag)
if match:
return True, weight, match, num_fragments
if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = weight
match.result = result
match.num_fragments = num_fragments
elif self.func:
match = self.func(fragment)
result = self.func(fragment)
Logr.debug('(execute) [func] %s += "%s"', self.tag, match)
if match:
return True, 1.0, match, 1
if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = 1.0
match.result = result
else:
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)
return True, 1.0, fragment.value, 1
return False, None, None, 1
include_separators = self.kwargs.get('include_separators', False)
# Populate CaptureMatch
match.success = True
match.weight = 1.0
if include_separators:
match.result = (fragment.left_sep, fragment.value, fragment.right_sep)
else:
match.result = fragment.value
return match
def __repr__(self):
attribute_values = [key + '=' + repr(getattr(self, key))

Loading…
Cancel
Save