Browse Source

Updated Caper to v0.3.1

pull/2599/head
Dean Gardiner 12 years ago
parent
commit
eb151a4c5d
  1. 24
      libs/caper/__init__.py
  2. 109
      libs/caper/constraint.py
  3. 212
      libs/caper/group.py
  4. 2
      libs/caper/matcher.py
  5. 43
      libs/caper/objects.py
  6. 4
      libs/caper/parsers/anime.py
  7. 14
      libs/caper/parsers/base.py
  8. 16
      libs/caper/parsers/scene.py
  9. 115
      libs/caper/parsers/usenet.py
  10. 93
      libs/caper/result.py
  11. 54
      libs/caper/step.py

24
libs/caper/__init__.py

@ -17,9 +17,10 @@ from caper.matcher import FragmentMatcher
from caper.objects import CaperFragment, CaperClosure from caper.objects import CaperFragment, CaperClosure
from caper.parsers.anime import AnimeParser from caper.parsers.anime import AnimeParser
from caper.parsers.scene import SceneParser from caper.parsers.scene import SceneParser
from caper.parsers.usenet import UsenetParser
__version_info__ = ('0', '2', '9') __version_info__ = ('0', '3', '1')
__version_branch__ = 'master' __version_branch__ = 'master'
__version__ = "%s%s" % ( __version__ = "%s%s" % (
@ -28,8 +29,9 @@ __version__ = "%s%s" % (
) )
CL_START_CHARS = ['(', '['] CL_START_CHARS = ['(', '[', '<', '>']
CL_END_CHARS = [')', ']'] CL_END_CHARS = [')', ']', '<', '>']
CL_END_STRINGS = [' - ']
STRIP_START_CHARS = ''.join(CL_START_CHARS) STRIP_START_CHARS = ''.join(CL_START_CHARS)
STRIP_END_CHARS = ''.join(CL_END_CHARS) STRIP_END_CHARS = ''.join(CL_END_CHARS)
@ -47,8 +49,9 @@ class Caper(object):
self.debug = debug self.debug = debug
self.parsers = { self.parsers = {
'anime': AnimeParser,
'scene': SceneParser, 'scene': SceneParser,
'anime': AnimeParser 'usenet': UsenetParser
} }
def _closure_split(self, name): def _closure_split(self, name):
@ -62,7 +65,7 @@ class Caper(object):
def end_closure(closures, buf): def end_closure(closures, buf):
buf = buf.strip(STRIP_CHARS) buf = buf.strip(STRIP_CHARS)
if len(buf) < 1: if len(buf) < 2:
return return
cur = CaperClosure(len(closures), buf) cur = CaperClosure(len(closures), buf)
@ -76,6 +79,7 @@ class Caper(object):
state = CL_START state = CL_START
buf = "" buf = ""
for x, ch in enumerate(name): for x, ch in enumerate(name):
# Check for start characters
if state == CL_START and ch in CL_START_CHARS: if state == CL_START and ch in CL_START_CHARS:
end_closure(closures, buf) end_closure(closures, buf)
@ -85,10 +89,17 @@ class Caper(object):
buf += ch buf += ch
if state == CL_END and ch in CL_END_CHARS: if state == CL_END and ch in CL_END_CHARS:
# End character found, create the closure
end_closure(closures, buf) end_closure(closures, buf)
state = CL_START state = CL_START
buf = "" buf = ""
elif state == CL_START and buf[-3:] in CL_END_STRINGS:
# End string found, create the closure
end_closure(closures, buf[:-3])
state = CL_START
buf = ""
end_closure(closures, buf) end_closure(closures, buf)
@ -174,6 +185,9 @@ class Caper(object):
for closure in closures: for closure in closures:
Logr.debug("closure [%s]", closure.value) Logr.debug("closure [%s]", closure.value)
for fragment in closure.fragments:
Logr.debug("\tfragment [%s]", fragment.value)
if parser not in self.parsers: if parser not in self.parsers:
raise ValueError("Unknown parser") raise ValueError("Unknown parser")

109
libs/caper/constraint.py

@ -14,7 +14,7 @@
class CaptureConstraint(object): class CaptureConstraint(object):
def __init__(self, capture_group, comparisons=None, **kwargs): def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs):
"""Capture constraint object """Capture constraint object
:type capture_group: CaptureGroup :type capture_group: CaptureGroup
@ -22,50 +22,113 @@ class CaptureConstraint(object):
self.capture_group = capture_group self.capture_group = capture_group
self.constraint_type = constraint_type
self.target = target
self.comparisons = comparisons if comparisons else [] self.comparisons = comparisons if comparisons else []
self.kwargs = {}
for key, value in kwargs.items(): for orig_key, value in kwargs.items():
key = key.split('__') key = orig_key.split('__')
if len(key) != 2: if len(key) != 2:
self.kwargs[orig_key] = value
continue continue
name, method = key name, method = key
method = '_compare_' + method method = 'constraint_match_' + method
if not hasattr(self, method): if not hasattr(self, method):
self.kwargs[orig_key] = value
continue continue
self.comparisons.append((name, getattr(self, method), value)) self.comparisons.append((name, getattr(self, method), value))
def _compare_eq(self, fragment, name, expected): def execute(self, parent_node, node, **kwargs):
if not hasattr(fragment, name): func_name = 'constraint_%s' % self.constraint_type
return 1.0, False
return 1.0, getattr(fragment, name) == expected if hasattr(self, func_name):
return getattr(self, func_name)(parent_node, node, **kwargs)
def _compare_re(self, fragment, name, arg): raise ValueError('Unknown constraint type "%s"' % self.constraint_type)
if name == 'fragment':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group) #
return weight, weight > minimum_weight # Node Matching
elif type(arg).__name__ == 'SRE_Pattern': #
return 1.0, arg.match(getattr(fragment, name)) is not None
elif hasattr(fragment, name):
match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True)
return 1.0, match is not None
else:
raise ValueError("Unable to find attribute with name '%s'" % name)
def execute(self, fragment): def constraint_match(self, parent_node, node):
results = [] results = []
total_weight = 0 total_weight = 0
for name, method, argument in self.comparisons: for name, method, argument in self.comparisons:
weight, success = method(fragment, name, argument) weight, success = method(node, name, argument)
total_weight += weight total_weight += weight
results.append(success) results.append(success)
return total_weight / float(len(results)), all(results) if len(results) > 0 else False return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False
def constraint_match_eq(self, node, name, expected):
if not hasattr(node, name):
return 1.0, False
return 1.0, getattr(node, name) == expected
def constraint_match_re(self, node, name, arg):
# Node match
if name == 'node':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group)
return weight, weight > minimum_weight
# Regex match
if type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(node, name)) is not None
# Value match
if hasattr(node, name):
match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True)
return 1.0, match is not None
raise ValueError("Unknown constraint match type '%s'" % name)
#
# Result
#
def constraint_result(self, parent_node, fragment):
ctag = self.kwargs.get('tag')
if not ctag:
return 0, False
ckey = self.kwargs.get('key')
for tag, result in parent_node.captured():
if tag != ctag:
continue
if not ckey or ckey in result.keys():
return 1.0, True
return 0.0, False
#
# Failure
#
def constraint_failure(self, parent_node, fragment, match):
if not match or not match.success:
return 1.0, True
return 0, False
#
# Success
#
def constraint_success(self, parent_node, fragment, match):
if match and match.success:
return 1.0, True
return 0, False
def __repr__(self): def __repr__(self):
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons)

212
libs/caper/group.py

@ -14,7 +14,7 @@
from logr import Logr from logr import Logr
from caper import CaperClosure from caper import CaperClosure, CaperFragment
from caper.helpers import clean_dict from caper.helpers import clean_dict
from caper.result import CaperFragmentNode, CaperClosureNode from caper.result import CaperFragmentNode, CaperClosureNode
from caper.step import CaptureStep from caper.step import CaptureStep
@ -34,86 +34,214 @@ class CaptureGroup(object):
#: @type: list of CaptureStep #: @type: list of CaptureStep
self.steps = [] self.steps = []
#: type: str
self.step_source = None
#: @type: list of CaptureConstraint #: @type: list of CaptureConstraint
self.constraints = [] self.pre_constraints = []
#: :type: list of CaptureConstraint
self.post_constraints = []
def capture_fragment(self, tag, regex=None, func=None, single=True): def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'fragment':
if self.step_source is None:
self.step_source = 'fragment'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep( self.steps.append(CaptureStep(
self, tag, self, tag,
'fragment', 'fragment',
regex=regex, regex=regex,
func=func, func=func,
single=single single=single,
**kwargs
)) ))
return self return self
def capture_closure(self, tag, regex=None, func=None, single=True): def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'closure':
if self.step_source is None:
self.step_source = 'closure'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep( self.steps.append(CaptureStep(
self, tag, self, tag,
'closure', 'closure',
regex=regex, regex=regex,
func=func, func=func,
single=single single=single,
**kwargs
)) ))
return self return self
def until(self, **kwargs): def until_closure(self, **kwargs):
self.constraints.append(CaptureConstraint(self, **kwargs)) self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs))
return self
def until_fragment(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs))
return self
def until_result(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs))
return self
def until_failure(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs))
return self
def until_success(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs))
return self return self
def parse_subject(self, parent_head, subject): def parse_subject(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject))
# TODO just jumping into closures for now, will be fixed later
if type(subject) is CaperClosure: if type(subject) is CaperClosure:
return [CaperClosureNode(subject, parent_head)] return self.parse_closure(parent_head, subject)
nodes = [] if type(subject) is CaperFragment:
return self.parse_fragment(parent_head, subject)
# Check constraints raise ValueError('Unknown subject (%s)', subject)
for constraint in self.constraints:
weight, success = constraint.execute(subject)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
nodes.append(parent_head)
if weight == 1.0: def parse_fragment(self, parent_head, subject):
return nodes parent_node = parent_head[0] if type(parent_head) is list else parent_head
else:
Logr.debug('Branching result')
# Try match subject against the steps available nodes, match = self.match(parent_head, parent_node, subject)
tag, success, weight, match, num_fragments = (None, None, None, None, None)
for step in self.steps: # Capturing broke on constraint, return now
tag = step.tag if not match:
success, weight, match, num_fragments = step.execute(subject) return nodes
if success:
match = clean_dict(match) if type(match) is dict else match
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments))
break
Logr.debug('created fragment node with subject.value: "%s"' % subject.value) Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)] result = [CaperFragmentNode(
parent_node.closure,
subject.take_right(match.num_fragments),
parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def parse_closure(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
nodes, match = self.match(parent_head, parent_node, subject)
# Capturing broke on constraint, return now
if not match:
return nodes
Logr.debug('created closure node with subject.value: "%s"' % subject.value)
if match and weight < 1.0: result = [CaperClosureNode(
if num_fragments == 1: subject,
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperClosureNode(subject, parent_head))
else: else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) nodes.append(CaperClosureNode(subject, parent_head))
nodes.append(result[0] if len(result) == 1 else result) nodes.append(result[0] if len(result) == 1 else result)
return nodes return nodes
def match(self, parent_head, parent_node, subject):
nodes = []
# Check pre constaints
broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject)
if broke:
nodes.append(parent_head)
if definite:
return nodes, None
# Try match subject against the steps available
match = None
for step in self.steps:
if step.source == 'closure' and type(subject) is not CaperClosure:
pass
elif step.source == 'fragment' and type(subject) is CaperClosure:
Logr.debug('Closure encountered on fragment step, jumping into fragments')
return [CaperClosureNode(subject, parent_head, None)], None
match = step.execute(subject)
if match.success:
if type(match.result) is dict:
match.result = clean_dict(match.result)
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (
match.weight, match.result, match.num_fragments
))
step.matched = True
break
if all([step.single and step.matched for step in self.steps]):
Logr.debug('All steps completed, group finished')
parent_node.finished_groups.append(self)
return nodes, match
# Check post constraints
broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match)
if broke:
return nodes, None
return nodes, match
def check_constraints(self, constraints, parent_head, subject, **kwargs):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
# Check constraints
for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]:
Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject))
weight, success = constraint.execute(parent_node, subject, **kwargs)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
return True, weight == 1.0
return False, None
def execute(self): def execute(self):
heads_finished = None heads_finished = None
@ -126,20 +254,26 @@ class CaptureGroup(object):
for head in heads: for head in heads:
node = head[0] if type(head) is list else head node = head[0] if type(head) is list else head
Logr.debug("head node: %s" % node)
if self in node.finished_groups: if self in node.finished_groups:
Logr.debug("head finished for group") Logr.debug("head finished for group")
self.result.heads.append(head) self.result.heads.append(head)
heads_finished.append(True) heads_finished.append(True)
continue continue
Logr.debug('')
Logr.debug(node)
next_subject = node.next() next_subject = node.next()
Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None))
if next_subject: if next_subject:
for node_result in self.parse_subject(head, next_subject): for node_result in self.parse_subject(head, next_subject):
self.result.heads.append(node_result) self.result.heads.append(node_result)
Logr.debug('Heads: %s', self.result.heads)
heads_finished.append(self in node.finished_groups or next_subject is None) heads_finished.append(self in node.finished_groups or next_subject is None)
if len(self.result.heads) == 0: if len(self.result.heads) == 0:

2
libs/caper/matcher.py

@ -71,7 +71,7 @@ class FragmentMatcher(object):
if group_name and group_name == name: if group_name and group_name == name:
return group_name, weight_groups return group_name, weight_groups
return None return None, None
def value_match(self, value, group_name=None, single=True): def value_match(self, value, group_name=None, single=True):
result = None result = None

43
libs/caper/objects.py

@ -16,6 +16,8 @@ from caper.helpers import xrange_six
class CaperClosure(object): class CaperClosure(object):
__key__ = 'closure'
def __init__(self, index, value): def __init__(self, index, value):
#: :type: int #: :type: int
self.index = index self.index = index
@ -31,8 +33,16 @@ class CaperClosure(object):
#: :type: list of CaperFragment #: :type: list of CaperFragment
self.fragments = [] self.fragments = []
def __str__(self):
return "<CaperClosure value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaperFragment(object): class CaperFragment(object):
__key__ = 'fragment'
def __init__(self, closure=None): def __init__(self, closure=None):
#: :type: CaperClosure #: :type: CaperClosure
self.closure = closure self.closure = closure
@ -79,3 +89,36 @@ class CaperFragment(object):
def take_right(self, count, include_self=True): def take_right(self, count, include_self=True):
return self.take('right', count, include_self) return self.take('right', count, include_self)
def __str__(self):
return "<CaperFragment value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaptureMatch(object):
def __init__(self, tag, step, success=False, weight=None, result=None, num_fragments=1):
#: :type: bool
self.success = success
#: :type: float
self.weight = weight
#: :type: dict or str
self.result = result
#: :type: int
self.num_fragments = num_fragments
#: :type: str
self.tag = tag
#: :type: CaptureStep
self.step = step
def __str__(self):
return "<CaperMatch result: %s>" % repr(self.result)
def __repr__(self):
return self.__str__()

4
libs/caper/parsers/anime.py

@ -75,8 +75,8 @@ class AnimeParser(Parser):
.execute(once=True) .execute(once=True)
self.capture_fragment('show_name', single=False)\ self.capture_fragment('show_name', single=False)\
.until(value__re='identifier')\ .until_fragment(value__re='identifier')\
.until(value__re='video')\ .until_fragment(value__re='video')\
.execute() .execute()
self.capture_fragment('identifier', regex='identifier') \ self.capture_fragment('identifier', regex='identifier') \

14
libs/caper/parsers/base.py

@ -14,7 +14,7 @@
from caper import FragmentMatcher from caper import FragmentMatcher
from caper.group import CaptureGroup from caper.group import CaptureGroup
from caper.result import CaperResult, CaperClosureNode from caper.result import CaperResult, CaperClosureNode, CaperRootNode
from logr import Logr from logr import Logr
@ -52,7 +52,7 @@ class Parser(object):
self.reset() self.reset()
self.closures = closures self.closures = closures
self.result.heads = [CaperClosureNode(closures[0])] self.result.heads = [CaperRootNode(closures[0])]
def run(self, closures): def run(self, closures):
""" """
@ -65,18 +65,20 @@ class Parser(object):
# Capture Methods # Capture Methods
# #
def capture_fragment(self, tag, regex=None, func=None, single=True): def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_fragment( return CaptureGroup(self, self.result).capture_fragment(
tag, tag,
regex=regex, regex=regex,
func=func, func=func,
single=single single=single,
**kwargs
) )
def capture_closure(self, tag, regex=None, func=None, single=True): def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_closure( return CaptureGroup(self, self.result).capture_closure(
tag, tag,
regex=regex, regex=regex,
func=func, func=func,
single=single single=single,
**kwargs
) )

16
libs/caper/parsers/scene.py

@ -185,11 +185,11 @@ class SceneParser(Parser):
self.setup(closures) self.setup(closures)
self.capture_fragment('show_name', single=False)\ self.capture_fragment('show_name', single=False)\
.until(fragment__re='identifier')\ .until_fragment(node__re='identifier')\
.until(fragment__re='video')\ .until_fragment(node__re='video')\
.until(fragment__re='dvd')\ .until_fragment(node__re='dvd')\
.until(fragment__re='audio')\ .until_fragment(node__re='audio')\
.until(fragment__re='scene')\ .until_fragment(node__re='scene')\
.execute() .execute()
self.capture_fragment('identifier', regex='identifier', single=False)\ self.capture_fragment('identifier', regex='identifier', single=False)\
@ -197,7 +197,7 @@ class SceneParser(Parser):
.capture_fragment('dvd', regex='dvd', single=False)\ .capture_fragment('dvd', regex='dvd', single=False)\
.capture_fragment('audio', regex='audio', single=False)\ .capture_fragment('audio', regex='audio', single=False)\
.capture_fragment('scene', regex='scene', single=False)\ .capture_fragment('scene', regex='scene', single=False)\
.until(left_sep__eq='-', right__eq=None)\ .until_fragment(left_sep__eq='-', right__eq=None)\
.execute() .execute()
self.capture_fragment('group', func=self.capture_group)\ self.capture_fragment('group', func=self.capture_group)\
@ -222,7 +222,9 @@ class SceneParser(Parser):
Logr.debug(head[0].closure.value) Logr.debug(head[0].closure.value)
for node in head: for node in head:
Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match)) Logr.debug('\t' + str(node).ljust(55) + '\t' + (
str(node.match.weight) + '\t' + str(node.match.result)
) if node.match else '')
if len(head) > 0 and head[0].parent: if len(head) > 0 and head[0].parent:
self.print_tree([head[0].parent]) self.print_tree([head[0].parent])

115
libs/caper/parsers/usenet.py

@ -0,0 +1,115 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.parsers.base import Parser
PATTERN_GROUPS = [
('usenet', [
r'\[(?P<group>#[\w\.@]+)\]',
r'^\[(?P<code>\w+)\]$',
r'\[(?P<full>FULL)\]',
r'\[\s?(?P<group>TOWN)\s?\]',
r'(.*?\s)?[_\W]*(?P<site>www\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?',
r'(.*?\s)?[_\W]*(?P<site>(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?'
]),
('part', [
r'.?(?P<current>\d+)/(?P<total>\d+).?'
]),
('detail', [
r'[\s-]*\w*?[\s-]*\"(?P<file_name>.*?)\"[\s-]*\w*?[\s-]*(?P<size>[\d,\.]*\s?MB)?[\s-]*(?P<extra>yEnc)?',
r'(?P<size>[\d,\.]*\s?MB)[\s-]*(?P<extra>yEnc)',
r'(?P<size>[\d,\.]*\s?MB)|(?P<extra>yEnc)'
])
]
class UsenetParser(Parser):
matcher = None
def __init__(self, debug=False):
if not UsenetParser.matcher:
UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS)
Logr.info("Fragment matcher for %s created", self.__class__.__name__)
super(UsenetParser, self).__init__(UsenetParser.matcher, debug)
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
# Capture usenet or part info until we get a part or matching fails
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('part', regex='part', single=True) \
.until_result(tag='part') \
.until_failure()\
.execute()
is_town_release, has_part = self.get_state()
if not is_town_release:
self.capture_release_name()
# If we already have the part (TOWN releases), ignore matching part again
if not is_town_release and not has_part:
self.capture_fragment('part', regex='part', single=True)\
.until_closure(node__re='usenet')\
.until_success()\
.execute()
# Capture any leftover details
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('detail', regex='detail', single=False)\
.execute()
self.result.build()
return self.result
def capture_release_name(self):
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
self.capture_fragment('release_name', single=False, include_separators=True) \
.until_closure(node__re='usenet') \
.until_closure(node__re='detail') \
.until_closure(node__re='part') \
.until_fragment(value__eq='-')\
.execute()
# Capture any detail after the release name
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
def get_state(self):
# TODO multiple-chains?
is_town_release = False
has_part = False
for tag, result in self.result.heads[0].captured():
if tag == 'usenet' and result.get('group') == 'TOWN':
is_town_release = True
if tag == 'part':
has_part = True
return is_town_release, has_part

93
libs/caper/result.py

@ -20,7 +20,7 @@ GROUP_MATCHES = ['identifier']
class CaperNode(object): class CaperNode(object):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None): def __init__(self, closure, parent=None, match=None):
""" """
:type parent: CaperNode :type parent: CaperNode
:type weight: float :type weight: float
@ -28,41 +28,77 @@ class CaperNode(object):
#: :type: caper.objects.CaperClosure #: :type: caper.objects.CaperClosure
self.closure = closure self.closure = closure
#: :type: CaperNode #: :type: CaperNode
self.parent = parent self.parent = parent
#: :type: str
self.tag = tag #: :type: CaptureMatch
#: :type: float
self.weight = weight
#: :type: dict
self.match = match self.match = match
#: :type: list of CaptureGroup #: :type: list of CaptureGroup
self.finished_groups = [] self.finished_groups = []
def next(self): def next(self):
raise NotImplementedError() raise NotImplementedError()
def captured(self):
cur = self
if cur.match:
yield cur.match.tag, cur.match.result
while cur.parent:
cur = cur.parent
if cur.match:
yield cur.match.tag, cur.match.result
class CaperRootNode(CaperNode):
def __init__(self, closure):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperRootNode, self).__init__(closure)
def next(self):
return self.closure
class CaperClosureNode(CaperNode): class CaperClosureNode(CaperNode):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None): def __init__(self, closure, parent=None, match=None):
""" """
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
""" """
super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match) super(CaperClosureNode, self).__init__(closure, parent, match)
def next(self): def next(self):
if self.closure and len(self.closure.fragments) > 0: if not self.closure:
return None
if self.match:
# Jump to next closure if we have a match
return self.closure.right
elif len(self.closure.fragments) > 0:
# Otherwise parse the fragments
return self.closure.fragments[0] return self.closure.fragments[0]
return None return None
def __str__(self):
return "<CaperClosureNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperFragmentNode(CaperNode): class CaperFragmentNode(CaperNode):
def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None): def __init__(self, closure, fragments, parent=None, match=None):
""" """
:type closure: caper.objects.CaperClosure :type closure: caper.objects.CaperClosure
:type fragments: list of caper.objects.CaperFragment :type fragments: list of caper.objects.CaperFragment
""" """
super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match) super(CaperFragmentNode, self).__init__(closure, parent, match)
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment #: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment
self.fragments = fragments self.fragments = fragments
@ -76,6 +112,12 @@ class CaperFragmentNode(CaperNode):
return None return None
def __str__(self):
return "<CaperFragmentNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperResult(object): class CaperResult(object):
def __init__(self): def __init__(self):
@ -122,15 +164,8 @@ class CaperResult(object):
result.append(node_chain) result.append(node_chain)
continue continue
# Skip over closure nodes node_chain.update(node)
if type(node) is CaperClosureNode: result.extend(self.combine_chain(node.parent, node_chain))
result.extend(self.combine_chain(node.parent, node_chain))
# Parse fragment matches
if type(node) is CaperFragmentNode:
node_chain.update(node)
result.extend(self.combine_chain(node.parent, node_chain))
return result return result
@ -145,17 +180,23 @@ class CaperResultChain(object):
self.weights = [] self.weights = []
def update(self, subject): def update(self, subject):
if subject.weight is None: """
:type subject: CaperFragmentNode
"""
if not subject.match or not subject.match.success:
return return
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 # TODO this should support closure nodes
self.weights.append(subject.weight) if type(subject) is CaperFragmentNode:
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0
self.weights.append(subject.match.weight)
if subject.match: if subject.match:
if subject.tag not in self.info: if subject.match.tag not in self.info:
self.info[subject.tag] = [] self.info[subject.match.tag] = []
self.info[subject.tag].insert(0, subject.match) self.info[subject.match.tag].insert(0, subject.match.result)
def finish(self): def finish(self):
self.weight = sum(self.weights) / len(self.weights) self.weight = sum(self.weights) / len(self.weights)

54
libs/caper/step.py

@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from caper.objects import CaptureMatch
from logr import Logr from logr import Logr
class CaptureStep(object): class CaptureStep(object):
REPR_KEYS = ['regex', 'func', 'single'] REPR_KEYS = ['regex', 'func', 'single']
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None): def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs):
#: @type: CaptureGroup #: @type: CaptureGroup
self.capture_group = capture_group self.capture_group = capture_group
@ -33,22 +34,57 @@ class CaptureStep(object):
#: @type: bool #: @type: bool
self.single = single self.single = single
self.kwargs = kwargs
self.matched = False
def execute(self, fragment): def execute(self, fragment):
"""Execute step on fragment
:type fragment: CaperFragment
:rtype : CaptureMatch
"""
match = CaptureMatch(self.tag, self)
if self.regex: if self.regex:
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
Logr.debug('(execute) [regex] tag: "%s"', self.tag) Logr.debug('(execute) [regex] tag: "%s"', self.tag)
if match:
return True, weight, match, num_fragments if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = weight
match.result = result
match.num_fragments = num_fragments
elif self.func: elif self.func:
match = self.func(fragment) result = self.func(fragment)
Logr.debug('(execute) [func] %s += "%s"', self.tag, match) Logr.debug('(execute) [func] %s += "%s"', self.tag, match)
if match:
return True, 1.0, match, 1 if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = 1.0
match.result = result
else: else:
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)
return True, 1.0, fragment.value, 1
return False, None, None, 1 include_separators = self.kwargs.get('include_separators', False)
# Populate CaptureMatch
match.success = True
match.weight = 1.0
if include_separators:
match.result = (fragment.left_sep, fragment.value, fragment.right_sep)
else:
match.result = fragment.value
return match
def __repr__(self): def __repr__(self):
attribute_values = [key + '=' + repr(getattr(self, key)) attribute_values = [key + '=' + repr(getattr(self, key))

Loading…
Cancel
Save