23 changed files with 2076 additions and 130 deletions
@ -0,0 +1,161 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper.matcher import FragmentMatcher |
||||
|
from caper.objects import CaperFragment, CaperClosure |
||||
|
from caper.parsers.anime import AnimeParser |
||||
|
from caper.parsers.scene import SceneParser |
||||
|
|
||||
|
|
||||
|
__version_info__ = ('0', '2', '0') |
||||
|
__version_branch__ = 'master' |
||||
|
|
||||
|
__version__ = "%s%s" % ( |
||||
|
'.'.join(__version_info__), |
||||
|
'-' + __version_branch__ if __version_branch__ else '' |
||||
|
) |
||||
|
|
||||
|
|
||||
|
CL_START_CHARS = ['(', '['] |
||||
|
CL_END_CHARS = [')', ']'] |
||||
|
|
||||
|
STRIP_START_CHARS = ''.join(CL_START_CHARS) |
||||
|
STRIP_END_CHARS = ''.join(CL_END_CHARS) |
||||
|
STRIP_CHARS = ''.join(['_', ' ', '.']) |
||||
|
|
||||
|
FRAGMENT_SEPARATORS = ['.', '-', '_', ' '] |
||||
|
|
||||
|
|
||||
|
CL_START = 0 |
||||
|
CL_END = 1 |
||||
|
|
||||
|
|
||||
|
class Caper(object): |
||||
|
def __init__(self): |
||||
|
self.parsers = { |
||||
|
'scene': SceneParser(), |
||||
|
'anime': AnimeParser() |
||||
|
} |
||||
|
|
||||
|
def _closure_split(self, name): |
||||
|
""" |
||||
|
:type name: str |
||||
|
|
||||
|
:rtype: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
closures = [] |
||||
|
|
||||
|
def end_closure(closures, buf): |
||||
|
buf = buf.strip(STRIP_CHARS) |
||||
|
if len(buf) < 1: |
||||
|
return |
||||
|
|
||||
|
cur = CaperClosure(buf) |
||||
|
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None |
||||
|
|
||||
|
if cur.left: |
||||
|
cur.left.right = cur |
||||
|
|
||||
|
closures.append(cur) |
||||
|
|
||||
|
state = CL_START |
||||
|
buf = "" |
||||
|
for x, ch in enumerate(name): |
||||
|
if state == CL_START and ch in CL_START_CHARS: |
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
state = CL_END |
||||
|
buf = "" |
||||
|
|
||||
|
buf += ch |
||||
|
|
||||
|
if state == CL_END and ch in CL_END_CHARS: |
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
state = CL_START |
||||
|
buf = "" |
||||
|
|
||||
|
end_closure(closures, buf) |
||||
|
|
||||
|
return closures |
||||
|
|
||||
|
def _clean_closure(self, closure): |
||||
|
""" |
||||
|
:type closure: str |
||||
|
|
||||
|
:rtype: str |
||||
|
""" |
||||
|
|
||||
|
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS) |
||||
|
|
||||
|
def _fragment_split(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
|
||||
|
:rtype: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
cur_position = 0 |
||||
|
cur = CaperFragment() |
||||
|
|
||||
|
def end_fragment(fragments, cur, cur_position): |
||||
|
cur.position = cur_position |
||||
|
|
||||
|
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None |
||||
|
if cur.left: |
||||
|
cur.left_sep = cur.left.right_sep |
||||
|
cur.left.right = cur |
||||
|
|
||||
|
cur.right_sep = ch |
||||
|
|
||||
|
fragments.append(cur) |
||||
|
|
||||
|
for closure in closures: |
||||
|
closure.fragments = [] |
||||
|
|
||||
|
for x, ch in enumerate(self._clean_closure(closure.value)): |
||||
|
if ch in FRAGMENT_SEPARATORS: |
||||
|
end_fragment(closure.fragments, cur, cur_position) |
||||
|
|
||||
|
# Reset |
||||
|
cur = CaperFragment() |
||||
|
cur_position += 1 |
||||
|
else: |
||||
|
cur.value += ch |
||||
|
|
||||
|
# Finish parsing the last fragment |
||||
|
if cur.value != "": |
||||
|
end_fragment(closure.fragments, cur, cur_position) |
||||
|
|
||||
|
# Reset |
||||
|
cur_position = 0 |
||||
|
cur = CaperFragment() |
||||
|
|
||||
|
return closures |
||||
|
|
||||
|
def parse(self, name, parser='scene'): |
||||
|
closures = self._closure_split(name) |
||||
|
closures = self._fragment_split(closures) |
||||
|
|
||||
|
# Print closures |
||||
|
for closure in closures: |
||||
|
Logr.debug("closure [%s]", closure.value) |
||||
|
|
||||
|
if parser not in self.parsers: |
||||
|
raise ValueError("Unknown parser") |
||||
|
|
||||
|
# TODO autodetect the parser type |
||||
|
return self.parsers[parser].run(closures) |
@ -0,0 +1,74 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
|
||||
|
class CaptureConstraint(object): |
||||
|
def __init__(self, capture_group, comparisons=None, **kwargs): |
||||
|
"""Capture constraint object |
||||
|
|
||||
|
:type capture_group: CaptureGroup |
||||
|
""" |
||||
|
|
||||
|
self.capture_group = capture_group |
||||
|
|
||||
|
self.comparisons = comparisons if comparisons else [] |
||||
|
|
||||
|
for key, value in kwargs.items(): |
||||
|
key = key.split('__') |
||||
|
if len(key) != 2: |
||||
|
continue |
||||
|
name, method = key |
||||
|
|
||||
|
method = '_compare_' + method |
||||
|
if not hasattr(self, method): |
||||
|
continue |
||||
|
|
||||
|
self.comparisons.append((name, getattr(self, method), value)) |
||||
|
|
||||
|
def _compare_eq(self, fragment, name, expected): |
||||
|
if not hasattr(fragment, name): |
||||
|
return None |
||||
|
|
||||
|
return 1.0, getattr(fragment, name) == expected |
||||
|
|
||||
|
def _compare_re(self, fragment, name, arg): |
||||
|
if name == 'fragment': |
||||
|
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) |
||||
|
|
||||
|
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group) |
||||
|
return weight, weight > minimum_weight |
||||
|
elif type(arg).__name__ == 'SRE_Pattern': |
||||
|
return 1.0, arg.match(getattr(fragment, name)) is not None |
||||
|
elif hasattr(fragment, name): |
||||
|
match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True) |
||||
|
return 1.0, match is not None |
||||
|
|
||||
|
if not hasattr(fragment, name): |
||||
|
raise ValueError("Unable to find fragment with name '%s'" % name) |
||||
|
else: |
||||
|
raise ValueError("Unexpected argument type") |
||||
|
|
||||
|
def execute(self, fragment): |
||||
|
results = [] |
||||
|
total_weight = 0 |
||||
|
|
||||
|
for name, method, argument in self.comparisons: |
||||
|
weight, success = method(fragment, name, argument) |
||||
|
total_weight += weight |
||||
|
results.append(success) |
||||
|
|
||||
|
return total_weight / float(len(results)), all(results) if len(results) > 0 else False |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) |
@ -0,0 +1,147 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper.helpers import clean_dict |
||||
|
from caper.result import CaperFragmentNode |
||||
|
from caper.step import CaptureStep |
||||
|
from caper.constraint import CaptureConstraint |
||||
|
|
||||
|
|
||||
|
class CaptureGroup(object): |
||||
|
def __init__(self, parser, result): |
||||
|
"""Capture group object |
||||
|
|
||||
|
:type parser: caper.parsers.base.Parser |
||||
|
:type result: caper.result.CaperResult |
||||
|
""" |
||||
|
|
||||
|
self.parser = parser |
||||
|
self.result = result |
||||
|
|
||||
|
#: @type: list of CaptureStep |
||||
|
self.steps = [] |
||||
|
#: @type: list of CaptureConstraint |
||||
|
self.constraints = [] |
||||
|
|
||||
|
def capture_fragment(self, tag, regex=None, func=None, single=True): |
||||
|
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) |
||||
|
|
||||
|
self.steps.append(CaptureStep( |
||||
|
self, tag, |
||||
|
'fragment', |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single |
||||
|
)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def capture_closure(self, tag, regex=None, func=None, single=True): |
||||
|
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) |
||||
|
|
||||
|
self.steps.append(CaptureStep( |
||||
|
self, tag, |
||||
|
'closure', |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single |
||||
|
)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def until(self, **kwargs): |
||||
|
self.constraints.append(CaptureConstraint(self, **kwargs)) |
||||
|
|
||||
|
return self |
||||
|
|
||||
|
def parse_subject(self, parent_head, subject): |
||||
|
parent_node = parent_head[0] if type(parent_head) is list else parent_head |
||||
|
|
||||
|
# TODO - if subject is a closure? |
||||
|
|
||||
|
nodes = [] |
||||
|
|
||||
|
# Check constraints |
||||
|
for constraint in self.constraints: |
||||
|
weight, success = constraint.execute(subject) |
||||
|
if success: |
||||
|
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) |
||||
|
parent_node.finished_groups.append(self) |
||||
|
nodes.append(parent_head) |
||||
|
|
||||
|
if weight == 1.0: |
||||
|
return nodes |
||||
|
else: |
||||
|
Logr.debug('Branching result') |
||||
|
|
||||
|
# Try match subject against the steps available |
||||
|
tag, success, weight, match, num_fragments = (None, None, None, None, None) |
||||
|
for step in self.steps: |
||||
|
tag = step.tag |
||||
|
success, weight, match, num_fragments = step.execute(subject) |
||||
|
if success: |
||||
|
match = clean_dict(match) if type(match) is dict else match |
||||
|
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments)) |
||||
|
break |
||||
|
|
||||
|
Logr.debug('created fragment node with subject.value: "%s"' % subject.value) |
||||
|
|
||||
|
result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)] |
||||
|
|
||||
|
if match and weight < 1.0: |
||||
|
if num_fragments == 1: |
||||
|
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) |
||||
|
else: |
||||
|
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) |
||||
|
|
||||
|
nodes.append(result[0] if len(result) == 1 else result) |
||||
|
|
||||
|
return nodes |
||||
|
|
||||
|
def execute(self): |
||||
|
heads_finished = None |
||||
|
|
||||
|
while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)): |
||||
|
heads_finished = [] |
||||
|
|
||||
|
heads = self.result.heads |
||||
|
self.result.heads = [] |
||||
|
|
||||
|
for head in heads: |
||||
|
node = head[0] if type(head) is list else head |
||||
|
|
||||
|
Logr.debug("head node: %s" % node) |
||||
|
|
||||
|
if self in node.finished_groups: |
||||
|
Logr.debug("head finished for group") |
||||
|
self.result.heads.append(head) |
||||
|
heads_finished.append(True) |
||||
|
continue |
||||
|
|
||||
|
next_subject = node.next() |
||||
|
|
||||
|
if next_subject: |
||||
|
for node_result in self.parse_subject(head, next_subject): |
||||
|
self.result.heads.append(node_result) |
||||
|
|
||||
|
heads_finished.append(self in node.finished_groups or next_subject is None) |
||||
|
|
||||
|
if len(self.result.heads) == 0: |
||||
|
self.result.heads = heads |
||||
|
|
||||
|
Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads) |
||||
|
|
||||
|
Logr.debug("group finished") |
@ -0,0 +1,64 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import sys |
||||
|
|
||||
|
|
||||
|
PY2 = sys.version_info[0] == 2 |
||||
|
PY3 = sys.version_info[0] == 3 |
||||
|
|
||||
|
|
||||
|
def is_list_type(obj, element_type): |
||||
|
if not type(obj) is list: |
||||
|
return False |
||||
|
|
||||
|
if len(obj) < 1: |
||||
|
raise ValueError("Unable to determine list element type from empty list") |
||||
|
|
||||
|
return type(obj[0]) is element_type |
||||
|
|
||||
|
|
||||
|
def clean_dict(target, remove=None): |
||||
|
"""Recursively remove items matching a value 'remove' from the dictionary |
||||
|
|
||||
|
:type target: dict |
||||
|
""" |
||||
|
if type(target) is not dict: |
||||
|
raise ValueError("Target is required to be a dict") |
||||
|
|
||||
|
remove_keys = [] |
||||
|
for key in target.keys(): |
||||
|
if type(target[key]) is not dict: |
||||
|
if target[key] == remove: |
||||
|
remove_keys.append(key) |
||||
|
else: |
||||
|
clean_dict(target[key], remove) |
||||
|
|
||||
|
for key in remove_keys: |
||||
|
target.pop(key) |
||||
|
|
||||
|
return target |
||||
|
|
||||
|
|
||||
|
def xrange_six(start, stop=None, step=None): |
||||
|
if stop is not None and step is not None: |
||||
|
if PY3: |
||||
|
return range(start, stop, step) |
||||
|
else: |
||||
|
return xrange(start, stop, step) |
||||
|
else: |
||||
|
if PY3: |
||||
|
return range(start) |
||||
|
else: |
||||
|
return xrange(start) |
@ -0,0 +1,193 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import pprint |
||||
|
import re |
||||
|
from logr import Logr |
||||
|
from caper.helpers import is_list_type, clean_dict |
||||
|
|
||||
|
|
||||
|
class FragmentMatcher(object): |
||||
|
def __init__(self, pattern_groups): |
||||
|
self.regex = {} |
||||
|
|
||||
|
for group_name, patterns in pattern_groups: |
||||
|
if group_name not in self.regex: |
||||
|
self.regex[group_name] = [] |
||||
|
|
||||
|
# Transform into weight groups |
||||
|
if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]: |
||||
|
patterns = [(1.0, patterns)] |
||||
|
|
||||
|
for weight, patterns in patterns: |
||||
|
weight_patterns = [] |
||||
|
|
||||
|
for pattern in patterns: |
||||
|
# Transform into multi-fragment patterns |
||||
|
if type(pattern) is str: |
||||
|
pattern = (pattern,) |
||||
|
|
||||
|
if type(pattern) is tuple and len(pattern) == 2: |
||||
|
if type(pattern[0]) is str and is_list_type(pattern[1], str): |
||||
|
pattern = (pattern,) |
||||
|
|
||||
|
result = [] |
||||
|
for value in pattern: |
||||
|
if type(value) is tuple: |
||||
|
if len(value) == 2: |
||||
|
# Construct OR-list pattern |
||||
|
value = value[0] % '|'.join(value[1]) |
||||
|
elif len(value) == 1: |
||||
|
value = value[0] |
||||
|
|
||||
|
result.append(re.compile(value, re.IGNORECASE)) |
||||
|
|
||||
|
weight_patterns.append(tuple(result)) |
||||
|
|
||||
|
self.regex[group_name].append((weight, weight_patterns)) |
||||
|
|
||||
|
pprint.pprint(self.regex) |
||||
|
|
||||
|
def find_group(self, name): |
||||
|
for group_name, weight_groups in self.regex.items(): |
||||
|
if group_name and group_name == name: |
||||
|
return group_name, weight_groups |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
def parser_match(self, parser, group_name, single=True): |
||||
|
""" |
||||
|
|
||||
|
:type parser: caper.parsers.base.Parser |
||||
|
""" |
||||
|
result = None |
||||
|
|
||||
|
for group, weight_groups in self.regex.items(): |
||||
|
if group_name and group != group_name: |
||||
|
continue |
||||
|
|
||||
|
# TODO handle multiple weights |
||||
|
weight, patterns = weight_groups[0] |
||||
|
|
||||
|
for pattern in patterns: |
||||
|
fragments = [] |
||||
|
pattern_matched = True |
||||
|
pattern_result = {} |
||||
|
|
||||
|
for fragment_pattern in pattern: |
||||
|
if not parser.fragment_available(): |
||||
|
pattern_matched = False |
||||
|
break |
||||
|
|
||||
|
fragment = parser.next_fragment() |
||||
|
fragments.append(fragment) |
||||
|
|
||||
|
Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value) |
||||
|
match = fragment_pattern.match(fragment.value) |
||||
|
if match: |
||||
|
Logr.debug('Pattern "%s" matched', fragment_pattern.pattern) |
||||
|
else: |
||||
|
pattern_matched = False |
||||
|
break |
||||
|
|
||||
|
pattern_result.update(clean_dict(match.groupdict())) |
||||
|
|
||||
|
if pattern_matched: |
||||
|
if result is None: |
||||
|
result = {} |
||||
|
|
||||
|
if group not in result: |
||||
|
result[group] = {} |
||||
|
|
||||
|
Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments])) |
||||
|
|
||||
|
result[group].update(pattern_result) |
||||
|
parser.commit() |
||||
|
|
||||
|
if single: |
||||
|
return result |
||||
|
else: |
||||
|
parser.rewind() |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def value_match(self, value, group_name=None, single=True): |
||||
|
result = None |
||||
|
|
||||
|
for group, weight_groups in self.regex.items(): |
||||
|
if group_name and group != group_name: |
||||
|
continue |
||||
|
|
||||
|
# TODO handle multiple weights |
||||
|
weight, patterns = weight_groups[0] |
||||
|
|
||||
|
for pattern in patterns: |
||||
|
match = pattern[0].match(value) |
||||
|
if not match: |
||||
|
continue |
||||
|
|
||||
|
if result is None: |
||||
|
result = {} |
||||
|
if group not in result: |
||||
|
result[group] = {} |
||||
|
|
||||
|
result[group].update(match.groupdict()) |
||||
|
|
||||
|
if single: |
||||
|
return result |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def fragment_match(self, fragment, group_name=None): |
||||
|
"""Follow a fragment chain to try find a match |
||||
|
|
||||
|
:type fragment: caper.objects.CaperFragment |
||||
|
:type group_name: str or None |
||||
|
|
||||
|
:return: The weight of the match found between 0.0 and 1.0, |
||||
|
where 1.0 means perfect match and 0.0 means no match |
||||
|
:rtype: (float, dict, int) |
||||
|
""" |
||||
|
|
||||
|
group_name, weight_groups = self.find_group(group_name) |
||||
|
|
||||
|
for weight, patterns in weight_groups: |
||||
|
for pattern in patterns: |
||||
|
cur_fragment = fragment |
||||
|
success = True |
||||
|
result = {} |
||||
|
|
||||
|
# Ignore empty patterns |
||||
|
if len(pattern) < 1: |
||||
|
break |
||||
|
|
||||
|
for fragment_pattern in pattern: |
||||
|
if not cur_fragment: |
||||
|
success = False |
||||
|
break |
||||
|
|
||||
|
match = fragment_pattern.match(cur_fragment.value) |
||||
|
if match: |
||||
|
result.update(match.groupdict()) |
||||
|
else: |
||||
|
success = False |
||||
|
break |
||||
|
|
||||
|
cur_fragment = cur_fragment.right if cur_fragment else None |
||||
|
|
||||
|
if success: |
||||
|
Logr.debug("Found match with weight %s" % weight) |
||||
|
return float(weight), result, len(pattern) |
||||
|
|
||||
|
return 0.0, None, 1 |
@ -0,0 +1,75 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from caper.helpers import xrange_six |
||||
|
|
||||
|
|
||||
|
class CaperClosure(object): |
||||
|
def __init__(self, value): |
||||
|
#: :type: str |
||||
|
self.value = value |
||||
|
|
||||
|
#: :type: CaperClosure |
||||
|
self.left = None |
||||
|
#: :type: CaperClosure |
||||
|
self.right = None |
||||
|
|
||||
|
#: :type: list of CaperFragment |
||||
|
self.fragments = [] |
||||
|
|
||||
|
|
||||
|
class CaperFragment(object): |
||||
|
def __init__(self): |
||||
|
#: :type: str |
||||
|
self.value = "" |
||||
|
|
||||
|
#: :type: CaperFragment |
||||
|
self.left = None |
||||
|
#: :type: str |
||||
|
self.left_sep = None |
||||
|
|
||||
|
#: :type: CaperFragment |
||||
|
self.right = None |
||||
|
#: :type: str |
||||
|
self.right_sep = None |
||||
|
|
||||
|
#: :type: int |
||||
|
self.position = None |
||||
|
|
||||
|
def take(self, direction, count, include_self=True): |
||||
|
if direction not in ['left', 'right']: |
||||
|
raise ValueError('Un-Expected value for "direction", expected "left" or "right".') |
||||
|
|
||||
|
result = [] |
||||
|
|
||||
|
if include_self: |
||||
|
result.append(self) |
||||
|
count -= 1 |
||||
|
|
||||
|
cur = self |
||||
|
for x in xrange_six(count): |
||||
|
if cur and getattr(cur, direction): |
||||
|
cur = getattr(cur, direction) |
||||
|
result.append(cur) |
||||
|
else: |
||||
|
result.append(None) |
||||
|
cur = None |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
def take_left(self, count, include_self=True): |
||||
|
return self.take('left', count, include_self) |
||||
|
|
||||
|
def take_right(self, count, include_self=True): |
||||
|
return self.take('right', count, include_self) |
@ -0,0 +1,88 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import re |
||||
|
from caper.parsers.base import Parser |
||||
|
|
||||
|
|
||||
|
REGEX_GROUP = re.compile(r'(\(|\[)(?P<group>.*?)(\)|\])', re.IGNORECASE) |
||||
|
|
||||
|
|
||||
|
PATTERN_GROUPS = [ |
||||
|
('identifier', [ |
||||
|
r'S(?P<season>\d+)E(?P<episode>\d+)', |
||||
|
r'(S(?P<season>\d+))|(E(?P<episode>\d+))', |
||||
|
|
||||
|
r'Ep(?P<episode>\d+)', |
||||
|
r'$(?P<absolute>\d+)^', |
||||
|
|
||||
|
(r'Episode', r'(?P<episode>\d+)'), |
||||
|
]), |
||||
|
('video', [ |
||||
|
(r'(?P<h264_profile>%s)', [ |
||||
|
'Hi10P' |
||||
|
]), |
||||
|
(r'.(?P<resolution>%s)', [ |
||||
|
'720p', |
||||
|
'1080p', |
||||
|
|
||||
|
'960x720', |
||||
|
'1920x1080' |
||||
|
]), |
||||
|
(r'(?P<source>%s)', [ |
||||
|
'BD' |
||||
|
]), |
||||
|
]), |
||||
|
('audio', [ |
||||
|
(r'(?P<codec>%s)', [ |
||||
|
'FLAC' |
||||
|
]), |
||||
|
]) |
||||
|
] |
||||
|
|
||||
|
|
||||
|
class AnimeParser(Parser): |
||||
|
def __init__(self): |
||||
|
super(AnimeParser, self).__init__(PATTERN_GROUPS) |
||||
|
|
||||
|
def capture_group(self, fragment): |
||||
|
match = REGEX_GROUP.match(fragment.value) |
||||
|
|
||||
|
if not match: |
||||
|
return None |
||||
|
|
||||
|
return match.group('group') |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.setup(closures) |
||||
|
|
||||
|
self.capture_closure('group', func=self.capture_group)\ |
||||
|
.execute(once=True) |
||||
|
|
||||
|
self.capture_fragment('show_name', single=False)\ |
||||
|
.until(value__re='identifier')\ |
||||
|
.until(value__re='video')\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('identifier', regex='identifier') \ |
||||
|
.capture_fragment('video', regex='video', single=False) \ |
||||
|
.capture_fragment('audio', regex='audio', single=False) \ |
||||
|
.execute() |
||||
|
|
||||
|
self.result.build() |
||||
|
return self.result |
@ -0,0 +1,136 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper import FragmentMatcher |
||||
|
from caper.group import CaptureGroup |
||||
|
from caper.result import CaperResult, CaperClosureNode |
||||
|
|
||||
|
|
||||
|
class Parser(object): |
||||
|
def __init__(self, pattern_groups): |
||||
|
self.matcher = FragmentMatcher(pattern_groups) |
||||
|
|
||||
|
self.closures = None |
||||
|
#: :type: caper.result.CaperResult |
||||
|
self.result = None |
||||
|
|
||||
|
self._match_cache = None |
||||
|
self._fragment_pos = None |
||||
|
self._closure_pos = None |
||||
|
self._history = None |
||||
|
|
||||
|
self.reset() |
||||
|
|
||||
|
def reset(self): |
||||
|
self.closures = None |
||||
|
self.result = CaperResult() |
||||
|
|
||||
|
self._match_cache = {} |
||||
|
self._fragment_pos = -1 |
||||
|
self._closure_pos = -1 |
||||
|
self._history = [] |
||||
|
|
||||
|
def setup(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.reset() |
||||
|
self.closures = closures |
||||
|
|
||||
|
self.result.heads = [CaperClosureNode(closures[0])] |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
# |
||||
|
# Closure Methods |
||||
|
# |
||||
|
|
||||
|
def next_closure(self): |
||||
|
self._closure_pos += 1 |
||||
|
closure = self.closures[self._closure_pos] |
||||
|
|
||||
|
self._history.append(('fragment', -1 - self._fragment_pos)) |
||||
|
self._fragment_pos = -1 |
||||
|
|
||||
|
if self._closure_pos != 0: |
||||
|
self._history.append(('closure', 1)) |
||||
|
|
||||
|
Logr.debug('(next_closure) closure.value: "%s"', closure.value) |
||||
|
return closure |
||||
|
|
||||
|
def closure_available(self): |
||||
|
return self._closure_pos + 1 < len(self.closures) |
||||
|
|
||||
|
# |
||||
|
# Fragment Methods |
||||
|
# |
||||
|
|
||||
|
def next_fragment(self): |
||||
|
closure = self.closures[self._closure_pos] |
||||
|
|
||||
|
self._fragment_pos += 1 |
||||
|
fragment = closure.fragments[self._fragment_pos] |
||||
|
|
||||
|
self._history.append(('fragment', 1)) |
||||
|
|
||||
|
Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value) |
||||
|
return fragment |
||||
|
|
||||
|
def fragment_available(self): |
||||
|
if not self.closure_available(): |
||||
|
return False |
||||
|
return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments) |
||||
|
|
||||
|
def rewind(self): |
||||
|
for source, delta in reversed(self._history): |
||||
|
Logr.debug('(rewind) Rewinding step: %s', (source, delta)) |
||||
|
if source == 'fragment': |
||||
|
self._fragment_pos -= delta |
||||
|
elif source == 'closure': |
||||
|
self._closure_pos -= delta |
||||
|
else: |
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
self.commit() |
||||
|
|
||||
|
def commit(self): |
||||
|
Logr.debug('(commit)') |
||||
|
self._history = [] |
||||
|
|
||||
|
# |
||||
|
# Capture Methods |
||||
|
# |
||||
|
|
||||
|
def capture_fragment(self, tag, regex=None, func=None, single=True): |
||||
|
return CaptureGroup(self, self.result).capture_fragment( |
||||
|
tag, |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single |
||||
|
) |
||||
|
|
||||
|
def capture_closure(self, tag, regex=None, func=None, single=True): |
||||
|
return CaptureGroup(self, self.result).capture_closure( |
||||
|
tag, |
||||
|
regex=regex, |
||||
|
func=func, |
||||
|
single=single |
||||
|
) |
@ -0,0 +1,148 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
from caper.parsers.base import Parser |
||||
|
from caper.result import CaperFragmentNode |
||||
|
|
||||
|
|
||||
|
PATTERN_GROUPS = [ |
||||
|
('identifier', [ |
||||
|
(1.0, [ |
||||
|
# S01E01-E02 |
||||
|
('^S(?P<season>\d+)E(?P<episode_from>\d+)$', '^E(?P<episode_to>\d+)$'), |
||||
|
# S02E13 |
||||
|
r'^S(?P<season>\d+)E(?P<episode>\d+)$', |
||||
|
# S01 E13 |
||||
|
(r'^(S(?P<season>\d+))$', r'^(E(?P<episode>\d+))$'), |
||||
|
# S02 |
||||
|
# E13 |
||||
|
r'^((S(?P<season>\d+))|(E(?P<episode>\d+)))$', |
||||
|
# 3x19 |
||||
|
r'^(?P<season>\d+)x(?P<episode>\d+)$', |
||||
|
|
||||
|
# 2013.09.15 |
||||
|
(r'^(?P<year>\d{4})$', r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$'), |
||||
|
# 09.15.2013 |
||||
|
(r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$', r'^(?P<year>\d{4})$'), |
||||
|
# TODO - US/UK Date Format Conflict? will only support US format for now.. |
||||
|
# 15.09.2013 |
||||
|
#(r'^(?P<day>\d{2})$', r'^(?P<month>\d{2})$', r'^(?P<year>\d{4})$'), |
||||
|
# 130915 |
||||
|
r'^(?P<year_short>\d{2})(?P<month>\d{2})(?P<day>\d{2})$', |
||||
|
|
||||
|
# Season 3 Episode 14 |
||||
|
(r'^Se(ason)?$', r'^(?P<season>\d+)$', r'^Ep(isode)?$', r'^(?P<episode>\d+)$'), |
||||
|
# Season 3 |
||||
|
(r'^Se(ason)?$', r'^(?P<season>\d+)$'), |
||||
|
# Episode 14 |
||||
|
(r'^Ep(isode)?$', r'^(?P<episode>\d+)$'), |
||||
|
|
||||
|
# Part.3 |
||||
|
# Part.1.and.Part.3 |
||||
|
('^Part$', '(?P<part>\d+)'), |
||||
|
]), |
||||
|
(0.8, [ |
||||
|
# 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake) |
||||
|
# TODO - Update this pattern on 31 Dec 2099 |
||||
|
r'^(?P<season>([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P<episode>\d{2})$' |
||||
|
]), |
||||
|
(0.5, [ |
||||
|
# 100 - 9999 |
||||
|
r'^(?P<season>([1-9])|([1-9][0-9]))(?P<episode>\d{2})$' |
||||
|
]) |
||||
|
]), |
||||
|
('video', [ |
||||
|
r'(?P<aspect>FS|WS)', |
||||
|
|
||||
|
(r'(?P<resolution>%s)', [ |
||||
|
'480p', |
||||
|
'720p', |
||||
|
'1080p' |
||||
|
]), |
||||
|
|
||||
|
(r'(?P<source>%s)', [ |
||||
|
'HDTV', |
||||
|
'PDTV', |
||||
|
'DSR', |
||||
|
'DVDRiP' |
||||
|
]), |
||||
|
|
||||
|
(r'(?P<codec>%s)', [ |
||||
|
'x264', |
||||
|
'XViD' |
||||
|
]), |
||||
|
|
||||
|
(r'(?P<language>%s)', [ |
||||
|
'GERMAN', |
||||
|
'DUTCH', |
||||
|
'FRENCH', |
||||
|
'SWEDiSH', |
||||
|
'DANiSH', |
||||
|
'iTALiAN' |
||||
|
]), |
||||
|
]) |
||||
|
] |
||||
|
|
||||
|
|
||||
|
class SceneParser(Parser): |
||||
|
def __init__(self): |
||||
|
super(SceneParser, self).__init__(PATTERN_GROUPS) |
||||
|
|
||||
|
def capture_group(self, fragment): |
||||
|
if fragment.left_sep == '-' and not fragment.right: |
||||
|
return fragment.value |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
def run(self, closures): |
||||
|
""" |
||||
|
:type closures: list of CaperClosure |
||||
|
""" |
||||
|
|
||||
|
self.setup(closures) |
||||
|
|
||||
|
self.capture_fragment('show_name', single=False)\ |
||||
|
.until(fragment__re='identifier')\ |
||||
|
.until(fragment__re='video')\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('identifier', regex='identifier', single=False)\ |
||||
|
.capture_fragment('video', regex='video', single=False)\ |
||||
|
.until(left_sep__eq='-', right__eq=None)\ |
||||
|
.execute() |
||||
|
|
||||
|
self.capture_fragment('group', func=self.capture_group)\ |
||||
|
.execute() |
||||
|
|
||||
|
self.print_tree(self.result.heads) |
||||
|
|
||||
|
self.result.build() |
||||
|
return self.result |
||||
|
|
||||
|
def print_tree(self, heads): |
||||
|
for head in heads: |
||||
|
head = head if type(head) is list else [head] |
||||
|
|
||||
|
if type(head[0]) is CaperFragmentNode: |
||||
|
for fragment in head[0].fragments: |
||||
|
Logr.debug(fragment.value) |
||||
|
else: |
||||
|
Logr.debug(head[0].closure.value) |
||||
|
|
||||
|
for node in head: |
||||
|
Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match)) |
||||
|
|
||||
|
if len(head) > 0 and head[0].parent: |
||||
|
self.print_tree([head[0].parent]) |
@ -0,0 +1,172 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
import copy |
||||
|
from logr import Logr |
||||
|
|
||||
|
|
||||
|
GROUP_MATCHES = ['identifier'] |
||||
|
|
||||
|
|
||||
|
class CaperNode(object): |
||||
|
def __init__(self, closure, parent=None, tag=None, weight=None, match=None): |
||||
|
""" |
||||
|
:type parent: CaperNode |
||||
|
:type weight: float |
||||
|
""" |
||||
|
|
||||
|
#: :type: caper.objects.CaperClosure |
||||
|
self.closure = closure |
||||
|
#: :type: CaperNode |
||||
|
self.parent = parent |
||||
|
#: :type: str |
||||
|
self.tag = tag |
||||
|
#: :type: float |
||||
|
self.weight = weight |
||||
|
#: :type: dict |
||||
|
self.match = match |
||||
|
#: :type: list of CaptureGroup |
||||
|
self.finished_groups = [] |
||||
|
|
||||
|
def next(self): |
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
|
||||
|
class CaperClosureNode(CaperNode): |
||||
|
def __init__(self, closure, parent=None, tag=None, weight=None, match=None): |
||||
|
""" |
||||
|
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure |
||||
|
""" |
||||
|
super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match) |
||||
|
|
||||
|
def next(self): |
||||
|
if self.closure and len(self.closure.fragments) > 0: |
||||
|
return self.closure.fragments[0] |
||||
|
return None |
||||
|
|
||||
|
|
||||
|
class CaperFragmentNode(CaperNode): |
||||
|
def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None): |
||||
|
""" |
||||
|
:type closure: caper.objects.CaperClosure |
||||
|
:type fragments: list of caper.objects.CaperFragment |
||||
|
""" |
||||
|
super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match) |
||||
|
|
||||
|
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment |
||||
|
self.fragments = fragments |
||||
|
|
||||
|
def next(self): |
||||
|
if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right: |
||||
|
return self.fragments[-1].right |
||||
|
|
||||
|
if self.closure.right: |
||||
|
return self.closure.right |
||||
|
|
||||
|
return None |
||||
|
|
||||
|
|
||||
|
class CaperResult(object): |
||||
|
def __init__(self): |
||||
|
#: :type: list of CaperNode |
||||
|
self.heads = [] |
||||
|
|
||||
|
self.chains = [] |
||||
|
|
||||
|
def build(self): |
||||
|
max_matched = 0 |
||||
|
|
||||
|
for head in self.heads: |
||||
|
for chain in self.combine_chain(head): |
||||
|
if chain.num_matched > max_matched: |
||||
|
max_matched = chain.num_matched |
||||
|
|
||||
|
self.chains.append(chain) |
||||
|
|
||||
|
for chain in self.chains: |
||||
|
chain.weights.append(chain.num_matched / float(max_matched)) |
||||
|
chain.finish() |
||||
|
|
||||
|
self.chains.sort(key=lambda chain: chain.weight, reverse=True) |
||||
|
|
||||
|
for chain in self.chains: |
||||
|
Logr.debug("chain weight: %.02f", chain.weight) |
||||
|
Logr.debug("\tInfo: %s", chain.info) |
||||
|
|
||||
|
Logr.debug("\tWeights: %s", chain.weights) |
||||
|
Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched) |
||||
|
|
||||
|
def combine_chain(self, subject, chain=None): |
||||
|
nodes = subject if type(subject) is list else [subject] |
||||
|
|
||||
|
if chain is None: |
||||
|
chain = CaperResultChain() |
||||
|
|
||||
|
result = [] |
||||
|
|
||||
|
for x, node in enumerate(nodes): |
||||
|
node_chain = chain if x == len(nodes) - 1 else chain.copy() |
||||
|
|
||||
|
if not node.parent: |
||||
|
result.append(node_chain) |
||||
|
continue |
||||
|
|
||||
|
# Skip over closure nodes |
||||
|
if type(node) is CaperClosureNode: |
||||
|
result.extend(self.combine_chain(node.parent, node_chain)) |
||||
|
|
||||
|
# Parse fragment matches |
||||
|
if type(node) is CaperFragmentNode: |
||||
|
node_chain.update(node) |
||||
|
|
||||
|
result.extend(self.combine_chain(node.parent, node_chain)) |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
|
||||
|
class CaperResultChain(object): |
||||
|
def __init__(self): |
||||
|
#: :type: float |
||||
|
self.weight = None |
||||
|
self.info = {} |
||||
|
self.num_matched = 0 |
||||
|
|
||||
|
self.weights = [] |
||||
|
|
||||
|
def update(self, subject): |
||||
|
if subject.weight is None: |
||||
|
return |
||||
|
|
||||
|
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 |
||||
|
self.weights.append(subject.weight) |
||||
|
|
||||
|
if subject.match: |
||||
|
if subject.tag not in self.info: |
||||
|
self.info[subject.tag] = [] |
||||
|
|
||||
|
self.info[subject.tag].insert(0, subject.match) |
||||
|
|
||||
|
def finish(self): |
||||
|
self.weight = sum(self.weights) / len(self.weights) |
||||
|
|
||||
|
def copy(self): |
||||
|
chain = CaperResultChain() |
||||
|
|
||||
|
chain.weight = self.weight |
||||
|
chain.info = copy.deepcopy(self.info) |
||||
|
|
||||
|
chain.num_matched = self.num_matched |
||||
|
chain.weights = copy.copy(self.weights) |
||||
|
|
||||
|
return chain |
@ -0,0 +1,72 @@ |
|||||
|
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
|
# you may not use this file except in compliance with the License. |
||||
|
# You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
from logr import Logr |
||||
|
|
||||
|
|
||||
|
class CaptureStep(object): |
||||
|
REPR_KEYS = ['regex', 'func', 'single'] |
||||
|
|
||||
|
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None): |
||||
|
#: @type: CaptureGroup |
||||
|
self.capture_group = capture_group |
||||
|
|
||||
|
#: @type: str |
||||
|
self.tag = tag |
||||
|
#: @type: str |
||||
|
self.source = source |
||||
|
#: @type: str |
||||
|
self.regex = regex |
||||
|
#: @type: function |
||||
|
self.func = func |
||||
|
#: @type: bool |
||||
|
self.single = single |
||||
|
|
||||
|
def _get_next_subject(self, parser): |
||||
|
if self.source == 'fragment': |
||||
|
if not parser.fragment_available(): |
||||
|
return None |
||||
|
return parser.next_fragment() |
||||
|
elif self.source == 'closure': |
||||
|
if not parser.closure_available(): |
||||
|
return None |
||||
|
return parser.next_closure() |
||||
|
|
||||
|
raise NotImplementedError() |
||||
|
|
||||
|
def execute(self, fragment): |
||||
|
if self.regex: |
||||
|
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) |
||||
|
Logr.debug('(execute) [regex] tag: "%s"', self.tag) |
||||
|
if match: |
||||
|
return True, weight, match, num_fragments |
||||
|
elif self.func: |
||||
|
match = self.func(fragment) |
||||
|
Logr.debug('(execute) [func] %s += "%s"', self.tag, match) |
||||
|
if match: |
||||
|
return True, 1.0, match, 1 |
||||
|
else: |
||||
|
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) |
||||
|
return True, 1.0, fragment.value, 1 |
||||
|
|
||||
|
return False, None, None, 1 |
||||
|
|
||||
|
def __repr__(self): |
||||
|
attribute_values = [key + '=' + repr(getattr(self, key)) |
||||
|
for key in self.REPR_KEYS |
||||
|
if hasattr(self, key) and getattr(self, key)] |
||||
|
|
||||
|
attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else '' |
||||
|
|
||||
|
return "CaptureStep('%s'%s)" % (self.tag, attribute_string) |
@ -0,0 +1,201 @@ |
|||||
|
# logr - Simple python logging wrapper |
||||
|
# Packed by Dean Gardiner <gardiner91@gmail.com> |
||||
|
# |
||||
|
# File part of: |
||||
|
# rdio-sock - Rdio WebSocket Library |
||||
|
# Copyright (C) 2013 fzza- <fzzzzzzzza@gmail.com> |
||||
|
|
||||
|
# This program is free software: you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU General Public License as published by |
||||
|
# the Free Software Foundation, either version 3 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
|
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU General Public License for more details. |
||||
|
|
||||
|
# You should have received a copy of the GNU General Public License |
||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
||||
|
|
||||
|
|
||||
|
import inspect |
||||
|
import logging |
||||
|
import os |
||||
|
import sys |
||||
|
|
||||
|
IGNORE = () |
||||
|
PY3 = sys.version_info[0] == 3 |
||||
|
|
||||
|
|
||||
|
class Logr(object): |
||||
|
loggers = {} |
||||
|
handler = None |
||||
|
|
||||
|
@staticmethod |
||||
|
def configure(level=logging.WARNING, handler=None, formatter=None): |
||||
|
"""Configure Logr |
||||
|
|
||||
|
@param handler: Logger message handler |
||||
|
@type handler: logging.Handler or None |
||||
|
|
||||
|
@param formatter: Logger message Formatter |
||||
|
@type formatter: logging.Formatter or None |
||||
|
""" |
||||
|
if formatter is None: |
||||
|
formatter = LogrFormatter() |
||||
|
|
||||
|
if handler is None: |
||||
|
handler = logging.StreamHandler() |
||||
|
|
||||
|
handler.setFormatter(formatter) |
||||
|
handler.setLevel(level) |
||||
|
Logr.handler = handler |
||||
|
|
||||
|
@staticmethod |
||||
|
def configure_check(): |
||||
|
if Logr.handler is None: |
||||
|
Logr.configure() |
||||
|
|
||||
|
@staticmethod |
||||
|
def _get_name_from_path(filename): |
||||
|
try: |
||||
|
return os.path.splitext(os.path.basename(filename))[0] |
||||
|
except TypeError: |
||||
|
return "<unknown>" |
||||
|
|
||||
|
@staticmethod |
||||
|
def get_logger_name(): |
||||
|
stack = inspect.stack() |
||||
|
|
||||
|
for x in xrange_six(len(stack)): |
||||
|
frame = stack[x][0] |
||||
|
name = None |
||||
|
|
||||
|
# Try find name of function defined inside a class |
||||
|
if len(frame.f_code.co_varnames) > 0: |
||||
|
self_argument = frame.f_code.co_varnames[0] |
||||
|
|
||||
|
if self_argument == 'self' and self_argument in frame.f_locals: |
||||
|
instance = frame.f_locals[self_argument] |
||||
|
|
||||
|
class_ = instance.__class__ |
||||
|
class_name = class_.__name__ |
||||
|
module_name = class_.__module__ |
||||
|
|
||||
|
if module_name != '__main__': |
||||
|
name = module_name + '.' + class_name |
||||
|
else: |
||||
|
name = class_name |
||||
|
|
||||
|
# Try find name of function defined outside of a class |
||||
|
if name is None: |
||||
|
if frame.f_code.co_name in frame.f_globals: |
||||
|
name = frame.f_globals.get('__name__') |
||||
|
if name == '__main__': |
||||
|
name = Logr._get_name_from_path(frame.f_globals.get('__file__')) |
||||
|
name = name |
||||
|
elif frame.f_code.co_name == '<module>': |
||||
|
name = Logr._get_name_from_path(frame.f_globals.get('__file__')) |
||||
|
|
||||
|
if name is not None and name not in IGNORE: |
||||
|
return name |
||||
|
|
||||
|
return "" |
||||
|
|
||||
|
@staticmethod |
||||
|
def get_logger(): |
||||
|
"""Get or create logger (if it does not exist) |
||||
|
|
||||
|
@rtype: RootLogger |
||||
|
""" |
||||
|
name = Logr.get_logger_name() |
||||
|
if name not in Logr.loggers: |
||||
|
Logr.configure_check() |
||||
|
Logr.loggers[name] = logging.Logger(name) |
||||
|
Logr.loggers[name].addHandler(Logr.handler) |
||||
|
return Logr.loggers[name] |
||||
|
|
||||
|
@staticmethod |
||||
|
def debug(msg, *args, **kwargs): |
||||
|
Logr.get_logger().debug(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def info(msg, *args, **kwargs): |
||||
|
Logr.get_logger().info(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def warning(msg, *args, **kwargs): |
||||
|
Logr.get_logger().warning(msg, *args, **kwargs) |
||||
|
|
||||
|
warn = warning |
||||
|
|
||||
|
@staticmethod |
||||
|
def error(msg, *args, **kwargs): |
||||
|
Logr.get_logger().error(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def exception(msg, *args, **kwargs): |
||||
|
Logr.get_logger().exception(msg, *args, **kwargs) |
||||
|
|
||||
|
@staticmethod |
||||
|
def critical(msg, *args, **kwargs): |
||||
|
Logr.get_logger().critical(msg, *args, **kwargs) |
||||
|
|
||||
|
fatal = critical |
||||
|
|
||||
|
@staticmethod |
||||
|
def log(level, msg, *args, **kwargs): |
||||
|
Logr.get_logger().log(level, msg, *args, **kwargs) |
||||
|
|
||||
|
|
||||
|
class LogrFormatter(logging.Formatter): |
||||
|
LENGTH_NAME = 32 |
||||
|
LENGTH_LEVEL_NAME = 5 |
||||
|
|
||||
|
def __init__(self, fmt=None, datefmt=None): |
||||
|
if sys.version_info[:2] > (2,6): |
||||
|
super(LogrFormatter, self).__init__(fmt, datefmt) |
||||
|
else: |
||||
|
logging.Formatter.__init__(self, fmt, datefmt) |
||||
|
|
||||
|
def usesTime(self): |
||||
|
return True |
||||
|
|
||||
|
def format(self, record): |
||||
|
record.message = record.getMessage() |
||||
|
if self.usesTime(): |
||||
|
record.asctime = self.formatTime(record, self.datefmt) |
||||
|
|
||||
|
s = "%(asctime)s %(name)s %(levelname)s %(message)s" % { |
||||
|
'asctime': record.asctime, |
||||
|
'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '), |
||||
|
'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '), |
||||
|
'message': record.message |
||||
|
} |
||||
|
|
||||
|
if record.exc_info: |
||||
|
if not record.exc_text: |
||||
|
record.exc_text = self.formatException(record.exc_info) |
||||
|
if record.exc_text: |
||||
|
if s[-1:] != "\n": |
||||
|
s += "\n" |
||||
|
try: |
||||
|
s += record.exc_text |
||||
|
except UnicodeError: |
||||
|
s = s + record.exc_text.decode(sys.getfilesystemencoding(), |
||||
|
'replace') |
||||
|
return s |
||||
|
|
||||
|
|
||||
|
def xrange_six(start, stop=None, step=None): |
||||
|
if stop is not None and step is not None: |
||||
|
if PY3: |
||||
|
return range(start, stop, step) |
||||
|
else: |
||||
|
return xrange(start, stop, step) |
||||
|
else: |
||||
|
if PY3: |
||||
|
return range(start) |
||||
|
else: |
||||
|
return xrange(start) |
Loading…
Reference in new issue