diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py new file mode 100644 index 0000000..23801ee --- /dev/null +++ b/libs/caper/__init__.py @@ -0,0 +1,161 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper.matcher import FragmentMatcher +from caper.objects import CaperFragment, CaperClosure +from caper.parsers.anime import AnimeParser +from caper.parsers.scene import SceneParser + + +__version_info__ = ('0', '2', '0') +__version_branch__ = 'master' + +__version__ = "%s%s" % ( + '.'.join(__version_info__), + '-' + __version_branch__ if __version_branch__ else '' +) + + +CL_START_CHARS = ['(', '['] +CL_END_CHARS = [')', ']'] + +STRIP_START_CHARS = ''.join(CL_START_CHARS) +STRIP_END_CHARS = ''.join(CL_END_CHARS) +STRIP_CHARS = ''.join(['_', ' ', '.']) + +FRAGMENT_SEPARATORS = ['.', '-', '_', ' '] + + +CL_START = 0 +CL_END = 1 + + +class Caper(object): + def __init__(self): + self.parsers = { + 'scene': SceneParser(), + 'anime': AnimeParser() + } + + def _closure_split(self, name): + """ + :type name: str + + :rtype: list of CaperClosure + """ + + closures = [] + + def end_closure(closures, buf): + buf = buf.strip(STRIP_CHARS) + if len(buf) < 1: + return + + cur = CaperClosure(buf) + cur.left = closures[len(closures) - 1] if len(closures) > 0 else None + + if cur.left: + cur.left.right = cur + + closures.append(cur) + + state = CL_START + buf = "" + for x, ch in enumerate(name): + if state == CL_START and ch in CL_START_CHARS: + end_closure(closures, buf) + + state = CL_END + buf = "" + + buf += ch + + if state == CL_END and ch in CL_END_CHARS: + end_closure(closures, buf) + + state = CL_START + buf = "" + + end_closure(closures, buf) + + return closures + + def _clean_closure(self, closure): + """ + :type closure: str + + :rtype: str + """ + + return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS) + + def _fragment_split(self, closures): + """ + :type closures: list of CaperClosure + + :rtype: list of CaperClosure + """ + + cur_position = 0 + cur = CaperFragment() + + def end_fragment(fragments, cur, cur_position): + cur.position = cur_position + + cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None + if cur.left: + cur.left_sep = cur.left.right_sep + cur.left.right = cur + + cur.right_sep = ch + + fragments.append(cur) + + for closure in closures: + closure.fragments = [] + + for x, ch in enumerate(self._clean_closure(closure.value)): + if ch in FRAGMENT_SEPARATORS: + end_fragment(closure.fragments, cur, cur_position) + + # Reset + cur = CaperFragment() + cur_position += 1 + else: + cur.value += ch + + # Finish parsing the last fragment + if cur.value != "": + end_fragment(closure.fragments, cur, cur_position) + + # Reset + cur_position = 0 + cur = CaperFragment() + + return closures + + def parse(self, name, parser='scene'): + closures = self._closure_split(name) + closures = self._fragment_split(closures) + + # Print closures + for closure in closures: + Logr.debug("closure [%s]", closure.value) + + if parser not in self.parsers: + raise ValueError("Unknown parser") + + # TODO autodetect the parser type + return self.parsers[parser].run(closures) diff --git a/libs/caper/constraint.py b/libs/caper/constraint.py new file mode 100644 index 0000000..8e5ee28 --- /dev/null +++ b/libs/caper/constraint.py @@ -0,0 +1,74 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class CaptureConstraint(object): + def __init__(self, capture_group, comparisons=None, **kwargs): + """Capture constraint object + + :type capture_group: CaptureGroup + """ + + self.capture_group = capture_group + + self.comparisons = comparisons if comparisons else [] + + for key, value in kwargs.items(): + key = key.split('__') + if len(key) != 2: + continue + name, method = key + + method = '_compare_' + method + if not hasattr(self, method): + continue + + self.comparisons.append((name, getattr(self, method), value)) + + def _compare_eq(self, fragment, name, expected): + if not hasattr(fragment, name): + return None + + return 1.0, getattr(fragment, name) == expected + + def _compare_re(self, fragment, name, arg): + if name == 'fragment': + group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) + + weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group) + return weight, weight > minimum_weight + elif type(arg).__name__ == 'SRE_Pattern': + return 1.0, arg.match(getattr(fragment, name)) is not None + elif hasattr(fragment, name): + match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True) + return 1.0, match is not None + + if not hasattr(fragment, name): + raise ValueError("Unable to find fragment with name '%s'" % name) + else: + raise ValueError("Unexpected argument type") + + def execute(self, fragment): + results = [] + total_weight = 0 + + for name, method, argument in self.comparisons: + weight, success = method(fragment, name, argument) + total_weight += weight + results.append(success) + + return total_weight / float(len(results)), all(results) if len(results) > 0 else False + + def __repr__(self): + return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) diff --git a/libs/caper/group.py b/libs/caper/group.py new file mode 100644 index 0000000..3dcb00b --- /dev/null +++ b/libs/caper/group.py @@ -0,0 +1,147 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from logr import Logr +from caper.helpers import clean_dict +from caper.result import CaperFragmentNode +from caper.step import CaptureStep +from caper.constraint import CaptureConstraint + + +class CaptureGroup(object): + def __init__(self, parser, result): + """Capture group object + + :type parser: caper.parsers.base.Parser + :type result: caper.result.CaperResult + """ + + self.parser = parser + self.result = result + + #: @type: list of CaptureStep + self.steps = [] + #: @type: list of CaptureConstraint + self.constraints = [] + + def capture_fragment(self, tag, regex=None, func=None, single=True): + Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) + + self.steps.append(CaptureStep( + self, tag, + 'fragment', + regex=regex, + func=func, + single=single + )) + + return self + + def capture_closure(self, tag, regex=None, func=None, single=True): + Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) + + self.steps.append(CaptureStep( + self, tag, + 'closure', + regex=regex, + func=func, + single=single + )) + + return self + + def until(self, **kwargs): + self.constraints.append(CaptureConstraint(self, **kwargs)) + + return self + + def parse_subject(self, parent_head, subject): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + # TODO - if subject is a closure? + + nodes = [] + + # Check constraints + for constraint in self.constraints: + weight, success = constraint.execute(subject) + if success: + Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) + parent_node.finished_groups.append(self) + nodes.append(parent_head) + + if weight == 1.0: + return nodes + else: + Logr.debug('Branching result') + + # Try match subject against the steps available + tag, success, weight, match, num_fragments = (None, None, None, None, None) + for step in self.steps: + tag = step.tag + success, weight, match, num_fragments = step.execute(subject) + if success: + match = clean_dict(match) if type(match) is dict else match + Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments)) + break + + Logr.debug('created fragment node with subject.value: "%s"' % subject.value) + + result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)] + + if match and weight < 1.0: + if num_fragments == 1: + result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) + else: + nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None)) + + nodes.append(result[0] if len(result) == 1 else result) + + return nodes + + def execute(self): + heads_finished = None + + while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)): + heads_finished = [] + + heads = self.result.heads + self.result.heads = [] + + for head in heads: + node = head[0] if type(head) is list else head + + Logr.debug("head node: %s" % node) + + if self in node.finished_groups: + Logr.debug("head finished for group") + self.result.heads.append(head) + heads_finished.append(True) + continue + + next_subject = node.next() + + if next_subject: + for node_result in self.parse_subject(head, next_subject): + self.result.heads.append(node_result) + + heads_finished.append(self in node.finished_groups or next_subject is None) + + if len(self.result.heads) == 0: + self.result.heads = heads + + Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads) + + Logr.debug("group finished") diff --git a/libs/caper/helpers.py b/libs/caper/helpers.py new file mode 100644 index 0000000..1a127c3 --- /dev/null +++ b/libs/caper/helpers.py @@ -0,0 +1,64 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + +def is_list_type(obj, element_type): + if not type(obj) is list: + return False + + if len(obj) < 1: + raise ValueError("Unable to determine list element type from empty list") + + return type(obj[0]) is element_type + + +def clean_dict(target, remove=None): + """Recursively remove items matching a value 'remove' from the dictionary + + :type target: dict + """ + if type(target) is not dict: + raise ValueError("Target is required to be a dict") + + remove_keys = [] + for key in target.keys(): + if type(target[key]) is not dict: + if target[key] == remove: + remove_keys.append(key) + else: + clean_dict(target[key], remove) + + for key in remove_keys: + target.pop(key) + + return target + + +def xrange_six(start, stop=None, step=None): + if stop is not None and step is not None: + if PY3: + return range(start, stop, step) + else: + return xrange(start, stop, step) + else: + if PY3: + return range(start) + else: + return xrange(start) diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py new file mode 100644 index 0000000..24ef69a --- /dev/null +++ b/libs/caper/matcher.py @@ -0,0 +1,193 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pprint +import re +from logr import Logr +from caper.helpers import is_list_type, clean_dict + + +class FragmentMatcher(object): + def __init__(self, pattern_groups): + self.regex = {} + + for group_name, patterns in pattern_groups: + if group_name not in self.regex: + self.regex[group_name] = [] + + # Transform into weight groups + if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]: + patterns = [(1.0, patterns)] + + for weight, patterns in patterns: + weight_patterns = [] + + for pattern in patterns: + # Transform into multi-fragment patterns + if type(pattern) is str: + pattern = (pattern,) + + if type(pattern) is tuple and len(pattern) == 2: + if type(pattern[0]) is str and is_list_type(pattern[1], str): + pattern = (pattern,) + + result = [] + for value in pattern: + if type(value) is tuple: + if len(value) == 2: + # Construct OR-list pattern + value = value[0] % '|'.join(value[1]) + elif len(value) == 1: + value = value[0] + + result.append(re.compile(value, re.IGNORECASE)) + + weight_patterns.append(tuple(result)) + + self.regex[group_name].append((weight, weight_patterns)) + + pprint.pprint(self.regex) + + def find_group(self, name): + for group_name, weight_groups in self.regex.items(): + if group_name and group_name == name: + return group_name, weight_groups + + return None + + def parser_match(self, parser, group_name, single=True): + """ + + :type parser: caper.parsers.base.Parser + """ + result = None + + for group, weight_groups in self.regex.items(): + if group_name and group != group_name: + continue + + # TODO handle multiple weights + weight, patterns = weight_groups[0] + + for pattern in patterns: + fragments = [] + pattern_matched = True + pattern_result = {} + + for fragment_pattern in pattern: + if not parser.fragment_available(): + pattern_matched = False + break + + fragment = parser.next_fragment() + fragments.append(fragment) + + Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value) + match = fragment_pattern.match(fragment.value) + if match: + Logr.debug('Pattern "%s" matched', fragment_pattern.pattern) + else: + pattern_matched = False + break + + pattern_result.update(clean_dict(match.groupdict())) + + if pattern_matched: + if result is None: + result = {} + + if group not in result: + result[group] = {} + + Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments])) + + result[group].update(pattern_result) + parser.commit() + + if single: + return result + else: + parser.rewind() + + return result + + def value_match(self, value, group_name=None, single=True): + result = None + + for group, weight_groups in self.regex.items(): + if group_name and group != group_name: + continue + + # TODO handle multiple weights + weight, patterns = weight_groups[0] + + for pattern in patterns: + match = pattern[0].match(value) + if not match: + continue + + if result is None: + result = {} + if group not in result: + result[group] = {} + + result[group].update(match.groupdict()) + + if single: + return result + + return result + + def fragment_match(self, fragment, group_name=None): + """Follow a fragment chain to try find a match + + :type fragment: caper.objects.CaperFragment + :type group_name: str or None + + :return: The weight of the match found between 0.0 and 1.0, + where 1.0 means perfect match and 0.0 means no match + :rtype: (float, dict, int) + """ + + group_name, weight_groups = self.find_group(group_name) + + for weight, patterns in weight_groups: + for pattern in patterns: + cur_fragment = fragment + success = True + result = {} + + # Ignore empty patterns + if len(pattern) < 1: + break + + for fragment_pattern in pattern: + if not cur_fragment: + success = False + break + + match = fragment_pattern.match(cur_fragment.value) + if match: + result.update(match.groupdict()) + else: + success = False + break + + cur_fragment = cur_fragment.right if cur_fragment else None + + if success: + Logr.debug("Found match with weight %s" % weight) + return float(weight), result, len(pattern) + + return 0.0, None, 1 diff --git a/libs/caper/objects.py b/libs/caper/objects.py new file mode 100644 index 0000000..4804dea --- /dev/null +++ b/libs/caper/objects.py @@ -0,0 +1,75 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from caper.helpers import xrange_six + + +class CaperClosure(object): + def __init__(self, value): + #: :type: str + self.value = value + + #: :type: CaperClosure + self.left = None + #: :type: CaperClosure + self.right = None + + #: :type: list of CaperFragment + self.fragments = [] + + +class CaperFragment(object): + def __init__(self): + #: :type: str + self.value = "" + + #: :type: CaperFragment + self.left = None + #: :type: str + self.left_sep = None + + #: :type: CaperFragment + self.right = None + #: :type: str + self.right_sep = None + + #: :type: int + self.position = None + + def take(self, direction, count, include_self=True): + if direction not in ['left', 'right']: + raise ValueError('Un-Expected value for "direction", expected "left" or "right".') + + result = [] + + if include_self: + result.append(self) + count -= 1 + + cur = self + for x in xrange_six(count): + if cur and getattr(cur, direction): + cur = getattr(cur, direction) + result.append(cur) + else: + result.append(None) + cur = None + + return result + + def take_left(self, count, include_self=True): + return self.take('left', count, include_self) + + def take_right(self, count, include_self=True): + return self.take('right', count, include_self) diff --git a/libs/caper/parsers/__init__.py b/libs/caper/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/caper/parsers/anime.py b/libs/caper/parsers/anime.py new file mode 100644 index 0000000..4f90163 --- /dev/null +++ b/libs/caper/parsers/anime.py @@ -0,0 +1,88 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from caper.parsers.base import Parser + + +REGEX_GROUP = re.compile(r'(\(|\[)(?P.*?)(\)|\])', re.IGNORECASE) + + +PATTERN_GROUPS = [ + ('identifier', [ + r'S(?P\d+)E(?P\d+)', + r'(S(?P\d+))|(E(?P\d+))', + + r'Ep(?P\d+)', + r'$(?P\d+)^', + + (r'Episode', r'(?P\d+)'), + ]), + ('video', [ + (r'(?P%s)', [ + 'Hi10P' + ]), + (r'.(?P%s)', [ + '720p', + '1080p', + + '960x720', + '1920x1080' + ]), + (r'(?P%s)', [ + 'BD' + ]), + ]), + ('audio', [ + (r'(?P%s)', [ + 'FLAC' + ]), + ]) +] + + +class AnimeParser(Parser): + def __init__(self): + super(AnimeParser, self).__init__(PATTERN_GROUPS) + + def capture_group(self, fragment): + match = REGEX_GROUP.match(fragment.value) + + if not match: + return None + + return match.group('group') + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + self.capture_closure('group', func=self.capture_group)\ + .execute(once=True) + + self.capture_fragment('show_name', single=False)\ + .until(value__re='identifier')\ + .until(value__re='video')\ + .execute() + + self.capture_fragment('identifier', regex='identifier') \ + .capture_fragment('video', regex='video', single=False) \ + .capture_fragment('audio', regex='audio', single=False) \ + .execute() + + self.result.build() + return self.result diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py new file mode 100644 index 0000000..70bc733 --- /dev/null +++ b/libs/caper/parsers/base.py @@ -0,0 +1,136 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper import FragmentMatcher +from caper.group import CaptureGroup +from caper.result import CaperResult, CaperClosureNode + + +class Parser(object): + def __init__(self, pattern_groups): + self.matcher = FragmentMatcher(pattern_groups) + + self.closures = None + #: :type: caper.result.CaperResult + self.result = None + + self._match_cache = None + self._fragment_pos = None + self._closure_pos = None + self._history = None + + self.reset() + + def reset(self): + self.closures = None + self.result = CaperResult() + + self._match_cache = {} + self._fragment_pos = -1 + self._closure_pos = -1 + self._history = [] + + def setup(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.reset() + self.closures = closures + + self.result.heads = [CaperClosureNode(closures[0])] + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + raise NotImplementedError() + + # + # Closure Methods + # + + def next_closure(self): + self._closure_pos += 1 + closure = self.closures[self._closure_pos] + + self._history.append(('fragment', -1 - self._fragment_pos)) + self._fragment_pos = -1 + + if self._closure_pos != 0: + self._history.append(('closure', 1)) + + Logr.debug('(next_closure) closure.value: "%s"', closure.value) + return closure + + def closure_available(self): + return self._closure_pos + 1 < len(self.closures) + + # + # Fragment Methods + # + + def next_fragment(self): + closure = self.closures[self._closure_pos] + + self._fragment_pos += 1 + fragment = closure.fragments[self._fragment_pos] + + self._history.append(('fragment', 1)) + + Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value) + return fragment + + def fragment_available(self): + if not self.closure_available(): + return False + return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments) + + def rewind(self): + for source, delta in reversed(self._history): + Logr.debug('(rewind) Rewinding step: %s', (source, delta)) + if source == 'fragment': + self._fragment_pos -= delta + elif source == 'closure': + self._closure_pos -= delta + else: + raise NotImplementedError() + + self.commit() + + def commit(self): + Logr.debug('(commit)') + self._history = [] + + # + # Capture Methods + # + + def capture_fragment(self, tag, regex=None, func=None, single=True): + return CaptureGroup(self, self.result).capture_fragment( + tag, + regex=regex, + func=func, + single=single + ) + + def capture_closure(self, tag, regex=None, func=None, single=True): + return CaptureGroup(self, self.result).capture_closure( + tag, + regex=regex, + func=func, + single=single + ) diff --git a/libs/caper/parsers/scene.py b/libs/caper/parsers/scene.py new file mode 100644 index 0000000..ff3ec10 --- /dev/null +++ b/libs/caper/parsers/scene.py @@ -0,0 +1,148 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper.parsers.base import Parser +from caper.result import CaperFragmentNode + + +PATTERN_GROUPS = [ + ('identifier', [ + (1.0, [ + # S01E01-E02 + ('^S(?P\d+)E(?P\d+)$', '^E(?P\d+)$'), + # S02E13 + r'^S(?P\d+)E(?P\d+)$', + # S01 E13 + (r'^(S(?P\d+))$', r'^(E(?P\d+))$'), + # S02 + # E13 + r'^((S(?P\d+))|(E(?P\d+)))$', + # 3x19 + r'^(?P\d+)x(?P\d+)$', + + # 2013.09.15 + (r'^(?P\d{4})$', r'^(?P\d{2})$', r'^(?P\d{2})$'), + # 09.15.2013 + (r'^(?P\d{2})$', r'^(?P\d{2})$', r'^(?P\d{4})$'), + # TODO - US/UK Date Format Conflict? will only support US format for now.. + # 15.09.2013 + #(r'^(?P\d{2})$', r'^(?P\d{2})$', r'^(?P\d{4})$'), + # 130915 + r'^(?P\d{2})(?P\d{2})(?P\d{2})$', + + # Season 3 Episode 14 + (r'^Se(ason)?$', r'^(?P\d+)$', r'^Ep(isode)?$', r'^(?P\d+)$'), + # Season 3 + (r'^Se(ason)?$', r'^(?P\d+)$'), + # Episode 14 + (r'^Ep(isode)?$', r'^(?P\d+)$'), + + # Part.3 + # Part.1.and.Part.3 + ('^Part$', '(?P\d+)'), + ]), + (0.8, [ + # 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake) + # TODO - Update this pattern on 31 Dec 2099 + r'^(?P([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P\d{2})$' + ]), + (0.5, [ + # 100 - 9999 + r'^(?P([1-9])|([1-9][0-9]))(?P\d{2})$' + ]) + ]), + ('video', [ + r'(?PFS|WS)', + + (r'(?P%s)', [ + '480p', + '720p', + '1080p' + ]), + + (r'(?P%s)', [ + 'HDTV', + 'PDTV', + 'DSR', + 'DVDRiP' + ]), + + (r'(?P%s)', [ + 'x264', + 'XViD' + ]), + + (r'(?P%s)', [ + 'GERMAN', + 'DUTCH', + 'FRENCH', + 'SWEDiSH', + 'DANiSH', + 'iTALiAN' + ]), + ]) +] + + +class SceneParser(Parser): + def __init__(self): + super(SceneParser, self).__init__(PATTERN_GROUPS) + + def capture_group(self, fragment): + if fragment.left_sep == '-' and not fragment.right: + return fragment.value + + return None + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + self.capture_fragment('show_name', single=False)\ + .until(fragment__re='identifier')\ + .until(fragment__re='video')\ + .execute() + + self.capture_fragment('identifier', regex='identifier', single=False)\ + .capture_fragment('video', regex='video', single=False)\ + .until(left_sep__eq='-', right__eq=None)\ + .execute() + + self.capture_fragment('group', func=self.capture_group)\ + .execute() + + self.print_tree(self.result.heads) + + self.result.build() + return self.result + + def print_tree(self, heads): + for head in heads: + head = head if type(head) is list else [head] + + if type(head[0]) is CaperFragmentNode: + for fragment in head[0].fragments: + Logr.debug(fragment.value) + else: + Logr.debug(head[0].closure.value) + + for node in head: + Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match)) + + if len(head) > 0 and head[0].parent: + self.print_tree([head[0].parent]) diff --git a/libs/caper/result.py b/libs/caper/result.py new file mode 100644 index 0000000..6890021 --- /dev/null +++ b/libs/caper/result.py @@ -0,0 +1,172 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from logr import Logr + + +GROUP_MATCHES = ['identifier'] + + +class CaperNode(object): + def __init__(self, closure, parent=None, tag=None, weight=None, match=None): + """ + :type parent: CaperNode + :type weight: float + """ + + #: :type: caper.objects.CaperClosure + self.closure = closure + #: :type: CaperNode + self.parent = parent + #: :type: str + self.tag = tag + #: :type: float + self.weight = weight + #: :type: dict + self.match = match + #: :type: list of CaptureGroup + self.finished_groups = [] + + def next(self): + raise NotImplementedError() + + +class CaperClosureNode(CaperNode): + def __init__(self, closure, parent=None, tag=None, weight=None, match=None): + """ + :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure + """ + super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match) + + def next(self): + if self.closure and len(self.closure.fragments) > 0: + return self.closure.fragments[0] + return None + + +class CaperFragmentNode(CaperNode): + def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None): + """ + :type closure: caper.objects.CaperClosure + :type fragments: list of caper.objects.CaperFragment + """ + super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match) + + #: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment + self.fragments = fragments + + def next(self): + if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right: + return self.fragments[-1].right + + if self.closure.right: + return self.closure.right + + return None + + +class CaperResult(object): + def __init__(self): + #: :type: list of CaperNode + self.heads = [] + + self.chains = [] + + def build(self): + max_matched = 0 + + for head in self.heads: + for chain in self.combine_chain(head): + if chain.num_matched > max_matched: + max_matched = chain.num_matched + + self.chains.append(chain) + + for chain in self.chains: + chain.weights.append(chain.num_matched / float(max_matched)) + chain.finish() + + self.chains.sort(key=lambda chain: chain.weight, reverse=True) + + for chain in self.chains: + Logr.debug("chain weight: %.02f", chain.weight) + Logr.debug("\tInfo: %s", chain.info) + + Logr.debug("\tWeights: %s", chain.weights) + Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched) + + def combine_chain(self, subject, chain=None): + nodes = subject if type(subject) is list else [subject] + + if chain is None: + chain = CaperResultChain() + + result = [] + + for x, node in enumerate(nodes): + node_chain = chain if x == len(nodes) - 1 else chain.copy() + + if not node.parent: + result.append(node_chain) + continue + + # Skip over closure nodes + if type(node) is CaperClosureNode: + result.extend(self.combine_chain(node.parent, node_chain)) + + # Parse fragment matches + if type(node) is CaperFragmentNode: + node_chain.update(node) + + result.extend(self.combine_chain(node.parent, node_chain)) + + return result + + +class CaperResultChain(object): + def __init__(self): + #: :type: float + self.weight = None + self.info = {} + self.num_matched = 0 + + self.weights = [] + + def update(self, subject): + if subject.weight is None: + return + + self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 + self.weights.append(subject.weight) + + if subject.match: + if subject.tag not in self.info: + self.info[subject.tag] = [] + + self.info[subject.tag].insert(0, subject.match) + + def finish(self): + self.weight = sum(self.weights) / len(self.weights) + + def copy(self): + chain = CaperResultChain() + + chain.weight = self.weight + chain.info = copy.deepcopy(self.info) + + chain.num_matched = self.num_matched + chain.weights = copy.copy(self.weights) + + return chain \ No newline at end of file diff --git a/libs/caper/step.py b/libs/caper/step.py new file mode 100644 index 0000000..eb94b6f --- /dev/null +++ b/libs/caper/step.py @@ -0,0 +1,72 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr + + +class CaptureStep(object): + REPR_KEYS = ['regex', 'func', 'single'] + + def __init__(self, capture_group, tag, source, regex=None, func=None, single=None): + #: @type: CaptureGroup + self.capture_group = capture_group + + #: @type: str + self.tag = tag + #: @type: str + self.source = source + #: @type: str + self.regex = regex + #: @type: function + self.func = func + #: @type: bool + self.single = single + + def _get_next_subject(self, parser): + if self.source == 'fragment': + if not parser.fragment_available(): + return None + return parser.next_fragment() + elif self.source == 'closure': + if not parser.closure_available(): + return None + return parser.next_closure() + + raise NotImplementedError() + + def execute(self, fragment): + if self.regex: + weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) + Logr.debug('(execute) [regex] tag: "%s"', self.tag) + if match: + return True, weight, match, num_fragments + elif self.func: + match = self.func(fragment) + Logr.debug('(execute) [func] %s += "%s"', self.tag, match) + if match: + return True, 1.0, match, 1 + else: + Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) + return True, 1.0, fragment.value, 1 + + return False, None, None, 1 + + def __repr__(self): + attribute_values = [key + '=' + repr(getattr(self, key)) + for key in self.REPR_KEYS + if hasattr(self, key) and getattr(self, key)] + + attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else '' + + return "CaptureStep('%s'%s)" % (self.tag, attribute_string) diff --git a/libs/logr/__init__.py b/libs/logr/__init__.py new file mode 100644 index 0000000..21909b6 --- /dev/null +++ b/libs/logr/__init__.py @@ -0,0 +1,201 @@ +# logr - Simple python logging wrapper +# Packed by Dean Gardiner +# +# File part of: +# rdio-sock - Rdio WebSocket Library +# Copyright (C) 2013 fzza- + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import inspect +import logging +import os +import sys + +IGNORE = () +PY3 = sys.version_info[0] == 3 + + +class Logr(object): + loggers = {} + handler = None + + @staticmethod + def configure(level=logging.WARNING, handler=None, formatter=None): + """Configure Logr + + @param handler: Logger message handler + @type handler: logging.Handler or None + + @param formatter: Logger message Formatter + @type formatter: logging.Formatter or None + """ + if formatter is None: + formatter = LogrFormatter() + + if handler is None: + handler = logging.StreamHandler() + + handler.setFormatter(formatter) + handler.setLevel(level) + Logr.handler = handler + + @staticmethod + def configure_check(): + if Logr.handler is None: + Logr.configure() + + @staticmethod + def _get_name_from_path(filename): + try: + return os.path.splitext(os.path.basename(filename))[0] + except TypeError: + return "" + + @staticmethod + def get_logger_name(): + stack = inspect.stack() + + for x in xrange_six(len(stack)): + frame = stack[x][0] + name = None + + # Try find name of function defined inside a class + if len(frame.f_code.co_varnames) > 0: + self_argument = frame.f_code.co_varnames[0] + + if self_argument == 'self' and self_argument in frame.f_locals: + instance = frame.f_locals[self_argument] + + class_ = instance.__class__ + class_name = class_.__name__ + module_name = class_.__module__ + + if module_name != '__main__': + name = module_name + '.' + class_name + else: + name = class_name + + # Try find name of function defined outside of a class + if name is None: + if frame.f_code.co_name in frame.f_globals: + name = frame.f_globals.get('__name__') + if name == '__main__': + name = Logr._get_name_from_path(frame.f_globals.get('__file__')) + name = name + elif frame.f_code.co_name == '': + name = Logr._get_name_from_path(frame.f_globals.get('__file__')) + + if name is not None and name not in IGNORE: + return name + + return "" + + @staticmethod + def get_logger(): + """Get or create logger (if it does not exist) + + @rtype: RootLogger + """ + name = Logr.get_logger_name() + if name not in Logr.loggers: + Logr.configure_check() + Logr.loggers[name] = logging.Logger(name) + Logr.loggers[name].addHandler(Logr.handler) + return Logr.loggers[name] + + @staticmethod + def debug(msg, *args, **kwargs): + Logr.get_logger().debug(msg, *args, **kwargs) + + @staticmethod + def info(msg, *args, **kwargs): + Logr.get_logger().info(msg, *args, **kwargs) + + @staticmethod + def warning(msg, *args, **kwargs): + Logr.get_logger().warning(msg, *args, **kwargs) + + warn = warning + + @staticmethod + def error(msg, *args, **kwargs): + Logr.get_logger().error(msg, *args, **kwargs) + + @staticmethod + def exception(msg, *args, **kwargs): + Logr.get_logger().exception(msg, *args, **kwargs) + + @staticmethod + def critical(msg, *args, **kwargs): + Logr.get_logger().critical(msg, *args, **kwargs) + + fatal = critical + + @staticmethod + def log(level, msg, *args, **kwargs): + Logr.get_logger().log(level, msg, *args, **kwargs) + + +class LogrFormatter(logging.Formatter): + LENGTH_NAME = 32 + LENGTH_LEVEL_NAME = 5 + + def __init__(self, fmt=None, datefmt=None): + if sys.version_info[:2] > (2,6): + super(LogrFormatter, self).__init__(fmt, datefmt) + else: + logging.Formatter.__init__(self, fmt, datefmt) + + def usesTime(self): + return True + + def format(self, record): + record.message = record.getMessage() + if self.usesTime(): + record.asctime = self.formatTime(record, self.datefmt) + + s = "%(asctime)s %(name)s %(levelname)s %(message)s" % { + 'asctime': record.asctime, + 'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '), + 'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '), + 'message': record.message + } + + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + if record.exc_text: + if s[-1:] != "\n": + s += "\n" + try: + s += record.exc_text + except UnicodeError: + s = s + record.exc_text.decode(sys.getfilesystemencoding(), + 'replace') + return s + + +def xrange_six(start, stop=None, step=None): + if stop is not None and step is not None: + if PY3: + return range(start, stop, step) + else: + return xrange(start, stop, step) + else: + if PY3: + return range(start) + else: + return xrange(start)