diff --git a/couchpotato/core/media/_base/library/__init__.py b/couchpotato/core/media/_base/library/__init__.py new file mode 100644 index 0000000..3e1babe --- /dev/null +++ b/couchpotato/core/media/_base/library/__init__.py @@ -0,0 +1,6 @@ +from .main import Library + +def autoload(): + return Library() + +config = [] diff --git a/couchpotato/core/media/_base/library/base.py b/couchpotato/core/media/_base/library/base.py new file mode 100644 index 0000000..553eff5 --- /dev/null +++ b/couchpotato/core/media/_base/library/base.py @@ -0,0 +1,13 @@ +from couchpotato.core.event import addEvent +from couchpotato.core.plugins.base import Plugin + + +class LibraryBase(Plugin): + + _type = None + + def initType(self): + addEvent('library.types', self.getType) + + def getType(self): + return self._type diff --git a/couchpotato/core/media/_base/library/main.py b/couchpotato/core/media/_base/library/main.py new file mode 100644 index 0000000..a723de5 --- /dev/null +++ b/couchpotato/core/media/_base/library/main.py @@ -0,0 +1,18 @@ +from couchpotato.core.event import addEvent, fireEvent +from couchpotato.core.media._base.library.base import LibraryBase + + +class Library(LibraryBase): + def __init__(self): + addEvent('library.title', self.title) + + def title(self, library): + return fireEvent( + 'library.query', + library, + + condense = False, + include_year = False, + include_identifier = False, + single = True + ) diff --git a/couchpotato/core/media/_base/matcher/__init__.py b/couchpotato/core/media/_base/matcher/__init__.py new file mode 100644 index 0000000..1e4cda3 --- /dev/null +++ b/couchpotato/core/media/_base/matcher/__init__.py @@ -0,0 +1,6 @@ +from .main import Matcher + +def autoload(): + return Matcher() + +config = [] diff --git a/couchpotato/core/media/_base/matcher/base.py b/couchpotato/core/media/_base/matcher/base.py new file mode 100644 index 0000000..8651126 --- /dev/null +++ b/couchpotato/core/media/_base/matcher/base.py @@ -0,0 +1,84 @@ +from couchpotato.core.event import addEvent +from couchpotato.core.helpers.encoding import simplifyString +from couchpotato.core.logger import CPLog +from couchpotato.core.plugins.base import Plugin + +log = CPLog(__name__) + + +class MatcherBase(Plugin): + type = None + + def __init__(self): + if self.type: + addEvent('%s.matcher.correct' % self.type, self.correct) + + def correct(self, chain, release, media, quality): + raise NotImplementedError() + + def flattenInfo(self, info): + # Flatten dictionary of matches (chain info) + if isinstance(info, dict): + return dict([(key, self.flattenInfo(value)) for key, value in info.items()]) + + # Flatten matches + result = None + + for match in info: + if isinstance(match, dict): + if result is None: + result = {} + + for key, value in match.items(): + if key not in result: + result[key] = [] + + result[key].append(value) + else: + if result is None: + result = [] + + result.append(match) + + return result + + def constructFromRaw(self, match): + if not match: + return None + + parts = [ + ''.join([ + y for y in x[1:] if y + ]) for x in match + ] + + return ''.join(parts)[:-1].strip() + + def simplifyValue(self, value): + if not value: + return value + + if isinstance(value, basestring): + return simplifyString(value) + + if isinstance(value, list): + return [self.simplifyValue(x) for x in value] + + raise ValueError("Unsupported value type") + + def chainMatch(self, chain, group, tags): + info = self.flattenInfo(chain.info[group]) + + found_tags = [] + for tag, accepted in tags.items(): + values = [self.simplifyValue(x) for x in info.get(tag, [None])] + + if any([val in accepted for val in values]): + found_tags.append(tag) + + log.debug('tags found: %s, required: %s' % (found_tags, tags.keys())) + + if set(tags.keys()) == set(found_tags): + return True + + return all([key in found_tags for key, value in tags.items()]) diff --git a/couchpotato/core/media/_base/matcher/main.py b/couchpotato/core/media/_base/matcher/main.py new file mode 100644 index 0000000..2034249 --- /dev/null +++ b/couchpotato/core/media/_base/matcher/main.py @@ -0,0 +1,89 @@ +from couchpotato.core.event import addEvent, fireEvent +from couchpotato.core.helpers.variable import possibleTitles +from couchpotato.core.logger import CPLog +from couchpotato.core.media._base.matcher.base import MatcherBase +from caper import Caper + +log = CPLog(__name__) + + +class Matcher(MatcherBase): + + def __init__(self): + super(Matcher, self).__init__() + + self.caper = Caper() + + addEvent('matcher.parse', self.parse) + addEvent('matcher.match', self.match) + + addEvent('matcher.flatten_info', self.flattenInfo) + addEvent('matcher.construct_from_raw', self.constructFromRaw) + + addEvent('matcher.correct_title', self.correctTitle) + addEvent('matcher.correct_quality', self.correctQuality) + + def parse(self, name, parser='scene'): + return self.caper.parse(name, parser) + + def match(self, release, media, quality): + match = fireEvent('matcher.parse', release['name'], single = True) + + if len(match.chains) < 1: + log.info2('Wrong: %s, unable to parse release name (no chains)', release['name']) + return False + + for chain in match.chains: + if fireEvent('%s.matcher.correct' % media['type'], chain, release, media, quality, single = True): + return chain + + return False + + def correctTitle(self, chain, media): + root_library = media['library']['root_library'] + + if 'show_name' not in chain.info or not len(chain.info['show_name']): + log.info('Wrong: missing show name in parsed result') + return False + + # Get the lower-case parsed show name from the chain + chain_words = [x.lower() for x in chain.info['show_name']] + + # Build a list of possible titles of the media we are searching for + titles = root_library['info']['titles'] + + # Add year suffix titles (will result in ['', ' ', '', ...]) + suffixes = [None, root_library['info']['year']] + + titles = [ + title + ((' %s' % suffix) if suffix else '') + for title in titles + for suffix in suffixes + ] + + # Check show titles match + # TODO check xem names + for title in titles: + for valid_words in [x.split(' ') for x in possibleTitles(title)]: + + if valid_words == chain_words: + return True + + return False + + def correctQuality(self, chain, quality, quality_map): + if quality['identifier'] not in quality_map: + log.info2('Wrong: unknown preferred quality %s', quality['identifier']) + return False + + if 'video' not in chain.info: + log.info2('Wrong: no video tags found') + return False + + video_tags = quality_map[quality['identifier']] + + if not self.chainMatch(chain, 'video', video_tags): + log.info2('Wrong: %s tags not in chain', video_tags) + return False + + return True diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py new file mode 100644 index 0000000..95fb6d7 --- /dev/null +++ b/libs/caper/__init__.py @@ -0,0 +1,195 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper.matcher import FragmentMatcher +from caper.objects import CaperFragment, CaperClosure +from caper.parsers.anime import AnimeParser +from caper.parsers.scene import SceneParser +from caper.parsers.usenet import UsenetParser + + +__version_info__ = ('0', '3', '1') +__version_branch__ = 'master' + +__version__ = "%s%s" % ( + '.'.join(__version_info__), + '-' + __version_branch__ if __version_branch__ else '' +) + + +CL_START_CHARS = ['(', '[', '<', '>'] +CL_END_CHARS = [')', ']', '<', '>'] +CL_END_STRINGS = [' - '] + +STRIP_START_CHARS = ''.join(CL_START_CHARS) +STRIP_END_CHARS = ''.join(CL_END_CHARS) +STRIP_CHARS = ''.join(['_', ' ', '.']) + +FRAGMENT_SEPARATORS = ['.', '-', '_', ' '] + + +CL_START = 0 +CL_END = 1 + + +class Caper(object): + def __init__(self, debug=False): + self.debug = debug + + self.parsers = { + 'anime': AnimeParser, + 'scene': SceneParser, + 'usenet': UsenetParser + } + + def _closure_split(self, name): + """ + :type name: str + + :rtype: list of CaperClosure + """ + + closures = [] + + def end_closure(closures, buf): + buf = buf.strip(STRIP_CHARS) + if len(buf) < 2: + return + + cur = CaperClosure(len(closures), buf) + cur.left = closures[len(closures) - 1] if len(closures) > 0 else None + + if cur.left: + cur.left.right = cur + + closures.append(cur) + + state = CL_START + buf = "" + for x, ch in enumerate(name): + # Check for start characters + if state == CL_START and ch in CL_START_CHARS: + end_closure(closures, buf) + + state = CL_END + buf = "" + + buf += ch + + if state == CL_END and ch in CL_END_CHARS: + # End character found, create the closure + end_closure(closures, buf) + + state = CL_START + buf = "" + elif state == CL_START and buf[-3:] in CL_END_STRINGS: + # End string found, create the closure + end_closure(closures, buf[:-3]) + + state = CL_START + buf = "" + + end_closure(closures, buf) + + return closures + + def _clean_closure(self, closure): + """ + :type closure: str + + :rtype: str + """ + + return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS) + + def _fragment_split(self, closures): + """ + :type closures: list of CaperClosure + + :rtype: list of CaperClosure + """ + + cur_position = 0 + cur = None + + def end_fragment(fragments, cur, cur_position): + cur.position = cur_position + + cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None + if cur.left: + cur.left_sep = cur.left.right_sep + cur.left.right = cur + + cur.right_sep = ch + + fragments.append(cur) + + for closure in closures: + closure.fragments = [] + + separator_buffer = "" + + for x, ch in enumerate(self._clean_closure(closure.value)): + if not cur: + cur = CaperFragment(closure) + + if ch in FRAGMENT_SEPARATORS: + if cur.value: + separator_buffer = "" + + separator_buffer += ch + + if cur.value or not closure.fragments: + end_fragment(closure.fragments, cur, cur_position) + elif len(separator_buffer) > 1: + cur.value = separator_buffer.strip() + + if cur.value: + end_fragment(closure.fragments, cur, cur_position) + + separator_buffer = "" + + # Reset + cur = None + cur_position += 1 + else: + cur.value += ch + + # Finish parsing the last fragment + if cur and cur.value: + end_fragment(closure.fragments, cur, cur_position) + + # Reset + cur_position = 0 + cur = None + + return closures + + def parse(self, name, parser='scene'): + closures = self._closure_split(name) + closures = self._fragment_split(closures) + + # Print closures + for closure in closures: + Logr.debug("closure [%s]", closure.value) + + for fragment in closure.fragments: + Logr.debug("\tfragment [%s]", fragment.value) + + if parser not in self.parsers: + raise ValueError("Unknown parser") + + # TODO autodetect the parser type + return self.parsers[parser](self.debug).run(closures) diff --git a/libs/caper/constraint.py b/libs/caper/constraint.py new file mode 100644 index 0000000..e092d33 --- /dev/null +++ b/libs/caper/constraint.py @@ -0,0 +1,134 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class CaptureConstraint(object): + def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs): + """Capture constraint object + + :type capture_group: CaptureGroup + """ + + self.capture_group = capture_group + + self.constraint_type = constraint_type + self.target = target + + self.comparisons = comparisons if comparisons else [] + self.kwargs = {} + + for orig_key, value in kwargs.items(): + key = orig_key.split('__') + if len(key) != 2: + self.kwargs[orig_key] = value + continue + name, method = key + + method = 'constraint_match_' + method + if not hasattr(self, method): + self.kwargs[orig_key] = value + continue + + self.comparisons.append((name, getattr(self, method), value)) + + def execute(self, parent_node, node, **kwargs): + func_name = 'constraint_%s' % self.constraint_type + + if hasattr(self, func_name): + return getattr(self, func_name)(parent_node, node, **kwargs) + + raise ValueError('Unknown constraint type "%s"' % self.constraint_type) + + # + # Node Matching + # + + def constraint_match(self, parent_node, node): + results = [] + total_weight = 0 + + for name, method, argument in self.comparisons: + weight, success = method(node, name, argument) + total_weight += weight + results.append(success) + + return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False + + def constraint_match_eq(self, node, name, expected): + if not hasattr(node, name): + return 1.0, False + + return 1.0, getattr(node, name) == expected + + def constraint_match_re(self, node, name, arg): + # Node match + if name == 'node': + group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0) + + weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group) + return weight, weight > minimum_weight + + # Regex match + if type(arg).__name__ == 'SRE_Pattern': + return 1.0, arg.match(getattr(node, name)) is not None + + # Value match + if hasattr(node, name): + match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True) + return 1.0, match is not None + + raise ValueError("Unknown constraint match type '%s'" % name) + + # + # Result + # + + def constraint_result(self, parent_node, fragment): + ctag = self.kwargs.get('tag') + if not ctag: + return 0, False + + ckey = self.kwargs.get('key') + + for tag, result in parent_node.captured(): + if tag != ctag: + continue + + if not ckey or ckey in result.keys(): + return 1.0, True + + return 0.0, False + + # + # Failure + # + + def constraint_failure(self, parent_node, fragment, match): + if not match or not match.success: + return 1.0, True + + return 0, False + + # + # Success + # + + def constraint_success(self, parent_node, fragment, match): + if match and match.success: + return 1.0, True + + return 0, False + + def __repr__(self): + return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons) diff --git a/libs/caper/group.py b/libs/caper/group.py new file mode 100644 index 0000000..8f0399e --- /dev/null +++ b/libs/caper/group.py @@ -0,0 +1,284 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from logr import Logr +from caper import CaperClosure, CaperFragment +from caper.helpers import clean_dict +from caper.result import CaperFragmentNode, CaperClosureNode +from caper.step import CaptureStep +from caper.constraint import CaptureConstraint + + +class CaptureGroup(object): + def __init__(self, parser, result): + """Capture group object + + :type parser: caper.parsers.base.Parser + :type result: caper.result.CaperResult + """ + + self.parser = parser + self.result = result + + #: @type: list of CaptureStep + self.steps = [] + + #: type: str + self.step_source = None + + #: @type: list of CaptureConstraint + self.pre_constraints = [] + + #: :type: list of CaptureConstraint + self.post_constraints = [] + + def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): + Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single) + + if self.step_source != 'fragment': + if self.step_source is None: + self.step_source = 'fragment' + else: + raise ValueError("Unable to mix fragment and closure capturing in a group") + + self.steps.append(CaptureStep( + self, tag, + 'fragment', + regex=regex, + func=func, + single=single, + **kwargs + )) + + return self + + def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): + Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single) + + if self.step_source != 'closure': + if self.step_source is None: + self.step_source = 'closure' + else: + raise ValueError("Unable to mix fragment and closure capturing in a group") + + self.steps.append(CaptureStep( + self, tag, + 'closure', + regex=regex, + func=func, + single=single, + **kwargs + )) + + return self + + def until_closure(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs)) + + return self + + def until_fragment(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs)) + + return self + + def until_result(self, **kwargs): + self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs)) + + return self + + def until_failure(self, **kwargs): + self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs)) + + return self + + def until_success(self, **kwargs): + self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs)) + + return self + + def parse_subject(self, parent_head, subject): + Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject)) + + if type(subject) is CaperClosure: + return self.parse_closure(parent_head, subject) + + if type(subject) is CaperFragment: + return self.parse_fragment(parent_head, subject) + + raise ValueError('Unknown subject (%s)', subject) + + def parse_fragment(self, parent_head, subject): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + nodes, match = self.match(parent_head, parent_node, subject) + + # Capturing broke on constraint, return now + if not match: + return nodes + + Logr.debug('created fragment node with subject.value: "%s"' % subject.value) + + result = [CaperFragmentNode( + parent_node.closure, + subject.take_right(match.num_fragments), + parent_head, + match + )] + + # Branch if the match was indefinite (weight below 1.0) + if match.result and match.weight < 1.0: + if match.num_fragments == 1: + result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) + else: + nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head)) + + nodes.append(result[0] if len(result) == 1 else result) + + return nodes + + def parse_closure(self, parent_head, subject): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + nodes, match = self.match(parent_head, parent_node, subject) + + # Capturing broke on constraint, return now + if not match: + return nodes + + Logr.debug('created closure node with subject.value: "%s"' % subject.value) + + result = [CaperClosureNode( + subject, + parent_head, + match + )] + + # Branch if the match was indefinite (weight below 1.0) + if match.result and match.weight < 1.0: + if match.num_fragments == 1: + result.append(CaperClosureNode(subject, parent_head)) + else: + nodes.append(CaperClosureNode(subject, parent_head)) + + nodes.append(result[0] if len(result) == 1 else result) + + return nodes + + def match(self, parent_head, parent_node, subject): + nodes = [] + + # Check pre constaints + broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject) + + if broke: + nodes.append(parent_head) + + if definite: + return nodes, None + + # Try match subject against the steps available + match = None + + for step in self.steps: + if step.source == 'closure' and type(subject) is not CaperClosure: + pass + elif step.source == 'fragment' and type(subject) is CaperClosure: + Logr.debug('Closure encountered on fragment step, jumping into fragments') + return [CaperClosureNode(subject, parent_head, None)], None + + match = step.execute(subject) + + if match.success: + if type(match.result) is dict: + match.result = clean_dict(match.result) + + Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % ( + match.weight, match.result, match.num_fragments + )) + + step.matched = True + + break + + if all([step.single and step.matched for step in self.steps]): + Logr.debug('All steps completed, group finished') + parent_node.finished_groups.append(self) + return nodes, match + + # Check post constraints + broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match) + if broke: + return nodes, None + + return nodes, match + + def check_constraints(self, constraints, parent_head, subject, **kwargs): + parent_node = parent_head[0] if type(parent_head) is list else parent_head + + # Check constraints + for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]: + Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject)) + + weight, success = constraint.execute(parent_node, subject, **kwargs) + + if success: + Logr.debug('capturing broke on "%s" at %s', subject.value, constraint) + parent_node.finished_groups.append(self) + + return True, weight == 1.0 + + return False, None + + def execute(self): + heads_finished = None + + while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)): + heads_finished = [] + + heads = self.result.heads + self.result.heads = [] + + for head in heads: + node = head[0] if type(head) is list else head + + if self in node.finished_groups: + Logr.debug("head finished for group") + self.result.heads.append(head) + heads_finished.append(True) + continue + + Logr.debug('') + + Logr.debug(node) + + next_subject = node.next() + + Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None)) + + if next_subject: + for node_result in self.parse_subject(head, next_subject): + self.result.heads.append(node_result) + + Logr.debug('Heads: %s', self.result.heads) + + heads_finished.append(self in node.finished_groups or next_subject is None) + + if len(self.result.heads) == 0: + self.result.heads = heads + + Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads) + + Logr.debug("group finished") diff --git a/libs/caper/helpers.py b/libs/caper/helpers.py new file mode 100644 index 0000000..ded5d48 --- /dev/null +++ b/libs/caper/helpers.py @@ -0,0 +1,80 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + +def is_list_type(obj, element_type): + if not type(obj) is list: + return False + + if len(obj) < 1: + raise ValueError("Unable to determine list element type from empty list") + + return type(obj[0]) is element_type + + +def clean_dict(target, remove=None): + """Recursively remove items matching a value 'remove' from the dictionary + + :type target: dict + """ + if type(target) is not dict: + raise ValueError("Target is required to be a dict") + + remove_keys = [] + for key in target.keys(): + if type(target[key]) is not dict: + if target[key] == remove: + remove_keys.append(key) + else: + clean_dict(target[key], remove) + + for key in remove_keys: + target.pop(key) + + return target + + +def update_dict(a, b): + for key, value in b.items(): + if key not in a: + a[key] = value + elif isinstance(a[key], dict) and isinstance(value, dict): + update_dict(a[key], value) + elif isinstance(a[key], list): + a[key].append(value) + else: + a[key] = [a[key], value] + + +def xrange_six(start, stop=None, step=None): + if stop is not None and step is not None: + if PY3: + return range(start, stop, step) + else: + return xrange(start, stop, step) + else: + if PY3: + return range(start) + else: + return xrange(start) + + +def delta_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6 diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py new file mode 100644 index 0000000..3acf2e6 --- /dev/null +++ b/libs/caper/matcher.py @@ -0,0 +1,144 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from caper.helpers import is_list_type, update_dict, delta_seconds +from datetime import datetime +from logr import Logr +import re + + +class FragmentMatcher(object): + def __init__(self, pattern_groups): + self.regex = {} + + self.construct_patterns(pattern_groups) + + def construct_patterns(self, pattern_groups): + compile_start = datetime.now() + compile_count = 0 + + for group_name, patterns in pattern_groups: + if group_name not in self.regex: + self.regex[group_name] = [] + + # Transform into weight groups + if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]: + patterns = [(1.0, patterns)] + + for weight, patterns in patterns: + weight_patterns = [] + + for pattern in patterns: + # Transform into multi-fragment patterns + if type(pattern) is str: + pattern = (pattern,) + + if type(pattern) is tuple and len(pattern) == 2: + if type(pattern[0]) is str and is_list_type(pattern[1], str): + pattern = (pattern,) + + result = [] + for value in pattern: + if type(value) is tuple: + if len(value) == 2: + # Construct OR-list pattern + value = value[0] % '|'.join(value[1]) + elif len(value) == 1: + value = value[0] + + result.append(re.compile(value, re.IGNORECASE)) + compile_count += 1 + + weight_patterns.append(tuple(result)) + + self.regex[group_name].append((weight, weight_patterns)) + + Logr.info("Compiled %s patterns in %ss", compile_count, delta_seconds(datetime.now() - compile_start)) + + def find_group(self, name): + for group_name, weight_groups in self.regex.items(): + if group_name and group_name == name: + return group_name, weight_groups + + return None, None + + def value_match(self, value, group_name=None, single=True): + result = None + + for group, weight_groups in self.regex.items(): + if group_name and group != group_name: + continue + + # TODO handle multiple weights + weight, patterns = weight_groups[0] + + for pattern in patterns: + match = pattern[0].match(value) + if not match: + continue + + if result is None: + result = {} + if group not in result: + result[group] = {} + + result[group].update(match.groupdict()) + + if single: + return result + + return result + + def fragment_match(self, fragment, group_name=None): + """Follow a fragment chain to try find a match + + :type fragment: caper.objects.CaperFragment + :type group_name: str or None + + :return: The weight of the match found between 0.0 and 1.0, + where 1.0 means perfect match and 0.0 means no match + :rtype: (float, dict, int) + """ + + group_name, weight_groups = self.find_group(group_name) + + for weight, patterns in weight_groups: + for pattern in patterns: + cur_fragment = fragment + success = True + result = {} + + # Ignore empty patterns + if len(pattern) < 1: + break + + for fragment_pattern in pattern: + if not cur_fragment: + success = False + break + + match = fragment_pattern.match(cur_fragment.value) + if match: + update_dict(result, match.groupdict()) + else: + success = False + break + + cur_fragment = cur_fragment.right if cur_fragment else None + + if success: + Logr.debug("Found match with weight %s" % weight) + return float(weight), result, len(pattern) + + return 0.0, None, 1 diff --git a/libs/caper/objects.py b/libs/caper/objects.py new file mode 100644 index 0000000..b7d9084 --- /dev/null +++ b/libs/caper/objects.py @@ -0,0 +1,124 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from caper.helpers import xrange_six + + +class CaperClosure(object): + __key__ = 'closure' + + def __init__(self, index, value): + #: :type: int + self.index = index + + #: :type: str + self.value = value + + #: :type: CaperClosure + self.left = None + #: :type: CaperClosure + self.right = None + + #: :type: list of CaperFragment + self.fragments = [] + + def __str__(self): + return "" % repr(self.result) + + def __repr__(self): + return self.__str__() diff --git a/libs/caper/parsers/__init__.py b/libs/caper/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/caper/parsers/anime.py b/libs/caper/parsers/anime.py new file mode 100644 index 0000000..86c7091 --- /dev/null +++ b/libs/caper/parsers/anime.py @@ -0,0 +1,88 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from caper.parsers.base import Parser + + +REGEX_GROUP = re.compile(r'(\(|\[)(?P.*?)(\)|\])', re.IGNORECASE) + + +PATTERN_GROUPS = [ + ('identifier', [ + r'S(?P\d+)E(?P\d+)', + r'(S(?P\d+))|(E(?P\d+))', + + r'Ep(?P\d+)', + r'$(?P\d+)^', + + (r'Episode', r'(?P\d+)'), + ]), + ('video', [ + (r'(?P%s)', [ + 'Hi10P' + ]), + (r'.(?P%s)', [ + '720p', + '1080p', + + '960x720', + '1920x1080' + ]), + (r'(?P%s)', [ + 'BD' + ]), + ]), + ('audio', [ + (r'(?P%s)', [ + 'FLAC' + ]), + ]) +] + + +class AnimeParser(Parser): + def __init__(self, debug=False): + super(AnimeParser, self).__init__(PATTERN_GROUPS, debug) + + def capture_group(self, fragment): + match = REGEX_GROUP.match(fragment.value) + + if not match: + return None + + return match.group('group') + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + self.capture_closure('group', func=self.capture_group)\ + .execute(once=True) + + self.capture_fragment('show_name', single=False)\ + .until_fragment(value__re='identifier')\ + .until_fragment(value__re='video')\ + .execute() + + self.capture_fragment('identifier', regex='identifier') \ + .capture_fragment('video', regex='video', single=False) \ + .capture_fragment('audio', regex='audio', single=False) \ + .execute() + + self.result.build() + return self.result diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py new file mode 100644 index 0000000..16bbc19 --- /dev/null +++ b/libs/caper/parsers/base.py @@ -0,0 +1,84 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from caper import FragmentMatcher +from caper.group import CaptureGroup +from caper.result import CaperResult, CaperClosureNode, CaperRootNode +from logr import Logr + + +class Parser(object): + def __init__(self, matcher, debug=False): + self.debug = debug + + self.matcher = matcher + + self.closures = None + #: :type: caper.result.CaperResult + self.result = None + + self._match_cache = None + self._fragment_pos = None + self._closure_pos = None + self._history = None + + self.reset() + + def reset(self): + self.closures = None + self.result = CaperResult() + + self._match_cache = {} + self._fragment_pos = -1 + self._closure_pos = -1 + self._history = [] + + def setup(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.reset() + self.closures = closures + + self.result.heads = [CaperRootNode(closures[0])] + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + raise NotImplementedError() + + # + # Capture Methods + # + + def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs): + return CaptureGroup(self, self.result).capture_fragment( + tag, + regex=regex, + func=func, + single=single, + **kwargs + ) + + def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs): + return CaptureGroup(self, self.result).capture_closure( + tag, + regex=regex, + func=func, + single=single, + **kwargs + ) diff --git a/libs/caper/parsers/scene.py b/libs/caper/parsers/scene.py new file mode 100644 index 0000000..cd0a8fd --- /dev/null +++ b/libs/caper/parsers/scene.py @@ -0,0 +1,230 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper import FragmentMatcher +from caper.parsers.base import Parser +from caper.result import CaperFragmentNode + + +PATTERN_GROUPS = [ + ('identifier', [ + (1.0, [ + # S01E01-E02 + ('^S(?P\d+)E(?P\d+)$', '^E(?P\d+)$'), + # 'S03 E01 to E08' or 'S03 E01 - E09' + ('^S(?P\d+)$', '^E(?P\d+)$', '^(to|-)$', '^E(?P\d+)$'), + # 'E01 to E08' or 'E01 - E09' + ('^E(?P\d+)$', '^(to|-)$', '^E(?P\d+)$'), + + # S01-S03 + ('^S(?P\d+)$', '^S(?P\d+)$'), + + # S02E13 + r'^S(?P\d+)E(?P\d+)$', + # S01 E13 + (r'^(S(?P\d+))$', r'^(E(?P\d+))$'), + # S02 + # E13 + r'^((S(?P\d+))|(E(?P\d+)))$', + # 3x19 + r'^(?P\d+)x(?P\d+)$', + + # 2013.09.15 + (r'^(?P\d{4})$', r'^(?P\d{2})$', r'^(?P\d{2})$'), + # 09.15.2013 + (r'^(?P\d{2})$', r'^(?P\d{2})$', r'^(?P\d{4})$'), + # TODO - US/UK Date Format Conflict? will only support US format for now.. + # 15.09.2013 + #(r'^(?P\d{2})$', r'^(?P\d{2})$', r'^(?P\d{4})$'), + # 130915 + r'^(?P\d{2})(?P\d{2})(?P\d{2})$', + + # Season 3 Episode 14 + (r'^Se(ason)?$', r'^(?P\d+)$', r'^Ep(isode)?$', r'^(?P\d+)$'), + # Season 3 + (r'^Se(ason)?$', r'^(?P\d+)$'), + # Episode 14 + (r'^Ep(isode)?$', r'^(?P\d+)$'), + + # Part.3 + # Part.1.and.Part.3 + ('^Part$', '(?P\d+)'), + + r'(?PSpecial)', + r'(?PNZ|AU|US|UK)' + ]), + (0.8, [ + # 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake) + # TODO - Update this pattern on 31 Dec 2099 + r'^(?P([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P\d{2})$' + ]), + (0.5, [ + # 100 - 9999 + r'^(?P([1-9])|([1-9][0-9]))(?P\d{2})$' + ]) + ]), + + ('video', [ + r'(?PFS|WS)', + + (r'(?P%s)', [ + '480p', + '720p', + '1080p' + ]), + + # + # Source + # + + (r'(?P%s)', [ + 'DVDRiP', + # HDTV + 'HDTV', + 'PDTV', + 'DSR', + # WEB + 'WEBRip', + 'WEBDL', + # BluRay + 'BluRay', + 'B(D|R)Rip', + # DVD + 'DVDR', + 'DVD9', + 'DVD5' + ]), + + # For multi-fragment 'WEB-DL', 'WEB-Rip', etc... matches + ('(?PWEB)', '(?PDL|Rip)'), + + # + # Codec + # + + (r'(?P%s)', [ + 'x264', + 'XViD', + 'H264', + 'AVC' + ]), + + # For multi-fragment 'H 264' tags + ('(?PH)', '(?P264)'), + ]), + + ('dvd', [ + r'D(ISC)?(?P\d+)', + + r'R(?P[0-8])', + + (r'(?P%s)', [ + 'PAL', + 'NTSC' + ]), + ]), + + ('audio', [ + (r'(?P%s)', [ + 'AC3', + 'TrueHD' + ]), + + (r'(?P%s)', [ + 'GERMAN', + 'DUTCH', + 'FRENCH', + 'SWEDiSH', + 'DANiSH', + 'iTALiAN' + ]), + ]), + + ('scene', [ + r'(?PPROPER|REAL)', + ]) +] + + +class SceneParser(Parser): + matcher = None + + def __init__(self, debug=False): + if not SceneParser.matcher: + SceneParser.matcher = FragmentMatcher(PATTERN_GROUPS) + Logr.info("Fragment matcher for %s created", self.__class__.__name__) + + super(SceneParser, self).__init__(SceneParser.matcher, debug) + + def capture_group(self, fragment): + if fragment.closure.index + 1 != len(self.closures): + return None + + if fragment.left_sep != '-' or fragment.right: + return None + + return fragment.value + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + self.capture_fragment('show_name', single=False)\ + .until_fragment(node__re='identifier')\ + .until_fragment(node__re='video')\ + .until_fragment(node__re='dvd')\ + .until_fragment(node__re='audio')\ + .until_fragment(node__re='scene')\ + .execute() + + self.capture_fragment('identifier', regex='identifier', single=False)\ + .capture_fragment('video', regex='video', single=False)\ + .capture_fragment('dvd', regex='dvd', single=False)\ + .capture_fragment('audio', regex='audio', single=False)\ + .capture_fragment('scene', regex='scene', single=False)\ + .until_fragment(left_sep__eq='-', right__eq=None)\ + .execute() + + self.capture_fragment('group', func=self.capture_group)\ + .execute() + + self.print_tree(self.result.heads) + + self.result.build() + return self.result + + def print_tree(self, heads): + if not self.debug: + return + + for head in heads: + head = head if type(head) is list else [head] + + if type(head[0]) is CaperFragmentNode: + for fragment in head[0].fragments: + Logr.debug(fragment.value) + else: + Logr.debug(head[0].closure.value) + + for node in head: + Logr.debug('\t' + str(node).ljust(55) + '\t' + ( + str(node.match.weight) + '\t' + str(node.match.result) + ) if node.match else '') + + if len(head) > 0 and head[0].parent: + self.print_tree([head[0].parent]) diff --git a/libs/caper/parsers/usenet.py b/libs/caper/parsers/usenet.py new file mode 100644 index 0000000..f622d43 --- /dev/null +++ b/libs/caper/parsers/usenet.py @@ -0,0 +1,115 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logr import Logr +from caper import FragmentMatcher +from caper.parsers.base import Parser + + +PATTERN_GROUPS = [ + ('usenet', [ + r'\[(?P#[\w\.@]+)\]', + r'^\[(?P\w+)\]$', + r'\[(?PFULL)\]', + r'\[\s?(?PTOWN)\s?\]', + r'(.*?\s)?[_\W]*(?Pwww\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?', + r'(.*?\s)?[_\W]*(?P(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?' + ]), + + ('part', [ + r'.?(?P\d+)/(?P\d+).?' + ]), + + ('detail', [ + r'[\s-]*\w*?[\s-]*\"(?P.*?)\"[\s-]*\w*?[\s-]*(?P[\d,\.]*\s?MB)?[\s-]*(?PyEnc)?', + r'(?P[\d,\.]*\s?MB)[\s-]*(?PyEnc)', + r'(?P[\d,\.]*\s?MB)|(?PyEnc)' + ]) +] + + +class UsenetParser(Parser): + matcher = None + + def __init__(self, debug=False): + if not UsenetParser.matcher: + UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS) + Logr.info("Fragment matcher for %s created", self.__class__.__name__) + + super(UsenetParser, self).__init__(UsenetParser.matcher, debug) + + def run(self, closures): + """ + :type closures: list of CaperClosure + """ + + self.setup(closures) + + # Capture usenet or part info until we get a part or matching fails + self.capture_closure('usenet', regex='usenet', single=False)\ + .capture_closure('part', regex='part', single=True) \ + .until_result(tag='part') \ + .until_failure()\ + .execute() + + is_town_release, has_part = self.get_state() + + if not is_town_release: + self.capture_release_name() + + # If we already have the part (TOWN releases), ignore matching part again + if not is_town_release and not has_part: + self.capture_fragment('part', regex='part', single=True)\ + .until_closure(node__re='usenet')\ + .until_success()\ + .execute() + + # Capture any leftover details + self.capture_closure('usenet', regex='usenet', single=False)\ + .capture_closure('detail', regex='detail', single=False)\ + .execute() + + self.result.build() + return self.result + + def capture_release_name(self): + self.capture_closure('detail', regex='detail', single=False)\ + .until_failure()\ + .execute() + + self.capture_fragment('release_name', single=False, include_separators=True) \ + .until_closure(node__re='usenet') \ + .until_closure(node__re='detail') \ + .until_closure(node__re='part') \ + .until_fragment(value__eq='-')\ + .execute() + + # Capture any detail after the release name + self.capture_closure('detail', regex='detail', single=False)\ + .until_failure()\ + .execute() + + def get_state(self): + # TODO multiple-chains? + is_town_release = False + has_part = False + + for tag, result in self.result.heads[0].captured(): + if tag == 'usenet' and result.get('group') == 'TOWN': + is_town_release = True + + if tag == 'part': + has_part = True + + return is_town_release, has_part diff --git a/libs/caper/result.py b/libs/caper/result.py new file mode 100644 index 0000000..c9e3423 --- /dev/null +++ b/libs/caper/result.py @@ -0,0 +1,213 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from logr import Logr + + +GROUP_MATCHES = ['identifier'] + + +class CaperNode(object): + def __init__(self, closure, parent=None, match=None): + """ + :type parent: CaperNode + :type weight: float + """ + + #: :type: caper.objects.CaperClosure + self.closure = closure + + #: :type: CaperNode + self.parent = parent + + #: :type: CaptureMatch + self.match = match + + #: :type: list of CaptureGroup + self.finished_groups = [] + + def next(self): + raise NotImplementedError() + + def captured(self): + cur = self + + if cur.match: + yield cur.match.tag, cur.match.result + + while cur.parent: + cur = cur.parent + + if cur.match: + yield cur.match.tag, cur.match.result + + +class CaperRootNode(CaperNode): + def __init__(self, closure): + """ + :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure + """ + super(CaperRootNode, self).__init__(closure) + + def next(self): + return self.closure + + +class CaperClosureNode(CaperNode): + def __init__(self, closure, parent=None, match=None): + """ + :type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure + """ + super(CaperClosureNode, self).__init__(closure, parent, match) + + def next(self): + if not self.closure: + return None + + if self.match: + # Jump to next closure if we have a match + return self.closure.right + elif len(self.closure.fragments) > 0: + # Otherwise parse the fragments + return self.closure.fragments[0] + + return None + + def __str__(self): + return "" % repr(self.match) + + def __repr__(self): + return self.__str__() + + +class CaperFragmentNode(CaperNode): + def __init__(self, closure, fragments, parent=None, match=None): + """ + :type closure: caper.objects.CaperClosure + :type fragments: list of caper.objects.CaperFragment + """ + super(CaperFragmentNode, self).__init__(closure, parent, match) + + #: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment + self.fragments = fragments + + def next(self): + if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right: + return self.fragments[-1].right + + if self.closure.right: + return self.closure.right + + return None + + def __str__(self): + return "" % repr(self.match) + + def __repr__(self): + return self.__str__() + + +class CaperResult(object): + def __init__(self): + #: :type: list of CaperNode + self.heads = [] + + self.chains = [] + + def build(self): + max_matched = 0 + + for head in self.heads: + for chain in self.combine_chain(head): + if chain.num_matched > max_matched: + max_matched = chain.num_matched + + self.chains.append(chain) + + for chain in self.chains: + chain.weights.append(chain.num_matched / float(max_matched or chain.num_matched or 1)) + chain.finish() + + self.chains.sort(key=lambda chain: chain.weight, reverse=True) + + for chain in self.chains: + Logr.debug("chain weight: %.02f", chain.weight) + Logr.debug("\tInfo: %s", chain.info) + + Logr.debug("\tWeights: %s", chain.weights) + Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched) + + def combine_chain(self, subject, chain=None): + nodes = subject if type(subject) is list else [subject] + + if chain is None: + chain = CaperResultChain() + + result = [] + + for x, node in enumerate(nodes): + node_chain = chain if x == len(nodes) - 1 else chain.copy() + + if not node.parent: + result.append(node_chain) + continue + + node_chain.update(node) + result.extend(self.combine_chain(node.parent, node_chain)) + + return result + + +class CaperResultChain(object): + def __init__(self): + #: :type: float + self.weight = None + self.info = {} + self.num_matched = 0 + + self.weights = [] + + def update(self, subject): + """ + :type subject: CaperFragmentNode + """ + if not subject.match or not subject.match.success: + return + + # TODO this should support closure nodes + if type(subject) is CaperFragmentNode: + self.num_matched += len(subject.fragments) if subject.fragments is not None else 0 + + self.weights.append(subject.match.weight) + + if subject.match: + if subject.match.tag not in self.info: + self.info[subject.match.tag] = [] + + self.info[subject.match.tag].insert(0, subject.match.result) + + def finish(self): + self.weight = sum(self.weights) / len(self.weights) + + def copy(self): + chain = CaperResultChain() + + chain.weight = self.weight + chain.info = copy.deepcopy(self.info) + + chain.num_matched = self.num_matched + chain.weights = copy.copy(self.weights) + + return chain \ No newline at end of file diff --git a/libs/caper/step.py b/libs/caper/step.py new file mode 100644 index 0000000..817514b --- /dev/null +++ b/libs/caper/step.py @@ -0,0 +1,96 @@ +# Copyright 2013 Dean Gardiner +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from caper.objects import CaptureMatch +from logr import Logr + + +class CaptureStep(object): + REPR_KEYS = ['regex', 'func', 'single'] + + def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs): + #: @type: CaptureGroup + self.capture_group = capture_group + + #: @type: str + self.tag = tag + #: @type: str + self.source = source + #: @type: str + self.regex = regex + #: @type: function + self.func = func + #: @type: bool + self.single = single + + self.kwargs = kwargs + + self.matched = False + + def execute(self, fragment): + """Execute step on fragment + + :type fragment: CaperFragment + :rtype : CaptureMatch + """ + + match = CaptureMatch(self.tag, self) + + if self.regex: + weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex) + Logr.debug('(execute) [regex] tag: "%s"', self.tag) + + if not result: + return match + + # Populate CaptureMatch + match.success = True + match.weight = weight + match.result = result + match.num_fragments = num_fragments + elif self.func: + result = self.func(fragment) + Logr.debug('(execute) [func] %s += "%s"', self.tag, match) + + if not result: + return match + + # Populate CaptureMatch + match.success = True + match.weight = 1.0 + match.result = result + else: + Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value) + + include_separators = self.kwargs.get('include_separators', False) + + # Populate CaptureMatch + match.success = True + match.weight = 1.0 + + if include_separators: + match.result = (fragment.left_sep, fragment.value, fragment.right_sep) + else: + match.result = fragment.value + + return match + + def __repr__(self): + attribute_values = [key + '=' + repr(getattr(self, key)) + for key in self.REPR_KEYS + if hasattr(self, key) and getattr(self, key)] + + attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else '' + + return "CaptureStep('%s'%s)" % (self.tag, attribute_string) diff --git a/libs/logr/__init__.py b/libs/logr/__init__.py new file mode 100644 index 0000000..7a2d7b2 --- /dev/null +++ b/libs/logr/__init__.py @@ -0,0 +1,225 @@ +# logr - Simple python logging wrapper +# Packed by Dean Gardiner +# +# File part of: +# rdio-sock - Rdio WebSocket Library +# Copyright (C) 2013 fzza- + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import inspect +import logging +import os +import sys + +IGNORE = () +PY3 = sys.version_info[0] == 3 + + +class Logr(object): + loggers = {} + handler = None + + trace_origin = False + name = "Logr" + + @staticmethod + def configure(level=logging.WARNING, handler=None, formatter=None, trace_origin=False, name="Logr"): + """Configure Logr + + @param handler: Logger message handler + @type handler: logging.Handler or None + + @param formatter: Logger message Formatter + @type formatter: logging.Formatter or None + """ + if formatter is None: + formatter = LogrFormatter() + + if handler is None: + handler = logging.StreamHandler() + + handler.setFormatter(formatter) + handler.setLevel(level) + Logr.handler = handler + + Logr.trace_origin = trace_origin + Logr.name = name + + @staticmethod + def configure_check(): + if Logr.handler is None: + Logr.configure() + + @staticmethod + def _get_name_from_path(filename): + try: + return os.path.splitext(os.path.basename(filename))[0] + except TypeError: + return "" + + @staticmethod + def get_frame_class(frame): + if len(frame.f_code.co_varnames) <= 0: + return None + + farg = frame.f_code.co_varnames[0] + + if farg not in frame.f_locals: + return None + + if farg == 'self': + return frame.f_locals[farg].__class__ + + if farg == 'cls': + return frame.f_locals[farg] + + return None + + + @staticmethod + def get_logger_name(): + if not Logr.trace_origin: + return Logr.name + + stack = inspect.stack() + + for x in xrange_six(len(stack)): + frame = stack[x][0] + name = None + + # Try find name of function defined inside a class + frame_class = Logr.get_frame_class(frame) + + if frame_class: + class_name = frame_class.__name__ + module_name = frame_class.__module__ + + if module_name != '__main__': + name = module_name + '.' + class_name + else: + name = class_name + + # Try find name of function defined outside of a class + if name is None: + if frame.f_code.co_name in frame.f_globals: + name = frame.f_globals.get('__name__') + if name == '__main__': + name = Logr._get_name_from_path(frame.f_globals.get('__file__')) + name = name + elif frame.f_code.co_name == '': + name = Logr._get_name_from_path(frame.f_globals.get('__file__')) + + if name is not None and name not in IGNORE: + return name + + return "" + + @staticmethod + def get_logger(): + """Get or create logger (if it does not exist) + + @rtype: RootLogger + """ + name = Logr.get_logger_name() + if name not in Logr.loggers: + Logr.configure_check() + Logr.loggers[name] = logging.Logger(name) + Logr.loggers[name].addHandler(Logr.handler) + return Logr.loggers[name] + + @staticmethod + def debug(msg, *args, **kwargs): + Logr.get_logger().debug(msg, *args, **kwargs) + + @staticmethod + def info(msg, *args, **kwargs): + Logr.get_logger().info(msg, *args, **kwargs) + + @staticmethod + def warning(msg, *args, **kwargs): + Logr.get_logger().warning(msg, *args, **kwargs) + + warn = warning + + @staticmethod + def error(msg, *args, **kwargs): + Logr.get_logger().error(msg, *args, **kwargs) + + @staticmethod + def exception(msg, *args, **kwargs): + Logr.get_logger().exception(msg, *args, **kwargs) + + @staticmethod + def critical(msg, *args, **kwargs): + Logr.get_logger().critical(msg, *args, **kwargs) + + fatal = critical + + @staticmethod + def log(level, msg, *args, **kwargs): + Logr.get_logger().log(level, msg, *args, **kwargs) + + +class LogrFormatter(logging.Formatter): + LENGTH_NAME = 32 + LENGTH_LEVEL_NAME = 5 + + def __init__(self, fmt=None, datefmt=None): + if sys.version_info[:2] > (2,6): + super(LogrFormatter, self).__init__(fmt, datefmt) + else: + logging.Formatter.__init__(self, fmt, datefmt) + + def usesTime(self): + return True + + def format(self, record): + record.message = record.getMessage() + if self.usesTime(): + record.asctime = self.formatTime(record, self.datefmt) + + s = "%(asctime)s %(name)s %(levelname)s %(message)s" % { + 'asctime': record.asctime, + 'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '), + 'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '), + 'message': record.message + } + + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + if record.exc_text: + if s[-1:] != "\n": + s += "\n" + try: + s += record.exc_text + except UnicodeError: + s = s + record.exc_text.decode(sys.getfilesystemencoding(), + 'replace') + return s + + +def xrange_six(start, stop=None, step=None): + if stop is not None and step is not None: + if PY3: + return range(start, stop, step) + else: + return xrange(start, stop, step) + else: + if PY3: + return range(start) + else: + return xrange(start)