Browse Source

Base classes for matcher and library

pull/2987/head
Ruud 11 years ago
parent
commit
8b2eb50f29
  1. 6
      couchpotato/core/media/_base/library/__init__.py
  2. 13
      couchpotato/core/media/_base/library/base.py
  3. 18
      couchpotato/core/media/_base/library/main.py
  4. 6
      couchpotato/core/media/_base/matcher/__init__.py
  5. 84
      couchpotato/core/media/_base/matcher/base.py
  6. 89
      couchpotato/core/media/_base/matcher/main.py
  7. 195
      libs/caper/__init__.py
  8. 134
      libs/caper/constraint.py
  9. 284
      libs/caper/group.py
  10. 80
      libs/caper/helpers.py
  11. 144
      libs/caper/matcher.py
  12. 124
      libs/caper/objects.py
  13. 0
      libs/caper/parsers/__init__.py
  14. 88
      libs/caper/parsers/anime.py
  15. 84
      libs/caper/parsers/base.py
  16. 230
      libs/caper/parsers/scene.py
  17. 115
      libs/caper/parsers/usenet.py
  18. 213
      libs/caper/result.py
  19. 96
      libs/caper/step.py
  20. 225
      libs/logr/__init__.py

6
couchpotato/core/media/_base/library/__init__.py

@ -0,0 +1,6 @@
from .main import Library
def autoload():
return Library()
config = []

13
couchpotato/core/media/_base/library/base.py

@ -0,0 +1,13 @@
from couchpotato.core.event import addEvent
from couchpotato.core.plugins.base import Plugin
class LibraryBase(Plugin):
_type = None
def initType(self):
addEvent('library.types', self.getType)
def getType(self):
return self._type

18
couchpotato/core/media/_base/library/main.py

@ -0,0 +1,18 @@
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.media._base.library.base import LibraryBase
class Library(LibraryBase):
def __init__(self):
addEvent('library.title', self.title)
def title(self, library):
return fireEvent(
'library.query',
library,
condense = False,
include_year = False,
include_identifier = False,
single = True
)

6
couchpotato/core/media/_base/matcher/__init__.py

@ -0,0 +1,6 @@
from .main import Matcher
def autoload():
return Matcher()
config = []

84
couchpotato/core/media/_base/matcher/base.py

@ -0,0 +1,84 @@
from couchpotato.core.event import addEvent
from couchpotato.core.helpers.encoding import simplifyString
from couchpotato.core.logger import CPLog
from couchpotato.core.plugins.base import Plugin
log = CPLog(__name__)
class MatcherBase(Plugin):
type = None
def __init__(self):
if self.type:
addEvent('%s.matcher.correct' % self.type, self.correct)
def correct(self, chain, release, media, quality):
raise NotImplementedError()
def flattenInfo(self, info):
# Flatten dictionary of matches (chain info)
if isinstance(info, dict):
return dict([(key, self.flattenInfo(value)) for key, value in info.items()])
# Flatten matches
result = None
for match in info:
if isinstance(match, dict):
if result is None:
result = {}
for key, value in match.items():
if key not in result:
result[key] = []
result[key].append(value)
else:
if result is None:
result = []
result.append(match)
return result
def constructFromRaw(self, match):
if not match:
return None
parts = [
''.join([
y for y in x[1:] if y
]) for x in match
]
return ''.join(parts)[:-1].strip()
def simplifyValue(self, value):
if not value:
return value
if isinstance(value, basestring):
return simplifyString(value)
if isinstance(value, list):
return [self.simplifyValue(x) for x in value]
raise ValueError("Unsupported value type")
def chainMatch(self, chain, group, tags):
info = self.flattenInfo(chain.info[group])
found_tags = []
for tag, accepted in tags.items():
values = [self.simplifyValue(x) for x in info.get(tag, [None])]
if any([val in accepted for val in values]):
found_tags.append(tag)
log.debug('tags found: %s, required: %s' % (found_tags, tags.keys()))
if set(tags.keys()) == set(found_tags):
return True
return all([key in found_tags for key, value in tags.items()])

89
couchpotato/core/media/_base/matcher/main.py

@ -0,0 +1,89 @@
from couchpotato.core.event import addEvent, fireEvent
from couchpotato.core.helpers.variable import possibleTitles
from couchpotato.core.logger import CPLog
from couchpotato.core.media._base.matcher.base import MatcherBase
from caper import Caper
log = CPLog(__name__)
class Matcher(MatcherBase):
def __init__(self):
super(Matcher, self).__init__()
self.caper = Caper()
addEvent('matcher.parse', self.parse)
addEvent('matcher.match', self.match)
addEvent('matcher.flatten_info', self.flattenInfo)
addEvent('matcher.construct_from_raw', self.constructFromRaw)
addEvent('matcher.correct_title', self.correctTitle)
addEvent('matcher.correct_quality', self.correctQuality)
def parse(self, name, parser='scene'):
return self.caper.parse(name, parser)
def match(self, release, media, quality):
match = fireEvent('matcher.parse', release['name'], single = True)
if len(match.chains) < 1:
log.info2('Wrong: %s, unable to parse release name (no chains)', release['name'])
return False
for chain in match.chains:
if fireEvent('%s.matcher.correct' % media['type'], chain, release, media, quality, single = True):
return chain
return False
def correctTitle(self, chain, media):
root_library = media['library']['root_library']
if 'show_name' not in chain.info or not len(chain.info['show_name']):
log.info('Wrong: missing show name in parsed result')
return False
# Get the lower-case parsed show name from the chain
chain_words = [x.lower() for x in chain.info['show_name']]
# Build a list of possible titles of the media we are searching for
titles = root_library['info']['titles']
# Add year suffix titles (will result in ['<name_one>', '<name_one> <suffix_one>', '<name_two>', ...])
suffixes = [None, root_library['info']['year']]
titles = [
title + ((' %s' % suffix) if suffix else '')
for title in titles
for suffix in suffixes
]
# Check show titles match
# TODO check xem names
for title in titles:
for valid_words in [x.split(' ') for x in possibleTitles(title)]:
if valid_words == chain_words:
return True
return False
def correctQuality(self, chain, quality, quality_map):
if quality['identifier'] not in quality_map:
log.info2('Wrong: unknown preferred quality %s', quality['identifier'])
return False
if 'video' not in chain.info:
log.info2('Wrong: no video tags found')
return False
video_tags = quality_map[quality['identifier']]
if not self.chainMatch(chain, 'video', video_tags):
log.info2('Wrong: %s tags not in chain', video_tags)
return False
return True

195
libs/caper/__init__.py

@ -0,0 +1,195 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.matcher import FragmentMatcher
from caper.objects import CaperFragment, CaperClosure
from caper.parsers.anime import AnimeParser
from caper.parsers.scene import SceneParser
from caper.parsers.usenet import UsenetParser
__version_info__ = ('0', '3', '1')
__version_branch__ = 'master'
__version__ = "%s%s" % (
'.'.join(__version_info__),
'-' + __version_branch__ if __version_branch__ else ''
)
CL_START_CHARS = ['(', '[', '<', '>']
CL_END_CHARS = [')', ']', '<', '>']
CL_END_STRINGS = [' - ']
STRIP_START_CHARS = ''.join(CL_START_CHARS)
STRIP_END_CHARS = ''.join(CL_END_CHARS)
STRIP_CHARS = ''.join(['_', ' ', '.'])
FRAGMENT_SEPARATORS = ['.', '-', '_', ' ']
CL_START = 0
CL_END = 1
class Caper(object):
def __init__(self, debug=False):
self.debug = debug
self.parsers = {
'anime': AnimeParser,
'scene': SceneParser,
'usenet': UsenetParser
}
def _closure_split(self, name):
"""
:type name: str
:rtype: list of CaperClosure
"""
closures = []
def end_closure(closures, buf):
buf = buf.strip(STRIP_CHARS)
if len(buf) < 2:
return
cur = CaperClosure(len(closures), buf)
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None
if cur.left:
cur.left.right = cur
closures.append(cur)
state = CL_START
buf = ""
for x, ch in enumerate(name):
# Check for start characters
if state == CL_START and ch in CL_START_CHARS:
end_closure(closures, buf)
state = CL_END
buf = ""
buf += ch
if state == CL_END and ch in CL_END_CHARS:
# End character found, create the closure
end_closure(closures, buf)
state = CL_START
buf = ""
elif state == CL_START and buf[-3:] in CL_END_STRINGS:
# End string found, create the closure
end_closure(closures, buf[:-3])
state = CL_START
buf = ""
end_closure(closures, buf)
return closures
def _clean_closure(self, closure):
"""
:type closure: str
:rtype: str
"""
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS)
def _fragment_split(self, closures):
"""
:type closures: list of CaperClosure
:rtype: list of CaperClosure
"""
cur_position = 0
cur = None
def end_fragment(fragments, cur, cur_position):
cur.position = cur_position
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None
if cur.left:
cur.left_sep = cur.left.right_sep
cur.left.right = cur
cur.right_sep = ch
fragments.append(cur)
for closure in closures:
closure.fragments = []
separator_buffer = ""
for x, ch in enumerate(self._clean_closure(closure.value)):
if not cur:
cur = CaperFragment(closure)
if ch in FRAGMENT_SEPARATORS:
if cur.value:
separator_buffer = ""
separator_buffer += ch
if cur.value or not closure.fragments:
end_fragment(closure.fragments, cur, cur_position)
elif len(separator_buffer) > 1:
cur.value = separator_buffer.strip()
if cur.value:
end_fragment(closure.fragments, cur, cur_position)
separator_buffer = ""
# Reset
cur = None
cur_position += 1
else:
cur.value += ch
# Finish parsing the last fragment
if cur and cur.value:
end_fragment(closure.fragments, cur, cur_position)
# Reset
cur_position = 0
cur = None
return closures
def parse(self, name, parser='scene'):
closures = self._closure_split(name)
closures = self._fragment_split(closures)
# Print closures
for closure in closures:
Logr.debug("closure [%s]", closure.value)
for fragment in closure.fragments:
Logr.debug("\tfragment [%s]", fragment.value)
if parser not in self.parsers:
raise ValueError("Unknown parser")
# TODO autodetect the parser type
return self.parsers[parser](self.debug).run(closures)

134
libs/caper/constraint.py

@ -0,0 +1,134 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class CaptureConstraint(object):
def __init__(self, capture_group, constraint_type, comparisons=None, target=None, **kwargs):
"""Capture constraint object
:type capture_group: CaptureGroup
"""
self.capture_group = capture_group
self.constraint_type = constraint_type
self.target = target
self.comparisons = comparisons if comparisons else []
self.kwargs = {}
for orig_key, value in kwargs.items():
key = orig_key.split('__')
if len(key) != 2:
self.kwargs[orig_key] = value
continue
name, method = key
method = 'constraint_match_' + method
if not hasattr(self, method):
self.kwargs[orig_key] = value
continue
self.comparisons.append((name, getattr(self, method), value))
def execute(self, parent_node, node, **kwargs):
func_name = 'constraint_%s' % self.constraint_type
if hasattr(self, func_name):
return getattr(self, func_name)(parent_node, node, **kwargs)
raise ValueError('Unknown constraint type "%s"' % self.constraint_type)
#
# Node Matching
#
def constraint_match(self, parent_node, node):
results = []
total_weight = 0
for name, method, argument in self.comparisons:
weight, success = method(node, name, argument)
total_weight += weight
results.append(success)
return total_weight / (float(len(results)) or 1), all(results) if len(results) > 0 else False
def constraint_match_eq(self, node, name, expected):
if not hasattr(node, name):
return 1.0, False
return 1.0, getattr(node, name) == expected
def constraint_match_re(self, node, name, arg):
# Node match
if name == 'node':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(node, group)
return weight, weight > minimum_weight
# Regex match
if type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(node, name)) is not None
# Value match
if hasattr(node, name):
match = self.capture_group.parser.matcher.value_match(getattr(node, name), arg, single=True)
return 1.0, match is not None
raise ValueError("Unknown constraint match type '%s'" % name)
#
# Result
#
def constraint_result(self, parent_node, fragment):
ctag = self.kwargs.get('tag')
if not ctag:
return 0, False
ckey = self.kwargs.get('key')
for tag, result in parent_node.captured():
if tag != ctag:
continue
if not ckey or ckey in result.keys():
return 1.0, True
return 0.0, False
#
# Failure
#
def constraint_failure(self, parent_node, fragment, match):
if not match or not match.success:
return 1.0, True
return 0, False
#
# Success
#
def constraint_success(self, parent_node, fragment, match):
if match and match.success:
return 1.0, True
return 0, False
def __repr__(self):
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons)

284
libs/caper/group.py

@ -0,0 +1,284 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import CaperClosure, CaperFragment
from caper.helpers import clean_dict
from caper.result import CaperFragmentNode, CaperClosureNode
from caper.step import CaptureStep
from caper.constraint import CaptureConstraint
class CaptureGroup(object):
def __init__(self, parser, result):
"""Capture group object
:type parser: caper.parsers.base.Parser
:type result: caper.result.CaperResult
"""
self.parser = parser
self.result = result
#: @type: list of CaptureStep
self.steps = []
#: type: str
self.step_source = None
#: @type: list of CaptureConstraint
self.pre_constraints = []
#: :type: list of CaptureConstraint
self.post_constraints = []
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'fragment':
if self.step_source is None:
self.step_source = 'fragment'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep(
self, tag,
'fragment',
regex=regex,
func=func,
single=single,
**kwargs
))
return self
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)
if self.step_source != 'closure':
if self.step_source is None:
self.step_source = 'closure'
else:
raise ValueError("Unable to mix fragment and closure capturing in a group")
self.steps.append(CaptureStep(
self, tag,
'closure',
regex=regex,
func=func,
single=single,
**kwargs
))
return self
def until_closure(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'match', target='closure', **kwargs))
return self
def until_fragment(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'match', target='fragment', **kwargs))
return self
def until_result(self, **kwargs):
self.pre_constraints.append(CaptureConstraint(self, 'result', **kwargs))
return self
def until_failure(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'failure', **kwargs))
return self
def until_success(self, **kwargs):
self.post_constraints.append(CaptureConstraint(self, 'success', **kwargs))
return self
def parse_subject(self, parent_head, subject):
Logr.debug("parse_subject (%s) subject: %s", self.step_source, repr(subject))
if type(subject) is CaperClosure:
return self.parse_closure(parent_head, subject)
if type(subject) is CaperFragment:
return self.parse_fragment(parent_head, subject)
raise ValueError('Unknown subject (%s)', subject)
def parse_fragment(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
nodes, match = self.match(parent_head, parent_node, subject)
# Capturing broke on constraint, return now
if not match:
return nodes
Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
result = [CaperFragmentNode(
parent_node.closure,
subject.take_right(match.num_fragments),
parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def parse_closure(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
nodes, match = self.match(parent_head, parent_node, subject)
# Capturing broke on constraint, return now
if not match:
return nodes
Logr.debug('created closure node with subject.value: "%s"' % subject.value)
result = [CaperClosureNode(
subject,
parent_head,
match
)]
# Branch if the match was indefinite (weight below 1.0)
if match.result and match.weight < 1.0:
if match.num_fragments == 1:
result.append(CaperClosureNode(subject, parent_head))
else:
nodes.append(CaperClosureNode(subject, parent_head))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def match(self, parent_head, parent_node, subject):
nodes = []
# Check pre constaints
broke, definite = self.check_constraints(self.pre_constraints, parent_head, subject)
if broke:
nodes.append(parent_head)
if definite:
return nodes, None
# Try match subject against the steps available
match = None
for step in self.steps:
if step.source == 'closure' and type(subject) is not CaperClosure:
pass
elif step.source == 'fragment' and type(subject) is CaperClosure:
Logr.debug('Closure encountered on fragment step, jumping into fragments')
return [CaperClosureNode(subject, parent_head, None)], None
match = step.execute(subject)
if match.success:
if type(match.result) is dict:
match.result = clean_dict(match.result)
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (
match.weight, match.result, match.num_fragments
))
step.matched = True
break
if all([step.single and step.matched for step in self.steps]):
Logr.debug('All steps completed, group finished')
parent_node.finished_groups.append(self)
return nodes, match
# Check post constraints
broke, definite = self.check_constraints(self.post_constraints, parent_head, subject, match=match)
if broke:
return nodes, None
return nodes, match
def check_constraints(self, constraints, parent_head, subject, **kwargs):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
# Check constraints
for constraint in [c for c in constraints if c.target == subject.__key__ or not c.target]:
Logr.debug("Testing constraint %s against subject %s", repr(constraint), repr(subject))
weight, success = constraint.execute(parent_node, subject, **kwargs)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
return True, weight == 1.0
return False, None
def execute(self):
heads_finished = None
while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)):
heads_finished = []
heads = self.result.heads
self.result.heads = []
for head in heads:
node = head[0] if type(head) is list else head
if self in node.finished_groups:
Logr.debug("head finished for group")
self.result.heads.append(head)
heads_finished.append(True)
continue
Logr.debug('')
Logr.debug(node)
next_subject = node.next()
Logr.debug('----------[%s] (%s)----------' % (next_subject, repr(next_subject.value) if next_subject else None))
if next_subject:
for node_result in self.parse_subject(head, next_subject):
self.result.heads.append(node_result)
Logr.debug('Heads: %s', self.result.heads)
heads_finished.append(self in node.finished_groups or next_subject is None)
if len(self.result.heads) == 0:
self.result.heads = heads
Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads)
Logr.debug("group finished")

80
libs/caper/helpers.py

@ -0,0 +1,80 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
def is_list_type(obj, element_type):
if not type(obj) is list:
return False
if len(obj) < 1:
raise ValueError("Unable to determine list element type from empty list")
return type(obj[0]) is element_type
def clean_dict(target, remove=None):
"""Recursively remove items matching a value 'remove' from the dictionary
:type target: dict
"""
if type(target) is not dict:
raise ValueError("Target is required to be a dict")
remove_keys = []
for key in target.keys():
if type(target[key]) is not dict:
if target[key] == remove:
remove_keys.append(key)
else:
clean_dict(target[key], remove)
for key in remove_keys:
target.pop(key)
return target
def update_dict(a, b):
for key, value in b.items():
if key not in a:
a[key] = value
elif isinstance(a[key], dict) and isinstance(value, dict):
update_dict(a[key], value)
elif isinstance(a[key], list):
a[key].append(value)
else:
a[key] = [a[key], value]
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)
def delta_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6

144
libs/caper/matcher.py

@ -0,0 +1,144 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.helpers import is_list_type, update_dict, delta_seconds
from datetime import datetime
from logr import Logr
import re
class FragmentMatcher(object):
def __init__(self, pattern_groups):
self.regex = {}
self.construct_patterns(pattern_groups)
def construct_patterns(self, pattern_groups):
compile_start = datetime.now()
compile_count = 0
for group_name, patterns in pattern_groups:
if group_name not in self.regex:
self.regex[group_name] = []
# Transform into weight groups
if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]:
patterns = [(1.0, patterns)]
for weight, patterns in patterns:
weight_patterns = []
for pattern in patterns:
# Transform into multi-fragment patterns
if type(pattern) is str:
pattern = (pattern,)
if type(pattern) is tuple and len(pattern) == 2:
if type(pattern[0]) is str and is_list_type(pattern[1], str):
pattern = (pattern,)
result = []
for value in pattern:
if type(value) is tuple:
if len(value) == 2:
# Construct OR-list pattern
value = value[0] % '|'.join(value[1])
elif len(value) == 1:
value = value[0]
result.append(re.compile(value, re.IGNORECASE))
compile_count += 1
weight_patterns.append(tuple(result))
self.regex[group_name].append((weight, weight_patterns))
Logr.info("Compiled %s patterns in %ss", compile_count, delta_seconds(datetime.now() - compile_start))
def find_group(self, name):
for group_name, weight_groups in self.regex.items():
if group_name and group_name == name:
return group_name, weight_groups
return None, None
def value_match(self, value, group_name=None, single=True):
result = None
for group, weight_groups in self.regex.items():
if group_name and group != group_name:
continue
# TODO handle multiple weights
weight, patterns = weight_groups[0]
for pattern in patterns:
match = pattern[0].match(value)
if not match:
continue
if result is None:
result = {}
if group not in result:
result[group] = {}
result[group].update(match.groupdict())
if single:
return result
return result
def fragment_match(self, fragment, group_name=None):
"""Follow a fragment chain to try find a match
:type fragment: caper.objects.CaperFragment
:type group_name: str or None
:return: The weight of the match found between 0.0 and 1.0,
where 1.0 means perfect match and 0.0 means no match
:rtype: (float, dict, int)
"""
group_name, weight_groups = self.find_group(group_name)
for weight, patterns in weight_groups:
for pattern in patterns:
cur_fragment = fragment
success = True
result = {}
# Ignore empty patterns
if len(pattern) < 1:
break
for fragment_pattern in pattern:
if not cur_fragment:
success = False
break
match = fragment_pattern.match(cur_fragment.value)
if match:
update_dict(result, match.groupdict())
else:
success = False
break
cur_fragment = cur_fragment.right if cur_fragment else None
if success:
Logr.debug("Found match with weight %s" % weight)
return float(weight), result, len(pattern)
return 0.0, None, 1

124
libs/caper/objects.py

@ -0,0 +1,124 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.helpers import xrange_six
class CaperClosure(object):
__key__ = 'closure'
def __init__(self, index, value):
#: :type: int
self.index = index
#: :type: str
self.value = value
#: :type: CaperClosure
self.left = None
#: :type: CaperClosure
self.right = None
#: :type: list of CaperFragment
self.fragments = []
def __str__(self):
return "<CaperClosure value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaperFragment(object):
__key__ = 'fragment'
def __init__(self, closure=None):
#: :type: CaperClosure
self.closure = closure
#: :type: str
self.value = ""
#: :type: CaperFragment
self.left = None
#: :type: str
self.left_sep = None
#: :type: CaperFragment
self.right = None
#: :type: str
self.right_sep = None
#: :type: int
self.position = None
def take(self, direction, count, include_self=True):
if direction not in ['left', 'right']:
raise ValueError('Un-Expected value for "direction", expected "left" or "right".')
result = []
if include_self:
result.append(self)
count -= 1
cur = self
for x in xrange_six(count):
if cur and getattr(cur, direction):
cur = getattr(cur, direction)
result.append(cur)
else:
result.append(None)
cur = None
return result
def take_left(self, count, include_self=True):
return self.take('left', count, include_self)
def take_right(self, count, include_self=True):
return self.take('right', count, include_self)
def __str__(self):
return "<CaperFragment value: %s" % repr(self.value)
def __repr__(self):
return self.__str__()
class CaptureMatch(object):
def __init__(self, tag, step, success=False, weight=None, result=None, num_fragments=1):
#: :type: bool
self.success = success
#: :type: float
self.weight = weight
#: :type: dict or str
self.result = result
#: :type: int
self.num_fragments = num_fragments
#: :type: str
self.tag = tag
#: :type: CaptureStep
self.step = step
def __str__(self):
return "<CaperMatch result: %s>" % repr(self.result)
def __repr__(self):
return self.__str__()

0
libs/caper/parsers/__init__.py

88
libs/caper/parsers/anime.py

@ -0,0 +1,88 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from caper.parsers.base import Parser
REGEX_GROUP = re.compile(r'(\(|\[)(?P<group>.*?)(\)|\])', re.IGNORECASE)
PATTERN_GROUPS = [
('identifier', [
r'S(?P<season>\d+)E(?P<episode>\d+)',
r'(S(?P<season>\d+))|(E(?P<episode>\d+))',
r'Ep(?P<episode>\d+)',
r'$(?P<absolute>\d+)^',
(r'Episode', r'(?P<episode>\d+)'),
]),
('video', [
(r'(?P<h264_profile>%s)', [
'Hi10P'
]),
(r'.(?P<resolution>%s)', [
'720p',
'1080p',
'960x720',
'1920x1080'
]),
(r'(?P<source>%s)', [
'BD'
]),
]),
('audio', [
(r'(?P<codec>%s)', [
'FLAC'
]),
])
]
class AnimeParser(Parser):
def __init__(self, debug=False):
super(AnimeParser, self).__init__(PATTERN_GROUPS, debug)
def capture_group(self, fragment):
match = REGEX_GROUP.match(fragment.value)
if not match:
return None
return match.group('group')
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_closure('group', func=self.capture_group)\
.execute(once=True)
self.capture_fragment('show_name', single=False)\
.until_fragment(value__re='identifier')\
.until_fragment(value__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier') \
.capture_fragment('video', regex='video', single=False) \
.capture_fragment('audio', regex='audio', single=False) \
.execute()
self.result.build()
return self.result

84
libs/caper/parsers/base.py

@ -0,0 +1,84 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper import FragmentMatcher
from caper.group import CaptureGroup
from caper.result import CaperResult, CaperClosureNode, CaperRootNode
from logr import Logr
class Parser(object):
def __init__(self, matcher, debug=False):
self.debug = debug
self.matcher = matcher
self.closures = None
#: :type: caper.result.CaperResult
self.result = None
self._match_cache = None
self._fragment_pos = None
self._closure_pos = None
self._history = None
self.reset()
def reset(self):
self.closures = None
self.result = CaperResult()
self._match_cache = {}
self._fragment_pos = -1
self._closure_pos = -1
self._history = []
def setup(self, closures):
"""
:type closures: list of CaperClosure
"""
self.reset()
self.closures = closures
self.result.heads = [CaperRootNode(closures[0])]
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
raise NotImplementedError()
#
# Capture Methods
#
def capture_fragment(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_fragment(
tag,
regex=regex,
func=func,
single=single,
**kwargs
)
def capture_closure(self, tag, regex=None, func=None, single=True, **kwargs):
return CaptureGroup(self, self.result).capture_closure(
tag,
regex=regex,
func=func,
single=single,
**kwargs
)

230
libs/caper/parsers/scene.py

@ -0,0 +1,230 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.parsers.base import Parser
from caper.result import CaperFragmentNode
PATTERN_GROUPS = [
('identifier', [
(1.0, [
# S01E01-E02
('^S(?P<season>\d+)E(?P<episode_from>\d+)$', '^E(?P<episode_to>\d+)$'),
# 'S03 E01 to E08' or 'S03 E01 - E09'
('^S(?P<season>\d+)$', '^E(?P<episode_from>\d+)$', '^(to|-)$', '^E(?P<episode_to>\d+)$'),
# 'E01 to E08' or 'E01 - E09'
('^E(?P<episode_from>\d+)$', '^(to|-)$', '^E(?P<episode_to>\d+)$'),
# S01-S03
('^S(?P<season_from>\d+)$', '^S(?P<season_to>\d+)$'),
# S02E13
r'^S(?P<season>\d+)E(?P<episode>\d+)$',
# S01 E13
(r'^(S(?P<season>\d+))$', r'^(E(?P<episode>\d+))$'),
# S02
# E13
r'^((S(?P<season>\d+))|(E(?P<episode>\d+)))$',
# 3x19
r'^(?P<season>\d+)x(?P<episode>\d+)$',
# 2013.09.15
(r'^(?P<year>\d{4})$', r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$'),
# 09.15.2013
(r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$', r'^(?P<year>\d{4})$'),
# TODO - US/UK Date Format Conflict? will only support US format for now..
# 15.09.2013
#(r'^(?P<day>\d{2})$', r'^(?P<month>\d{2})$', r'^(?P<year>\d{4})$'),
# 130915
r'^(?P<year_short>\d{2})(?P<month>\d{2})(?P<day>\d{2})$',
# Season 3 Episode 14
(r'^Se(ason)?$', r'^(?P<season>\d+)$', r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Season 3
(r'^Se(ason)?$', r'^(?P<season>\d+)$'),
# Episode 14
(r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Part.3
# Part.1.and.Part.3
('^Part$', '(?P<part>\d+)'),
r'(?P<extra>Special)',
r'(?P<country>NZ|AU|US|UK)'
]),
(0.8, [
# 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake)
# TODO - Update this pattern on 31 Dec 2099
r'^(?P<season>([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P<episode>\d{2})$'
]),
(0.5, [
# 100 - 9999
r'^(?P<season>([1-9])|([1-9][0-9]))(?P<episode>\d{2})$'
])
]),
('video', [
r'(?P<aspect>FS|WS)',
(r'(?P<resolution>%s)', [
'480p',
'720p',
'1080p'
]),
#
# Source
#
(r'(?P<source>%s)', [
'DVDRiP',
# HDTV
'HDTV',
'PDTV',
'DSR',
# WEB
'WEBRip',
'WEBDL',
# BluRay
'BluRay',
'B(D|R)Rip',
# DVD
'DVDR',
'DVD9',
'DVD5'
]),
# For multi-fragment 'WEB-DL', 'WEB-Rip', etc... matches
('(?P<source>WEB)', '(?P<source>DL|Rip)'),
#
# Codec
#
(r'(?P<codec>%s)', [
'x264',
'XViD',
'H264',
'AVC'
]),
# For multi-fragment 'H 264' tags
('(?P<codec>H)', '(?P<codec>264)'),
]),
('dvd', [
r'D(ISC)?(?P<disc>\d+)',
r'R(?P<region>[0-8])',
(r'(?P<encoding>%s)', [
'PAL',
'NTSC'
]),
]),
('audio', [
(r'(?P<codec>%s)', [
'AC3',
'TrueHD'
]),
(r'(?P<language>%s)', [
'GERMAN',
'DUTCH',
'FRENCH',
'SWEDiSH',
'DANiSH',
'iTALiAN'
]),
]),
('scene', [
r'(?P<proper>PROPER|REAL)',
])
]
class SceneParser(Parser):
matcher = None
def __init__(self, debug=False):
if not SceneParser.matcher:
SceneParser.matcher = FragmentMatcher(PATTERN_GROUPS)
Logr.info("Fragment matcher for %s created", self.__class__.__name__)
super(SceneParser, self).__init__(SceneParser.matcher, debug)
def capture_group(self, fragment):
if fragment.closure.index + 1 != len(self.closures):
return None
if fragment.left_sep != '-' or fragment.right:
return None
return fragment.value
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_fragment('show_name', single=False)\
.until_fragment(node__re='identifier')\
.until_fragment(node__re='video')\
.until_fragment(node__re='dvd')\
.until_fragment(node__re='audio')\
.until_fragment(node__re='scene')\
.execute()
self.capture_fragment('identifier', regex='identifier', single=False)\
.capture_fragment('video', regex='video', single=False)\
.capture_fragment('dvd', regex='dvd', single=False)\
.capture_fragment('audio', regex='audio', single=False)\
.capture_fragment('scene', regex='scene', single=False)\
.until_fragment(left_sep__eq='-', right__eq=None)\
.execute()
self.capture_fragment('group', func=self.capture_group)\
.execute()
self.print_tree(self.result.heads)
self.result.build()
return self.result
def print_tree(self, heads):
if not self.debug:
return
for head in heads:
head = head if type(head) is list else [head]
if type(head[0]) is CaperFragmentNode:
for fragment in head[0].fragments:
Logr.debug(fragment.value)
else:
Logr.debug(head[0].closure.value)
for node in head:
Logr.debug('\t' + str(node).ljust(55) + '\t' + (
str(node.match.weight) + '\t' + str(node.match.result)
) if node.match else '')
if len(head) > 0 and head[0].parent:
self.print_tree([head[0].parent])

115
libs/caper/parsers/usenet.py

@ -0,0 +1,115 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.parsers.base import Parser
PATTERN_GROUPS = [
('usenet', [
r'\[(?P<group>#[\w\.@]+)\]',
r'^\[(?P<code>\w+)\]$',
r'\[(?P<full>FULL)\]',
r'\[\s?(?P<group>TOWN)\s?\]',
r'(.*?\s)?[_\W]*(?P<site>www\..*?\.[a-z0-9]+)[_\W]*(.*?\s)?',
r'(.*?\s)?[_\W]*(?P<site>(www\.)?[-\w]+\.(com|org|info))[_\W]*(.*?\s)?'
]),
('part', [
r'.?(?P<current>\d+)/(?P<total>\d+).?'
]),
('detail', [
r'[\s-]*\w*?[\s-]*\"(?P<file_name>.*?)\"[\s-]*\w*?[\s-]*(?P<size>[\d,\.]*\s?MB)?[\s-]*(?P<extra>yEnc)?',
r'(?P<size>[\d,\.]*\s?MB)[\s-]*(?P<extra>yEnc)',
r'(?P<size>[\d,\.]*\s?MB)|(?P<extra>yEnc)'
])
]
class UsenetParser(Parser):
matcher = None
def __init__(self, debug=False):
if not UsenetParser.matcher:
UsenetParser.matcher = FragmentMatcher(PATTERN_GROUPS)
Logr.info("Fragment matcher for %s created", self.__class__.__name__)
super(UsenetParser, self).__init__(UsenetParser.matcher, debug)
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
# Capture usenet or part info until we get a part or matching fails
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('part', regex='part', single=True) \
.until_result(tag='part') \
.until_failure()\
.execute()
is_town_release, has_part = self.get_state()
if not is_town_release:
self.capture_release_name()
# If we already have the part (TOWN releases), ignore matching part again
if not is_town_release and not has_part:
self.capture_fragment('part', regex='part', single=True)\
.until_closure(node__re='usenet')\
.until_success()\
.execute()
# Capture any leftover details
self.capture_closure('usenet', regex='usenet', single=False)\
.capture_closure('detail', regex='detail', single=False)\
.execute()
self.result.build()
return self.result
def capture_release_name(self):
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
self.capture_fragment('release_name', single=False, include_separators=True) \
.until_closure(node__re='usenet') \
.until_closure(node__re='detail') \
.until_closure(node__re='part') \
.until_fragment(value__eq='-')\
.execute()
# Capture any detail after the release name
self.capture_closure('detail', regex='detail', single=False)\
.until_failure()\
.execute()
def get_state(self):
# TODO multiple-chains?
is_town_release = False
has_part = False
for tag, result in self.result.heads[0].captured():
if tag == 'usenet' and result.get('group') == 'TOWN':
is_town_release = True
if tag == 'part':
has_part = True
return is_town_release, has_part

213
libs/caper/result.py

@ -0,0 +1,213 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from logr import Logr
GROUP_MATCHES = ['identifier']
class CaperNode(object):
def __init__(self, closure, parent=None, match=None):
"""
:type parent: CaperNode
:type weight: float
"""
#: :type: caper.objects.CaperClosure
self.closure = closure
#: :type: CaperNode
self.parent = parent
#: :type: CaptureMatch
self.match = match
#: :type: list of CaptureGroup
self.finished_groups = []
def next(self):
raise NotImplementedError()
def captured(self):
cur = self
if cur.match:
yield cur.match.tag, cur.match.result
while cur.parent:
cur = cur.parent
if cur.match:
yield cur.match.tag, cur.match.result
class CaperRootNode(CaperNode):
def __init__(self, closure):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperRootNode, self).__init__(closure)
def next(self):
return self.closure
class CaperClosureNode(CaperNode):
def __init__(self, closure, parent=None, match=None):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperClosureNode, self).__init__(closure, parent, match)
def next(self):
if not self.closure:
return None
if self.match:
# Jump to next closure if we have a match
return self.closure.right
elif len(self.closure.fragments) > 0:
# Otherwise parse the fragments
return self.closure.fragments[0]
return None
def __str__(self):
return "<CaperClosureNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperFragmentNode(CaperNode):
def __init__(self, closure, fragments, parent=None, match=None):
"""
:type closure: caper.objects.CaperClosure
:type fragments: list of caper.objects.CaperFragment
"""
super(CaperFragmentNode, self).__init__(closure, parent, match)
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment
self.fragments = fragments
def next(self):
if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right:
return self.fragments[-1].right
if self.closure.right:
return self.closure.right
return None
def __str__(self):
return "<CaperFragmentNode match: %s>" % repr(self.match)
def __repr__(self):
return self.__str__()
class CaperResult(object):
def __init__(self):
#: :type: list of CaperNode
self.heads = []
self.chains = []
def build(self):
max_matched = 0
for head in self.heads:
for chain in self.combine_chain(head):
if chain.num_matched > max_matched:
max_matched = chain.num_matched
self.chains.append(chain)
for chain in self.chains:
chain.weights.append(chain.num_matched / float(max_matched or chain.num_matched or 1))
chain.finish()
self.chains.sort(key=lambda chain: chain.weight, reverse=True)
for chain in self.chains:
Logr.debug("chain weight: %.02f", chain.weight)
Logr.debug("\tInfo: %s", chain.info)
Logr.debug("\tWeights: %s", chain.weights)
Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched)
def combine_chain(self, subject, chain=None):
nodes = subject if type(subject) is list else [subject]
if chain is None:
chain = CaperResultChain()
result = []
for x, node in enumerate(nodes):
node_chain = chain if x == len(nodes) - 1 else chain.copy()
if not node.parent:
result.append(node_chain)
continue
node_chain.update(node)
result.extend(self.combine_chain(node.parent, node_chain))
return result
class CaperResultChain(object):
def __init__(self):
#: :type: float
self.weight = None
self.info = {}
self.num_matched = 0
self.weights = []
def update(self, subject):
"""
:type subject: CaperFragmentNode
"""
if not subject.match or not subject.match.success:
return
# TODO this should support closure nodes
if type(subject) is CaperFragmentNode:
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0
self.weights.append(subject.match.weight)
if subject.match:
if subject.match.tag not in self.info:
self.info[subject.match.tag] = []
self.info[subject.match.tag].insert(0, subject.match.result)
def finish(self):
self.weight = sum(self.weights) / len(self.weights)
def copy(self):
chain = CaperResultChain()
chain.weight = self.weight
chain.info = copy.deepcopy(self.info)
chain.num_matched = self.num_matched
chain.weights = copy.copy(self.weights)
return chain

96
libs/caper/step.py

@ -0,0 +1,96 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.objects import CaptureMatch
from logr import Logr
class CaptureStep(object):
REPR_KEYS = ['regex', 'func', 'single']
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None, **kwargs):
#: @type: CaptureGroup
self.capture_group = capture_group
#: @type: str
self.tag = tag
#: @type: str
self.source = source
#: @type: str
self.regex = regex
#: @type: function
self.func = func
#: @type: bool
self.single = single
self.kwargs = kwargs
self.matched = False
def execute(self, fragment):
"""Execute step on fragment
:type fragment: CaperFragment
:rtype : CaptureMatch
"""
match = CaptureMatch(self.tag, self)
if self.regex:
weight, result, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
Logr.debug('(execute) [regex] tag: "%s"', self.tag)
if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = weight
match.result = result
match.num_fragments = num_fragments
elif self.func:
result = self.func(fragment)
Logr.debug('(execute) [func] %s += "%s"', self.tag, match)
if not result:
return match
# Populate CaptureMatch
match.success = True
match.weight = 1.0
match.result = result
else:
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)
include_separators = self.kwargs.get('include_separators', False)
# Populate CaptureMatch
match.success = True
match.weight = 1.0
if include_separators:
match.result = (fragment.left_sep, fragment.value, fragment.right_sep)
else:
match.result = fragment.value
return match
def __repr__(self):
attribute_values = [key + '=' + repr(getattr(self, key))
for key in self.REPR_KEYS
if hasattr(self, key) and getattr(self, key)]
attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else ''
return "CaptureStep('%s'%s)" % (self.tag, attribute_string)

225
libs/logr/__init__.py

@ -0,0 +1,225 @@
# logr - Simple python logging wrapper
# Packed by Dean Gardiner <gardiner91@gmail.com>
#
# File part of:
# rdio-sock - Rdio WebSocket Library
# Copyright (C) 2013 fzza- <fzzzzzzzza@gmail.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import inspect
import logging
import os
import sys
IGNORE = ()
PY3 = sys.version_info[0] == 3
class Logr(object):
loggers = {}
handler = None
trace_origin = False
name = "Logr"
@staticmethod
def configure(level=logging.WARNING, handler=None, formatter=None, trace_origin=False, name="Logr"):
"""Configure Logr
@param handler: Logger message handler
@type handler: logging.Handler or None
@param formatter: Logger message Formatter
@type formatter: logging.Formatter or None
"""
if formatter is None:
formatter = LogrFormatter()
if handler is None:
handler = logging.StreamHandler()
handler.setFormatter(formatter)
handler.setLevel(level)
Logr.handler = handler
Logr.trace_origin = trace_origin
Logr.name = name
@staticmethod
def configure_check():
if Logr.handler is None:
Logr.configure()
@staticmethod
def _get_name_from_path(filename):
try:
return os.path.splitext(os.path.basename(filename))[0]
except TypeError:
return "<unknown>"
@staticmethod
def get_frame_class(frame):
if len(frame.f_code.co_varnames) <= 0:
return None
farg = frame.f_code.co_varnames[0]
if farg not in frame.f_locals:
return None
if farg == 'self':
return frame.f_locals[farg].__class__
if farg == 'cls':
return frame.f_locals[farg]
return None
@staticmethod
def get_logger_name():
if not Logr.trace_origin:
return Logr.name
stack = inspect.stack()
for x in xrange_six(len(stack)):
frame = stack[x][0]
name = None
# Try find name of function defined inside a class
frame_class = Logr.get_frame_class(frame)
if frame_class:
class_name = frame_class.__name__
module_name = frame_class.__module__
if module_name != '__main__':
name = module_name + '.' + class_name
else:
name = class_name
# Try find name of function defined outside of a class
if name is None:
if frame.f_code.co_name in frame.f_globals:
name = frame.f_globals.get('__name__')
if name == '__main__':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
name = name
elif frame.f_code.co_name == '<module>':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
if name is not None and name not in IGNORE:
return name
return ""
@staticmethod
def get_logger():
"""Get or create logger (if it does not exist)
@rtype: RootLogger
"""
name = Logr.get_logger_name()
if name not in Logr.loggers:
Logr.configure_check()
Logr.loggers[name] = logging.Logger(name)
Logr.loggers[name].addHandler(Logr.handler)
return Logr.loggers[name]
@staticmethod
def debug(msg, *args, **kwargs):
Logr.get_logger().debug(msg, *args, **kwargs)
@staticmethod
def info(msg, *args, **kwargs):
Logr.get_logger().info(msg, *args, **kwargs)
@staticmethod
def warning(msg, *args, **kwargs):
Logr.get_logger().warning(msg, *args, **kwargs)
warn = warning
@staticmethod
def error(msg, *args, **kwargs):
Logr.get_logger().error(msg, *args, **kwargs)
@staticmethod
def exception(msg, *args, **kwargs):
Logr.get_logger().exception(msg, *args, **kwargs)
@staticmethod
def critical(msg, *args, **kwargs):
Logr.get_logger().critical(msg, *args, **kwargs)
fatal = critical
@staticmethod
def log(level, msg, *args, **kwargs):
Logr.get_logger().log(level, msg, *args, **kwargs)
class LogrFormatter(logging.Formatter):
LENGTH_NAME = 32
LENGTH_LEVEL_NAME = 5
def __init__(self, fmt=None, datefmt=None):
if sys.version_info[:2] > (2,6):
super(LogrFormatter, self).__init__(fmt, datefmt)
else:
logging.Formatter.__init__(self, fmt, datefmt)
def usesTime(self):
return True
def format(self, record):
record.message = record.getMessage()
if self.usesTime():
record.asctime = self.formatTime(record, self.datefmt)
s = "%(asctime)s %(name)s %(levelname)s %(message)s" % {
'asctime': record.asctime,
'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '),
'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '),
'message': record.message
}
if record.exc_info:
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
if s[-1:] != "\n":
s += "\n"
try:
s += record.exc_text
except UnicodeError:
s = s + record.exc_text.decode(sys.getfilesystemencoding(),
'replace')
return s
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)
Loading…
Cancel
Save