Browse Source

Added Caper (0.2.0-master) and Logr (0.2.1) libraries

pull/2284/head
Dean Gardiner 12 years ago
parent
commit
ab51707607
  1. 161
      libs/caper/__init__.py
  2. 74
      libs/caper/constraint.py
  3. 147
      libs/caper/group.py
  4. 64
      libs/caper/helpers.py
  5. 193
      libs/caper/matcher.py
  6. 75
      libs/caper/objects.py
  7. 0
      libs/caper/parsers/__init__.py
  8. 88
      libs/caper/parsers/anime.py
  9. 136
      libs/caper/parsers/base.py
  10. 148
      libs/caper/parsers/scene.py
  11. 172
      libs/caper/result.py
  12. 72
      libs/caper/step.py
  13. 201
      libs/logr/__init__.py

161
libs/caper/__init__.py

@ -0,0 +1,161 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.matcher import FragmentMatcher
from caper.objects import CaperFragment, CaperClosure
from caper.parsers.anime import AnimeParser
from caper.parsers.scene import SceneParser
__version_info__ = ('0', '2', '0')
__version_branch__ = 'master'
__version__ = "%s%s" % (
'.'.join(__version_info__),
'-' + __version_branch__ if __version_branch__ else ''
)
CL_START_CHARS = ['(', '[']
CL_END_CHARS = [')', ']']
STRIP_START_CHARS = ''.join(CL_START_CHARS)
STRIP_END_CHARS = ''.join(CL_END_CHARS)
STRIP_CHARS = ''.join(['_', ' ', '.'])
FRAGMENT_SEPARATORS = ['.', '-', '_', ' ']
CL_START = 0
CL_END = 1
class Caper(object):
def __init__(self):
self.parsers = {
'scene': SceneParser(),
'anime': AnimeParser()
}
def _closure_split(self, name):
"""
:type name: str
:rtype: list of CaperClosure
"""
closures = []
def end_closure(closures, buf):
buf = buf.strip(STRIP_CHARS)
if len(buf) < 1:
return
cur = CaperClosure(buf)
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None
if cur.left:
cur.left.right = cur
closures.append(cur)
state = CL_START
buf = ""
for x, ch in enumerate(name):
if state == CL_START and ch in CL_START_CHARS:
end_closure(closures, buf)
state = CL_END
buf = ""
buf += ch
if state == CL_END and ch in CL_END_CHARS:
end_closure(closures, buf)
state = CL_START
buf = ""
end_closure(closures, buf)
return closures
def _clean_closure(self, closure):
"""
:type closure: str
:rtype: str
"""
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS)
def _fragment_split(self, closures):
"""
:type closures: list of CaperClosure
:rtype: list of CaperClosure
"""
cur_position = 0
cur = CaperFragment()
def end_fragment(fragments, cur, cur_position):
cur.position = cur_position
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None
if cur.left:
cur.left_sep = cur.left.right_sep
cur.left.right = cur
cur.right_sep = ch
fragments.append(cur)
for closure in closures:
closure.fragments = []
for x, ch in enumerate(self._clean_closure(closure.value)):
if ch in FRAGMENT_SEPARATORS:
end_fragment(closure.fragments, cur, cur_position)
# Reset
cur = CaperFragment()
cur_position += 1
else:
cur.value += ch
# Finish parsing the last fragment
if cur.value != "":
end_fragment(closure.fragments, cur, cur_position)
# Reset
cur_position = 0
cur = CaperFragment()
return closures
def parse(self, name, parser='scene'):
closures = self._closure_split(name)
closures = self._fragment_split(closures)
# Print closures
for closure in closures:
Logr.debug("closure [%s]", closure.value)
if parser not in self.parsers:
raise ValueError("Unknown parser")
# TODO autodetect the parser type
return self.parsers[parser].run(closures)

74
libs/caper/constraint.py

@ -0,0 +1,74 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class CaptureConstraint(object):
def __init__(self, capture_group, comparisons=None, **kwargs):
"""Capture constraint object
:type capture_group: CaptureGroup
"""
self.capture_group = capture_group
self.comparisons = comparisons if comparisons else []
for key, value in kwargs.items():
key = key.split('__')
if len(key) != 2:
continue
name, method = key
method = '_compare_' + method
if not hasattr(self, method):
continue
self.comparisons.append((name, getattr(self, method), value))
def _compare_eq(self, fragment, name, expected):
if not hasattr(fragment, name):
return None
return 1.0, getattr(fragment, name) == expected
def _compare_re(self, fragment, name, arg):
if name == 'fragment':
group, minimum_weight = arg if type(arg) is tuple and len(arg) > 1 else (arg, 0)
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, group)
return weight, weight > minimum_weight
elif type(arg).__name__ == 'SRE_Pattern':
return 1.0, arg.match(getattr(fragment, name)) is not None
elif hasattr(fragment, name):
match = self.capture_group.parser.matcher.value_match(getattr(fragment, name), arg, single=True)
return 1.0, match is not None
if not hasattr(fragment, name):
raise ValueError("Unable to find fragment with name '%s'" % name)
else:
raise ValueError("Unexpected argument type")
def execute(self, fragment):
results = []
total_weight = 0
for name, method, argument in self.comparisons:
weight, success = method(fragment, name, argument)
total_weight += weight
results.append(success)
return total_weight / float(len(results)), all(results) if len(results) > 0 else False
def __repr__(self):
return "CaptureConstraint(comparisons=%s)" % repr(self.comparisons)

147
libs/caper/group.py

@ -0,0 +1,147 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.helpers import clean_dict
from caper.result import CaperFragmentNode
from caper.step import CaptureStep
from caper.constraint import CaptureConstraint
class CaptureGroup(object):
def __init__(self, parser, result):
"""Capture group object
:type parser: caper.parsers.base.Parser
:type result: caper.result.CaperResult
"""
self.parser = parser
self.result = result
#: @type: list of CaptureStep
self.steps = []
#: @type: list of CaptureConstraint
self.constraints = []
def capture_fragment(self, tag, regex=None, func=None, single=True):
Logr.debug('capture_fragment("%s", "%s", %s, %s)', tag, regex, func, single)
self.steps.append(CaptureStep(
self, tag,
'fragment',
regex=regex,
func=func,
single=single
))
return self
def capture_closure(self, tag, regex=None, func=None, single=True):
Logr.debug('capture_closure("%s", "%s", %s, %s)', tag, regex, func, single)
self.steps.append(CaptureStep(
self, tag,
'closure',
regex=regex,
func=func,
single=single
))
return self
def until(self, **kwargs):
self.constraints.append(CaptureConstraint(self, **kwargs))
return self
def parse_subject(self, parent_head, subject):
parent_node = parent_head[0] if type(parent_head) is list else parent_head
# TODO - if subject is a closure?
nodes = []
# Check constraints
for constraint in self.constraints:
weight, success = constraint.execute(subject)
if success:
Logr.debug('capturing broke on "%s" at %s', subject.value, constraint)
parent_node.finished_groups.append(self)
nodes.append(parent_head)
if weight == 1.0:
return nodes
else:
Logr.debug('Branching result')
# Try match subject against the steps available
tag, success, weight, match, num_fragments = (None, None, None, None, None)
for step in self.steps:
tag = step.tag
success, weight, match, num_fragments = step.execute(subject)
if success:
match = clean_dict(match) if type(match) is dict else match
Logr.debug('Found match with weight %s, match: %s, num_fragments: %s' % (weight, match, num_fragments))
break
Logr.debug('created fragment node with subject.value: "%s"' % subject.value)
result = [CaperFragmentNode(parent_node.closure, subject.take_right(num_fragments), parent_head, tag, weight, match)]
if match and weight < 1.0:
if num_fragments == 1:
result.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
else:
nodes.append(CaperFragmentNode(parent_node.closure, [subject], parent_head, None, None, None))
nodes.append(result[0] if len(result) == 1 else result)
return nodes
def execute(self):
heads_finished = None
while heads_finished is None or not (len(heads_finished) == len(self.result.heads) and all(heads_finished)):
heads_finished = []
heads = self.result.heads
self.result.heads = []
for head in heads:
node = head[0] if type(head) is list else head
Logr.debug("head node: %s" % node)
if self in node.finished_groups:
Logr.debug("head finished for group")
self.result.heads.append(head)
heads_finished.append(True)
continue
next_subject = node.next()
if next_subject:
for node_result in self.parse_subject(head, next_subject):
self.result.heads.append(node_result)
heads_finished.append(self in node.finished_groups or next_subject is None)
if len(self.result.heads) == 0:
self.result.heads = heads
Logr.debug("heads_finished: %s, self.result.heads: %s", heads_finished, self.result.heads)
Logr.debug("group finished")

64
libs/caper/helpers.py

@ -0,0 +1,64 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
def is_list_type(obj, element_type):
if not type(obj) is list:
return False
if len(obj) < 1:
raise ValueError("Unable to determine list element type from empty list")
return type(obj[0]) is element_type
def clean_dict(target, remove=None):
"""Recursively remove items matching a value 'remove' from the dictionary
:type target: dict
"""
if type(target) is not dict:
raise ValueError("Target is required to be a dict")
remove_keys = []
for key in target.keys():
if type(target[key]) is not dict:
if target[key] == remove:
remove_keys.append(key)
else:
clean_dict(target[key], remove)
for key in remove_keys:
target.pop(key)
return target
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)

193
libs/caper/matcher.py

@ -0,0 +1,193 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pprint
import re
from logr import Logr
from caper.helpers import is_list_type, clean_dict
class FragmentMatcher(object):
def __init__(self, pattern_groups):
self.regex = {}
for group_name, patterns in pattern_groups:
if group_name not in self.regex:
self.regex[group_name] = []
# Transform into weight groups
if type(patterns[0]) is str or type(patterns[0][0]) not in [int, float]:
patterns = [(1.0, patterns)]
for weight, patterns in patterns:
weight_patterns = []
for pattern in patterns:
# Transform into multi-fragment patterns
if type(pattern) is str:
pattern = (pattern,)
if type(pattern) is tuple and len(pattern) == 2:
if type(pattern[0]) is str and is_list_type(pattern[1], str):
pattern = (pattern,)
result = []
for value in pattern:
if type(value) is tuple:
if len(value) == 2:
# Construct OR-list pattern
value = value[0] % '|'.join(value[1])
elif len(value) == 1:
value = value[0]
result.append(re.compile(value, re.IGNORECASE))
weight_patterns.append(tuple(result))
self.regex[group_name].append((weight, weight_patterns))
pprint.pprint(self.regex)
def find_group(self, name):
for group_name, weight_groups in self.regex.items():
if group_name and group_name == name:
return group_name, weight_groups
return None
def parser_match(self, parser, group_name, single=True):
"""
:type parser: caper.parsers.base.Parser
"""
result = None
for group, weight_groups in self.regex.items():
if group_name and group != group_name:
continue
# TODO handle multiple weights
weight, patterns = weight_groups[0]
for pattern in patterns:
fragments = []
pattern_matched = True
pattern_result = {}
for fragment_pattern in pattern:
if not parser.fragment_available():
pattern_matched = False
break
fragment = parser.next_fragment()
fragments.append(fragment)
Logr.debug('[r"%s"].match("%s")', fragment_pattern.pattern, fragment.value)
match = fragment_pattern.match(fragment.value)
if match:
Logr.debug('Pattern "%s" matched', fragment_pattern.pattern)
else:
pattern_matched = False
break
pattern_result.update(clean_dict(match.groupdict()))
if pattern_matched:
if result is None:
result = {}
if group not in result:
result[group] = {}
Logr.debug('Matched on <%s>', ' '.join([f.value for f in fragments]))
result[group].update(pattern_result)
parser.commit()
if single:
return result
else:
parser.rewind()
return result
def value_match(self, value, group_name=None, single=True):
result = None
for group, weight_groups in self.regex.items():
if group_name and group != group_name:
continue
# TODO handle multiple weights
weight, patterns = weight_groups[0]
for pattern in patterns:
match = pattern[0].match(value)
if not match:
continue
if result is None:
result = {}
if group not in result:
result[group] = {}
result[group].update(match.groupdict())
if single:
return result
return result
def fragment_match(self, fragment, group_name=None):
"""Follow a fragment chain to try find a match
:type fragment: caper.objects.CaperFragment
:type group_name: str or None
:return: The weight of the match found between 0.0 and 1.0,
where 1.0 means perfect match and 0.0 means no match
:rtype: (float, dict, int)
"""
group_name, weight_groups = self.find_group(group_name)
for weight, patterns in weight_groups:
for pattern in patterns:
cur_fragment = fragment
success = True
result = {}
# Ignore empty patterns
if len(pattern) < 1:
break
for fragment_pattern in pattern:
if not cur_fragment:
success = False
break
match = fragment_pattern.match(cur_fragment.value)
if match:
result.update(match.groupdict())
else:
success = False
break
cur_fragment = cur_fragment.right if cur_fragment else None
if success:
Logr.debug("Found match with weight %s" % weight)
return float(weight), result, len(pattern)
return 0.0, None, 1

75
libs/caper/objects.py

@ -0,0 +1,75 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from caper.helpers import xrange_six
class CaperClosure(object):
def __init__(self, value):
#: :type: str
self.value = value
#: :type: CaperClosure
self.left = None
#: :type: CaperClosure
self.right = None
#: :type: list of CaperFragment
self.fragments = []
class CaperFragment(object):
def __init__(self):
#: :type: str
self.value = ""
#: :type: CaperFragment
self.left = None
#: :type: str
self.left_sep = None
#: :type: CaperFragment
self.right = None
#: :type: str
self.right_sep = None
#: :type: int
self.position = None
def take(self, direction, count, include_self=True):
if direction not in ['left', 'right']:
raise ValueError('Un-Expected value for "direction", expected "left" or "right".')
result = []
if include_self:
result.append(self)
count -= 1
cur = self
for x in xrange_six(count):
if cur and getattr(cur, direction):
cur = getattr(cur, direction)
result.append(cur)
else:
result.append(None)
cur = None
return result
def take_left(self, count, include_self=True):
return self.take('left', count, include_self)
def take_right(self, count, include_self=True):
return self.take('right', count, include_self)

0
libs/caper/parsers/__init__.py

88
libs/caper/parsers/anime.py

@ -0,0 +1,88 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from caper.parsers.base import Parser
REGEX_GROUP = re.compile(r'(\(|\[)(?P<group>.*?)(\)|\])', re.IGNORECASE)
PATTERN_GROUPS = [
('identifier', [
r'S(?P<season>\d+)E(?P<episode>\d+)',
r'(S(?P<season>\d+))|(E(?P<episode>\d+))',
r'Ep(?P<episode>\d+)',
r'$(?P<absolute>\d+)^',
(r'Episode', r'(?P<episode>\d+)'),
]),
('video', [
(r'(?P<h264_profile>%s)', [
'Hi10P'
]),
(r'.(?P<resolution>%s)', [
'720p',
'1080p',
'960x720',
'1920x1080'
]),
(r'(?P<source>%s)', [
'BD'
]),
]),
('audio', [
(r'(?P<codec>%s)', [
'FLAC'
]),
])
]
class AnimeParser(Parser):
def __init__(self):
super(AnimeParser, self).__init__(PATTERN_GROUPS)
def capture_group(self, fragment):
match = REGEX_GROUP.match(fragment.value)
if not match:
return None
return match.group('group')
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_closure('group', func=self.capture_group)\
.execute(once=True)
self.capture_fragment('show_name', single=False)\
.until(value__re='identifier')\
.until(value__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier') \
.capture_fragment('video', regex='video', single=False) \
.capture_fragment('audio', regex='audio', single=False) \
.execute()
self.result.build()
return self.result

136
libs/caper/parsers/base.py

@ -0,0 +1,136 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper import FragmentMatcher
from caper.group import CaptureGroup
from caper.result import CaperResult, CaperClosureNode
class Parser(object):
def __init__(self, pattern_groups):
self.matcher = FragmentMatcher(pattern_groups)
self.closures = None
#: :type: caper.result.CaperResult
self.result = None
self._match_cache = None
self._fragment_pos = None
self._closure_pos = None
self._history = None
self.reset()
def reset(self):
self.closures = None
self.result = CaperResult()
self._match_cache = {}
self._fragment_pos = -1
self._closure_pos = -1
self._history = []
def setup(self, closures):
"""
:type closures: list of CaperClosure
"""
self.reset()
self.closures = closures
self.result.heads = [CaperClosureNode(closures[0])]
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
raise NotImplementedError()
#
# Closure Methods
#
def next_closure(self):
self._closure_pos += 1
closure = self.closures[self._closure_pos]
self._history.append(('fragment', -1 - self._fragment_pos))
self._fragment_pos = -1
if self._closure_pos != 0:
self._history.append(('closure', 1))
Logr.debug('(next_closure) closure.value: "%s"', closure.value)
return closure
def closure_available(self):
return self._closure_pos + 1 < len(self.closures)
#
# Fragment Methods
#
def next_fragment(self):
closure = self.closures[self._closure_pos]
self._fragment_pos += 1
fragment = closure.fragments[self._fragment_pos]
self._history.append(('fragment', 1))
Logr.debug('(next_fragment) closure.value "%s" - fragment.value: "%s"', closure.value, fragment.value)
return fragment
def fragment_available(self):
if not self.closure_available():
return False
return self._fragment_pos + 1 < len(self.closures[self._closure_pos].fragments)
def rewind(self):
for source, delta in reversed(self._history):
Logr.debug('(rewind) Rewinding step: %s', (source, delta))
if source == 'fragment':
self._fragment_pos -= delta
elif source == 'closure':
self._closure_pos -= delta
else:
raise NotImplementedError()
self.commit()
def commit(self):
Logr.debug('(commit)')
self._history = []
#
# Capture Methods
#
def capture_fragment(self, tag, regex=None, func=None, single=True):
return CaptureGroup(self, self.result).capture_fragment(
tag,
regex=regex,
func=func,
single=single
)
def capture_closure(self, tag, regex=None, func=None, single=True):
return CaptureGroup(self, self.result).capture_closure(
tag,
regex=regex,
func=func,
single=single
)

148
libs/caper/parsers/scene.py

@ -0,0 +1,148 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
from caper.parsers.base import Parser
from caper.result import CaperFragmentNode
PATTERN_GROUPS = [
('identifier', [
(1.0, [
# S01E01-E02
('^S(?P<season>\d+)E(?P<episode_from>\d+)$', '^E(?P<episode_to>\d+)$'),
# S02E13
r'^S(?P<season>\d+)E(?P<episode>\d+)$',
# S01 E13
(r'^(S(?P<season>\d+))$', r'^(E(?P<episode>\d+))$'),
# S02
# E13
r'^((S(?P<season>\d+))|(E(?P<episode>\d+)))$',
# 3x19
r'^(?P<season>\d+)x(?P<episode>\d+)$',
# 2013.09.15
(r'^(?P<year>\d{4})$', r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$'),
# 09.15.2013
(r'^(?P<month>\d{2})$', r'^(?P<day>\d{2})$', r'^(?P<year>\d{4})$'),
# TODO - US/UK Date Format Conflict? will only support US format for now..
# 15.09.2013
#(r'^(?P<day>\d{2})$', r'^(?P<month>\d{2})$', r'^(?P<year>\d{4})$'),
# 130915
r'^(?P<year_short>\d{2})(?P<month>\d{2})(?P<day>\d{2})$',
# Season 3 Episode 14
(r'^Se(ason)?$', r'^(?P<season>\d+)$', r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Season 3
(r'^Se(ason)?$', r'^(?P<season>\d+)$'),
# Episode 14
(r'^Ep(isode)?$', r'^(?P<episode>\d+)$'),
# Part.3
# Part.1.and.Part.3
('^Part$', '(?P<part>\d+)'),
]),
(0.8, [
# 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake)
# TODO - Update this pattern on 31 Dec 2099
r'^(?P<season>([1-9])|(1[0-8])|(2[1-9])|([3-9][0-9]))(?P<episode>\d{2})$'
]),
(0.5, [
# 100 - 9999
r'^(?P<season>([1-9])|([1-9][0-9]))(?P<episode>\d{2})$'
])
]),
('video', [
r'(?P<aspect>FS|WS)',
(r'(?P<resolution>%s)', [
'480p',
'720p',
'1080p'
]),
(r'(?P<source>%s)', [
'HDTV',
'PDTV',
'DSR',
'DVDRiP'
]),
(r'(?P<codec>%s)', [
'x264',
'XViD'
]),
(r'(?P<language>%s)', [
'GERMAN',
'DUTCH',
'FRENCH',
'SWEDiSH',
'DANiSH',
'iTALiAN'
]),
])
]
class SceneParser(Parser):
def __init__(self):
super(SceneParser, self).__init__(PATTERN_GROUPS)
def capture_group(self, fragment):
if fragment.left_sep == '-' and not fragment.right:
return fragment.value
return None
def run(self, closures):
"""
:type closures: list of CaperClosure
"""
self.setup(closures)
self.capture_fragment('show_name', single=False)\
.until(fragment__re='identifier')\
.until(fragment__re='video')\
.execute()
self.capture_fragment('identifier', regex='identifier', single=False)\
.capture_fragment('video', regex='video', single=False)\
.until(left_sep__eq='-', right__eq=None)\
.execute()
self.capture_fragment('group', func=self.capture_group)\
.execute()
self.print_tree(self.result.heads)
self.result.build()
return self.result
def print_tree(self, heads):
for head in heads:
head = head if type(head) is list else [head]
if type(head[0]) is CaperFragmentNode:
for fragment in head[0].fragments:
Logr.debug(fragment.value)
else:
Logr.debug(head[0].closure.value)
for node in head:
Logr.debug('\t' + str(node).ljust(55) + '\t' + str(node.weight) + '\t' + str(node.match))
if len(head) > 0 and head[0].parent:
self.print_tree([head[0].parent])

172
libs/caper/result.py

@ -0,0 +1,172 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from logr import Logr
GROUP_MATCHES = ['identifier']
class CaperNode(object):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
"""
:type parent: CaperNode
:type weight: float
"""
#: :type: caper.objects.CaperClosure
self.closure = closure
#: :type: CaperNode
self.parent = parent
#: :type: str
self.tag = tag
#: :type: float
self.weight = weight
#: :type: dict
self.match = match
#: :type: list of CaptureGroup
self.finished_groups = []
def next(self):
raise NotImplementedError()
class CaperClosureNode(CaperNode):
def __init__(self, closure, parent=None, tag=None, weight=None, match=None):
"""
:type closure: caper.objects.CaperClosure or list of caper.objects.CaperClosure
"""
super(CaperClosureNode, self).__init__(closure, parent, tag, weight, match)
def next(self):
if self.closure and len(self.closure.fragments) > 0:
return self.closure.fragments[0]
return None
class CaperFragmentNode(CaperNode):
def __init__(self, closure, fragments, parent=None, tag=None, weight=None, match=None):
"""
:type closure: caper.objects.CaperClosure
:type fragments: list of caper.objects.CaperFragment
"""
super(CaperFragmentNode, self).__init__(closure, parent, tag, weight, match)
#: :type: caper.objects.CaperFragment or list of caper.objects.CaperFragment
self.fragments = fragments
def next(self):
if len(self.fragments) > 0 and self.fragments[-1] and self.fragments[-1].right:
return self.fragments[-1].right
if self.closure.right:
return self.closure.right
return None
class CaperResult(object):
def __init__(self):
#: :type: list of CaperNode
self.heads = []
self.chains = []
def build(self):
max_matched = 0
for head in self.heads:
for chain in self.combine_chain(head):
if chain.num_matched > max_matched:
max_matched = chain.num_matched
self.chains.append(chain)
for chain in self.chains:
chain.weights.append(chain.num_matched / float(max_matched))
chain.finish()
self.chains.sort(key=lambda chain: chain.weight, reverse=True)
for chain in self.chains:
Logr.debug("chain weight: %.02f", chain.weight)
Logr.debug("\tInfo: %s", chain.info)
Logr.debug("\tWeights: %s", chain.weights)
Logr.debug("\tNumber of Fragments Matched: %s", chain.num_matched)
def combine_chain(self, subject, chain=None):
nodes = subject if type(subject) is list else [subject]
if chain is None:
chain = CaperResultChain()
result = []
for x, node in enumerate(nodes):
node_chain = chain if x == len(nodes) - 1 else chain.copy()
if not node.parent:
result.append(node_chain)
continue
# Skip over closure nodes
if type(node) is CaperClosureNode:
result.extend(self.combine_chain(node.parent, node_chain))
# Parse fragment matches
if type(node) is CaperFragmentNode:
node_chain.update(node)
result.extend(self.combine_chain(node.parent, node_chain))
return result
class CaperResultChain(object):
def __init__(self):
#: :type: float
self.weight = None
self.info = {}
self.num_matched = 0
self.weights = []
def update(self, subject):
if subject.weight is None:
return
self.num_matched += len(subject.fragments) if subject.fragments is not None else 0
self.weights.append(subject.weight)
if subject.match:
if subject.tag not in self.info:
self.info[subject.tag] = []
self.info[subject.tag].insert(0, subject.match)
def finish(self):
self.weight = sum(self.weights) / len(self.weights)
def copy(self):
chain = CaperResultChain()
chain.weight = self.weight
chain.info = copy.deepcopy(self.info)
chain.num_matched = self.num_matched
chain.weights = copy.copy(self.weights)
return chain

72
libs/caper/step.py

@ -0,0 +1,72 @@
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logr import Logr
class CaptureStep(object):
REPR_KEYS = ['regex', 'func', 'single']
def __init__(self, capture_group, tag, source, regex=None, func=None, single=None):
#: @type: CaptureGroup
self.capture_group = capture_group
#: @type: str
self.tag = tag
#: @type: str
self.source = source
#: @type: str
self.regex = regex
#: @type: function
self.func = func
#: @type: bool
self.single = single
def _get_next_subject(self, parser):
if self.source == 'fragment':
if not parser.fragment_available():
return None
return parser.next_fragment()
elif self.source == 'closure':
if not parser.closure_available():
return None
return parser.next_closure()
raise NotImplementedError()
def execute(self, fragment):
if self.regex:
weight, match, num_fragments = self.capture_group.parser.matcher.fragment_match(fragment, self.regex)
Logr.debug('(execute) [regex] tag: "%s"', self.tag)
if match:
return True, weight, match, num_fragments
elif self.func:
match = self.func(fragment)
Logr.debug('(execute) [func] %s += "%s"', self.tag, match)
if match:
return True, 1.0, match, 1
else:
Logr.debug('(execute) [raw] %s += "%s"', self.tag, fragment.value)
return True, 1.0, fragment.value, 1
return False, None, None, 1
def __repr__(self):
attribute_values = [key + '=' + repr(getattr(self, key))
for key in self.REPR_KEYS
if hasattr(self, key) and getattr(self, key)]
attribute_string = ', ' + ', '.join(attribute_values) if len(attribute_values) > 0 else ''
return "CaptureStep('%s'%s)" % (self.tag, attribute_string)

201
libs/logr/__init__.py

@ -0,0 +1,201 @@
# logr - Simple python logging wrapper
# Packed by Dean Gardiner <gardiner91@gmail.com>
#
# File part of:
# rdio-sock - Rdio WebSocket Library
# Copyright (C) 2013 fzza- <fzzzzzzzza@gmail.com>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import inspect
import logging
import os
import sys
IGNORE = ()
PY3 = sys.version_info[0] == 3
class Logr(object):
loggers = {}
handler = None
@staticmethod
def configure(level=logging.WARNING, handler=None, formatter=None):
"""Configure Logr
@param handler: Logger message handler
@type handler: logging.Handler or None
@param formatter: Logger message Formatter
@type formatter: logging.Formatter or None
"""
if formatter is None:
formatter = LogrFormatter()
if handler is None:
handler = logging.StreamHandler()
handler.setFormatter(formatter)
handler.setLevel(level)
Logr.handler = handler
@staticmethod
def configure_check():
if Logr.handler is None:
Logr.configure()
@staticmethod
def _get_name_from_path(filename):
try:
return os.path.splitext(os.path.basename(filename))[0]
except TypeError:
return "<unknown>"
@staticmethod
def get_logger_name():
stack = inspect.stack()
for x in xrange_six(len(stack)):
frame = stack[x][0]
name = None
# Try find name of function defined inside a class
if len(frame.f_code.co_varnames) > 0:
self_argument = frame.f_code.co_varnames[0]
if self_argument == 'self' and self_argument in frame.f_locals:
instance = frame.f_locals[self_argument]
class_ = instance.__class__
class_name = class_.__name__
module_name = class_.__module__
if module_name != '__main__':
name = module_name + '.' + class_name
else:
name = class_name
# Try find name of function defined outside of a class
if name is None:
if frame.f_code.co_name in frame.f_globals:
name = frame.f_globals.get('__name__')
if name == '__main__':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
name = name
elif frame.f_code.co_name == '<module>':
name = Logr._get_name_from_path(frame.f_globals.get('__file__'))
if name is not None and name not in IGNORE:
return name
return ""
@staticmethod
def get_logger():
"""Get or create logger (if it does not exist)
@rtype: RootLogger
"""
name = Logr.get_logger_name()
if name not in Logr.loggers:
Logr.configure_check()
Logr.loggers[name] = logging.Logger(name)
Logr.loggers[name].addHandler(Logr.handler)
return Logr.loggers[name]
@staticmethod
def debug(msg, *args, **kwargs):
Logr.get_logger().debug(msg, *args, **kwargs)
@staticmethod
def info(msg, *args, **kwargs):
Logr.get_logger().info(msg, *args, **kwargs)
@staticmethod
def warning(msg, *args, **kwargs):
Logr.get_logger().warning(msg, *args, **kwargs)
warn = warning
@staticmethod
def error(msg, *args, **kwargs):
Logr.get_logger().error(msg, *args, **kwargs)
@staticmethod
def exception(msg, *args, **kwargs):
Logr.get_logger().exception(msg, *args, **kwargs)
@staticmethod
def critical(msg, *args, **kwargs):
Logr.get_logger().critical(msg, *args, **kwargs)
fatal = critical
@staticmethod
def log(level, msg, *args, **kwargs):
Logr.get_logger().log(level, msg, *args, **kwargs)
class LogrFormatter(logging.Formatter):
LENGTH_NAME = 32
LENGTH_LEVEL_NAME = 5
def __init__(self, fmt=None, datefmt=None):
if sys.version_info[:2] > (2,6):
super(LogrFormatter, self).__init__(fmt, datefmt)
else:
logging.Formatter.__init__(self, fmt, datefmt)
def usesTime(self):
return True
def format(self, record):
record.message = record.getMessage()
if self.usesTime():
record.asctime = self.formatTime(record, self.datefmt)
s = "%(asctime)s %(name)s %(levelname)s %(message)s" % {
'asctime': record.asctime,
'name': record.name[-self.LENGTH_NAME:].rjust(self.LENGTH_NAME, ' '),
'levelname': record.levelname[:self.LENGTH_LEVEL_NAME].ljust(self.LENGTH_LEVEL_NAME, ' '),
'message': record.message
}
if record.exc_info:
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
if s[-1:] != "\n":
s += "\n"
try:
s += record.exc_text
except UnicodeError:
s = s + record.exc_text.decode(sys.getfilesystemencoding(),
'replace')
return s
def xrange_six(start, stop=None, step=None):
if stop is not None and step is not None:
if PY3:
return range(start, stop, step)
else:
return xrange(start, stop, step)
else:
if PY3:
return range(start)
else:
return xrange(start)
Loading…
Cancel
Save