diff --git a/libs/caper/__init__.py b/libs/caper/__init__.py index 9637c06..8b2e61a 100644 --- a/libs/caper/__init__.py +++ b/libs/caper/__init__.py @@ -19,7 +19,7 @@ from caper.parsers.anime import AnimeParser from caper.parsers.scene import SceneParser -__version_info__ = ('0', '2', '6') +__version_info__ = ('0', '2', '9') __version_branch__ = 'master' __version__ = "%s%s" % ( @@ -44,9 +44,11 @@ CL_END = 1 class Caper(object): def __init__(self, debug=False): + self.debug = debug + self.parsers = { - 'scene': SceneParser(debug), - 'anime': AnimeParser(debug) + 'scene': SceneParser, + 'anime': AnimeParser } def _closure_split(self, name): @@ -63,7 +65,7 @@ class Caper(object): if len(buf) < 1: return - cur = CaperClosure(buf) + cur = CaperClosure(len(closures), buf) cur.left = closures[len(closures) - 1] if len(closures) > 0 else None if cur.left: @@ -109,7 +111,7 @@ class Caper(object): """ cur_position = 0 - cur = CaperFragment() + cur = None def end_fragment(fragments, cur, cur_position): cur.position = cur_position @@ -126,23 +128,41 @@ class Caper(object): for closure in closures: closure.fragments = [] + separator_buffer = "" + for x, ch in enumerate(self._clean_closure(closure.value)): + if not cur: + cur = CaperFragment(closure) + if ch in FRAGMENT_SEPARATORS: - end_fragment(closure.fragments, cur, cur_position) + if cur.value: + separator_buffer = "" + + separator_buffer += ch + + if cur.value or not closure.fragments: + end_fragment(closure.fragments, cur, cur_position) + elif len(separator_buffer) > 1: + cur.value = separator_buffer.strip() + + if cur.value: + end_fragment(closure.fragments, cur, cur_position) + + separator_buffer = "" # Reset - cur = CaperFragment() + cur = None cur_position += 1 else: cur.value += ch # Finish parsing the last fragment - if cur.value != "": + if cur and cur.value: end_fragment(closure.fragments, cur, cur_position) # Reset cur_position = 0 - cur = CaperFragment() + cur = None return closures @@ -158,4 +178,4 @@ class Caper(object): raise ValueError("Unknown parser") # TODO autodetect the parser type - return self.parsers[parser].run(closures) + return self.parsers[parser](self.debug).run(closures) diff --git a/libs/caper/helpers.py b/libs/caper/helpers.py index 2b27e57..ded5d48 100644 --- a/libs/caper/helpers.py +++ b/libs/caper/helpers.py @@ -74,3 +74,7 @@ def xrange_six(start, stop=None, step=None): return range(start) else: return xrange(start) + + +def delta_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6 diff --git a/libs/caper/matcher.py b/libs/caper/matcher.py index c71da97..c154cd7 100644 --- a/libs/caper/matcher.py +++ b/libs/caper/matcher.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re +from caper.helpers import is_list_type, update_dict, delta_seconds +from datetime import datetime from logr import Logr -from caper.helpers import is_list_type, update_dict +import re class FragmentMatcher(object): @@ -24,6 +25,9 @@ class FragmentMatcher(object): self.construct_patterns(pattern_groups) def construct_patterns(self, pattern_groups): + compile_start = datetime.now() + compile_count = 0 + for group_name, patterns in pattern_groups: if group_name not in self.regex: self.regex[group_name] = [] @@ -54,11 +58,14 @@ class FragmentMatcher(object): value = value[0] result.append(re.compile(value, re.IGNORECASE)) + compile_count += 1 weight_patterns.append(tuple(result)) self.regex[group_name].append((weight, weight_patterns)) + Logr.info("Compiled %s patterns in %ss", compile_count, delta_seconds(datetime.now() - compile_start)) + def find_group(self, name): for group_name, weight_groups in self.regex.items(): if group_name and group_name == name: diff --git a/libs/caper/objects.py b/libs/caper/objects.py index 4804dea..1f82c33 100644 --- a/libs/caper/objects.py +++ b/libs/caper/objects.py @@ -16,7 +16,10 @@ from caper.helpers import xrange_six class CaperClosure(object): - def __init__(self, value): + def __init__(self, index, value): + #: :type: int + self.index = index + #: :type: str self.value = value @@ -30,7 +33,10 @@ class CaperClosure(object): class CaperFragment(object): - def __init__(self): + def __init__(self, closure=None): + #: :type: CaperClosure + self.closure = closure + #: :type: str self.value = "" diff --git a/libs/caper/parsers/base.py b/libs/caper/parsers/base.py index 6f79be6..6bae537 100644 --- a/libs/caper/parsers/base.py +++ b/libs/caper/parsers/base.py @@ -15,13 +15,14 @@ from caper import FragmentMatcher from caper.group import CaptureGroup from caper.result import CaperResult, CaperClosureNode +from logr import Logr class Parser(object): - def __init__(self, pattern_groups, debug=False): + def __init__(self, matcher, debug=False): self.debug = debug - self.matcher = FragmentMatcher(pattern_groups) + self.matcher = matcher self.closures = None #: :type: caper.result.CaperResult diff --git a/libs/caper/parsers/scene.py b/libs/caper/parsers/scene.py index b96967b..0dfe378 100644 --- a/libs/caper/parsers/scene.py +++ b/libs/caper/parsers/scene.py @@ -13,6 +13,7 @@ # limitations under the License. from logr import Logr +from caper import FragmentMatcher from caper.parsers.base import Parser from caper.result import CaperFragmentNode @@ -22,8 +23,10 @@ PATTERN_GROUPS = [ (1.0, [ # S01E01-E02 ('^S(?P\d+)E(?P\d+)$', '^E(?P\d+)$'), - # S03 E01 to E08 - ('^S(?P\d+)$', '^E(?P\d+)$', '^to$', '^E(?P\d+)$'), + # 'S03 E01 to E08' or 'S03 E01 - E09' + ('^S(?P\d+)$', '^E(?P\d+)$', '^(to|-)$', '^E(?P\d+)$'), + # 'E01 to E08' or 'E01 - E09' + ('^E(?P\d+)$', '^(to|-)$', '^E(?P\d+)$'), # S01-S03 ('^S(?P\d+)$', '^S(?P\d+)$'), @@ -58,6 +61,9 @@ PATTERN_GROUPS = [ # Part.3 # Part.1.and.Part.3 ('^Part$', '(?P\d+)'), + + r'(?PSpecial)', + r'(?PNZ|AU|US|UK)' ]), (0.8, [ # 100 - 1899, 2100 - 9999 (skips 1900 to 2099 - so we don't get years my mistake) @@ -69,6 +75,7 @@ PATTERN_GROUPS = [ r'^(?P([1-9])|([1-9][0-9]))(?P\d{2})$' ]) ]), + ('video', [ r'(?PFS|WS)', @@ -152,14 +159,23 @@ PATTERN_GROUPS = [ class SceneParser(Parser): + matcher = None + def __init__(self, debug=False): - super(SceneParser, self).__init__(PATTERN_GROUPS, debug) + if not SceneParser.matcher: + SceneParser.matcher = FragmentMatcher(PATTERN_GROUPS) + Logr.info("Fragment matcher for %s created", self.__class__.__name__) + + super(SceneParser, self).__init__(SceneParser.matcher, debug) def capture_group(self, fragment): - if fragment.left_sep == '-' and not fragment.right: - return fragment.value + if fragment.closure.index + 1 != len(self.closures): + return None + + if fragment.left_sep != '-' or fragment.right: + return None - return None + return fragment.value def run(self, closures): """ @@ -170,17 +186,17 @@ class SceneParser(Parser): self.capture_fragment('show_name', single=False)\ .until(fragment__re='identifier')\ - .until(fragment__re='video') \ - .until(fragment__re='dvd') \ - .until(fragment__re='audio') \ - .until(fragment__re='scene') \ + .until(fragment__re='video')\ + .until(fragment__re='dvd')\ + .until(fragment__re='audio')\ + .until(fragment__re='scene')\ .execute() self.capture_fragment('identifier', regex='identifier', single=False)\ - .capture_fragment('video', regex='video', single=False) \ - .capture_fragment('dvd', regex='dvd', single=False) \ - .capture_fragment('audio', regex='audio', single=False) \ - .capture_fragment('scene', regex='scene', single=False) \ + .capture_fragment('video', regex='video', single=False)\ + .capture_fragment('dvd', regex='dvd', single=False)\ + .capture_fragment('audio', regex='audio', single=False)\ + .capture_fragment('scene', regex='scene', single=False)\ .until(left_sep__eq='-', right__eq=None)\ .execute()