You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
195 lines
5.3 KiB
195 lines
5.3 KiB
# Copyright 2013 Dean Gardiner <gardiner91@gmail.com>
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from logr import Logr
|
|
from caper.matcher import FragmentMatcher
|
|
from caper.objects import CaperFragment, CaperClosure
|
|
from caper.parsers.anime import AnimeParser
|
|
from caper.parsers.scene import SceneParser
|
|
from caper.parsers.usenet import UsenetParser
|
|
|
|
|
|
__version_info__ = ('0', '3', '1')
|
|
__version_branch__ = 'master'
|
|
|
|
__version__ = "%s%s" % (
|
|
'.'.join(__version_info__),
|
|
'-' + __version_branch__ if __version_branch__ else ''
|
|
)
|
|
|
|
|
|
CL_START_CHARS = ['(', '[', '<', '>']
|
|
CL_END_CHARS = [')', ']', '<', '>']
|
|
CL_END_STRINGS = [' - ']
|
|
|
|
STRIP_START_CHARS = ''.join(CL_START_CHARS)
|
|
STRIP_END_CHARS = ''.join(CL_END_CHARS)
|
|
STRIP_CHARS = ''.join(['_', ' ', '.'])
|
|
|
|
FRAGMENT_SEPARATORS = ['.', '-', '_', ' ']
|
|
|
|
|
|
CL_START = 0
|
|
CL_END = 1
|
|
|
|
|
|
class Caper(object):
|
|
def __init__(self, debug=False):
|
|
self.debug = debug
|
|
|
|
self.parsers = {
|
|
'anime': AnimeParser,
|
|
'scene': SceneParser,
|
|
'usenet': UsenetParser
|
|
}
|
|
|
|
def _closure_split(self, name):
|
|
"""
|
|
:type name: str
|
|
|
|
:rtype: list of CaperClosure
|
|
"""
|
|
|
|
closures = []
|
|
|
|
def end_closure(closures, buf):
|
|
buf = buf.strip(STRIP_CHARS)
|
|
if len(buf) < 2:
|
|
return
|
|
|
|
cur = CaperClosure(len(closures), buf)
|
|
cur.left = closures[len(closures) - 1] if len(closures) > 0 else None
|
|
|
|
if cur.left:
|
|
cur.left.right = cur
|
|
|
|
closures.append(cur)
|
|
|
|
state = CL_START
|
|
buf = ""
|
|
for x, ch in enumerate(name):
|
|
# Check for start characters
|
|
if state == CL_START and ch in CL_START_CHARS:
|
|
end_closure(closures, buf)
|
|
|
|
state = CL_END
|
|
buf = ""
|
|
|
|
buf += ch
|
|
|
|
if state == CL_END and ch in CL_END_CHARS:
|
|
# End character found, create the closure
|
|
end_closure(closures, buf)
|
|
|
|
state = CL_START
|
|
buf = ""
|
|
elif state == CL_START and buf[-3:] in CL_END_STRINGS:
|
|
# End string found, create the closure
|
|
end_closure(closures, buf[:-3])
|
|
|
|
state = CL_START
|
|
buf = ""
|
|
|
|
end_closure(closures, buf)
|
|
|
|
return closures
|
|
|
|
def _clean_closure(self, closure):
|
|
"""
|
|
:type closure: str
|
|
|
|
:rtype: str
|
|
"""
|
|
|
|
return closure.lstrip(STRIP_START_CHARS).rstrip(STRIP_END_CHARS)
|
|
|
|
def _fragment_split(self, closures):
|
|
"""
|
|
:type closures: list of CaperClosure
|
|
|
|
:rtype: list of CaperClosure
|
|
"""
|
|
|
|
cur_position = 0
|
|
cur = None
|
|
|
|
def end_fragment(fragments, cur, cur_position):
|
|
cur.position = cur_position
|
|
|
|
cur.left = fragments[len(fragments) - 1] if len(fragments) > 0 else None
|
|
if cur.left:
|
|
cur.left_sep = cur.left.right_sep
|
|
cur.left.right = cur
|
|
|
|
cur.right_sep = ch
|
|
|
|
fragments.append(cur)
|
|
|
|
for closure in closures:
|
|
closure.fragments = []
|
|
|
|
separator_buffer = ""
|
|
|
|
for x, ch in enumerate(self._clean_closure(closure.value)):
|
|
if not cur:
|
|
cur = CaperFragment(closure)
|
|
|
|
if ch in FRAGMENT_SEPARATORS:
|
|
if cur.value:
|
|
separator_buffer = ""
|
|
|
|
separator_buffer += ch
|
|
|
|
if cur.value or not closure.fragments:
|
|
end_fragment(closure.fragments, cur, cur_position)
|
|
elif len(separator_buffer) > 1:
|
|
cur.value = separator_buffer.strip()
|
|
|
|
if cur.value:
|
|
end_fragment(closure.fragments, cur, cur_position)
|
|
|
|
separator_buffer = ""
|
|
|
|
# Reset
|
|
cur = None
|
|
cur_position += 1
|
|
else:
|
|
cur.value += ch
|
|
|
|
# Finish parsing the last fragment
|
|
if cur and cur.value:
|
|
end_fragment(closure.fragments, cur, cur_position)
|
|
|
|
# Reset
|
|
cur_position = 0
|
|
cur = None
|
|
|
|
return closures
|
|
|
|
def parse(self, name, parser='scene'):
|
|
closures = self._closure_split(name)
|
|
closures = self._fragment_split(closures)
|
|
|
|
# Print closures
|
|
for closure in closures:
|
|
Logr.debug("closure [%s]", closure.value)
|
|
|
|
for fragment in closure.fragments:
|
|
Logr.debug("\tfragment [%s]", fragment.value)
|
|
|
|
if parser not in self.parsers:
|
|
raise ValueError("Unknown parser")
|
|
|
|
# TODO autodetect the parser type
|
|
return self.parsers[parser](self.debug).run(closures)
|
|
|