16 changed files with 1 additions and 1460 deletions
@ -1,78 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# encoding: utf-8 |
|||
""" |
|||
StringMatcher.py |
|||
|
|||
ported from python-Levenshtein |
|||
[https://github.com/miohtama/python-Levenshtein] |
|||
""" |
|||
|
|||
from Levenshtein import * |
|||
from warnings import warn |
|||
|
|||
class StringMatcher: |
|||
"""A SequenceMatcher-like class built on the top of Levenshtein""" |
|||
|
|||
def _reset_cache(self): |
|||
self._ratio = self._distance = None |
|||
self._opcodes = self._editops = self._matching_blocks = None |
|||
|
|||
def __init__(self, isjunk=None, seq1='', seq2=''): |
|||
if isjunk: |
|||
warn("isjunk not NOT implemented, it will be ignored") |
|||
self._str1, self._str2 = seq1, seq2 |
|||
self._reset_cache() |
|||
|
|||
def set_seqs(self, seq1, seq2): |
|||
self._str1, self._str2 = seq1, seq2 |
|||
self._reset_cache() |
|||
|
|||
def set_seq1(self, seq1): |
|||
self._str1 = seq1 |
|||
self._reset_cache() |
|||
|
|||
def set_seq2(self, seq2): |
|||
self._str2 = seq2 |
|||
self._reset_cache() |
|||
|
|||
def get_opcodes(self): |
|||
if not self._opcodes: |
|||
if self._editops: |
|||
self._opcodes = opcodes(self._editops, self._str1, self._str2) |
|||
else: |
|||
self._opcodes = opcodes(self._str1, self._str2) |
|||
return self._opcodes |
|||
|
|||
def get_editops(self): |
|||
if not self._editops: |
|||
if self._opcodes: |
|||
self._editops = editops(self._opcodes, self._str1, self._str2) |
|||
else: |
|||
self._editops = editops(self._str1, self._str2) |
|||
return self._editops |
|||
|
|||
def get_matching_blocks(self): |
|||
if not self._matching_blocks: |
|||
self._matching_blocks = matching_blocks(self.get_opcodes(), |
|||
self._str1, self._str2) |
|||
return self._matching_blocks |
|||
|
|||
def ratio(self): |
|||
if not self._ratio: |
|||
self._ratio = ratio(self._str1, self._str2) |
|||
return self._ratio |
|||
|
|||
def quick_ratio(self): |
|||
# This is usually quick enough :o) |
|||
if not self._ratio: |
|||
self._ratio = ratio(self._str1, self._str2) |
|||
return self._ratio |
|||
|
|||
def real_quick_ratio(self): |
|||
len1, len2 = len(self._str1), len(self._str2) |
|||
return 2.0 * min(len1, len2) / (len1 + len2) |
|||
|
|||
def distance(self): |
|||
if not self._distance: |
|||
self._distance = distance(self._str1, self._str2) |
|||
return self._distance |
@ -1,263 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# encoding: utf-8 |
|||
""" |
|||
fuzz.py |
|||
|
|||
Copyright (c) 2011 Adam Cohen |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining |
|||
a copy of this software and associated documentation files (the |
|||
"Software"), to deal in the Software without restriction, including |
|||
without limitation the rights to use, copy, modify, merge, publish, |
|||
distribute, sublicense, and/or sell copies of the Software, and to |
|||
permit persons to whom the Software is furnished to do so, subject to |
|||
the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be |
|||
included in all copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
|||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
|||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|||
""" |
|||
from __future__ import unicode_literals |
|||
|
|||
try: |
|||
from StringMatcher import StringMatcher as SequenceMatcher |
|||
except: |
|||
from difflib import SequenceMatcher |
|||
|
|||
from . import utils |
|||
|
|||
|
|||
########################### |
|||
# Basic Scoring Functions # |
|||
########################### |
|||
|
|||
|
|||
def ratio(s1, s2): |
|||
|
|||
if s1 is None: |
|||
raise TypeError("s1 is None") |
|||
if s2 is None: |
|||
raise TypeError("s2 is None") |
|||
s1, s2 = utils.make_type_consistent(s1, s2) |
|||
if len(s1) == 0 or len(s2) == 0: |
|||
return 0 |
|||
|
|||
m = SequenceMatcher(None, s1, s2) |
|||
return utils.intr(100 * m.ratio()) |
|||
|
|||
|
|||
# todo: skip duplicate indexes for a little more speed |
|||
def partial_ratio(s1, s2): |
|||
|
|||
if s1 is None: |
|||
raise TypeError("s1 is None") |
|||
if s2 is None: |
|||
raise TypeError("s2 is None") |
|||
s1, s2 = utils.make_type_consistent(s1, s2) |
|||
if len(s1) == 0 or len(s2) == 0: |
|||
return 0 |
|||
|
|||
if len(s1) <= len(s2): |
|||
shorter = s1 |
|||
longer = s2 |
|||
else: |
|||
shorter = s2 |
|||
longer = s1 |
|||
|
|||
m = SequenceMatcher(None, shorter, longer) |
|||
blocks = m.get_matching_blocks() |
|||
|
|||
# each block represents a sequence of matching characters in a string |
|||
# of the form (idx_1, idx_2, len) |
|||
# the best partial match will block align with at least one of those blocks |
|||
# e.g. shorter = "abcd", longer = XXXbcdeEEE |
|||
# block = (1,3,3) |
|||
# best score === ratio("abcd", "Xbcd") |
|||
scores = [] |
|||
for block in blocks: |
|||
long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 |
|||
long_end = long_start + len(shorter) |
|||
long_substr = longer[long_start:long_end] |
|||
|
|||
m2 = SequenceMatcher(None, shorter, long_substr) |
|||
r = m2.ratio() |
|||
if r > .995: |
|||
return 100 |
|||
else: |
|||
scores.append(r) |
|||
|
|||
return int(100 * max(scores)) |
|||
|
|||
|
|||
############################## |
|||
# Advanced Scoring Functions # |
|||
############################## |
|||
|
|||
# Sorted Token |
|||
# find all alphanumeric tokens in the string |
|||
# sort those tokens and take ratio of resulting joined strings |
|||
# controls for unordered string elements |
|||
def _token_sort(s1, s2, partial=True, force_ascii=True): |
|||
|
|||
if s1 is None: |
|||
raise TypeError("s1 is None") |
|||
if s2 is None: |
|||
raise TypeError("s2 is None") |
|||
|
|||
# pull tokens |
|||
tokens1 = utils.full_process(s1, force_ascii=force_ascii).split() |
|||
tokens2 = utils.full_process(s2, force_ascii=force_ascii).split() |
|||
|
|||
# sort tokens and join |
|||
sorted1 = " ".join(sorted(tokens1)) |
|||
sorted2 = " ".join(sorted(tokens2)) |
|||
|
|||
sorted1 = sorted1.strip() |
|||
sorted2 = sorted2.strip() |
|||
|
|||
if partial: |
|||
return partial_ratio(sorted1, sorted2) |
|||
else: |
|||
return ratio(sorted1, sorted2) |
|||
|
|||
|
|||
def token_sort_ratio(s1, s2, force_ascii=True): |
|||
return _token_sort(s1, s2, partial=False, force_ascii=force_ascii) |
|||
|
|||
|
|||
def partial_token_sort_ratio(s1, s2, force_ascii=True): |
|||
return _token_sort(s1, s2, partial=True, force_ascii=force_ascii) |
|||
|
|||
|
|||
# Token Set |
|||
# find all alphanumeric tokens in each string...treat them as a set |
|||
# construct two strings of the form |
|||
# <sorted_intersection><sorted_remainder> |
|||
# take ratios of those two strings |
|||
# controls for unordered partial matches |
|||
def _token_set(s1, s2, partial=True, force_ascii=True): |
|||
|
|||
if s1 is None: |
|||
raise TypeError("s1 is None") |
|||
if s2 is None: |
|||
raise TypeError("s2 is None") |
|||
|
|||
p1 = utils.full_process(s1, force_ascii=force_ascii) |
|||
p2 = utils.full_process(s2, force_ascii=force_ascii) |
|||
|
|||
if not utils.validate_string(p1): |
|||
return 0 |
|||
if not utils.validate_string(p2): |
|||
return 0 |
|||
|
|||
# pull tokens |
|||
tokens1 = set(utils.full_process(p1).split()) |
|||
tokens2 = set(utils.full_process(p2).split()) |
|||
|
|||
intersection = tokens1.intersection(tokens2) |
|||
diff1to2 = tokens1.difference(tokens2) |
|||
diff2to1 = tokens2.difference(tokens1) |
|||
|
|||
sorted_sect = " ".join(sorted(intersection)) |
|||
sorted_1to2 = " ".join(sorted(diff1to2)) |
|||
sorted_2to1 = " ".join(sorted(diff2to1)) |
|||
|
|||
combined_1to2 = sorted_sect + " " + sorted_1to2 |
|||
combined_2to1 = sorted_sect + " " + sorted_2to1 |
|||
|
|||
# strip |
|||
sorted_sect = sorted_sect.strip() |
|||
combined_1to2 = combined_1to2.strip() |
|||
combined_2to1 = combined_2to1.strip() |
|||
|
|||
pairwise = [ |
|||
ratio(sorted_sect, combined_1to2), |
|||
ratio(sorted_sect, combined_2to1), |
|||
ratio(combined_1to2, combined_2to1) |
|||
] |
|||
return max(pairwise) |
|||
|
|||
|
|||
def token_set_ratio(s1, s2, force_ascii=True): |
|||
return _token_set(s1, s2, partial=False, force_ascii=force_ascii) |
|||
|
|||
|
|||
def partial_token_set_ratio(s1, s2, force_ascii=True): |
|||
return _token_set(s1, s2, partial=True, force_ascii=force_ascii) |
|||
|
|||
|
|||
# TODO: numerics |
|||
|
|||
################### |
|||
# Combination API # |
|||
################### |
|||
|
|||
# q is for quick |
|||
def QRatio(s1, s2, force_ascii=True): |
|||
|
|||
p1 = utils.full_process(s1, force_ascii=force_ascii) |
|||
p2 = utils.full_process(s2, force_ascii=force_ascii) |
|||
|
|||
if not utils.validate_string(p1): |
|||
return 0 |
|||
if not utils.validate_string(p2): |
|||
return 0 |
|||
|
|||
return ratio(p1, p2) |
|||
|
|||
|
|||
def UQRatio(s1, s2): |
|||
return QRatio(s1, s2, force_ascii=False) |
|||
|
|||
|
|||
# w is for weighted |
|||
def WRatio(s1, s2, force_ascii=True): |
|||
|
|||
p1 = utils.full_process(s1, force_ascii=force_ascii) |
|||
p2 = utils.full_process(s2, force_ascii=force_ascii) |
|||
|
|||
if not utils.validate_string(p1): |
|||
return 0 |
|||
if not utils.validate_string(p2): |
|||
return 0 |
|||
|
|||
# should we look at partials? |
|||
try_partial = True |
|||
unbase_scale = .95 |
|||
partial_scale = .90 |
|||
|
|||
base = ratio(p1, p2) |
|||
len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2)) |
|||
|
|||
# if strings are similar length, don't use partials |
|||
if len_ratio < 1.5: |
|||
try_partial = False |
|||
|
|||
# if one string is much much shorter than the other |
|||
if len_ratio > 8: |
|||
partial_scale = .6 |
|||
|
|||
if try_partial: |
|||
partial = partial_ratio(p1, p2) * partial_scale |
|||
ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \ |
|||
* unbase_scale * partial_scale |
|||
ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \ |
|||
* unbase_scale * partial_scale |
|||
|
|||
return int(max(base, partial, ptsor, ptser)) |
|||
else: |
|||
tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale |
|||
tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale |
|||
|
|||
return int(max(base, tsor, tser)) |
|||
|
|||
|
|||
def UWRatio(s1, s2): |
|||
return WRatio(s1, s2, force_ascii=False) |
@ -1,119 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# encoding: utf-8 |
|||
""" |
|||
process.py |
|||
|
|||
Copyright (c) 2011 Adam Cohen |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining |
|||
a copy of this software and associated documentation files (the |
|||
"Software"), to deal in the Software without restriction, including |
|||
without limitation the rights to use, copy, modify, merge, publish, |
|||
distribute, sublicense, and/or sell copies of the Software, and to |
|||
permit persons to whom the Software is furnished to do so, subject to |
|||
the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be |
|||
included in all copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
|||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
|||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|||
""" |
|||
import itertools |
|||
|
|||
from . import fuzz |
|||
from . import utils |
|||
|
|||
|
|||
def extract(query, choices, processor=None, scorer=None, limit=5): |
|||
"""Find best matches in a list of choices, return a list of tuples |
|||
containing the match and it's score. |
|||
|
|||
Arguments: |
|||
query -- an object representing the thing we want to find |
|||
choices -- a list of objects we are attempting to extract |
|||
values from |
|||
scorer -- f(OBJ, QUERY) --> INT. We will return the objects |
|||
with the highest score by default, we use |
|||
score.WRatio() and both OBJ and QUERY should be |
|||
strings |
|||
processor -- f(OBJ_A) --> OBJ_B, where the output is an input |
|||
to scorer for example, "processor = lambda x: |
|||
x[0]" would return the first element in a |
|||
collection x (of, say, strings) this would then |
|||
be used in the scoring collection by default, we |
|||
use utils.full_process() |
|||
|
|||
""" |
|||
if choices is None or len(choices) == 0: |
|||
return [] |
|||
|
|||
# default, turn whatever the choice is into a workable string |
|||
if processor is None: |
|||
processor = lambda x: utils.full_process(x) |
|||
|
|||
# default: wratio |
|||
if scorer is None: |
|||
scorer = fuzz.WRatio |
|||
|
|||
sl = list() |
|||
|
|||
for choice in choices: |
|||
processed = processor(choice) |
|||
score = scorer(query, processed) |
|||
tuple = (choice, score) |
|||
sl.append(tuple) |
|||
|
|||
sl.sort(key=lambda i: i[1], reverse=True) |
|||
return sl[:limit] |
|||
|
|||
|
|||
def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5): |
|||
"""Find best matches above a score in a list of choices, return a |
|||
list of tuples containing the match and it's score. |
|||
|
|||
Convenience method which returns the choices with best scores, see |
|||
extract() for full arguments list |
|||
|
|||
Optional parameter: score_cutoff. |
|||
If the choice has a score of less than or equal to score_cutoff |
|||
it will not be included on result list |
|||
|
|||
""" |
|||
|
|||
best_list = extract(query, choices, processor, scorer, limit) |
|||
if len(best_list) > 0: |
|||
return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list)) |
|||
else: |
|||
return [] |
|||
|
|||
|
|||
def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0): |
|||
"""Find the best match above a score in a list of choices, return a |
|||
tuple containing the match and it's score if it's above the treshold |
|||
or None. |
|||
|
|||
Convenience method which returns the single best choice, see |
|||
extract() for full arguments list |
|||
|
|||
Optional parameter: score_cutoff. |
|||
If the best choice has a score of less than or equal to |
|||
score_cutoff we will return none (intuition: not a good enough |
|||
match) |
|||
|
|||
""" |
|||
|
|||
best_list = extract(query, choices, processor, scorer, limit=1) |
|||
if len(best_list) > 0: |
|||
best = best_list[0] |
|||
if best[1] > score_cutoff: |
|||
return best |
|||
else: |
|||
return None |
|||
else: |
|||
return None |
@ -1,41 +0,0 @@ |
|||
from __future__ import unicode_literals |
|||
import re |
|||
|
|||
|
|||
class StringProcessor(object): |
|||
""" |
|||
This class defines method to process strings in the most |
|||
efficient way. Ideally all the methods below use unicode strings |
|||
for both input and output. |
|||
""" |
|||
|
|||
@classmethod |
|||
def replace_non_letters_non_numbers_with_whitespace(cls, a_string): |
|||
""" |
|||
This function replaces any sequence of non letters and non |
|||
numbers with a single white space. |
|||
""" |
|||
regex = re.compile(r"(?ui)\W") |
|||
return regex.sub(" ", a_string) |
|||
|
|||
@classmethod |
|||
def strip(cls, a_string): |
|||
""" |
|||
This function strips leading and trailing white space. |
|||
""" |
|||
|
|||
return a_string.strip() |
|||
|
|||
@classmethod |
|||
def to_lower_case(cls, a_string): |
|||
""" |
|||
This function returns the lower-cased version of the string given. |
|||
""" |
|||
return a_string.lower() |
|||
|
|||
@classmethod |
|||
def to_upper_case(cls, a_string): |
|||
""" |
|||
This function returns the upper-cased version of the string given. |
|||
""" |
|||
return a_string.upper() |
@ -1,76 +0,0 @@ |
|||
from __future__ import unicode_literals |
|||
import sys |
|||
|
|||
from fuzzywuzzy.string_processing import StringProcessor |
|||
|
|||
|
|||
PY3 = sys.version_info[0] == 3 |
|||
|
|||
|
|||
def validate_string(s): |
|||
try: |
|||
if len(s) > 0: |
|||
return True |
|||
else: |
|||
return False |
|||
except: |
|||
return False |
|||
|
|||
bad_chars = str('') # ascii dammit! |
|||
for i in range(128, 256): |
|||
bad_chars += chr(i) |
|||
if PY3: |
|||
translation_table = dict((ord(c), None) for c in bad_chars) |
|||
|
|||
|
|||
def asciionly(s): |
|||
if PY3: |
|||
return s.translate(translation_table) |
|||
else: |
|||
return s.translate(None, bad_chars) |
|||
|
|||
|
|||
def asciidammit(s): |
|||
if type(s) is str: |
|||
return asciionly(s) |
|||
elif type(s) is unicode: |
|||
return asciionly(s.encode('ascii', 'ignore')) |
|||
else: |
|||
return asciidammit(unicode(s)) |
|||
|
|||
|
|||
def make_type_consistent(s1, s2): |
|||
if isinstance(s1, str) and isinstance(s2, str): |
|||
return s1, s2 |
|||
|
|||
elif isinstance(s1, unicode) and isinstance(s2, unicode): |
|||
return s1, s2 |
|||
|
|||
else: |
|||
return unicode(s1), unicode(s2) |
|||
|
|||
|
|||
def full_process(s, force_ascii=False): |
|||
"""Process string by |
|||
-- removing all but letters and numbers |
|||
-- trim whitespace |
|||
-- force to lower case |
|||
if force_ascii == True, force convert to ascii""" |
|||
|
|||
if s is None: |
|||
return "" |
|||
|
|||
if force_ascii: |
|||
s = asciidammit(s) |
|||
# Keep only Letters and Numbres (see Unicode docs). |
|||
string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s) |
|||
# Force into lowercase. |
|||
string_out = StringProcessor.to_lower_case(string_out) |
|||
# Remove leading and trailing whitespaces. |
|||
string_out = StringProcessor.strip(string_out) |
|||
return string_out |
|||
|
|||
|
|||
def intr(n): |
|||
'''Returns a correctly rounded integer''' |
|||
return int(round(n)) |
@ -1,18 +0,0 @@ |
|||
from pysrt.srttime import SubRipTime |
|||
from pysrt.srtitem import SubRipItem |
|||
from pysrt.srtfile import SubRipFile |
|||
from pysrt.srtexc import Error, InvalidItem, InvalidTimeString |
|||
from pysrt.version import VERSION, VERSION_STRING |
|||
|
|||
__all__ = [ |
|||
'SubRipFile', 'SubRipItem', 'SubRipFile', 'SUPPORT_UTF_32_LE', |
|||
'SUPPORT_UTF_32_BE', 'InvalidItem', 'InvalidTimeString' |
|||
] |
|||
|
|||
ERROR_PASS = SubRipFile.ERROR_PASS |
|||
ERROR_LOG = SubRipFile.ERROR_LOG |
|||
ERROR_RAISE = SubRipFile.ERROR_RAISE |
|||
|
|||
open = SubRipFile.open |
|||
stream = SubRipFile.stream |
|||
from_string = SubRipFile.from_string |
@ -1,218 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
# pylint: disable-all |
|||
|
|||
import os |
|||
import re |
|||
import sys |
|||
import codecs |
|||
import shutil |
|||
import argparse |
|||
from textwrap import dedent |
|||
|
|||
from chardet import detect |
|||
from pysrt import SubRipFile, SubRipTime, VERSION_STRING |
|||
|
|||
def underline(string): |
|||
return "\033[4m%s\033[0m" % string |
|||
|
|||
|
|||
class TimeAwareArgumentParser(argparse.ArgumentParser): |
|||
|
|||
RE_TIME_REPRESENTATION = re.compile(r'^\-?(\d+[hms]{0,2}){1,4}$') |
|||
|
|||
def parse_args(self, args=None, namespace=None): |
|||
time_index = -1 |
|||
for index, arg in enumerate(args): |
|||
match = self.RE_TIME_REPRESENTATION.match(arg) |
|||
if match: |
|||
time_index = index |
|||
break |
|||
|
|||
if time_index >= 0: |
|||
args.insert(time_index, '--') |
|||
|
|||
return super(TimeAwareArgumentParser, self).parse_args(args, namespace) |
|||
|
|||
|
|||
class SubRipShifter(object): |
|||
|
|||
BACKUP_EXTENSION = '.bak' |
|||
RE_TIME_STRING = re.compile(r'(\d+)([hms]{0,2})') |
|||
UNIT_RATIOS = { |
|||
'ms': 1, |
|||
'': SubRipTime.SECONDS_RATIO, |
|||
's': SubRipTime.SECONDS_RATIO, |
|||
'm': SubRipTime.MINUTES_RATIO, |
|||
'h': SubRipTime.HOURS_RATIO, |
|||
} |
|||
DESCRIPTION = dedent("""\ |
|||
Srt subtitle editor |
|||
|
|||
It can either shift, split or change the frame rate. |
|||
""") |
|||
TIMESTAMP_HELP = "A timestamp in the form: [-][Hh][Mm]S[s][MSms]" |
|||
SHIFT_EPILOG = dedent("""\ |
|||
|
|||
Examples: |
|||
1 minute and 12 seconds foreward (in place): |
|||
$ srt -i shift 1m12s movie.srt |
|||
|
|||
half a second foreward: |
|||
$ srt shift 500ms movie.srt > othername.srt |
|||
|
|||
1 second and half backward: |
|||
$ srt -i shift -1s500ms movie.srt |
|||
|
|||
3 seconds backward: |
|||
$ srt -i shift -3 movie.srt |
|||
""") |
|||
RATE_EPILOG = dedent("""\ |
|||
|
|||
Examples: |
|||
Convert 23.9fps subtitles to 25fps: |
|||
$ srt -i rate 23.9 25 movie.srt |
|||
""") |
|||
LIMITS_HELP = "Each parts duration in the form: [Hh][Mm]S[s][MSms]" |
|||
SPLIT_EPILOG = dedent("""\ |
|||
|
|||
Examples: |
|||
For a movie in 2 parts with the first part 48 minutes and 18 seconds long: |
|||
$ srt split 48m18s movie.srt |
|||
=> creates movie.1.srt and movie.2.srt |
|||
|
|||
For a movie in 3 parts of 20 minutes each: |
|||
$ srt split 20m 20m movie.srt |
|||
=> creates movie.1.srt, movie.2.srt and movie.3.srt |
|||
""") |
|||
FRAME_RATE_HELP = "A frame rate in fps (commonly 23.9 or 25)" |
|||
ENCODING_HELP = dedent("""\ |
|||
Change file encoding. Useful for players accepting only latin1 subtitles. |
|||
List of supported encodings: http://docs.python.org/library/codecs.html#standard-encodings |
|||
""") |
|||
BREAK_EPILOG = dedent("""\ |
|||
Break lines longer than defined length |
|||
""") |
|||
LENGTH_HELP = "Maximum number of characters per line" |
|||
|
|||
def __init__(self): |
|||
self.output_file_path = None |
|||
|
|||
def build_parser(self): |
|||
parser = TimeAwareArgumentParser(description=self.DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter) |
|||
parser.add_argument('-i', '--in-place', action='store_true', dest='in_place', |
|||
help="Edit file in-place, saving a backup as file.bak (do not works for the split command)") |
|||
parser.add_argument('-e', '--output-encoding', metavar=underline('encoding'), action='store', dest='output_encoding', |
|||
type=self.parse_encoding, help=self.ENCODING_HELP) |
|||
parser.add_argument('-v', '--version', action='version', version='%%(prog)s %s' % VERSION_STRING) |
|||
subparsers = parser.add_subparsers(title='commands') |
|||
|
|||
shift_parser = subparsers.add_parser('shift', help="Shift subtitles by specified time offset", epilog=self.SHIFT_EPILOG, formatter_class=argparse.RawTextHelpFormatter) |
|||
shift_parser.add_argument('time_offset', action='store', metavar=underline('offset'), |
|||
type=self.parse_time, help=self.TIMESTAMP_HELP) |
|||
shift_parser.set_defaults(action=self.shift) |
|||
|
|||
rate_parser = subparsers.add_parser('rate', help="Convert subtitles from a frame rate to another", epilog=self.RATE_EPILOG, formatter_class=argparse.RawTextHelpFormatter) |
|||
rate_parser.add_argument('initial', action='store', type=float, help=self.FRAME_RATE_HELP) |
|||
rate_parser.add_argument('final', action='store', type=float, help=self.FRAME_RATE_HELP) |
|||
rate_parser.set_defaults(action=self.rate) |
|||
|
|||
split_parser = subparsers.add_parser('split', help="Split a file in multiple parts", epilog=self.SPLIT_EPILOG, formatter_class=argparse.RawTextHelpFormatter) |
|||
split_parser.add_argument('limits', action='store', nargs='+', type=self.parse_time, help=self.LIMITS_HELP) |
|||
split_parser.set_defaults(action=self.split) |
|||
|
|||
break_parser = subparsers.add_parser('break', help="Break long lines", epilog=self.BREAK_EPILOG, formatter_class=argparse.RawTextHelpFormatter) |
|||
break_parser.add_argument('length', action='store', type=int, help=self.LENGTH_HELP) |
|||
break_parser.set_defaults(action=self.break_lines) |
|||
|
|||
parser.add_argument('file', action='store') |
|||
|
|||
return parser |
|||
|
|||
def run(self, args): |
|||
self.arguments = self.build_parser().parse_args(args) |
|||
if self.arguments.in_place: |
|||
self.create_backup() |
|||
self.arguments.action() |
|||
|
|||
def parse_time(self, time_string): |
|||
negative = time_string.startswith('-') |
|||
if negative: |
|||
time_string = time_string[1:] |
|||
ordinal = sum(int(value) * self.UNIT_RATIOS[unit] for value, unit |
|||
in self.RE_TIME_STRING.findall(time_string)) |
|||
return -ordinal if negative else ordinal |
|||
|
|||
def parse_encoding(self, encoding_name): |
|||
try: |
|||
codecs.lookup(encoding_name) |
|||
except LookupError as error: |
|||
raise argparse.ArgumentTypeError(error.message) |
|||
return encoding_name |
|||
|
|||
def shift(self): |
|||
self.input_file.shift(milliseconds=self.arguments.time_offset) |
|||
self.input_file.write_into(self.output_file) |
|||
|
|||
def rate(self): |
|||
ratio = self.arguments.final / self.arguments.initial |
|||
self.input_file.shift(ratio=ratio) |
|||
self.input_file.write_into(self.output_file) |
|||
|
|||
def split(self): |
|||
limits = [0] + self.arguments.limits + [self.input_file[-1].end.ordinal + 1] |
|||
base_name, extension = os.path.splitext(self.arguments.file) |
|||
for index, (start, end) in enumerate(zip(limits[:-1], limits[1:])): |
|||
file_name = '%s.%s%s' % (base_name, index + 1, extension) |
|||
part_file = self.input_file.slice(ends_after=start, starts_before=end) |
|||
part_file.shift(milliseconds=-start) |
|||
part_file.clean_indexes() |
|||
part_file.save(path=file_name, encoding=self.output_encoding) |
|||
|
|||
def create_backup(self): |
|||
backup_file = self.arguments.file + self.BACKUP_EXTENSION |
|||
if not os.path.exists(backup_file): |
|||
shutil.copy2(self.arguments.file, backup_file) |
|||
self.output_file_path = self.arguments.file |
|||
self.arguments.file = backup_file |
|||
|
|||
def break_lines(self): |
|||
split_re = re.compile(r'(.{,%i})(?:\s+|$)' % self.arguments.length) |
|||
for item in self.input_file: |
|||
item.text = '\n'.join(split_re.split(item.text)[1::2]) |
|||
self.input_file.write_into(self.output_file) |
|||
|
|||
@property |
|||
def output_encoding(self): |
|||
return self.arguments.output_encoding or self.input_file.encoding |
|||
|
|||
@property |
|||
def input_file(self): |
|||
if not hasattr(self, '_source_file'): |
|||
with open(self.arguments.file, 'rb') as f: |
|||
content = f.read() |
|||
encoding = detect(content).get('encoding') |
|||
encoding = self.normalize_encoding(encoding) |
|||
|
|||
self._source_file = SubRipFile.open(self.arguments.file, |
|||
encoding=encoding, error_handling=SubRipFile.ERROR_LOG) |
|||
return self._source_file |
|||
|
|||
@property |
|||
def output_file(self): |
|||
if not hasattr(self, '_output_file'): |
|||
if self.output_file_path: |
|||
self._output_file = codecs.open(self.output_file_path, 'w+', encoding=self.output_encoding) |
|||
else: |
|||
self._output_file = sys.stdout |
|||
return self._output_file |
|||
|
|||
def normalize_encoding(self, encoding): |
|||
return encoding.lower().replace('-', '_') |
|||
|
|||
|
|||
def main(): |
|||
SubRipShifter().run(sys.argv[1:]) |
|||
|
|||
if __name__ == '__main__': |
|||
main() |
@ -1,26 +0,0 @@ |
|||
class ComparableMixin(object): |
|||
def _compare(self, other, method): |
|||
try: |
|||
return method(self._cmpkey(), other._cmpkey()) |
|||
except (AttributeError, TypeError): |
|||
# _cmpkey not implemented, or return different type, |
|||
# so I can't compare with "other". |
|||
return NotImplemented |
|||
|
|||
def __lt__(self, other): |
|||
return self._compare(other, lambda s, o: s < o) |
|||
|
|||
def __le__(self, other): |
|||
return self._compare(other, lambda s, o: s <= o) |
|||
|
|||
def __eq__(self, other): |
|||
return self._compare(other, lambda s, o: s == o) |
|||
|
|||
def __ge__(self, other): |
|||
return self._compare(other, lambda s, o: s >= o) |
|||
|
|||
def __gt__(self, other): |
|||
return self._compare(other, lambda s, o: s > o) |
|||
|
|||
def __ne__(self, other): |
|||
return self._compare(other, lambda s, o: s != o) |
@ -1,24 +0,0 @@ |
|||
|
|||
import sys |
|||
|
|||
# Syntax sugar. |
|||
_ver = sys.version_info |
|||
|
|||
#: Python 2.x? |
|||
is_py2 = (_ver[0] == 2) |
|||
|
|||
#: Python 3.x? |
|||
is_py3 = (_ver[0] == 3) |
|||
|
|||
from io import open as io_open |
|||
|
|||
if is_py2: |
|||
builtin_str = str |
|||
basestring = basestring |
|||
str = unicode |
|||
open = io_open |
|||
elif is_py3: |
|||
builtin_str = str |
|||
basestring = (str, bytes) |
|||
str = str |
|||
open = open |
@ -1,31 +0,0 @@ |
|||
""" |
|||
Exception classes |
|||
""" |
|||
|
|||
|
|||
class Error(Exception): |
|||
""" |
|||
Pysrt's base exception |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class InvalidTimeString(Error): |
|||
""" |
|||
Raised when parser fail on bad formated time strings |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class InvalidItem(Error): |
|||
""" |
|||
Raised when parser fail to parse a sub title item |
|||
""" |
|||
pass |
|||
|
|||
|
|||
class InvalidIndex(InvalidItem): |
|||
""" |
|||
Raised when parser fail to parse a sub title index |
|||
""" |
|||
pass |
@ -1,312 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
import os |
|||
import sys |
|||
import codecs |
|||
|
|||
try: |
|||
from collections import UserList |
|||
except ImportError: |
|||
from UserList import UserList |
|||
|
|||
from itertools import chain |
|||
from copy import copy |
|||
|
|||
from pysrt.srtexc import Error |
|||
from pysrt.srtitem import SubRipItem |
|||
from pysrt.compat import str |
|||
|
|||
BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'), |
|||
(codecs.BOM_UTF32_BE, 'utf_32_be'), |
|||
(codecs.BOM_UTF16_LE, 'utf_16_le'), |
|||
(codecs.BOM_UTF16_BE, 'utf_16_be'), |
|||
(codecs.BOM_UTF8, 'utf_8')) |
|||
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS) |
|||
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS) |
|||
|
|||
|
|||
class SubRipFile(UserList, object): |
|||
""" |
|||
SubRip file descriptor. |
|||
|
|||
Provide a pure Python mapping on all metadata. |
|||
|
|||
SubRipFile(items, eol, path, encoding) |
|||
|
|||
items -> list of SubRipItem. Default to []. |
|||
eol -> str: end of line character. Default to linesep used in opened file |
|||
if any else to os.linesep. |
|||
path -> str: path where file will be saved. To open an existant file see |
|||
SubRipFile.open. |
|||
encoding -> str: encoding used at file save. Default to utf-8. |
|||
""" |
|||
ERROR_PASS = 0 |
|||
ERROR_LOG = 1 |
|||
ERROR_RAISE = 2 |
|||
|
|||
DEFAULT_ENCODING = 'utf_8' |
|||
|
|||
def __init__(self, items=None, eol=None, path=None, encoding='utf-8'): |
|||
UserList.__init__(self, items or []) |
|||
self._eol = eol |
|||
self.path = path |
|||
self.encoding = encoding |
|||
|
|||
def _get_eol(self): |
|||
return self._eol or os.linesep |
|||
|
|||
def _set_eol(self, eol): |
|||
self._eol = self._eol or eol |
|||
|
|||
eol = property(_get_eol, _set_eol) |
|||
|
|||
def slice(self, starts_before=None, starts_after=None, ends_before=None, |
|||
ends_after=None): |
|||
""" |
|||
slice([starts_before][, starts_after][, ends_before][, ends_after]) \ |
|||
-> SubRipFile clone |
|||
|
|||
All arguments are optional, and should be coercible to SubRipTime |
|||
object. |
|||
|
|||
It reduce the set of subtitles to those that match match given time |
|||
constraints. |
|||
|
|||
The returned set is a clone, but still contains references to original |
|||
subtitles. So if you shift this returned set, subs contained in the |
|||
original SubRipFile instance will be altered too. |
|||
|
|||
Example: |
|||
>>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2) |
|||
""" |
|||
clone = copy(self) |
|||
|
|||
if starts_before: |
|||
clone.data = (i for i in clone.data if i.start < starts_before) |
|||
if starts_after: |
|||
clone.data = (i for i in clone.data if i.start > starts_after) |
|||
if ends_before: |
|||
clone.data = (i for i in clone.data if i.end < ends_before) |
|||
if ends_after: |
|||
clone.data = (i for i in clone.data if i.end > ends_after) |
|||
|
|||
clone.data = list(clone.data) |
|||
return clone |
|||
|
|||
def at(self, timestamp=None, **kwargs): |
|||
""" |
|||
at(timestamp) -> SubRipFile clone |
|||
|
|||
timestamp argument should be coercible to SubRipFile object. |
|||
|
|||
A specialization of slice. Return all subtiles visible at the |
|||
timestamp mark. |
|||
|
|||
Example: |
|||
>>> subs.at((0, 0, 20, 0)).shift(seconds=2) |
|||
>>> subs.at(seconds=20).shift(seconds=2) |
|||
""" |
|||
time = timestamp or kwargs |
|||
return self.slice(starts_before=time, ends_after=time) |
|||
|
|||
def shift(self, *args, **kwargs): |
|||
"""shift(hours, minutes, seconds, milliseconds, ratio) |
|||
|
|||
Shift `start` and `end` attributes of each items of file either by |
|||
applying a ratio or by adding an offset. |
|||
|
|||
`ratio` should be either an int or a float. |
|||
Example to convert subtitles from 23.9 fps to 25 fps: |
|||
>>> subs.shift(ratio=25/23.9) |
|||
|
|||
All "time" arguments are optional and have a default value of 0. |
|||
Example to delay all subs from 2 seconds and half |
|||
>>> subs.shift(seconds=2, milliseconds=500) |
|||
""" |
|||
for item in self: |
|||
item.shift(*args, **kwargs) |
|||
|
|||
def clean_indexes(self): |
|||
""" |
|||
clean_indexes() |
|||
|
|||
Sort subs and reset their index attribute. Should be called after |
|||
destructive operations like split or such. |
|||
""" |
|||
self.sort() |
|||
for index, item in enumerate(self): |
|||
item.index = index + 1 |
|||
|
|||
@property |
|||
def text(self): |
|||
return '\n'.join(i.text for i in self) |
|||
|
|||
@classmethod |
|||
def open(cls, path='', encoding=None, error_handling=ERROR_PASS): |
|||
""" |
|||
open([path, [encoding]]) |
|||
|
|||
If you do not provide any encoding, it can be detected if the file |
|||
contain a bit order mark, unless it is set to utf-8 as default. |
|||
""" |
|||
new_file = cls(path=path, encoding=encoding) |
|||
source_file = cls._open_unicode_file(path, claimed_encoding=encoding) |
|||
new_file.read(source_file, error_handling=error_handling) |
|||
source_file.close() |
|||
return new_file |
|||
|
|||
@classmethod |
|||
def from_string(cls, source, **kwargs): |
|||
""" |
|||
from_string(source, **kwargs) -> SubRipFile |
|||
|
|||
`source` -> a unicode instance or at least a str instance encoded with |
|||
`sys.getdefaultencoding()` |
|||
""" |
|||
error_handling = kwargs.pop('error_handling', None) |
|||
new_file = cls(**kwargs) |
|||
new_file.read(source.splitlines(True), error_handling=error_handling) |
|||
return new_file |
|||
|
|||
def read(self, source_file, error_handling=ERROR_PASS): |
|||
""" |
|||
read(source_file, [error_handling]) |
|||
|
|||
This method parse subtitles contained in `source_file` and append them |
|||
to the current instance. |
|||
|
|||
`source_file` -> Any iterable that yield unicode strings, like a file |
|||
opened with `codecs.open()` or an array of unicode. |
|||
""" |
|||
self.eol = self._guess_eol(source_file) |
|||
self.extend(self.stream(source_file, error_handling=error_handling)) |
|||
return self |
|||
|
|||
@classmethod |
|||
def stream(cls, source_file, error_handling=ERROR_PASS): |
|||
""" |
|||
stream(source_file, [error_handling]) |
|||
|
|||
This method yield SubRipItem instances a soon as they have been parsed |
|||
without storing them. It is a kind of SAX parser for .srt files. |
|||
|
|||
`source_file` -> Any iterable that yield unicode strings, like a file |
|||
opened with `codecs.open()` or an array of unicode. |
|||
|
|||
Example: |
|||
>>> import pysrt |
|||
>>> import codecs |
|||
>>> file = codecs.open('movie.srt', encoding='utf-8') |
|||
>>> for sub in pysrt.stream(file): |
|||
... sub.text += "\nHello !" |
|||
... print unicode(sub) |
|||
""" |
|||
string_buffer = [] |
|||
for index, line in enumerate(chain(source_file, '\n')): |
|||
if line.strip(): |
|||
string_buffer.append(line) |
|||
else: |
|||
source = string_buffer |
|||
string_buffer = [] |
|||
if source and all(source): |
|||
try: |
|||
yield SubRipItem.from_lines(source) |
|||
except Error as error: |
|||
error.args += (''.join(source), ) |
|||
cls._handle_error(error, error_handling, index) |
|||
|
|||
def save(self, path=None, encoding=None, eol=None): |
|||
""" |
|||
save([path][, encoding][, eol]) |
|||
|
|||
Use initial path if no other provided. |
|||
Use initial encoding if no other provided. |
|||
Use initial eol if no other provided. |
|||
""" |
|||
path = path or self.path |
|||
encoding = encoding or self.encoding |
|||
|
|||
save_file = codecs.open(path, 'w+', encoding=encoding) |
|||
self.write_into(save_file, eol=eol) |
|||
save_file.close() |
|||
|
|||
def write_into(self, output_file, eol=None): |
|||
""" |
|||
write_into(output_file [, eol]) |
|||
|
|||
Serialize current state into `output_file`. |
|||
|
|||
`output_file` -> Any instance that respond to `write()`, typically a |
|||
file object |
|||
""" |
|||
output_eol = eol or self.eol |
|||
|
|||
for item in self: |
|||
string_repr = str(item) |
|||
if output_eol != '\n': |
|||
string_repr = string_repr.replace('\n', output_eol) |
|||
output_file.write(string_repr) |
|||
# Only add trailing eol if it's not already present. |
|||
# It was kept in the SubRipItem's text before but it really |
|||
# belongs here. Existing applications might give us subtitles |
|||
# which already contain a trailing eol though. |
|||
if not string_repr.endswith(2 * output_eol): |
|||
output_file.write(output_eol) |
|||
|
|||
@classmethod |
|||
def _guess_eol(cls, string_iterable): |
|||
first_line = cls._get_first_line(string_iterable) |
|||
for eol in ('\r\n', '\r', '\n'): |
|||
if first_line.endswith(eol): |
|||
return eol |
|||
return os.linesep |
|||
|
|||
@classmethod |
|||
def _get_first_line(cls, string_iterable): |
|||
if hasattr(string_iterable, 'tell'): |
|||
previous_position = string_iterable.tell() |
|||
|
|||
try: |
|||
first_line = next(iter(string_iterable)) |
|||
except StopIteration: |
|||
return '' |
|||
if hasattr(string_iterable, 'seek'): |
|||
string_iterable.seek(previous_position) |
|||
|
|||
return first_line |
|||
|
|||
@classmethod |
|||
def _detect_encoding(cls, path): |
|||
file_descriptor = open(path, 'rb') |
|||
first_chars = file_descriptor.read(BIGGER_BOM) |
|||
file_descriptor.close() |
|||
|
|||
for bom, encoding in BOMS: |
|||
if first_chars.startswith(bom): |
|||
return encoding |
|||
|
|||
# TODO: maybe a chardet integration |
|||
return cls.DEFAULT_ENCODING |
|||
|
|||
@classmethod |
|||
def _open_unicode_file(cls, path, claimed_encoding=None): |
|||
encoding = claimed_encoding or cls._detect_encoding(path) |
|||
source_file = codecs.open(path, 'rU', encoding=encoding) |
|||
|
|||
# get rid of BOM if any |
|||
possible_bom = CODECS_BOMS.get(encoding, None) |
|||
if possible_bom: |
|||
file_bom = source_file.read(len(possible_bom)) |
|||
if not file_bom == possible_bom: |
|||
source_file.seek(0) # if not rewind |
|||
return source_file |
|||
|
|||
@classmethod |
|||
def _handle_error(cls, error, error_handling, index): |
|||
if error_handling == cls.ERROR_RAISE: |
|||
error.args = (index, ) + error.args |
|||
raise error |
|||
if error_handling == cls.ERROR_LOG: |
|||
name = type(error).__name__ |
|||
sys.stderr.write('PySRT-%s(line %s): \n' % (name, index)) |
|||
sys.stderr.write(error.args[0].encode('ascii', 'replace')) |
|||
sys.stderr.write('\n') |
@ -1,76 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
SubRip's subtitle parser |
|||
""" |
|||
from pysrt.srtexc import InvalidItem, InvalidIndex |
|||
from pysrt.srttime import SubRipTime |
|||
from pysrt.comparablemixin import ComparableMixin |
|||
from pysrt.compat import str |
|||
|
|||
class SubRipItem(ComparableMixin): |
|||
""" |
|||
SubRipItem(index, start, end, text, position) |
|||
|
|||
index -> int: index of item in file. 0 by default. |
|||
start, end -> SubRipTime or coercible. |
|||
text -> unicode: text content for item. |
|||
position -> unicode: raw srt/vtt "display coordinates" string |
|||
""" |
|||
ITEM_PATTERN = '%s\n%s --> %s%s\n%s\n' |
|||
TIMESTAMP_SEPARATOR = '-->' |
|||
|
|||
def __init__(self, index=0, start=None, end=None, text='', position=''): |
|||
try: |
|||
self.index = int(index) |
|||
except (TypeError, ValueError): # try to cast as int, but it's not mandatory |
|||
self.index = index |
|||
|
|||
self.start = SubRipTime.coerce(start or 0) |
|||
self.end = SubRipTime.coerce(end or 0) |
|||
self.position = str(position) |
|||
self.text = str(text) |
|||
|
|||
def __str__(self): |
|||
position = ' %s' % self.position if self.position.strip() else '' |
|||
return self.ITEM_PATTERN % (self.index, self.start, self.end, |
|||
position, self.text) |
|||
|
|||
def _cmpkey(self): |
|||
return (self.start, self.end) |
|||
|
|||
def shift(self, *args, **kwargs): |
|||
""" |
|||
shift(hours, minutes, seconds, milliseconds, ratio) |
|||
|
|||
Add given values to start and end attributes. |
|||
All arguments are optional and have a default value of 0. |
|||
""" |
|||
self.start.shift(*args, **kwargs) |
|||
self.end.shift(*args, **kwargs) |
|||
|
|||
@classmethod |
|||
def from_string(cls, source): |
|||
return cls.from_lines(source.splitlines(True)) |
|||
|
|||
@classmethod |
|||
def from_lines(cls, lines): |
|||
if len(lines) < 2: |
|||
raise InvalidItem() |
|||
lines = [l.rstrip() for l in lines] |
|||
index = None |
|||
if cls.TIMESTAMP_SEPARATOR not in lines[0]: |
|||
index = lines.pop(0) |
|||
start, end, position = cls.split_timestamps(lines[0]) |
|||
body = '\n'.join(lines[1:]) |
|||
return cls(index, start, end, body, position) |
|||
|
|||
@classmethod |
|||
def split_timestamps(cls, line): |
|||
timestamps = line.split(cls.TIMESTAMP_SEPARATOR) |
|||
if len(timestamps) != 2: |
|||
raise InvalidItem() |
|||
start, end_and_position = timestamps |
|||
end_and_position = end_and_position.lstrip().split(' ', 1) |
|||
end = end_and_position[0] |
|||
position = end_and_position[1] if len(end_and_position) > 1 else '' |
|||
return (s.strip() for s in (start, end, position)) |
@ -1,176 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
SubRip's time format parser: HH:MM:SS,mmm |
|||
""" |
|||
import re |
|||
from datetime import time |
|||
|
|||
from pysrt.srtexc import InvalidTimeString |
|||
from pysrt.comparablemixin import ComparableMixin |
|||
from pysrt.compat import str, basestring |
|||
|
|||
class TimeItemDescriptor(object): |
|||
# pylint: disable-msg=R0903 |
|||
def __init__(self, ratio, super_ratio=0): |
|||
self.ratio = int(ratio) |
|||
self.super_ratio = int(super_ratio) |
|||
|
|||
def _get_ordinal(self, instance): |
|||
if self.super_ratio: |
|||
return instance.ordinal % self.super_ratio |
|||
return instance.ordinal |
|||
|
|||
def __get__(self, instance, klass): |
|||
if instance is None: |
|||
raise AttributeError |
|||
return self._get_ordinal(instance) // self.ratio |
|||
|
|||
def __set__(self, instance, value): |
|||
part = self._get_ordinal(instance) - instance.ordinal % self.ratio |
|||
instance.ordinal += value * self.ratio - part |
|||
|
|||
|
|||
class SubRipTime(ComparableMixin): |
|||
TIME_PATTERN = '%02d:%02d:%02d,%03d' |
|||
TIME_REPR = 'SubRipTime(%d, %d, %d, %d)' |
|||
RE_TIME_SEP = re.compile(r'\:|\.|\,') |
|||
RE_INTEGER = re.compile(r'^(\d+)') |
|||
SECONDS_RATIO = 1000 |
|||
MINUTES_RATIO = SECONDS_RATIO * 60 |
|||
HOURS_RATIO = MINUTES_RATIO * 60 |
|||
|
|||
hours = TimeItemDescriptor(HOURS_RATIO) |
|||
minutes = TimeItemDescriptor(MINUTES_RATIO, HOURS_RATIO) |
|||
seconds = TimeItemDescriptor(SECONDS_RATIO, MINUTES_RATIO) |
|||
milliseconds = TimeItemDescriptor(1, SECONDS_RATIO) |
|||
|
|||
def __init__(self, hours=0, minutes=0, seconds=0, milliseconds=0): |
|||
""" |
|||
SubRipTime(hours, minutes, seconds, milliseconds) |
|||
|
|||
All arguments are optional and have a default value of 0. |
|||
""" |
|||
super(SubRipTime, self).__init__() |
|||
self.ordinal = hours * self.HOURS_RATIO \ |
|||
+ minutes * self.MINUTES_RATIO \ |
|||
+ seconds * self.SECONDS_RATIO \ |
|||
+ milliseconds |
|||
|
|||
def __repr__(self): |
|||
return self.TIME_REPR % tuple(self) |
|||
|
|||
def __str__(self): |
|||
if self.ordinal < 0: |
|||
# Represent negative times as zero |
|||
return str(SubRipTime.from_ordinal(0)) |
|||
return self.TIME_PATTERN % tuple(self) |
|||
|
|||
def _compare(self, other, method): |
|||
return super(SubRipTime, self)._compare(self.coerce(other), method) |
|||
|
|||
def _cmpkey(self): |
|||
return self.ordinal |
|||
|
|||
def __add__(self, other): |
|||
return self.from_ordinal(self.ordinal + self.coerce(other).ordinal) |
|||
|
|||
def __iadd__(self, other): |
|||
self.ordinal += self.coerce(other).ordinal |
|||
return self |
|||
|
|||
def __sub__(self, other): |
|||
return self.from_ordinal(self.ordinal - self.coerce(other).ordinal) |
|||
|
|||
def __isub__(self, other): |
|||
self.ordinal -= self.coerce(other).ordinal |
|||
return self |
|||
|
|||
def __mul__(self, ratio): |
|||
return self.from_ordinal(int(round(self.ordinal * ratio))) |
|||
|
|||
def __imul__(self, ratio): |
|||
self.ordinal = int(round(self.ordinal * ratio)) |
|||
return self |
|||
|
|||
@classmethod |
|||
def coerce(cls, other): |
|||
""" |
|||
Coerce many types to SubRipTime instance. |
|||
Supported types: |
|||
- str/unicode |
|||
- int/long |
|||
- datetime.time |
|||
- any iterable |
|||
- dict |
|||
""" |
|||
if isinstance(other, SubRipTime): |
|||
return other |
|||
if isinstance(other, basestring): |
|||
return cls.from_string(other) |
|||
if isinstance(other, int): |
|||
return cls.from_ordinal(other) |
|||
if isinstance(other, time): |
|||
return cls.from_time(other) |
|||
try: |
|||
return cls(**other) |
|||
except TypeError: |
|||
return cls(*other) |
|||
|
|||
def __iter__(self): |
|||
yield self.hours |
|||
yield self.minutes |
|||
yield self.seconds |
|||
yield self.milliseconds |
|||
|
|||
def shift(self, *args, **kwargs): |
|||
""" |
|||
shift(hours, minutes, seconds, milliseconds) |
|||
|
|||
All arguments are optional and have a default value of 0. |
|||
""" |
|||
if 'ratio' in kwargs: |
|||
self *= kwargs.pop('ratio') |
|||
self += self.__class__(*args, **kwargs) |
|||
|
|||
@classmethod |
|||
def from_ordinal(cls, ordinal): |
|||
""" |
|||
int -> SubRipTime corresponding to a total count of milliseconds |
|||
""" |
|||
return cls(milliseconds=int(ordinal)) |
|||
|
|||
@classmethod |
|||
def from_string(cls, source): |
|||
""" |
|||
str/unicode(HH:MM:SS,mmm) -> SubRipTime corresponding to serial |
|||
raise InvalidTimeString |
|||
""" |
|||
items = cls.RE_TIME_SEP.split(source) |
|||
if len(items) != 4: |
|||
raise InvalidTimeString |
|||
return cls(*(cls.parse_int(i) for i in items)) |
|||
|
|||
@classmethod |
|||
def parse_int(cls, digits): |
|||
try: |
|||
return int(digits) |
|||
except ValueError: |
|||
match = cls.RE_INTEGER.match(digits) |
|||
if match: |
|||
return int(match.group()) |
|||
return 0 |
|||
|
|||
@classmethod |
|||
def from_time(cls, source): |
|||
""" |
|||
datetime.time -> SubRipTime corresponding to time object |
|||
""" |
|||
return cls(hours=source.hour, minutes=source.minute, |
|||
seconds=source.second, milliseconds=source.microsecond // 1000) |
|||
|
|||
def to_time(self): |
|||
""" |
|||
Convert SubRipTime instance into a pure datetime.time object |
|||
""" |
|||
return time(self.hours, self.minutes, self.seconds, |
|||
self.milliseconds * 1000) |
@ -1,2 +0,0 @@ |
|||
VERSION = (1, 0, 1) |
|||
VERSION_STRING = '.'.join(str(i) for i in VERSION) |
Loading…
Reference in new issue