From a9185b4aeec85f67f9ed6d49ec762b9621a5fe3e Mon Sep 17 00:00:00 2001 From: Safihre Date: Wed, 6 Mar 2019 08:54:22 +0100 Subject: [PATCH] Py3: Add pygettext to repo and basic test of POT extraction Python is deprecating pygettext.py so we better have our own, just like msgfmt.py --- tests/test_functional_misc.py | 26 +- tools/extract_pot.py | 96 +++---- tools/pygettext.py | 634 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 702 insertions(+), 54 deletions(-) create mode 100644 tools/pygettext.py diff --git a/tests/test_functional_misc.py b/tests/test_functional_misc.py index c8d40f0..7632f1d 100644 --- a/tests/test_functional_misc.py +++ b/tests/test_functional_misc.py @@ -18,7 +18,7 @@ """ tests.test_functional_misc - Functional tests of various functions """ -import os + import sys import subprocess import sabnzbd.encoding @@ -74,3 +74,27 @@ class TestSamplePostProc: for param in script_params: assert param in script_output assert env["SAB_VERSION"] in script_output + + +class TestExtractPot: + def test_extract_pot(self): + """ Simple test if translation extraction still works """ + script_call = [sys.executable, "tools/extract_pot.py"] + + # Run script and check output + script_call = subprocess.Popen(script_call, stdout=subprocess.PIPE) + script_output, errs = script_call.communicate(timeout=15) + script_output = sabnzbd.encoding.platform_btou(script_output) + + # Success message? + assert "Creating POT file" in script_output + assert "Finished creating POT file" in script_output + assert "Post-process POT file" in script_output + assert "Finished post-process POT file" in script_output + assert "Creating email POT file" in script_output + assert "Finished creating email POT file" in script_output + + # Check if the file was modified less than 30 seconds ago + cur_time = time.time() + assert (cur_time - os.path.getmtime("po/main/SABnzbd.pot")) < 30 + assert (cur_time - os.path.getmtime("po/email/SABemail.pot")) < 30 diff --git a/tools/extract_pot.py b/tools/extract_pot.py index 20eb96d..bdbad81 100755 --- a/tools/extract_pot.py +++ b/tools/extract_pot.py @@ -24,10 +24,8 @@ import sys import re # Import version.py without the sabnzbd overhead -f = open("sabnzbd/version.py") -code = f.read() -f.close() -exec(code) +with open("sabnzbd/version.py") as version_file: + exec(version_file.read()) # Fixed information for the POT header HEADER = ( @@ -56,48 +54,47 @@ EMAIL_DIR = "email" DOMAIN = "SABnzbd" DOMAIN_EMAIL = "SABemail" DOMAIN_NSIS = "SABnsis" -PARMS = "-d %s -p %s -w500 -k T -k Ta -k TT -o %s.pot.tmp" % (DOMAIN, PO_DIR, DOMAIN) +PARMS = "-d %s -p %s -w500 -k T -k TT -o %s.pot.tmp" % (DOMAIN, PO_DIR, DOMAIN) FILES = "SABnzbd.py SABHelper.py SABnzbdDelegate.py sabnzbd/*.py sabnzbd/utils/*.py" FILE_CACHE = {} +RE_LINE = re.compile(r"\s*([^: \t]+)\s*:\s*(\d+)") +RE_CONTEXT = re.compile(r"#:\s*(.*)$") + -def get_a_line(src, number): +def get_a_line(line_src, number): """ Retrieve line 'number' from file 'src' with caching """ global FILE_CACHE - if src not in FILE_CACHE: - FILE_CACHE[src] = [] - for line in open(src, "r"): - FILE_CACHE[src].append(line) + if line_src not in FILE_CACHE: + FILE_CACHE[line_src] = [] + for file_line in open(line_src, "r"): + FILE_CACHE[line_src].append(file_line) try: - return FILE_CACHE[src][number - 1] + return FILE_CACHE[line_src][number - 1] except: return "" -RE_LINE = re.compile(r"\s*([^: \t]+)\s*:\s*(\d+)") -RE_CONTEXT = re.compile(r"#:\s*(.*)$") - - -def get_context(line): +def get_context(ctx_line): """ Read context info from source file and append to line. input: "#: filepath.py:123 filepath2.py:456" output: "#: filepath.py:123 # [context info] # filepath2.py:456 # [context info 2]" """ - if not line.startswith("#:"): - return line + if not ctx_line.startswith("#:"): + return ctx_line newlines = [] - for item in line[2:].strip("\r\n").split(): + for item in ctx_line[2:].strip("\r\n").split(): m = RE_LINE.search(item) if m: - src = m.group(1) + line_src = m.group(1) number = m.group(2) else: newlines.append(item) continue - srcline = get_a_line(src, int(number)).strip("\r\n") + srcline = get_a_line(line_src, int(number)).strip("\r\n") context = "" m = RE_CONTEXT.search(srcline) if m: @@ -121,38 +118,30 @@ def get_context(line): return "#: " + " # ".join(newlines) + "\n" -def add_tmpl_to_pot(prefix, dst): +def add_tmpl_to_pot(prefix, dst_file): """ Append english template to open POT file 'dst' """ - src = open(EMAIL_DIR + "/%s-en.tmpl" % prefix, "r") - dst.write("#: email/%s.tmpl:1\n" % prefix) - dst.write('msgid ""\n') - for line in src: - dst.write('"%s"\n' % line.replace("\n", "\\n").replace('"', '\\"')) - dst.write('msgstr ""\n\n') - src.close() + with open(EMAIL_DIR + "/%s-en.tmpl" % prefix, "r") as tmpl_src: + dst_file.write("#: email/%s.tmpl:1\n" % prefix) + dst_file.write('msgid ""\n') + for tmpl_line in tmpl_src: + dst_file.write('"%s"\n' % tmpl_line.replace("\n", "\\n").replace('"', '\\"')) + dst_file.write('msgstr ""\n\n') +print("Creating POT file") if not os.path.exists(PO_DIR): os.makedirs(PO_DIR) # Determine location of PyGetText tool -path, exe = os.path.split(sys.executable) -if os.name == "nt": - TOOL = os.path.join(path, r"Tools\i18n\pygettext.py") - TOOL = "python " + TOOL -else: - TOOL = os.path.join(path, "pygettext.py") - if not os.path.exists(TOOL): - TOOL = "pygettext" - - -cmd = "%s %s %s" % (TOOL, PARMS, FILES) -print("Create POT file") +path, py = os.path.split(sys.argv[0]) +PYGETTEXT = os.path.abspath(os.path.normpath(os.path.join(path, "pygettext.py"))) +cmd = "%s %s %s %s" % (sys.executable, PYGETTEXT, PARMS, FILES) os.system(cmd) +print("Finished creating POT file") -print("Post-process the POT file") +print("Post-process POT file") src = open("%s/%s.pot.tmp" % (PO_DIR, DOMAIN), "r") -dst = open("%s/%s.pot" % (PO_DIR, DOMAIN), "wb") +dst = open("%s/%s.pot" % (PO_DIR, DOMAIN), "w") dst.write(HEADER.replace("__TYPE__", "MAIN")) header = True @@ -172,17 +161,17 @@ for line in src: src.close() dst.close() os.remove("%s/%s.pot.tmp" % (PO_DIR, DOMAIN)) +print("Finished post-process POT file") - -print("Create the email POT file") +print("Creating email POT file") if not os.path.exists(POE_DIR): os.makedirs(POE_DIR) -dst = open(os.path.join(POE_DIR, DOMAIN_EMAIL + ".pot"), "wb") -dst.write(HEADER.replace("__TYPE__", "EMAIL")) -add_tmpl_to_pot("email", dst) -add_tmpl_to_pot("rss", dst) -add_tmpl_to_pot("badfetch", dst) -dst.close() +with open(os.path.join(POE_DIR, DOMAIN_EMAIL + ".pot"), "w") as dst_email: + dst_email.write(HEADER.replace("__TYPE__", "EMAIL")) + add_tmpl_to_pot("email", dst_email) + add_tmpl_to_pot("rss", dst_email) + add_tmpl_to_pot("badfetch", dst_email) +print("Finished creating email POT file") # Create the NSIS POT file @@ -190,11 +179,11 @@ NSIS = "NSIS_Installer.nsi" RE_NSIS = re.compile(r'LangString\s+\w+\s+\$\{LANG_ENGLISH\}\s+(".*)', re.I) if os.path.exists(NSIS): - print("Creating the NSIS POT file") + print("Creating NSIS POT file") if not os.path.exists(PON_DIR): os.makedirs(PON_DIR) src = open(NSIS, "r") - dst = open(os.path.join(PON_DIR, DOMAIN_NSIS + ".pot"), "wb") + dst = open(os.path.join(PON_DIR, DOMAIN_NSIS + ".pot"), "w") dst.write(HEADER.replace("__TYPE__", "NSIS")) dst.write("\n") for line in src: @@ -206,3 +195,4 @@ if os.path.exists(NSIS): dst.write('msgstr ""\n\n') dst.close() src.close() + print("Finished creating NSIS POT file") diff --git a/tools/pygettext.py b/tools/pygettext.py new file mode 100644 index 0000000..a7c9921 --- /dev/null +++ b/tools/pygettext.py @@ -0,0 +1,634 @@ +#! /usr/bin/env python3 +# -*- coding: iso-8859-1 -*- +# Originally written by Barry Warsaw +# +# Minimally patched to make it even more xgettext compatible +# by Peter Funk +# +# 2002-11-22 Jürgen Hermann +# Added checks that _() only contains string literals, and +# command line args are resolved to module lists, i.e. you +# can now pass a filename, a module or package name, or a +# directory (including globbing chars, important for Win32). +# Made docstring fit in 80 chars wide displays using pydoc. +# + +# for selftesting +try: + import fintl + + _ = fintl.gettext +except ImportError: + _ = lambda s: s + +__doc__ = _( + """pygettext -- Python equivalent of xgettext(1) + +Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the +internationalization of C programs. Most of these tools are independent of +the programming language and can be used from within Python programs. +Martin von Loewis' work[1] helps considerably in this regard. + +There's one problem though; xgettext is the program that scans source code +looking for message strings, but it groks only C (or C++). Python +introduces a few wrinkles, such as dual quoting characters, triple quoted +strings, and raw strings. xgettext understands none of this. + +Enter pygettext, which uses Python's standard tokenize module to scan +Python source code, generating .pot files identical to what GNU xgettext[2] +generates for C and C++ code. From there, the standard GNU tools can be +used. + +A word about marking Python strings as candidates for translation. GNU +xgettext recognizes the following keywords: gettext, dgettext, dcgettext, +and gettext_noop. But those can be a lot of text to include all over your +code. C and C++ have a trick: they use the C preprocessor. Most +internationalized C source includes a #define for gettext() to _() so that +what has to be written in the source is much less. Thus these are both +translatable strings: + + gettext("Translatable String") + _("Translatable String") + +Python of course has no preprocessor so this doesn't work so well. Thus, +pygettext searches only for _() by default, but see the -k/--keyword flag +below for how to augment this. + + [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html + [2] http://www.gnu.org/software/gettext/gettext.html + +NOTE: pygettext attempts to be option and feature compatible with GNU +xgettext where ever possible. However some options are still missing or are +not fully implemented. Also, xgettext's use of command line switches with +option arguments is broken, and in these cases, pygettext just defines +additional switches. + +Usage: pygettext [options] inputfile ... + +Options: + + -a + --extract-all + Extract all strings. + + -d name + --default-domain=name + Rename the default output file from messages.pot to name.pot. + + -E + --escape + Replace non-ASCII characters with octal escape sequences. + + -D + --docstrings + Extract module, class, method, and function docstrings. These do + not need to be wrapped in _() markers, and in fact cannot be for + Python to consider them docstrings. (See also the -X option). + + -h + --help + Print this help message and exit. + + -k word + --keyword=word + Keywords to look for in addition to the default set, which are: + %(DEFAULTKEYWORDS)s + + You can have multiple -k flags on the command line. + + -K + --no-default-keywords + Disable the default set of keywords (see above). Any keywords + explicitly added with the -k/--keyword option are still recognized. + + --no-location + Do not write filename/lineno location comments. + + -n + --add-location + Write filename/lineno location comments indicating where each + extracted string is found in the source. These lines appear before + each msgid. The style of comments is controlled by the -S/--style + option. This is the default. + + -o filename + --output=filename + Rename the default output file from messages.pot to filename. If + filename is `-' then the output is sent to standard out. + + -p dir + --output-dir=dir + Output files will be placed in directory dir. + + -S stylename + --style stylename + Specify which style to use for location comments. Two styles are + supported: + + Solaris # File: filename, line: line-number + GNU #: filename:line + + The style name is case insensitive. GNU style is the default. + + -v + --verbose + Print the names of the files being processed. + + -V + --version + Print the version of pygettext and exit. + + -w columns + --width=columns + Set width of output to columns. + + -x filename + --exclude-file=filename + Specify a file that contains a list of strings that are not be + extracted from the input files. Each string to be excluded must + appear on a line by itself in the file. + + -X filename + --no-docstrings=filename + Specify a file that contains a list of files (one per line) that + should not have their docstrings extracted. This is only useful in + conjunction with the -D option above. + +If `inputfile' is -, standard input is read. +""" +) + +import os +import importlib.machinery +import importlib.util +import sys +import glob +import time +import getopt +import token +import tokenize + +__version__ = "1.5" + +default_keywords = ["_"] +DEFAULTKEYWORDS = ", ".join(default_keywords) + +EMPTYSTRING = "" + +# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's +# there. +pot_header = _( + """\ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\\n" +"POT-Creation-Date: %(time)s\\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" +"Last-Translator: FULL NAME \\n" +"Language-Team: LANGUAGE \\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=%(charset)s\\n" +"Content-Transfer-Encoding: %(encoding)s\\n" +"Generated-By: pygettext.py %(version)s\\n" + +""" +) + + +def usage(code, msg=""): + print(__doc__ % globals(), file=sys.stderr) + if msg: + print(msg, file=sys.stderr) + sys.exit(code) + + +def make_escapes(pass_nonascii): + global escapes, escape + if pass_nonascii: + # Allow non-ascii characters to pass through so that e.g. 'msgid + # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we + # escape any character outside the 32..126 range. + mod = 128 + escape = escape_ascii + else: + mod = 256 + escape = escape_nonascii + escapes = [r"\%03o" % i for i in range(mod)] + for i in range(32, 127): + escapes[i] = chr(i) + escapes[ord("\\")] = r"\\" + escapes[ord("\t")] = r"\t" + escapes[ord("\r")] = r"\r" + escapes[ord("\n")] = r"\n" + escapes[ord('"')] = r"\"" + + +def escape_ascii(s, encoding): + return "".join(escapes[ord(c)] if ord(c) < 128 else c for c in s) + + +def escape_nonascii(s, encoding): + return "".join(escapes[b] for b in s.encode(encoding)) + + +def is_literal_string(s): + return s[0] in "'\"" or (s[0] in "rRuU" and s[1] in "'\"") + + +def safe_eval(s): + # unwrap quotes, safely + return eval(s, {"__builtins__": {}}, {}) + + +def normalize(s, encoding): + # This converts the various Python string types into a format that is + # appropriate for .po files, namely much closer to C style. + lines = s.split("\n") + if len(lines) == 1: + s = '"' + escape(s, encoding) + '"' + else: + if not lines[-1]: + del lines[-1] + lines[-1] = lines[-1] + "\n" + for i in range(len(lines)): + lines[i] = escape(lines[i], encoding) + lineterm = '\\n"\n"' + s = '""\n"' + lineterm.join(lines) + '"' + return s + + +def containsAny(str, set): + """Check whether 'str' contains ANY of the chars in 'set'""" + return 1 in [c in str for c in set] + + +def getFilesForName(name): + """Get a list of module files for a filename, a module or package name, + or a directory. + """ + if not os.path.exists(name): + # check for glob chars + if containsAny(name, "*?[]"): + files = glob.glob(name) + list = [] + for file in files: + list.extend(getFilesForName(file)) + return list + + # try to find module or package + try: + spec = importlib.util.find_spec(name) + name = spec.origin + except ImportError: + name = None + if not name: + return [] + + if os.path.isdir(name): + # find all python files in directory + list = [] + # get extension for python source files + _py_ext = importlib.machinery.SOURCE_SUFFIXES[0] + for root, dirs, files in os.walk(name): + # don't recurse into CVS directories + if "CVS" in dirs: + dirs.remove("CVS") + # add all *.py files to list + list.extend([os.path.join(root, file) for file in files if os.path.splitext(file)[1] == _py_ext]) + return list + elif os.path.exists(name): + # a single file + return [name] + + return [] + + +class TokenEater: + def __init__(self, options): + self.__options = options + self.__messages = {} + self.__state = self.__waiting + self.__data = [] + self.__lineno = -1 + self.__freshmodule = 1 + self.__curfile = None + self.__enclosurecount = 0 + + def __call__(self, ttype, tstring, stup, etup, line): + # dispatch + ## import token + ## print('ttype:', token.tok_name[ttype], 'tstring:', tstring, + ## file=sys.stderr) + self.__state(ttype, tstring, stup[0]) + + def __waiting(self, ttype, tstring, lineno): + opts = self.__options + # Do docstring extractions, if enabled + if opts.docstrings and not opts.nodocstrings.get(self.__curfile): + # module docstring? + if self.__freshmodule: + if ttype == tokenize.STRING and is_literal_string(tstring): + self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__freshmodule = 0 + elif ttype not in (tokenize.COMMENT, tokenize.NL): + self.__freshmodule = 0 + return + # class or func/method docstring? + if ttype == tokenize.NAME and tstring in ("class", "def"): + self.__state = self.__suiteseen + return + if ttype == tokenize.NAME and tstring in opts.keywords: + self.__state = self.__keywordseen + + def __suiteseen(self, ttype, tstring, lineno): + # skip over any enclosure pairs until we see the colon + if ttype == tokenize.OP: + if tstring == ":" and self.__enclosurecount == 0: + # we see a colon and we're not in an enclosure: end of def + self.__state = self.__suitedocstring + elif tstring in "([{": + self.__enclosurecount += 1 + elif tstring in ")]}": + self.__enclosurecount -= 1 + + def __suitedocstring(self, ttype, tstring, lineno): + # ignore any intervening noise + if ttype == tokenize.STRING and is_literal_string(tstring): + self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__state = self.__waiting + elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): + # there was no class docstring + self.__state = self.__waiting + + def __keywordseen(self, ttype, tstring, lineno): + if ttype == tokenize.OP and tstring == "(": + self.__data = [] + self.__lineno = lineno + self.__state = self.__openseen + else: + self.__state = self.__waiting + + def __openseen(self, ttype, tstring, lineno): + if ttype == tokenize.OP and tstring == ")": + # We've seen the last of the translatable strings. Record the + # line number of the first line of the strings and update the list + # of messages seen. Reset state for the next batch. If there + # were no strings inside _(), then just ignore this entry. + if self.__data: + self.__addentry(EMPTYSTRING.join(self.__data)) + self.__state = self.__waiting + elif ttype == tokenize.STRING and is_literal_string(tstring): + self.__data.append(safe_eval(tstring)) + elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: + # warn if we see anything else than STRING or whitespace + print( + _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') + % {"token": tstring, "file": self.__curfile, "lineno": self.__lineno}, + file=sys.stderr, + ) + self.__state = self.__waiting + + def __addentry(self, msg, lineno=None, isdocstring=0): + if lineno is None: + lineno = self.__lineno + if not msg in self.__options.toexclude: + entry = (self.__curfile, lineno) + self.__messages.setdefault(msg, {})[entry] = isdocstring + + def set_filename(self, filename): + self.__curfile = filename + self.__freshmodule = 1 + + def write(self, fp): + options = self.__options + timestamp = time.strftime("%Y-%m-%d %H:%M%z") + encoding = fp.encoding if fp.encoding else "UTF-8" + print( + pot_header % {"time": timestamp, "version": __version__, "charset": encoding, "encoding": "8bit"}, file=fp + ) + # Sort the entries. First sort each particular entry's keys, then + # sort all the entries by their first item. + reverse = {} + for k, v in self.__messages.items(): + keys = sorted(v.keys()) + reverse.setdefault(tuple(keys), []).append((k, v)) + rkeys = sorted(reverse.keys()) + for rkey in rkeys: + rentries = reverse[rkey] + rentries.sort() + for k, v in rentries: + # If the entry was gleaned out of a docstring, then add a + # comment stating so. This is to aid translators who may wish + # to skip translating some unimportant docstrings. + isdocstring = any(v.values()) + # k is the message string, v is a dictionary-set of (filename, + # lineno) tuples. We want to sort the entries in v first by + # file name and then by line number. + v = sorted(v.keys()) + if not options.writelocations: + pass + # location comments are different b/w Solaris and GNU: + elif options.locationstyle == options.SOLARIS: + for filename, lineno in v: + d = {"filename": filename, "lineno": lineno} + print(_("# File: %(filename)s, line: %(lineno)d") % d, file=fp) + elif options.locationstyle == options.GNU: + # fit as many locations on one line, as long as the + # resulting line length doesn't exceed 'options.width' + locline = "#:" + for filename, lineno in v: + d = {"filename": filename, "lineno": lineno} + s = _(" %(filename)s:%(lineno)d") % d + if len(locline) + len(s) <= options.width: + locline = locline + s + else: + print(locline, file=fp) + locline = "#:" + s + if len(locline) > 2: + print(locline, file=fp) + if isdocstring: + print("#, docstring", file=fp) + print("msgid", normalize(k, encoding), file=fp) + print('msgstr ""\n', file=fp) + + +def main(): + global default_keywords + try: + opts, args = getopt.getopt( + sys.argv[1:], + "ad:DEhk:Kno:p:S:Vvw:x:X:", + [ + "extract-all", + "default-domain=", + "escape", + "help", + "keyword=", + "no-default-keywords", + "add-location", + "no-location", + "output=", + "output-dir=", + "style=", + "verbose", + "version", + "width=", + "exclude-file=", + "docstrings", + "no-docstrings", + ], + ) + except getopt.error as msg: + usage(1, msg) + + # for holding option values + class Options: + # constants + GNU = 1 + SOLARIS = 2 + # defaults + extractall = 0 # FIXME: currently this option has no effect at all. + escape = 0 + keywords = [] + outpath = "" + outfile = "messages.pot" + writelocations = 1 + locationstyle = GNU + verbose = 0 + width = 78 + excludefilename = "" + docstrings = 0 + nodocstrings = {} + + options = Options() + locations = {"gnu": options.GNU, "solaris": options.SOLARIS} + + # parse options + for opt, arg in opts: + if opt in ("-h", "--help"): + usage(0) + elif opt in ("-a", "--extract-all"): + options.extractall = 1 + elif opt in ("-d", "--default-domain"): + options.outfile = arg + ".pot" + elif opt in ("-E", "--escape"): + options.escape = 1 + elif opt in ("-D", "--docstrings"): + options.docstrings = 1 + elif opt in ("-k", "--keyword"): + options.keywords.append(arg) + elif opt in ("-K", "--no-default-keywords"): + default_keywords = [] + elif opt in ("-n", "--add-location"): + options.writelocations = 1 + elif opt in ("--no-location",): + options.writelocations = 0 + elif opt in ("-S", "--style"): + options.locationstyle = locations.get(arg.lower()) + if options.locationstyle is None: + usage(1, _("Invalid value for --style: %s") % arg) + elif opt in ("-o", "--output"): + options.outfile = arg + elif opt in ("-p", "--output-dir"): + options.outpath = arg + elif opt in ("-v", "--verbose"): + options.verbose = 1 + elif opt in ("-V", "--version"): + print(_("pygettext.py (xgettext for Python) %s") % __version__) + sys.exit(0) + elif opt in ("-w", "--width"): + try: + options.width = int(arg) + except ValueError: + usage(1, _("--width argument must be an integer: %s") % arg) + elif opt in ("-x", "--exclude-file"): + options.excludefilename = arg + elif opt in ("-X", "--no-docstrings"): + fp = open(arg) + try: + while 1: + line = fp.readline() + if not line: + break + options.nodocstrings[line[:-1]] = 1 + finally: + fp.close() + + # calculate escapes + make_escapes(not options.escape) + + # calculate all keywords + options.keywords.extend(default_keywords) + + # initialize list of strings to exclude + if options.excludefilename: + try: + fp = open(options.excludefilename) + options.toexclude = fp.readlines() + fp.close() + except IOError: + print(_("Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) + sys.exit(1) + else: + options.toexclude = [] + + # resolve args to module lists + expanded = [] + for arg in args: + if arg == "-": + expanded.append(arg) + else: + expanded.extend(getFilesForName(arg)) + args = expanded + + # slurp through all the files + eater = TokenEater(options) + for filename in args: + if filename == "-": + if options.verbose: + print(_("Reading standard input")) + fp = sys.stdin.buffer + closep = 0 + else: + if options.verbose: + print(_("Working on %s") % filename) + fp = open(filename, "rb") + closep = 1 + try: + eater.set_filename(filename) + try: + tokens = tokenize.tokenize(fp.readline) + for _token in tokens: + eater(*_token) + except tokenize.TokenError as e: + print("%s: %s, line %d, column %d" % (e.args[0], filename, e.args[1][0], e.args[1][1]), file=sys.stderr) + finally: + if closep: + fp.close() + + # write the output + if options.outfile == "-": + fp = sys.stdout + closep = 0 + else: + if options.outpath: + options.outfile = os.path.join(options.outpath, options.outfile) + fp = open(options.outfile, "w") + closep = 1 + try: + eater.write(fp) + finally: + if closep: + fp.close() + + +if __name__ == "__main__": + main() + # some more test strings + # this one creates a warning + _('*** Seen unexpected token "%(token)s"') % {"token": "test"} + _("more" "than" "one" "string")