Py3: Add pygettext to repo and basic test of POT extraction

Python is deprecating pygettext.py so we better have our own, just like msgfmt.py
6 years ago · a9185b4aee
3 changed files with 702 additions and 54 deletions
--- a/tests/test_functional_misc.py
+++ b/tests/test_functional_misc.py
@ -18,7 +18,7 @@
 """
 tests.test_functional_misc - Functional tests of various functions
 """
-import os
+
 import sys
 import subprocess
 import sabnzbd.encoding
@ -74,3 +74,27 @@ class TestSamplePostProc:
        for param in script_params:
            assert param in script_output
        assert env["SAB_VERSION"] in script_output
+
+
+class TestExtractPot:
+    def test_extract_pot(self):
+        """ Simple test if translation extraction still works """
+        script_call = [sys.executable, "tools/extract_pot.py"]
+
+        # Run script and check output
+        script_call = subprocess.Popen(script_call, stdout=subprocess.PIPE)
+        script_output, errs = script_call.communicate(timeout=15)
+        script_output = sabnzbd.encoding.platform_btou(script_output)
+
+        # Success message?
+        assert "Creating POT file" in script_output
+        assert "Finished creating POT file" in script_output
+        assert "Post-process POT file" in script_output
+        assert "Finished post-process POT file" in script_output
+        assert "Creating email POT file" in script_output
+        assert "Finished creating email POT file" in script_output
+
+        # Check if the file was modified less than 30 seconds ago
+        cur_time = time.time()
+        assert (cur_time - os.path.getmtime("po/main/SABnzbd.pot")) < 30
+        assert (cur_time - os.path.getmtime("po/email/SABemail.pot")) < 30
--- a/tools/extract_pot.py
+++ b/tools/extract_pot.py
@ -24,10 +24,8 @@ import sys
 import re

 # Import version.py without the sabnzbd overhead
-f = open("sabnzbd/version.py")
-code = f.read()
-f.close()
-exec(code)
+with open("sabnzbd/version.py") as version_file:
+    exec(version_file.read())

 # Fixed information for the POT header
 HEADER = (
@ -56,48 +54,47 @@ EMAIL_DIR = "email"
 DOMAIN = "SABnzbd"
 DOMAIN_EMAIL = "SABemail"
 DOMAIN_NSIS = "SABnsis"
-PARMS = "-d %s -p %s -w500 -k T -k Ta -k TT -o %s.pot.tmp" % (DOMAIN, PO_DIR, DOMAIN)
+PARMS = "-d %s -p %s -w500 -k T -k TT -o %s.pot.tmp" % (DOMAIN, PO_DIR, DOMAIN)
 FILES = "SABnzbd.py SABHelper.py SABnzbdDelegate.py sabnzbd/*.py sabnzbd/utils/*.py"

 FILE_CACHE = {}

+RE_LINE = re.compile(r"\s*([^: \t]+)\s*:\s*(\d+)")
+RE_CONTEXT = re.compile(r"#:\s*(.*)$")
+

-def get_a_line(src, number):
+def get_a_line(line_src, number):
    """ Retrieve line 'number' from file 'src' with caching """
    global FILE_CACHE
-    if src not in FILE_CACHE:
-        FILE_CACHE[src] = []
-        for line in open(src, "r"):
-            FILE_CACHE[src].append(line)
+    if line_src not in FILE_CACHE:
+        FILE_CACHE[line_src] = []
+        for file_line in open(line_src, "r"):
+            FILE_CACHE[line_src].append(file_line)
    try:
-        return FILE_CACHE[src][number - 1]
+        return FILE_CACHE[line_src][number - 1]
    except:
        return ""


-RE_LINE = re.compile(r"\s*([^: \t]+)\s*:\s*(\d+)")
-RE_CONTEXT = re.compile(r"#:\s*(.*)$")
-
-
-def get_context(line):
+def get_context(ctx_line):
    """ Read context info from source file and append to line.
        input: "#: filepath.py:123 filepath2.py:456"
        output: "#: filepath.py:123 # [context info] # filepath2.py:456 # [context info 2]"
    """
-    if not line.startswith("#:"):
-        return line
+    if not ctx_line.startswith("#:"):
+        return ctx_line

    newlines = []
-    for item in line[2:].strip("\r\n").split():
+    for item in ctx_line[2:].strip("\r\n").split():
        m = RE_LINE.search(item)
        if m:
-            src = m.group(1)
+            line_src = m.group(1)
            number = m.group(2)
        else:
            newlines.append(item)
            continue

-        srcline = get_a_line(src, int(number)).strip("\r\n")
+        srcline = get_a_line(line_src, int(number)).strip("\r\n")
        context = ""
        m = RE_CONTEXT.search(srcline)
        if m:
@ -121,38 +118,30 @@ def get_context(line):
    return "#: " + " # ".join(newlines) + "\n"


-def add_tmpl_to_pot(prefix, dst):
+def add_tmpl_to_pot(prefix, dst_file):
    """ Append english template to open POT file 'dst' """
-    src = open(EMAIL_DIR + "/%s-en.tmpl" % prefix, "r")
-    dst.write("#: email/%s.tmpl:1\n" % prefix)
-    dst.write('msgid ""\n')
-    for line in src:
-        dst.write('"%s"\n' % line.replace("\n", "\\n").replace('"', '\\"'))
-    dst.write('msgstr ""\n\n')
-    src.close()
+    with open(EMAIL_DIR + "/%s-en.tmpl" % prefix, "r") as tmpl_src:
+        dst_file.write("#: email/%s.tmpl:1\n" % prefix)
+        dst_file.write('msgid ""\n')
+        for tmpl_line in tmpl_src:
+            dst_file.write('"%s"\n' % tmpl_line.replace("\n", "\\n").replace('"', '\\"'))
+        dst_file.write('msgstr ""\n\n')


+print("Creating POT file")
 if not os.path.exists(PO_DIR):
    os.makedirs(PO_DIR)

 # Determine location of PyGetText tool
-path, exe = os.path.split(sys.executable)
-if os.name == "nt":
-    TOOL = os.path.join(path, r"Tools\i18n\pygettext.py")
-    TOOL = "python " + TOOL
-else:
-    TOOL = os.path.join(path, "pygettext.py")
-    if not os.path.exists(TOOL):
-        TOOL = "pygettext"
-
-
-cmd = "%s %s %s" % (TOOL, PARMS, FILES)
-print("Create POT file")
+path, py = os.path.split(sys.argv[0])
+PYGETTEXT = os.path.abspath(os.path.normpath(os.path.join(path, "pygettext.py")))
+cmd = "%s %s %s %s" % (sys.executable, PYGETTEXT, PARMS, FILES)
 os.system(cmd)
+print("Finished creating POT file")

-print("Post-process the POT file")
+print("Post-process POT file")
 src = open("%s/%s.pot.tmp" % (PO_DIR, DOMAIN), "r")
-dst = open("%s/%s.pot" % (PO_DIR, DOMAIN), "wb")
+dst = open("%s/%s.pot" % (PO_DIR, DOMAIN), "w")
 dst.write(HEADER.replace("__TYPE__", "MAIN"))
 header = True

@ -172,17 +161,17 @@ for line in src:
 src.close()
 dst.close()
 os.remove("%s/%s.pot.tmp" % (PO_DIR, DOMAIN))
+print("Finished post-process POT file")

-
-print("Create the email POT file")
+print("Creating email POT file")
 if not os.path.exists(POE_DIR):
    os.makedirs(POE_DIR)
-dst = open(os.path.join(POE_DIR, DOMAIN_EMAIL + ".pot"), "wb")
-dst.write(HEADER.replace("__TYPE__", "EMAIL"))
-add_tmpl_to_pot("email", dst)
-add_tmpl_to_pot("rss", dst)
-add_tmpl_to_pot("badfetch", dst)
-dst.close()
+with open(os.path.join(POE_DIR, DOMAIN_EMAIL + ".pot"), "w") as dst_email:
+    dst_email.write(HEADER.replace("__TYPE__", "EMAIL"))
+    add_tmpl_to_pot("email", dst_email)
+    add_tmpl_to_pot("rss", dst_email)
+    add_tmpl_to_pot("badfetch", dst_email)
+print("Finished creating email POT file")


 # Create the NSIS POT file
@ -190,11 +179,11 @@ NSIS = "NSIS_Installer.nsi"
 RE_NSIS = re.compile(r'LangString\s+\w+\s+\$\{LANG_ENGLISH\}\s+(".*)', re.I)

 if os.path.exists(NSIS):
-    print("Creating the NSIS POT file")
+    print("Creating NSIS POT file")
    if not os.path.exists(PON_DIR):
        os.makedirs(PON_DIR)
    src = open(NSIS, "r")
-    dst = open(os.path.join(PON_DIR, DOMAIN_NSIS + ".pot"), "wb")
+    dst = open(os.path.join(PON_DIR, DOMAIN_NSIS + ".pot"), "w")
    dst.write(HEADER.replace("__TYPE__", "NSIS"))
    dst.write("\n")
    for line in src:
@ -206,3 +195,4 @@ if os.path.exists(NSIS):
            dst.write('msgstr ""\n\n')
    dst.close()
    src.close()
+    print("Finished creating NSIS POT file")
--- a/tools/pygettext.py
+++ b/tools/pygettext.py
@ -0,0 +1,634 @@
+#! /usr/bin/env python3
+# -*- coding: iso-8859-1 -*-
+# Originally written by Barry Warsaw <barry@python.org>
+#
+# Minimally patched to make it even more xgettext compatible
+# by Peter Funk <pf@artcom-gmbh.de>
+#
+# 2002-11-22 Jürgen Hermann <jh@web.de>
+# Added checks that _() only contains string literals, and
+# command line args are resolved to module lists, i.e. you
+# can now pass a filename, a module or package name, or a
+# directory (including globbing chars, important for Win32).
+# Made docstring fit in 80 chars wide displays using pydoc.
+#
+
+# for selftesting
+try:
+    import fintl
+
+    _ = fintl.gettext
+except ImportError:
+    _ = lambda s: s
+
+__doc__ = _(
+    """pygettext -- Python equivalent of xgettext(1)
+
+Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
+internationalization of C programs. Most of these tools are independent of
+the programming language and can be used from within Python programs.
+Martin von Loewis' work[1] helps considerably in this regard.
+
+There's one problem though; xgettext is the program that scans source code
+looking for message strings, but it groks only C (or C++). Python
+introduces a few wrinkles, such as dual quoting characters, triple quoted
+strings, and raw strings. xgettext understands none of this.
+
+Enter pygettext, which uses Python's standard tokenize module to scan
+Python source code, generating .pot files identical to what GNU xgettext[2]
+generates for C and C++ code. From there, the standard GNU tools can be
+used.
+
+A word about marking Python strings as candidates for translation. GNU
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
+and gettext_noop. But those can be a lot of text to include all over your
+code. C and C++ have a trick: they use the C preprocessor. Most
+internationalized C source includes a #define for gettext() to _() so that
+what has to be written in the source is much less. Thus these are both
+translatable strings:
+
+    gettext("Translatable String")
+    _("Translatable String")
+
+Python of course has no preprocessor so this doesn't work so well.  Thus,
+pygettext searches only for _() by default, but see the -k/--keyword flag
+below for how to augment this.
+
+ [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
+ [2] http://www.gnu.org/software/gettext/gettext.html
+
+NOTE: pygettext attempts to be option and feature compatible with GNU
+xgettext where ever possible. However some options are still missing or are
+not fully implemented. Also, xgettext's use of command line switches with
+option arguments is broken, and in these cases, pygettext just defines
+additional switches.
+
+Usage: pygettext [options] inputfile ...
+
+Options:
+
+    -a
+    --extract-all
+        Extract all strings.
+
+    -d name
+    --default-domain=name
+        Rename the default output file from messages.pot to name.pot.
+
+    -E
+    --escape
+        Replace non-ASCII characters with octal escape sequences.
+
+    -D
+    --docstrings
+        Extract module, class, method, and function docstrings.  These do
+        not need to be wrapped in _() markers, and in fact cannot be for
+        Python to consider them docstrings. (See also the -X option).
+
+    -h
+    --help
+        Print this help message and exit.
+
+    -k word
+    --keyword=word
+        Keywords to look for in addition to the default set, which are:
+        %(DEFAULTKEYWORDS)s
+
+        You can have multiple -k flags on the command line.
+
+    -K
+    --no-default-keywords
+        Disable the default set of keywords (see above).  Any keywords
+        explicitly added with the -k/--keyword option are still recognized.
+
+    --no-location
+        Do not write filename/lineno location comments.
+
+    -n
+    --add-location
+        Write filename/lineno location comments indicating where each
+        extracted string is found in the source.  These lines appear before
+        each msgid.  The style of comments is controlled by the -S/--style
+        option.  This is the default.
+
+    -o filename
+    --output=filename
+        Rename the default output file from messages.pot to filename.  If
+        filename is `-' then the output is sent to standard out.
+
+    -p dir
+    --output-dir=dir
+        Output files will be placed in directory dir.
+
+    -S stylename
+    --style stylename
+        Specify which style to use for location comments.  Two styles are
+        supported:
+
+        Solaris  # File: filename, line: line-number
+        GNU      #: filename:line
+
+        The style name is case insensitive.  GNU style is the default.
+
+    -v
+    --verbose
+        Print the names of the files being processed.
+
+    -V
+    --version
+        Print the version of pygettext and exit.
+
+    -w columns
+    --width=columns
+        Set width of output to columns.
+
+    -x filename
+    --exclude-file=filename
+        Specify a file that contains a list of strings that are not be
+        extracted from the input files.  Each string to be excluded must
+        appear on a line by itself in the file.
+
+    -X filename
+    --no-docstrings=filename
+        Specify a file that contains a list of files (one per line) that
+        should not have their docstrings extracted.  This is only useful in
+        conjunction with the -D option above.
+
+If `inputfile' is -, standard input is read.
+"""
+)
+
+import os
+import importlib.machinery
+import importlib.util
+import sys
+import glob
+import time
+import getopt
+import token
+import tokenize
+
+__version__ = "1.5"
+
+default_keywords = ["_"]
+DEFAULTKEYWORDS = ", ".join(default_keywords)
+
+EMPTYSTRING = ""
+
+# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
+# there.
+pot_header = _(
+    """\
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=%(charset)s\\n"
+"Content-Transfer-Encoding: %(encoding)s\\n"
+"Generated-By: pygettext.py %(version)s\\n"
+
+"""
+)
+
+
+def usage(code, msg=""):
+    print(__doc__ % globals(), file=sys.stderr)
+    if msg:
+        print(msg, file=sys.stderr)
+    sys.exit(code)
+
+
+def make_escapes(pass_nonascii):
+    global escapes, escape
+    if pass_nonascii:
+        # Allow non-ascii characters to pass through so that e.g. 'msgid
+        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
+        # escape any character outside the 32..126 range.
+        mod = 128
+        escape = escape_ascii
+    else:
+        mod = 256
+        escape = escape_nonascii
+    escapes = [r"\%03o" % i for i in range(mod)]
+    for i in range(32, 127):
+        escapes[i] = chr(i)
+    escapes[ord("\\")] = r"\\"
+    escapes[ord("\t")] = r"\t"
+    escapes[ord("\r")] = r"\r"
+    escapes[ord("\n")] = r"\n"
+    escapes[ord('"')] = r"\""
+
+
+def escape_ascii(s, encoding):
+    return "".join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
+
+
+def escape_nonascii(s, encoding):
+    return "".join(escapes[b] for b in s.encode(encoding))
+
+
+def is_literal_string(s):
+    return s[0] in "'\"" or (s[0] in "rRuU" and s[1] in "'\"")
+
+
+def safe_eval(s):
+    # unwrap quotes, safely
+    return eval(s, {"__builtins__": {}}, {})
+
+
+def normalize(s, encoding):
+    # This converts the various Python string types into a format that is
+    # appropriate for .po files, namely much closer to C style.
+    lines = s.split("\n")
+    if len(lines) == 1:
+        s = '"' + escape(s, encoding) + '"'
+    else:
+        if not lines[-1]:
+            del lines[-1]
+            lines[-1] = lines[-1] + "\n"
+        for i in range(len(lines)):
+            lines[i] = escape(lines[i], encoding)
+        lineterm = '\\n"\n"'
+        s = '""\n"' + lineterm.join(lines) + '"'
+    return s
+
+
+def containsAny(str, set):
+    """Check whether 'str' contains ANY of the chars in 'set'"""
+    return 1 in [c in str for c in set]
+
+
+def getFilesForName(name):
+    """Get a list of module files for a filename, a module or package name,
+    or a directory.
+    """
+    if not os.path.exists(name):
+        # check for glob chars
+        if containsAny(name, "*?[]"):
+            files = glob.glob(name)
+            list = []
+            for file in files:
+                list.extend(getFilesForName(file))
+            return list
+
+        # try to find module or package
+        try:
+            spec = importlib.util.find_spec(name)
+            name = spec.origin
+        except ImportError:
+            name = None
+        if not name:
+            return []
+
+    if os.path.isdir(name):
+        # find all python files in directory
+        list = []
+        # get extension for python source files
+        _py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
+        for root, dirs, files in os.walk(name):
+            # don't recurse into CVS directories
+            if "CVS" in dirs:
+                dirs.remove("CVS")
+            # add all *.py files to list
+            list.extend([os.path.join(root, file) for file in files if os.path.splitext(file)[1] == _py_ext])
+        return list
+    elif os.path.exists(name):
+        # a single file
+        return [name]
+
+    return []
+
+
+class TokenEater:
+    def __init__(self, options):
+        self.__options = options
+        self.__messages = {}
+        self.__state = self.__waiting
+        self.__data = []
+        self.__lineno = -1
+        self.__freshmodule = 1
+        self.__curfile = None
+        self.__enclosurecount = 0
+
+    def __call__(self, ttype, tstring, stup, etup, line):
+        # dispatch
+        ##        import token
+        ##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
+        ##              file=sys.stderr)
+        self.__state(ttype, tstring, stup[0])
+
+    def __waiting(self, ttype, tstring, lineno):
+        opts = self.__options
+        # Do docstring extractions, if enabled
+        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
+            # module docstring?
+            if self.__freshmodule:
+                if ttype == tokenize.STRING and is_literal_string(tstring):
+                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+                    self.__freshmodule = 0
+                elif ttype not in (tokenize.COMMENT, tokenize.NL):
+                    self.__freshmodule = 0
+                return
+            # class or func/method docstring?
+            if ttype == tokenize.NAME and tstring in ("class", "def"):
+                self.__state = self.__suiteseen
+                return
+        if ttype == tokenize.NAME and tstring in opts.keywords:
+            self.__state = self.__keywordseen
+
+    def __suiteseen(self, ttype, tstring, lineno):
+        # skip over any enclosure pairs until we see the colon
+        if ttype == tokenize.OP:
+            if tstring == ":" and self.__enclosurecount == 0:
+                # we see a colon and we're not in an enclosure: end of def
+                self.__state = self.__suitedocstring
+            elif tstring in "([{":
+                self.__enclosurecount += 1
+            elif tstring in ")]}":
+                self.__enclosurecount -= 1
+
+    def __suitedocstring(self, ttype, tstring, lineno):
+        # ignore any intervening noise
+        if ttype == tokenize.STRING and is_literal_string(tstring):
+            self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+            self.__state = self.__waiting
+        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT):
+            # there was no class docstring
+            self.__state = self.__waiting
+
+    def __keywordseen(self, ttype, tstring, lineno):
+        if ttype == tokenize.OP and tstring == "(":
+            self.__data = []
+            self.__lineno = lineno
+            self.__state = self.__openseen
+        else:
+            self.__state = self.__waiting
+
+    def __openseen(self, ttype, tstring, lineno):
+        if ttype == tokenize.OP and tstring == ")":
+            # We've seen the last of the translatable strings.  Record the
+            # line number of the first line of the strings and update the list
+            # of messages seen.  Reset state for the next batch.  If there
+            # were no strings inside _(), then just ignore this entry.
+            if self.__data:
+                self.__addentry(EMPTYSTRING.join(self.__data))
+            self.__state = self.__waiting
+        elif ttype == tokenize.STRING and is_literal_string(tstring):
+            self.__data.append(safe_eval(tstring))
+        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]:
+            # warn if we see anything else than STRING or whitespace
+            print(
+                _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"')
+                % {"token": tstring, "file": self.__curfile, "lineno": self.__lineno},
+                file=sys.stderr,
+            )
+            self.__state = self.__waiting
+
+    def __addentry(self, msg, lineno=None, isdocstring=0):
+        if lineno is None:
+            lineno = self.__lineno
+        if not msg in self.__options.toexclude:
+            entry = (self.__curfile, lineno)
+            self.__messages.setdefault(msg, {})[entry] = isdocstring
+
+    def set_filename(self, filename):
+        self.__curfile = filename
+        self.__freshmodule = 1
+
+    def write(self, fp):
+        options = self.__options
+        timestamp = time.strftime("%Y-%m-%d %H:%M%z")
+        encoding = fp.encoding if fp.encoding else "UTF-8"
+        print(
+            pot_header % {"time": timestamp, "version": __version__, "charset": encoding, "encoding": "8bit"}, file=fp
+        )
+        # Sort the entries.  First sort each particular entry's keys, then
+        # sort all the entries by their first item.
+        reverse = {}
+        for k, v in self.__messages.items():
+            keys = sorted(v.keys())
+            reverse.setdefault(tuple(keys), []).append((k, v))
+        rkeys = sorted(reverse.keys())
+        for rkey in rkeys:
+            rentries = reverse[rkey]
+            rentries.sort()
+            for k, v in rentries:
+                # If the entry was gleaned out of a docstring, then add a
+                # comment stating so.  This is to aid translators who may wish
+                # to skip translating some unimportant docstrings.
+                isdocstring = any(v.values())
+                # k is the message string, v is a dictionary-set of (filename,
+                # lineno) tuples.  We want to sort the entries in v first by
+                # file name and then by line number.
+                v = sorted(v.keys())
+                if not options.writelocations:
+                    pass
+                # location comments are different b/w Solaris and GNU:
+                elif options.locationstyle == options.SOLARIS:
+                    for filename, lineno in v:
+                        d = {"filename": filename, "lineno": lineno}
+                        print(_("# File: %(filename)s, line: %(lineno)d") % d, file=fp)
+                elif options.locationstyle == options.GNU:
+                    # fit as many locations on one line, as long as the
+                    # resulting line length doesn't exceed 'options.width'
+                    locline = "#:"
+                    for filename, lineno in v:
+                        d = {"filename": filename, "lineno": lineno}
+                        s = _(" %(filename)s:%(lineno)d") % d
+                        if len(locline) + len(s) <= options.width:
+                            locline = locline + s
+                        else:
+                            print(locline, file=fp)
+                            locline = "#:" + s
+                    if len(locline) > 2:
+                        print(locline, file=fp)
+                if isdocstring:
+                    print("#, docstring", file=fp)
+                print("msgid", normalize(k, encoding), file=fp)
+                print('msgstr ""\n', file=fp)
+
+
+def main():
+    global default_keywords
+    try:
+        opts, args = getopt.getopt(
+            sys.argv[1:],
+            "ad:DEhk:Kno:p:S:Vvw:x:X:",
+            [
+                "extract-all",
+                "default-domain=",
+                "escape",
+                "help",
+                "keyword=",
+                "no-default-keywords",
+                "add-location",
+                "no-location",
+                "output=",
+                "output-dir=",
+                "style=",
+                "verbose",
+                "version",
+                "width=",
+                "exclude-file=",
+                "docstrings",
+                "no-docstrings",
+            ],
+        )
+    except getopt.error as msg:
+        usage(1, msg)
+
+    # for holding option values
+    class Options:
+        # constants
+        GNU = 1
+        SOLARIS = 2
+        # defaults
+        extractall = 0  # FIXME: currently this option has no effect at all.
+        escape = 0
+        keywords = []
+        outpath = ""
+        outfile = "messages.pot"
+        writelocations = 1
+        locationstyle = GNU
+        verbose = 0
+        width = 78
+        excludefilename = ""
+        docstrings = 0
+        nodocstrings = {}
+
+    options = Options()
+    locations = {"gnu": options.GNU, "solaris": options.SOLARIS}
+
+    # parse options
+    for opt, arg in opts:
+        if opt in ("-h", "--help"):
+            usage(0)
+        elif opt in ("-a", "--extract-all"):
+            options.extractall = 1
+        elif opt in ("-d", "--default-domain"):
+            options.outfile = arg + ".pot"
+        elif opt in ("-E", "--escape"):
+            options.escape = 1
+        elif opt in ("-D", "--docstrings"):
+            options.docstrings = 1
+        elif opt in ("-k", "--keyword"):
+            options.keywords.append(arg)
+        elif opt in ("-K", "--no-default-keywords"):
+            default_keywords = []
+        elif opt in ("-n", "--add-location"):
+            options.writelocations = 1
+        elif opt in ("--no-location",):
+            options.writelocations = 0
+        elif opt in ("-S", "--style"):
+            options.locationstyle = locations.get(arg.lower())
+            if options.locationstyle is None:
+                usage(1, _("Invalid value for --style: %s") % arg)
+        elif opt in ("-o", "--output"):
+            options.outfile = arg
+        elif opt in ("-p", "--output-dir"):
+            options.outpath = arg
+        elif opt in ("-v", "--verbose"):
+            options.verbose = 1
+        elif opt in ("-V", "--version"):
+            print(_("pygettext.py (xgettext for Python) %s") % __version__)
+            sys.exit(0)
+        elif opt in ("-w", "--width"):
+            try:
+                options.width = int(arg)
+            except ValueError:
+                usage(1, _("--width argument must be an integer: %s") % arg)
+        elif opt in ("-x", "--exclude-file"):
+            options.excludefilename = arg
+        elif opt in ("-X", "--no-docstrings"):
+            fp = open(arg)
+            try:
+                while 1:
+                    line = fp.readline()
+                    if not line:
+                        break
+                    options.nodocstrings[line[:-1]] = 1
+            finally:
+                fp.close()
+
+    # calculate escapes
+    make_escapes(not options.escape)
+
+    # calculate all keywords
+    options.keywords.extend(default_keywords)
+
+    # initialize list of strings to exclude
+    if options.excludefilename:
+        try:
+            fp = open(options.excludefilename)
+            options.toexclude = fp.readlines()
+            fp.close()
+        except IOError:
+            print(_("Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
+            sys.exit(1)
+    else:
+        options.toexclude = []
+
+    # resolve args to module lists
+    expanded = []
+    for arg in args:
+        if arg == "-":
+            expanded.append(arg)
+        else:
+            expanded.extend(getFilesForName(arg))
+    args = expanded
+
+    # slurp through all the files
+    eater = TokenEater(options)
+    for filename in args:
+        if filename == "-":
+            if options.verbose:
+                print(_("Reading standard input"))
+            fp = sys.stdin.buffer
+            closep = 0
+        else:
+            if options.verbose:
+                print(_("Working on %s") % filename)
+            fp = open(filename, "rb")
+            closep = 1
+        try:
+            eater.set_filename(filename)
+            try:
+                tokens = tokenize.tokenize(fp.readline)
+                for _token in tokens:
+                    eater(*_token)
+            except tokenize.TokenError as e:
+                print("%s: %s, line %d, column %d" % (e.args[0], filename, e.args[1][0], e.args[1][1]), file=sys.stderr)
+        finally:
+            if closep:
+                fp.close()
+
+    # write the output
+    if options.outfile == "-":
+        fp = sys.stdout
+        closep = 0
+    else:
+        if options.outpath:
+            options.outfile = os.path.join(options.outpath, options.outfile)
+        fp = open(options.outfile, "w")
+        closep = 1
+    try:
+        eater.write(fp)
+    finally:
+        if closep:
+            fp.close()
+
+
+if __name__ == "__main__":
+    main()
+    # some more test strings
+    # this one creates a warning
+    _('*** Seen unexpected token "%(token)s"') % {"token": "test"}
+    _("more" "than" "one" "string")