Update xmltodict library 0.12.0 (f3ab7e1) → 0.12.0 (02c9b71).

6 years ago · 02e8d6264f
2 changed files with 42 additions and 2 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -15,6 +15,7 @@
 * Update TZlocal 2.0.0.dev0 (b73a692) to 2.0.0b3 (410a838)
 * Update unidecode module 1.0.22 (a5045ab) to 1.1.1 (632af82)
 * Update urllib3 release 1.25.3 (3387b20) to 1.25.3 (67715fd)
+* Update xmltodict library 0.12.0 (f3ab7e1) to 0.12.0 (02c9b71)


 [develop changelog]
--- a/lib/xmltodict.py
+++ b/lib/xmltodict.py
@ -50,7 +50,8 @@ class _DictSAXHandler(object):
                 strip_whitespace=True,
                 namespace_separator=':',
                 namespaces=None,
-                 force_list=None):
+                 force_list=None,
+                 comment_key='#comment'):
        self.path = []
        self.stack = []
        self.data = []
@ -69,6 +70,7 @@ class _DictSAXHandler(object):
        self.namespaces = namespaces
        self.namespace_declarations = OrderedDict()
        self.force_list = force_list
+        self.comment_key = comment_key

    def _build_name(self, full_name):
        if self.namespaces is None:
@ -156,6 +158,11 @@ class _DictSAXHandler(object):
        else:
            self.data.append(data)

+    def comments(self, data):
+        if self.strip_whitespace:
+            data = data.strip()
+        self.item = self.push_data(self.item, self.comment_key, data)
+
    def push_data(self, item, key, data):
        if self.postprocessor is not None:
            result = self.postprocessor(self.path, key, data)
@ -189,7 +196,7 @@ class _DictSAXHandler(object):


 def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
-          namespace_separator=':', disable_entities=True, **kwargs):
+          namespace_separator=':', disable_entities=True, process_comments=False, **kwargs):
    """Parse the given XML input and convert it into a dictionary.

    `xml_input` can either be a `string`, a file-like object, or a generator of strings.
@ -291,6 +298,36 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
        `force_list` can also be a callable that receives `path`, `key` and
        `value`. This is helpful in cases where the logic that decides whether
        a list should be forced is more complex.
+
+
+        If `process_comment` is `True` then comment will be added with comment_key
+        (default=`'#comment'`) to then tag which contains comment
+
+            For example, given this input:
+            <a>
+              <b>
+                <!-- b comment -->
+                <c>
+                    <!-- c comment -->
+                    1
+                </c>
+                <d>2</d>
+              </b>
+            </a>
+
+            If called with process_comment=True, it will produce
+            this dictionary:
+            'a': {
+                'b': {
+                    '#comment': 'b comment',
+                    'c': {
+
+                        '#comment': 'c comment',
+                        '#text': '1',
+                    },
+                    'd': '2',
+                },
+            }
    """
    handler = _DictSAXHandler(namespace_separator=namespace_separator,
                              **kwargs)
@ -313,6 +350,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
    parser.StartElementHandler = handler.startElement
    parser.EndElementHandler = handler.endElement
    parser.CharacterDataHandler = handler.characters
+    if process_comments:
+        parser.CommentHandler = handler.comments
    parser.buffer_text = True
    if disable_entities:
        try: