diff --git a/CHANGES.md b/CHANGES.md index fea4ce8..5702628 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ * Update TZlocal 2.0.0.dev0 (b73a692) to 2.0.0b3 (410a838) * Update unidecode module 1.0.22 (a5045ab) to 1.1.1 (632af82) * Update urllib3 release 1.25.3 (3387b20) to 1.25.3 (67715fd) +* Update xmltodict library 0.12.0 (f3ab7e1) to 0.12.0 (02c9b71) [develop changelog] diff --git a/lib/xmltodict.py b/lib/xmltodict.py index 7e0a35e..9ad2ad5 100644 --- a/lib/xmltodict.py +++ b/lib/xmltodict.py @@ -50,7 +50,8 @@ class _DictSAXHandler(object): strip_whitespace=True, namespace_separator=':', namespaces=None, - force_list=None): + force_list=None, + comment_key='#comment'): self.path = [] self.stack = [] self.data = [] @@ -69,6 +70,7 @@ class _DictSAXHandler(object): self.namespaces = namespaces self.namespace_declarations = OrderedDict() self.force_list = force_list + self.comment_key = comment_key def _build_name(self, full_name): if self.namespaces is None: @@ -156,6 +158,11 @@ class _DictSAXHandler(object): else: self.data.append(data) + def comments(self, data): + if self.strip_whitespace: + data = data.strip() + self.item = self.push_data(self.item, self.comment_key, data) + def push_data(self, item, key, data): if self.postprocessor is not None: result = self.postprocessor(self.path, key, data) @@ -189,7 +196,7 @@ class _DictSAXHandler(object): def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, - namespace_separator=':', disable_entities=True, **kwargs): + namespace_separator=':', disable_entities=True, process_comments=False, **kwargs): """Parse the given XML input and convert it into a dictionary. `xml_input` can either be a `string`, a file-like object, or a generator of strings. @@ -291,6 +298,36 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, `force_list` can also be a callable that receives `path`, `key` and `value`. This is helpful in cases where the logic that decides whether a list should be forced is more complex. + + + If `process_comment` is `True` then comment will be added with comment_key + (default=`'#comment'`) to then tag which contains comment + + For example, given this input: + + + + + + 1 + + 2 + + + + If called with process_comment=True, it will produce + this dictionary: + 'a': { + 'b': { + '#comment': 'b comment', + 'c': { + + '#comment': 'c comment', + '#text': '1', + }, + 'd': '2', + }, + } """ handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs) @@ -313,6 +350,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, parser.StartElementHandler = handler.startElement parser.EndElementHandler = handler.endElement parser.CharacterDataHandler = handler.characters + if process_comments: + parser.CommentHandler = handler.comments parser.buffer_text = True if disable_entities: try: