|
|
@ -50,10 +50,11 @@ class _DictSAXHandler(object): |
|
|
|
dict_constructor=OrderedDict, |
|
|
|
strip_whitespace=True, |
|
|
|
namespace_separator=':', |
|
|
|
namespaces=None): |
|
|
|
namespaces=None, |
|
|
|
force_list=()): |
|
|
|
self.path = [] |
|
|
|
self.stack = [] |
|
|
|
self.data = None |
|
|
|
self.data = [] |
|
|
|
self.item = None |
|
|
|
self.item_depth = item_depth |
|
|
|
self.xml_attribs = xml_attribs |
|
|
@ -67,6 +68,7 @@ class _DictSAXHandler(object): |
|
|
|
self.strip_whitespace = strip_whitespace |
|
|
|
self.namespace_separator = namespace_separator |
|
|
|
self.namespaces = namespaces |
|
|
|
self.force_list = force_list |
|
|
|
|
|
|
|
def _build_name(self, full_name): |
|
|
|
if not self.namespaces: |
|
|
@ -99,21 +101,25 @@ class _DictSAXHandler(object): |
|
|
|
else: |
|
|
|
attrs = None |
|
|
|
self.item = attrs or None |
|
|
|
self.data = None |
|
|
|
self.data = [] |
|
|
|
|
|
|
|
def endElement(self, full_name): |
|
|
|
name = self._build_name(full_name) |
|
|
|
if len(self.path) == self.item_depth: |
|
|
|
item = self.item |
|
|
|
if item is None: |
|
|
|
item = self.data |
|
|
|
item = (None if not self.data |
|
|
|
else self.cdata_separator.join(self.data)) |
|
|
|
|
|
|
|
should_continue = self.item_callback(self.path, item) |
|
|
|
if not should_continue: |
|
|
|
raise ParsingInterrupted() |
|
|
|
if len(self.stack): |
|
|
|
item, data = self.item, self.data |
|
|
|
data = (None if not self.data |
|
|
|
else self.cdata_separator.join(self.data)) |
|
|
|
item = self.item |
|
|
|
self.item, self.data = self.stack.pop() |
|
|
|
if self.strip_whitespace and data is not None: |
|
|
|
if self.strip_whitespace and data: |
|
|
|
data = data.strip() or None |
|
|
|
if data and self.force_cdata and item is None: |
|
|
|
item = self.dict_constructor() |
|
|
@ -124,14 +130,15 @@ class _DictSAXHandler(object): |
|
|
|
else: |
|
|
|
self.item = self.push_data(self.item, name, data) |
|
|
|
else: |
|
|
|
self.item = self.data = None |
|
|
|
self.item = None |
|
|
|
self.data = [] |
|
|
|
self.path.pop() |
|
|
|
|
|
|
|
def characters(self, data): |
|
|
|
if not self.data: |
|
|
|
self.data = data |
|
|
|
self.data = [data] |
|
|
|
else: |
|
|
|
self.data += self.cdata_separator + data |
|
|
|
self.data.append(data) |
|
|
|
|
|
|
|
def push_data(self, item, key, data): |
|
|
|
if self.postprocessor is not None: |
|
|
@ -148,7 +155,10 @@ class _DictSAXHandler(object): |
|
|
|
else: |
|
|
|
item[key] = [value, data] |
|
|
|
except KeyError: |
|
|
|
item[key] = data |
|
|
|
if key in self.force_list: |
|
|
|
item[key] = [data] |
|
|
|
else: |
|
|
|
item[key] = data |
|
|
|
return item |
|
|
|
|
|
|
|
|
|
|
@ -220,6 +230,37 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, |
|
|
|
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat) |
|
|
|
OrderedDict([(u'a', u'hello')]) |
|
|
|
|
|
|
|
You can use the force_list argument to force lists to be created even |
|
|
|
when there is only a single child of a given level of hierarchy. The |
|
|
|
force_list argument is a tuple of keys. If the key for a given level |
|
|
|
of hierarchy is in the force_list argument, that level of hierarchy |
|
|
|
will have a list as a child (even if there is only one sub-element). |
|
|
|
The index_keys operation takes precendence over this. This is applied |
|
|
|
after any user-supplied postprocessor has already run. |
|
|
|
|
|
|
|
For example, given this input: |
|
|
|
<servers> |
|
|
|
<server> |
|
|
|
<name>host1</name> |
|
|
|
<os>Linux</os> |
|
|
|
<interfaces> |
|
|
|
<interface> |
|
|
|
<name>em0</name> |
|
|
|
<ip_address>10.0.0.1</ip_address> |
|
|
|
</interface> |
|
|
|
</interfaces> |
|
|
|
</server> |
|
|
|
</servers> |
|
|
|
|
|
|
|
If called with force_list=('interface',), it will produce |
|
|
|
this dictionary: |
|
|
|
{'servers': |
|
|
|
{'server': |
|
|
|
{'name': 'host1', |
|
|
|
'os': 'Linux'}, |
|
|
|
'interfaces': |
|
|
|
{'interface': |
|
|
|
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } } |
|
|
|
""" |
|
|
|
handler = _DictSAXHandler(namespace_separator=namespace_separator, |
|
|
|
**kwargs) |
|
|
|