You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

338 lines
12 KiB

12 years ago
from __future__ import absolute_import, division, unicode_literals
from six import text_type
13 years ago
import re
12 years ago
from . import _base
from .. import ihatexml
from .. import constants
from ..constants import namespaces
from ..utils import moduleFactoryFactory
13 years ago
tag_regexp = re.compile("{([^}]*)}(.*)")
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
ElementTree = ElementTreeImplementation
12 years ago
ElementTreeCommentType = ElementTree.Comment("asd").tag
13 years ago
class Element(_base.Node):
def __init__(self, name, namespace=None):
self._name = name
self._namespace = namespace
self._element = ElementTree.Element(self._getETreeTag(name,
namespace))
if namespace is None:
self.nameTuple = namespaces["html"], self._name
else:
self.nameTuple = self._namespace, self._name
self.parent = None
self._childNodes = []
self._flags = []
def _getETreeTag(self, name, namespace):
if namespace is None:
etree_tag = name
else:
12 years ago
etree_tag = "{%s}%s" % (namespace, name)
13 years ago
return etree_tag
12 years ago
13 years ago
def _setName(self, name):
self._name = name
self._element.tag = self._getETreeTag(self._name, self._namespace)
12 years ago
13 years ago
def _getName(self):
return self._name
12 years ago
13 years ago
name = property(_getName, _setName)
def _setNamespace(self, namespace):
self._namespace = namespace
self._element.tag = self._getETreeTag(self._name, self._namespace)
def _getNamespace(self):
return self._namespace
namespace = property(_getNamespace, _setNamespace)
12 years ago
13 years ago
def _getAttributes(self):
return self._element.attrib
12 years ago
13 years ago
def _setAttributes(self, attributes):
12 years ago
# Delete existing attributes first
# XXX - there may be a better way to do this...
for key in list(self._element.attrib.keys()):
13 years ago
del self._element.attrib[key]
12 years ago
for key, value in attributes.items():
13 years ago
if isinstance(key, tuple):
12 years ago
name = "{%s}%s" % (key[2], key[1])
13 years ago
else:
name = key
self._element.set(name, value)
12 years ago
13 years ago
attributes = property(_getAttributes, _setAttributes)
12 years ago
13 years ago
def _getChildNodes(self):
12 years ago
return self._childNodes
13 years ago
def _setChildNodes(self, value):
del self._element[:]
self._childNodes = []
for element in value:
self.insertChild(element)
12 years ago
13 years ago
childNodes = property(_getChildNodes, _setChildNodes)
12 years ago
13 years ago
def hasContent(self):
"""Return true if the node has children or text"""
return bool(self._element.text or len(self._element))
12 years ago
13 years ago
def appendChild(self, node):
self._childNodes.append(node)
self._element.append(node._element)
node.parent = self
12 years ago
13 years ago
def insertBefore(self, node, refNode):
index = list(self._element).index(refNode._element)
self._element.insert(index, node._element)
node.parent = self
12 years ago
13 years ago
def removeChild(self, node):
self._element.remove(node._element)
12 years ago
node.parent = None
13 years ago
def insertText(self, data, insertBefore=None):
if not(len(self._element)):
if not self._element.text:
self._element.text = ""
self._element.text += data
elif insertBefore is None:
12 years ago
# Insert the text as the tail of the last child element
13 years ago
if not self._element[-1].tail:
self._element[-1].tail = ""
self._element[-1].tail += data
else:
12 years ago
# Insert the text before the specified node
13 years ago
children = list(self._element)
index = children.index(insertBefore._element)
if index > 0:
12 years ago
if not self._element[index - 1].tail:
self._element[index - 1].tail = ""
self._element[index - 1].tail += data
13 years ago
else:
if not self._element.text:
self._element.text = ""
self._element.text += data
12 years ago
13 years ago
def cloneNode(self):
element = type(self)(self.name, self.namespace)
12 years ago
for name, value in self.attributes.items():
13 years ago
element.attributes[name] = value
return element
12 years ago
13 years ago
def reparentChildren(self, newParent):
if newParent.childNodes:
newParent.childNodes[-1]._element.tail += self._element.text
else:
if not newParent._element.text:
newParent._element.text = ""
if self._element.text is not None:
newParent._element.text += self._element.text
self._element.text = ""
_base.Node.reparentChildren(self, newParent)
12 years ago
13 years ago
class Comment(Element):
def __init__(self, data):
12 years ago
# Use the superclass constructor to set all properties on the
# wrapper element
13 years ago
self._element = ElementTree.Comment(data)
self.parent = None
self._childNodes = []
self._flags = []
12 years ago
13 years ago
def _getData(self):
return self._element.text
12 years ago
13 years ago
def _setData(self, value):
self._element.text = value
12 years ago
13 years ago
data = property(_getData, _setData)
12 years ago
13 years ago
class DocumentType(Element):
def __init__(self, name, publicId, systemId):
12 years ago
Element.__init__(self, "<!DOCTYPE>")
13 years ago
self._element.text = name
self.publicId = publicId
self.systemId = systemId
def _getPublicId(self):
12 years ago
return self._element.get("publicId", "")
13 years ago
def _setPublicId(self, value):
if value is not None:
12 years ago
self._element.set("publicId", value)
13 years ago
publicId = property(_getPublicId, _setPublicId)
12 years ago
13 years ago
def _getSystemId(self):
12 years ago
return self._element.get("systemId", "")
13 years ago
def _setSystemId(self, value):
if value is not None:
12 years ago
self._element.set("systemId", value)
13 years ago
systemId = property(_getSystemId, _setSystemId)
12 years ago
13 years ago
class Document(Element):
def __init__(self):
12 years ago
Element.__init__(self, "DOCUMENT_ROOT")
13 years ago
class DocumentFragment(Element):
def __init__(self):
12 years ago
Element.__init__(self, "DOCUMENT_FRAGMENT")
13 years ago
def testSerializer(element):
rv = []
12 years ago
13 years ago
def serializeElement(element, indent=0):
if not(hasattr(element, "tag")):
element = element.getroot()
if element.tag == "<!DOCTYPE>":
if element.get("publicId") or element.get("systemId"):
publicId = element.get("publicId") or ""
systemId = element.get("systemId") or ""
12 years ago
rv.append("""<!DOCTYPE %s "%s" "%s">""" %
(element.text, publicId, systemId))
else:
rv.append("<!DOCTYPE %s>" % (element.text,))
elif element.tag == "DOCUMENT_ROOT":
13 years ago
rv.append("#document")
12 years ago
if element.text is not None:
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
if element.tail is not None:
raise TypeError("Document node cannot have tail")
if hasattr(element, "attrib") and len(element.attrib):
raise TypeError("Document node cannot have attributes")
elif element.tag == ElementTreeCommentType:
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
13 years ago
else:
12 years ago
assert isinstance(element.tag, text_type), \
"Expected unicode, got %s, %s" % (type(element.tag), element.tag)
13 years ago
nsmatch = tag_regexp.match(element.tag)
if nsmatch is None:
name = element.tag
else:
ns, name = nsmatch.groups()
prefix = constants.prefixes[ns]
12 years ago
name = "%s %s" % (prefix, name)
rv.append("|%s<%s>" % (' ' * indent, name))
13 years ago
if hasattr(element, "attrib"):
attributes = []
12 years ago
for name, value in element.attrib.items():
13 years ago
nsmatch = tag_regexp.match(name)
if nsmatch is not None:
ns, name = nsmatch.groups()
prefix = constants.prefixes[ns]
12 years ago
attr_string = "%s %s" % (prefix, name)
13 years ago
else:
attr_string = name
attributes.append((attr_string, value))
for name, value in sorted(attributes):
12 years ago
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
13 years ago
if element.text:
12 years ago
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
13 years ago
indent += 2
for child in element:
serializeElement(child, indent)
if element.tail:
12 years ago
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
13 years ago
serializeElement(element, 0)
12 years ago
13 years ago
return "\n".join(rv)
12 years ago
13 years ago
def tostring(element):
"""Serialize an element and its child nodes to a string"""
rv = []
filter = ihatexml.InfosetFilter()
12 years ago
13 years ago
def serializeElement(element):
12 years ago
if isinstance(element, ElementTree.ElementTree):
13 years ago
element = element.getroot()
12 years ago
13 years ago
if element.tag == "<!DOCTYPE>":
if element.get("publicId") or element.get("systemId"):
publicId = element.get("publicId") or ""
systemId = element.get("systemId") or ""
12 years ago
rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
(element.text, publicId, systemId))
else:
rv.append("<!DOCTYPE %s>" % (element.text,))
elif element.tag == "DOCUMENT_ROOT":
if element.text is not None:
13 years ago
rv.append(element.text)
12 years ago
if element.tail is not None:
raise TypeError("Document node cannot have tail")
if hasattr(element, "attrib") and len(element.attrib):
raise TypeError("Document node cannot have attributes")
13 years ago
for child in element:
serializeElement(child)
12 years ago
elif element.tag == ElementTreeCommentType:
rv.append("<!--%s-->" % (element.text,))
13 years ago
else:
12 years ago
# This is assumed to be an ordinary element
13 years ago
if not element.attrib:
12 years ago
rv.append("<%s>" % (filter.fromXmlName(element.tag),))
13 years ago
else:
12 years ago
attr = " ".join(["%s=\"%s\"" % (
filter.fromXmlName(name), value)
for name, value in element.attrib.items()])
rv.append("<%s %s>" % (element.tag, attr))
13 years ago
if element.text:
rv.append(element.text)
12 years ago
13 years ago
for child in element:
serializeElement(child)
12 years ago
rv.append("</%s>" % (element.tag,))
13 years ago
if element.tail:
rv.append(element.tail)
12 years ago
13 years ago
serializeElement(element)
12 years ago
13 years ago
return "".join(rv)
12 years ago
13 years ago
class TreeBuilder(_base.TreeBuilder):
documentClass = Document
doctypeClass = DocumentType
elementClass = Element
commentClass = Comment
fragmentClass = DocumentFragment
12 years ago
implementation = ElementTreeImplementation
13 years ago
def testSerializer(self, element):
return testSerializer(element)
12 years ago
13 years ago
def getDocument(self):
if fullTree:
return self.document._element
else:
if self.defaultNamespace is not None:
return self.document._element.find(
12 years ago
"{%s}html" % self.defaultNamespace)
13 years ago
else:
return self.document._element.find("html")
12 years ago
13 years ago
def getFragment(self):
return _base.TreeBuilder.getFragment(self)._element
12 years ago
13 years ago
return locals()
12 years ago
getETreeModule = moduleFactoryFactory(getETreeBuilder)