Browse Source

Update library: html5lib

pull/2742/merge
Ruud 11 years ago
parent
commit
f318524070
  1. 2
      libs/html5lib/__init__.py
  2. 7
      libs/html5lib/inputstream.py
  3. 0
      libs/html5lib/treebuilders/__init__.py
  4. 0
      libs/html5lib/treebuilders/_base.py
  5. 0
      libs/html5lib/treebuilders/etree.py
  6. 6
      libs/html5lib/treewalkers/lxmletree.py

2
libs/html5lib/__init__.py

@ -20,4 +20,4 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"] "getTreeWalker", "serialize"]
__version__ = "0.99" __version__ = "0.999"

7
libs/html5lib/inputstream.py

@ -1,5 +1,6 @@
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
from six import text_type from six import text_type
from six.moves import http_client
import codecs import codecs
import re import re
@ -118,7 +119,11 @@ class BufferedStream(object):
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
if hasattr(source, "read"): if isinstance(source, http_client.HTTPResponse):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
isUnicode = False
elif hasattr(source, "read"):
isUnicode = isinstance(source.read(0), text_type) isUnicode = isinstance(source.read(0), text_type)
else: else:
isUnicode = isinstance(source, text_type) isUnicode = isinstance(source, text_type)

0
libs/html5lib/treebuilders/__init__.py

0
libs/html5lib/treebuilders/_base.py

0
libs/html5lib/treebuilders/etree.py

6
libs/html5lib/treewalkers/lxmletree.py

@ -87,10 +87,6 @@ class FragmentWrapper(object):
self.tail = ensure_str(self.obj.tail) self.tail = ensure_str(self.obj.tail)
else: else:
self.tail = None self.tail = None
self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
# Support for bytes here is Py2
if self.isstring:
self.obj = ensure_str(self.obj)
def __getattr__(self, name): def __getattr__(self, name):
return getattr(self.obj, name) return getattr(self.obj, name)
@ -143,7 +139,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
elif isinstance(node, Doctype): elif isinstance(node, Doctype):
return _base.DOCTYPE, node.name, node.public_id, node.system_id return _base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and node.isstring: elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
return _base.TEXT, node.obj return _base.TEXT, node.obj
elif node.tag == etree.Comment: elif node.tag == etree.Comment:

Loading…
Cancel
Save