From f318524070beb21a1f763982ed3f37a011631229 Mon Sep 17 00:00:00 2001 From: Ruud Date: Mon, 20 Jan 2014 16:50:21 +0100 Subject: [PATCH] Update library: html5lib --- libs/html5lib/__init__.py | 2 +- libs/html5lib/inputstream.py | 7 ++++++- libs/html5lib/treebuilders/__init__.py | 0 libs/html5lib/treebuilders/_base.py | 0 libs/html5lib/treebuilders/etree.py | 0 libs/html5lib/treewalkers/lxmletree.py | 6 +----- 6 files changed, 8 insertions(+), 7 deletions(-) mode change 100755 => 100644 libs/html5lib/treebuilders/__init__.py mode change 100755 => 100644 libs/html5lib/treebuilders/_base.py mode change 100755 => 100644 libs/html5lib/treebuilders/etree.py diff --git a/libs/html5lib/__init__.py b/libs/html5lib/__init__.py index 66c1a8e..19a4b7d 100644 --- a/libs/html5lib/__init__.py +++ b/libs/html5lib/__init__.py @@ -20,4 +20,4 @@ from .serializer import serialize __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", "getTreeWalker", "serialize"] -__version__ = "0.99" +__version__ = "0.999" diff --git a/libs/html5lib/inputstream.py b/libs/html5lib/inputstream.py index 004bdd4..9e03b93 100644 --- a/libs/html5lib/inputstream.py +++ b/libs/html5lib/inputstream.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, unicode_literals from six import text_type +from six.moves import http_client import codecs import re @@ -118,7 +119,11 @@ class BufferedStream(object): def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): - if hasattr(source, "read"): + if isinstance(source, http_client.HTTPResponse): + # Work around Python bug #20007: read(0) closes the connection. + # http://bugs.python.org/issue20007 + isUnicode = False + elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) else: isUnicode = isinstance(source, text_type) diff --git a/libs/html5lib/treebuilders/__init__.py b/libs/html5lib/treebuilders/__init__.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treebuilders/_base.py b/libs/html5lib/treebuilders/_base.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treebuilders/etree.py b/libs/html5lib/treebuilders/etree.py old mode 100755 new mode 100644 diff --git a/libs/html5lib/treewalkers/lxmletree.py b/libs/html5lib/treewalkers/lxmletree.py index 375cc2e..bc934ac 100644 --- a/libs/html5lib/treewalkers/lxmletree.py +++ b/libs/html5lib/treewalkers/lxmletree.py @@ -87,10 +87,6 @@ class FragmentWrapper(object): self.tail = ensure_str(self.obj.tail) else: self.tail = None - self.isstring = isinstance(obj, str) or isinstance(obj, bytes) - # Support for bytes here is Py2 - if self.isstring: - self.obj = ensure_str(self.obj) def __getattr__(self, name): return getattr(self.obj, name) @@ -143,7 +139,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker): elif isinstance(node, Doctype): return _base.DOCTYPE, node.name, node.public_id, node.system_id - elif isinstance(node, FragmentWrapper) and node.isstring: + elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return _base.TEXT, node.obj elif node.tag == etree.Comment: