Update Beautiful Soup 4.6.3 (r475) → 4.7.1 (r497).

6 years ago · 0ec577e69c
9 changed files with 209 additions and 425 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -1,6 +1,7 @@
 ### 0.20.0 (2019-xx-xx xx:xx:xx UTC)
 * Update attr 18.3.0.dev0 (55642b3) to 19.2.0.dev0 (de84609) 
 * Update Beautiful Soup 4.6.3 (r475) to 4.7.1 (r497)
 [develop changelog]
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -17,12 +17,10 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.6.3"
+__version__ = "4.7.1"
-__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson"
+__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 __all__ = ['BeautifulSoup']
@ -237,10 +235,11 @@ class BeautifulSoup(Tag):
        self.builder = builder
        self.is_xml = builder.is_xml
        self.known_xml = self.is_xml
-        self.builder.soup = self
+        self._namespaces = dict()
        self.parse_only = parse_only
        self.builder.initialize_soup(self)
        if hasattr(markup, 'read'):        # It's a file-type object.
            markup = markup.read()
        elif len(markup) <= 256 and (
@ -382,7 +381,7 @@ class BeautifulSoup(Tag):
    def pushTag(self, tag):
        #print "Push", tag.name
-        if self.currentTag:
+        if self.currentTag is not None:
            self.currentTag.contents.append(tag)
        self.tagStack.append(tag)
        self.currentTag = self.tagStack[-1]
@ -421,60 +420,71 @@ class BeautifulSoup(Tag):
    def object_was_parsed(self, o, parent=None, most_recent_element=None):
        """Add an object to the parse tree."""
-        parent = parent or self.currentTag
+        if parent is None:
-        previous_element = most_recent_element or self._most_recent_element
+            parent = self.currentTag
        if most_recent_element is not None:
            previous_element = most_recent_element
        else:
            previous_element = self._most_recent_element
        next_element = previous_sibling = next_sibling = None
        if isinstance(o, Tag):
            next_element = o.next_element
            next_sibling = o.next_sibling
            previous_sibling = o.previous_sibling
-            if not previous_element:
+            if previous_element is None:
                previous_element = o.previous_element
        fix = parent.next_element is not None
        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
        self._most_recent_element = o
        parent.contents.append(o)
-        if parent.next_sibling:
+        # Check if we are inserting into an already parsed node.
-            # This node is being inserted into an element that has
+        if fix:
-            # already been parsed. Deal with any dangling references.
+            self._linkage_fixer(parent)
-            index = len(parent.contents)-1
+
-            while index >= 0:
+    def _linkage_fixer(self, el):
-                if parent.contents[index] is o:
+        """Make sure linkage of this fragment is sound."""
-                    break
+
-                index -= 1
+        first = el.contents[0]
-            else:
+        child = el.contents[-1]
-                raise ValueError(
+        descendant = child
-                    "Error building tree: supposedly %r was inserted "
+
-                    "into %r after the fact, but I don't see it!" % (
+        if child is first and el.parent is not None:
-                        o, parent
+            # Parent should be linked to first child
-                    )
+            el.next_element = child
-                )
+            # We are no longer linked to whatever this element is
-            if index == 0:
+            prev_el = child.previous_element
-                previous_element = parent
+            if prev_el is not None and prev_el is not el:
-                previous_sibling = None
+                prev_el.next_element = None
-            else:
+            # First child should be linked to the parent, and no previous siblings.
-                previous_element = previous_sibling = parent.contents[index-1]
+            child.previous_element = el
-            if index == len(parent.contents)-1:
+            child.previous_sibling = None
-                next_element = parent.next_sibling
+
-                next_sibling = None
+        # We have no sibling as we've been appended as the last.
-            else:
+        child.next_sibling = None
-                next_element = next_sibling = parent.contents[index+1]
+
-
+        # This index is a tag, dig deeper for a "last descendant"
-            o.previous_element = previous_element
+        if isinstance(child, Tag) and child.contents:
-            if previous_element:
+            descendant = child._last_descendant(False)
-                previous_element.next_element = o
+
-            o.next_element = next_element
+        # As the final step, link last descendant. It should be linked
-            if next_element:
+        # to the parent's next sibling (if found), else walk up the chain
-                next_element.previous_element = o
+        # and find a parent with a sibling. It should have no next sibling.
-            o.next_sibling = next_sibling
+        descendant.next_element = None
-            if next_sibling:
+        descendant.next_sibling = None
-                next_sibling.previous_sibling = o
+        target = el
-            o.previous_sibling = previous_sibling
+        while True:
-            if previous_sibling:
+            if target is None:
-                previous_sibling.next_sibling = o
+                break
            elif target.next_sibling is not None:
                descendant.next_element = target.next_sibling
                target.next_sibling.previous_element = child
                break
            target = target.parent
    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
        """Pops the tag stack up to and including the most recent
@ -520,7 +530,7 @@ class BeautifulSoup(Tag):
                  self.currentTag, self._most_recent_element)
        if tag is None:
            return tag
-        if self._most_recent_element:
+        if self._most_recent_element is not None:
            self._most_recent_element.next_element = tag
        self._most_recent_element = tag
        self.pushTag(tag)
--- a/lib/bs4/builder/init.py
+++ b/lib/bs4/builder/init.py
@ -1,5 +1,5 @@
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
-# found in the LICENSE file.
+__license__ = "MIT"
 from collections import defaultdict
 import itertools
@ -8,7 +8,7 @@ from bs4.element import (
    CharsetMetaAttributeValue,
    ContentMetaAttributeValue,
    HTMLAwareEntitySubstitution,
-    whitespace_re
+    nonwhitespace_re
    )
 __all__ = [
@ -102,6 +102,12 @@ class TreeBuilder(object):
    def __init__(self):
        self.soup = None
    def initialize_soup(self, soup):
        """The BeautifulSoup object has been initialized and is now
        being associated with the TreeBuilder.
        """
        self.soup = soup
    def reset(self):
        pass
@ -167,7 +173,7 @@ class TreeBuilder(object):
                    # values. Split it into a list.
                    value = attrs[attr]
                    if isinstance(value, basestring):
-                        values = whitespace_re.split(value)
+                        values = nonwhitespace_re.findall(value)
                    else:
                        # html5lib sometimes calls setAttributes twice
                        # for the same tag when rearranging the parse
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@ -1,5 +1,5 @@
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
-# found in the LICENSE file.
+__license__ = "MIT"
 __all__ = [
    'HTML5TreeBuilder',
@ -15,7 +15,7 @@ from bs4.builder import (
    )
 from bs4.element import (
    NamespacedAttribute,
-    whitespace_re,
+    nonwhitespace_re,
 )
 import html5lib
 from html5lib.constants import (
@ -206,7 +206,7 @@ class AttrList(object):
            # A node that is being cloned may have already undergone
            # this procedure.
            if not isinstance(value, list):
-                value = whitespace_re.split(value)
+                value = nonwhitespace_re.findall(value)
        self.element[name] = value
    def items(self):
        return list(self.attrs.items())
@ -249,7 +249,7 @@ class Element(treebuilder_base.Node):
        if not isinstance(child, basestring) and child.parent is not None:
            node.element.extract()
-        if (string_child and self.element.contents
+        if (string_child is not None and self.element.contents
            and self.element.contents[-1].__class__ == NavigableString):
            # We are appending a string onto another string.
            # TODO This has O(n^2) performance, for input like
@ -360,16 +360,16 @@ class Element(treebuilder_base.Node):
            # Set the first child's previous_element and previous_sibling
            # to elements within the new parent
            first_child = to_append[0]
-            if new_parents_last_descendant:
+            if new_parents_last_descendant is not None:
                first_child.previous_element = new_parents_last_descendant
            else:
                first_child.previous_element = new_parent_element
            first_child.previous_sibling = new_parents_last_child
-            if new_parents_last_descendant:
+            if new_parents_last_descendant is not None:
                new_parents_last_descendant.next_element = first_child
            else:
                new_parent_element.next_element = first_child
-            if new_parents_last_child:
+            if new_parents_last_child is not None:
                new_parents_last_child.next_sibling = first_child
            # Find the very last element being moved. It is now the
@ -379,7 +379,7 @@ class Element(treebuilder_base.Node):
            last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
            last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
-            if new_parents_last_descendant_next_element:
+            if new_parents_last_descendant_next_element is not None:
                # TODO: This code has no test coverage and I'm not sure
                # how to get html5lib to go through this path, but it's
                # just the other side of the previous line.
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -1,8 +1,8 @@
 # encoding: utf-8
 """Use the HTMLParser library to parse HTML files that aren't too bad."""
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
-# found in the LICENSE file.
+__license__ = "MIT"
 __all__ = [
    'HTMLParserTreeBuilder',
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@ -1,5 +1,6 @@
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
-# found in the LICENSE file.
+__license__ = "MIT"
 __all__ = [
    'LXMLTreeBuilderForXML',
    'LXMLTreeBuilder',
@ -32,6 +33,10 @@ from bs4.dammit import EncodingDetector
 LXML = 'lxml'
 def _invert(d):
    "Invert a dictionary."
    return dict((v,k) for k, v in d.items())
 class LXMLTreeBuilderForXML(TreeBuilder):
    DEFAULT_PARSER_CLASS = etree.XMLParser
@ -48,7 +53,29 @@ class LXMLTreeBuilderForXML(TreeBuilder):
    # This namespace mapping is specified in the XML Namespace
    # standard.
-    DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
+    DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace')
    DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS)
    def initialize_soup(self, soup):
        """Let the BeautifulSoup object know about the standard namespace
        mapping.
        """
        super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
        self._register_namespaces(self.DEFAULT_NSMAPS)
    def _register_namespaces(self, mapping):
        """Let the BeautifulSoup object know about namespaces encountered
        while parsing the document.
        This might be useful later on when creating CSS selectors.
        """
        for key, value in mapping.items():
            if key and key not in self.soup._namespaces:
                # Let the BeautifulSoup object know about a new namespace.
                # If there are multiple namespaces defined with the same
                # prefix, the first one in the document takes precedence.
                self.soup._namespaces[key] = value
    def default_parser(self, encoding):
        # This can either return a parser object or a class, which
@ -75,8 +102,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        if empty_element_tags is not None:
            self.empty_element_tags = set(empty_element_tags)
        self.soup = None
-        self.nsmaps = [self.DEFAULT_NSMAPS]
+        self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
-
+        
    def _getNsTag(self, tag):
        # Split the namespace URL out of a fully-qualified lxml tag
        # name. Copied from lxml's src/lxml/sax.py.
@ -144,7 +171,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            raise ParserRejectedMarkup(str(e))
    def close(self):
-        self.nsmaps = [self.DEFAULT_NSMAPS]
+        self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
    def start(self, name, attrs, nsmap={}):
        # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
@ -158,8 +185,14 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                self.nsmaps.append(None)
        elif len(nsmap) > 0:
            # A new namespace mapping has come into play.
-            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+
-            self.nsmaps.append(inverted_nsmap)
+            # First, Let the BeautifulSoup object know about it.
            self._register_namespaces(nsmap)
            # Then, add it to our running list of inverted namespace
            # mappings.
            self.nsmaps.append(_invert(nsmap))
            # Also treat the namespace mapping as a set of attributes on the
            # tag, so we can recreate it later.
            attrs = attrs.copy()
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@ -6,8 +6,7 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
 Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
 # found in the LICENSE file.
 __license__ = "MIT"
 import codecs
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -1,7 +1,6 @@
 """Diagnostic functions, mainly for use when doing tech support."""
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
 # found in the LICENSE file.
 __license__ = "MIT"
 import cProfile
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -1,5 +1,4 @@
-# Use of this source code is governed by a BSD-style license that can be
+# Use of this source code is governed by the MIT license.
 # found in the LICENSE file.
 __license__ = "MIT"
 try:
@ -7,14 +6,25 @@ try:
 except ImportError , e:
    from collections import Callable
 import re
 import shlex
 import sys
 import warnings
 try:
    import soupsieve
 except ImportError, e:
    soupsieve = None
    warnings.warn(
        'The soupsieve package is not installed. CSS selectors cannot be used.'
    )
 from bs4.dammit import EntitySubstitution
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)
 nonwhitespace_re = re.compile(r"\S+")
 # NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on
 # the off chance someone imported it for their own use.
 whitespace_re = re.compile(r"\s+")
 def _alias(attr):
@ -207,7 +217,7 @@ class PageElement(object):
        if formatter is None:
            output = s
        else:
-            if callable(formatter):
+            if isinstance(formatter, Callable):
                # Backwards compatibility -- you used to pass in a formatting method.
                output = formatter(s)
            else:
@ -256,26 +266,26 @@ class PageElement(object):
            self.previous_element.next_element = self
        self.next_element = next_element
-        if self.next_element:
+        if self.next_element is not None:
            self.next_element.previous_element = self
        self.next_sibling = next_sibling
-        if self.next_sibling:
+        if self.next_sibling is not None:
            self.next_sibling.previous_sibling = self
-        if (not previous_sibling
+        if (previous_sibling is None
            and self.parent is not None and self.parent.contents):
            previous_sibling = self.parent.contents[-1]
        self.previous_sibling = previous_sibling
-        if previous_sibling:
+        if previous_sibling is not None:
            self.previous_sibling.next_sibling = self
    nextSibling = _alias("next_sibling")  # BS3
    previousSibling = _alias("previous_sibling")  # BS3
    def replace_with(self, replace_with):
-        if not self.parent:
+        if self.parent is None:
            raise ValueError(
                "Cannot replace one element with another when the"
                "element to be replaced is not part of a tree.")
@ -292,7 +302,7 @@ class PageElement(object):
    def unwrap(self):
        my_parent = self.parent
-        if not self.parent:
+        if self.parent is None:
            raise ValueError(
                "Cannot replace an element with its contents when that"
                "element is not part of a tree.")
@ -340,7 +350,7 @@ class PageElement(object):
    def _last_descendant(self, is_initialized=True, accept_self=True):
        "Finds the last element beneath this object to be parsed."
-        if is_initialized and self.next_sibling:
+        if is_initialized and self.next_sibling is not None:
            last_child = self.next_sibling.previous_element
        else:
            last_child = self
@ -430,43 +440,54 @@ class PageElement(object):
        """Appends the given tag to the contents of this tag."""
        self.insert(len(self.contents), tag)
-    def insert_before(self, predecessor):
+    def extend(self, tags):
-        """Makes the given element the immediate predecessor of this one.
+        """Appends the given tags to the contents of this tag."""
        for tag in tags:
            self.append(tag)
    def insert_before(self, *args):
        """Makes the given element(s) the immediate predecessor of this one.
-        The two elements will have the same parent, and the given element
+        The elements will have the same parent, and the given elements
        will be immediately before this one.
        """
        if self is predecessor:
            raise ValueError("Can't insert an element before itself.")
        parent = self.parent
        if parent is None:
            raise ValueError(
                "Element has no parent, so 'before' has no meaning.")
-        # Extract first so that the index won't be screwed up if they
+        if any(x is self for x in args):
-        # are siblings.
+                raise ValueError("Can't insert an element before itself.")
-        if isinstance(predecessor, PageElement):
+        for predecessor in args:
-            predecessor.extract()
+            # Extract first so that the index won't be screwed up if they
-        index = parent.index(self)
+            # are siblings.
-        parent.insert(index, predecessor)
+            if isinstance(predecessor, PageElement):
-
+                predecessor.extract()
-    def insert_after(self, successor):
+            index = parent.index(self)
-        """Makes the given element the immediate successor of this one.
+            parent.insert(index, predecessor)
-
+
-        The two elements will have the same parent, and the given element
+    def insert_after(self, *args):
        """Makes the given element(s) the immediate successor of this one.
        The elements will have the same parent, and the given elements
        will be immediately after this one.
        """
-        if self is successor:
+        # Do all error checking before modifying the tree.
            raise ValueError("Can't insert an element after itself.")
        parent = self.parent
        if parent is None:
            raise ValueError(
                "Element has no parent, so 'after' has no meaning.")
-        # Extract first so that the index won't be screwed up if they
+        if any(x is self for x in args):
-        # are siblings.
+            raise ValueError("Can't insert an element after itself.")
-        if isinstance(successor, PageElement):
+        
-            successor.extract()
+        offset = 0
-        index = parent.index(self)
+        for successor in args:
-        parent.insert(index+1, successor)
+            # Extract first so that the index won't be screwed up if they
            # are siblings.
            if isinstance(successor, PageElement):
                successor.extract()
            index = parent.index(self)
            parent.insert(index+1+offset, successor)
            offset += 1
    def find_next(self, name=None, attrs={}, text=None, **kwargs):
        """Returns the first item that matches the given criteria and
@ -657,82 +678,6 @@ class PageElement(object):
            yield i
            i = i.parent
    # Methods for supporting CSS selectors.
    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
    #   \---------------------------/  \---/\-------------/    \-------/
    #     |                              |         |               |
    #     |                              |         |           The value
    #     |                              |    ~,|,^,$,* or =
    #     |                           Attribute
    #    Tag
    attribselect_re = re.compile(
        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
        r'=?"?(?P<value>[^\]"]*)"?\]$'
        )
    def _attr_value_as_string(self, value, default=None):
        """Force an attribute value into a string representation.
        A multi-valued attribute will be converted into a
        space-separated stirng.
        """
        value = self.get(value, default)
        if isinstance(value, list) or isinstance(value, tuple):
            value =" ".join(value)
        return value
    def _tag_name_matches_and(self, function, tag_name):
        if not tag_name:
            return function
        else:
            def _match(tag):
                return tag.name == tag_name and function(tag)
            return _match
    def _attribute_checker(self, operator, attribute, value=''):
        """Create a function that performs a CSS selector operation.
        Takes an operator, attribute and optional value. Returns a
        function that will return True for elements that match that
        combination.
        """
        if operator == '=':
            # string representation of `attribute` is equal to `value`
            return lambda el: el._attr_value_as_string(attribute) == value
        elif operator == '~':
            # space-separated list representation of `attribute`
            # contains `value`
            def _includes_value(element):
                attribute_value = element.get(attribute, [])
                if not isinstance(attribute_value, list):
                    attribute_value = attribute_value.split()
                return value in attribute_value
            return _includes_value
        elif operator == '^':
            # string representation of `attribute` starts with `value`
            return lambda el: el._attr_value_as_string(
                attribute, '').startswith(value)
        elif operator == '$':
            # string representation of `attribute` ends with `value`
            return lambda el: el._attr_value_as_string(
                attribute, '').endswith(value)
        elif operator == '*':
            # string representation of `attribute` contains `value`
            return lambda el: value in el._attr_value_as_string(attribute, '')
        elif operator == '|':
            # string representation of `attribute` is either exactly
            # `value` or starts with `value` and then a dash.
            def _is_or_starts_with_dash(element):
                attribute_value = element._attr_value_as_string(attribute, '')
                return (attribute_value == value or attribute_value.startswith(
                        value + '-'))
            return _is_or_starts_with_dash
        else:
            return lambda el: el.has_attr(attribute)
    # Old non-property versions of the generators, for backwards
    # compatibility with BS3.
    def nextGenerator(self):
@ -1193,7 +1138,7 @@ class Tag(PageElement):
        # First off, turn a string formatter into a Formatter object. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
            formatter = self._formatter_for_name(formatter)
        attrs = []
        if self.attrs:
@ -1298,7 +1243,7 @@ class Tag(PageElement):
        """
        # First off, turn a string formatter into a Formatter object. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
            formatter = self._formatter_for_name(formatter)
        pretty_print = (indent_level is not None)
@ -1394,250 +1339,41 @@ class Tag(PageElement):
            current = current.next_element
    # CSS selector code
-
+    def select_one(self, selector, namespaces=None, **kwargs):
    _selector_combinators = ['>', '+', '~']
    _select_debug = False
    quoted_colon = re.compile('"[^"]*:[^"]*"')
    def select_one(self, selector):
        """Perform a CSS selection operation on the current element."""
-        value = self.select(selector, limit=1)
+        value = self.select(selector, namespaces, 1, **kwargs)
        if value:
            return value[0]
        return None
-    def select(self, selector, _candidate_generator=None, limit=None):
+    def select(self, selector, namespaces=None, limit=None, **kwargs):
-        """Perform a CSS selection operation on the current element."""
+        """Perform a CSS selection operation on the current element.
-        # Handle grouping selectors if ',' exists, ie: p,a
+        This uses the SoupSieve library.
        if ',' in selector:
            context = []
            selectors = [x.strip() for x in selector.split(",")]
            # If a selector is mentioned multiple times we don't want
            # to use it more than once.
            used_selectors = set()
            # We also don't want to select the same element more than once,
            # if it's matched by multiple selectors.
            selected_object_ids = set()
            for partial_selector in selectors:
                if partial_selector == '':
                    raise ValueError('Invalid group selection syntax: %s' % selector)
                if partial_selector in used_selectors:
                    continue
                used_selectors.add(partial_selector)
                candidates = self.select(partial_selector, limit=limit)
                for candidate in candidates:
                    # This lets us distinguish between distinct tags that
                    # represent the same markup.
                    object_id = id(candidate)
                    if object_id not in selected_object_ids:
                        context.append(candidate)
                        selected_object_ids.add(object_id)
                if limit and len(context) >= limit:
                    break
            return context
        tokens = shlex.split(selector)
        current_context = [self]
        if tokens[-1] in self._selector_combinators:
            raise ValueError(
                'Final combinator "%s" is missing an argument.' % tokens[-1])
-        if self._select_debug:
+        :param selector: A string containing a CSS selector.
            print 'Running CSS selector "%s"' % selector
-        for index, token in enumerate(tokens):
+        :param namespaces: A dictionary mapping namespace prefixes
-            new_context = []
+        used in the CSS selector to namespace URIs. By default,
-            new_context_ids = set([])
+        Beautiful Soup will use the prefixes it encountered while
        parsing the document.
-            if tokens[index-1] in self._selector_combinators:
+        :param limit: After finding this number of results, stop looking.
                # This token was consumed by the previous combinator. Skip it.
                if self._select_debug:
                    print '  Token was consumed by the previous combinator.'
                continue
-            if self._select_debug:
+        :param kwargs: Any extra arguments you'd like to pass in to
-                print ' Considering token "%s"' % token
+        soupsieve.select().
-            recursive_candidate_generator = None
+        """
-            tag_name = None
+        if namespaces is None:
-
+            namespaces = self._namespaces
-            # Each operation corresponds to a checker function, a rule
+        
-            # for determining whether a candidate matches the
+        if limit is None:
-            # selector. Candidates are generated by the active
+            limit = 0
-            # iterator.
+        if soupsieve is None:
-            checker = None
+            raise NotImplementedError(
-
+                "Cannot execute CSS selectors because the soupsieve package is not installed."
-            m = self.attribselect_re.match(token)
+            )
-            if m is not None:
+            
-                # Attribute selector
+        return soupsieve.select(selector, self, namespaces, limit, **kwargs)
                tag_name, attribute, operator, value = m.groups()
                checker = self._attribute_checker(operator, attribute, value)
            elif '#' in token:
                # ID selector
                tag_name, tag_id = token.split('#', 1)
                def id_matches(tag):
                    return tag.get('id', None) == tag_id
                checker = id_matches
            elif '.' in token:
                # Class selector
                tag_name, klass = token.split('.', 1)
                classes = set(klass.split('.'))
                def classes_match(candidate):
                    return classes.issubset(candidate.get('class', []))
                checker = classes_match
            elif ':' in token and not self.quoted_colon.search(token):
                # Pseudo-class
                tag_name, pseudo = token.split(':', 1)
                if tag_name == '':
                    raise ValueError(
                        "A pseudo-class must be prefixed with a tag name.")
                pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                found = []
                if pseudo_attributes is None:
                    pseudo_type = pseudo
                    pseudo_value = None
                else:
                    pseudo_type, pseudo_value = pseudo_attributes.groups()
                if pseudo_type == 'nth-of-type':
                    try:
                        pseudo_value = int(pseudo_value)
                    except:
                        raise NotImplementedError(
                            'Only numeric values are currently supported for the nth-of-type pseudo-class.')
                    if pseudo_value < 1:
                        raise ValueError(
                            'nth-of-type pseudo-class value must be at least 1.')
                    class Counter(object):
                        def __init__(self, destination):
                            self.count = 0
                            self.destination = destination
                        def nth_child_of_type(self, tag):
                            self.count += 1
                            if self.count == self.destination:
                                return True
                            else:
                                return False
                    checker = Counter(pseudo_value).nth_child_of_type
                else:
                    raise NotImplementedError(
                        'Only the following pseudo-classes are implemented: nth-of-type.')
            elif token == '*':
                # Star selector -- matches everything
                pass
            elif token == '>':
                # Run the next token as a CSS selector against the
                # direct children of each tag in the current context.
                recursive_candidate_generator = lambda tag: tag.children
            elif token == '~':
                # Run the next token as a CSS selector against the
                # siblings of each tag in the current context.
                recursive_candidate_generator = lambda tag: tag.next_siblings
            elif token == '+':
                # For each tag in the current context, run the next
                # token as a CSS selector against the tag's next
                # sibling that's a tag.
                def next_tag_sibling(tag):
                    yield tag.find_next_sibling(True)
                recursive_candidate_generator = next_tag_sibling
            elif self.tag_name_re.match(token):
                # Just a tag name.
                tag_name = token
            else:
                raise ValueError(
                    'Unsupported or invalid CSS selector: "%s"' % token)
            if recursive_candidate_generator:
                # This happens when the selector looks like  "> foo".
                #
                # The generator calls select() recursively on every
                # member of the current context, passing in a different
                # candidate generator and a different selector.
                #
                # In the case of "> foo", the candidate generator is
                # one that yields a tag's direct children (">"), and
                # the selector is "foo".
                next_token = tokens[index+1]
                def recursive_select(tag):
                    if self._select_debug:
                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
                        print '-' * 40
                    for i in tag.select(next_token, recursive_candidate_generator):
                        if self._select_debug:
                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
                        yield i
                    if self._select_debug:
                        print '-' * 40
                _use_candidate_generator = recursive_select
            elif _candidate_generator is None:
                # By default, a tag's candidates are all of its
                # children. If tag_name is defined, only yield tags
                # with that name.
                if self._select_debug:
                    if tag_name:
                        check = "[any]"
                    else:
                        check = tag_name
                    print '   Default candidate generator, tag name="%s"' % check
                if self._select_debug:
                    # This is redundant with later code, but it stops
                    # a bunch of bogus tags from cluttering up the
                    # debug log.
                    def default_candidate_generator(tag):
                        for child in tag.descendants:
                            if not isinstance(child, Tag):
                                continue
                            if tag_name and not child.name == tag_name:
                                continue
                            yield child
                    _use_candidate_generator = default_candidate_generator
                else:
                    _use_candidate_generator = lambda tag: tag.descendants
            else:
                _use_candidate_generator = _candidate_generator
            count = 0
            for tag in current_context:
                if self._select_debug:
                    print "    Running candidate generator on %s %s" % (
                        tag.name, repr(tag.attrs))
                for candidate in _use_candidate_generator(tag):
                    if not isinstance(candidate, Tag):
                        continue
                    if tag_name and candidate.name != tag_name:
                        continue
                    if checker is not None:
                        try:
                            result = checker(candidate)
                        except StopIteration:
                            # The checker has decided we should no longer
                            # run the generator.
                            break
                    if checker is None or result:
                        if self._select_debug:
                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
                        if id(candidate) not in new_context_ids:
                            # If a tag matches a selector more than once,
                            # don't include it in the context more than once.
                            new_context.append(candidate)
                            new_context_ids.add(id(candidate))
                    elif self._select_debug:
                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
            current_context = new_context
        if limit and len(current_context) >= limit:
            current_context = current_context[:limit]
        if self._select_debug:
            print "Final verdict:"
            for i in current_context:
                print " %s %s" % (i.name, i.attrs)
        return current_context
    # Old names for backwards compatibility
    def childGenerator(self):
@ -1689,7 +1425,7 @@ class SoupStrainer(object):
    def _normalize_search_value(self, value):
        # Leave it alone if it's a Unicode string, a callable, a
        # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
            or isinstance(value, bool) or value is None):
            return value