Update Beautiful Soup 4.6.3 (r475) → 4.7.1 (r497).

6 years ago · 0ec577e69c
9 changed files with 209 additions and 425 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -1,6 +1,7 @@
 ### 0.20.0 (2019-xx-xx xx:xx:xx UTC)

 * Update attr 18.3.0.dev0 (55642b3) to 19.2.0.dev0 (de84609) 
+* Update Beautiful Soup 4.6.3 (r475) to 4.7.1 (r497)


 [develop changelog]
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -17,12 +17,10 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/

 """

-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.6.3"
-__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson"
+__version__ = "4.7.1"
+__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson"
+# Use of this source code is governed by the MIT license.
 __license__ = "MIT"

 __all__ = ['BeautifulSoup']
@ -237,10 +235,11 @@ class BeautifulSoup(Tag):
        self.builder = builder
        self.is_xml = builder.is_xml
        self.known_xml = self.is_xml
-        self.builder.soup = self
-
+        self._namespaces = dict()
        self.parse_only = parse_only

+        self.builder.initialize_soup(self)
+
        if hasattr(markup, 'read'):        # It's a file-type object.
            markup = markup.read()
        elif len(markup) <= 256 and (
@ -382,7 +381,7 @@ class BeautifulSoup(Tag):

    def pushTag(self, tag):
        #print "Push", tag.name
-        if self.currentTag:
+        if self.currentTag is not None:
            self.currentTag.contents.append(tag)
        self.tagStack.append(tag)
        self.currentTag = self.tagStack[-1]
@ -421,60 +420,71 @@ class BeautifulSoup(Tag):

    def object_was_parsed(self, o, parent=None, most_recent_element=None):
        """Add an object to the parse tree."""
-        parent = parent or self.currentTag
-        previous_element = most_recent_element or self._most_recent_element
+        if parent is None:
+            parent = self.currentTag
+        if most_recent_element is not None:
+            previous_element = most_recent_element
+        else:
+            previous_element = self._most_recent_element

        next_element = previous_sibling = next_sibling = None
        if isinstance(o, Tag):
            next_element = o.next_element
            next_sibling = o.next_sibling
            previous_sibling = o.previous_sibling
-            if not previous_element:
+            if previous_element is None:
                previous_element = o.previous_element

+        fix = parent.next_element is not None
+
        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)

        self._most_recent_element = o
        parent.contents.append(o)

-        if parent.next_sibling:
-            # This node is being inserted into an element that has
-            # already been parsed. Deal with any dangling references.
-            index = len(parent.contents)-1
-            while index >= 0:
-                if parent.contents[index] is o:
-                    break
-                index -= 1
-            else:
-                raise ValueError(
-                    "Error building tree: supposedly %r was inserted "
-                    "into %r after the fact, but I don't see it!" % (
-                        o, parent
-                    )
-                )
-            if index == 0:
-                previous_element = parent
-                previous_sibling = None
-            else:
-                previous_element = previous_sibling = parent.contents[index-1]
-            if index == len(parent.contents)-1:
-                next_element = parent.next_sibling
-                next_sibling = None
-            else:
-                next_element = next_sibling = parent.contents[index+1]
-
-            o.previous_element = previous_element
-            if previous_element:
-                previous_element.next_element = o
-            o.next_element = next_element
-            if next_element:
-                next_element.previous_element = o
-            o.next_sibling = next_sibling
-            if next_sibling:
-                next_sibling.previous_sibling = o
-            o.previous_sibling = previous_sibling
-            if previous_sibling:
-                previous_sibling.next_sibling = o
+        # Check if we are inserting into an already parsed node.
+        if fix:
+            self._linkage_fixer(parent)
+
+    def _linkage_fixer(self, el):
+        """Make sure linkage of this fragment is sound."""
+
+        first = el.contents[0]
+        child = el.contents[-1]
+        descendant = child
+
+        if child is first and el.parent is not None:
+            # Parent should be linked to first child
+            el.next_element = child
+            # We are no longer linked to whatever this element is
+            prev_el = child.previous_element
+            if prev_el is not None and prev_el is not el:
+                prev_el.next_element = None
+            # First child should be linked to the parent, and no previous siblings.
+            child.previous_element = el
+            child.previous_sibling = None
+
+        # We have no sibling as we've been appended as the last.
+        child.next_sibling = None
+
+        # This index is a tag, dig deeper for a "last descendant"
+        if isinstance(child, Tag) and child.contents:
+            descendant = child._last_descendant(False)
+
+        # As the final step, link last descendant. It should be linked
+        # to the parent's next sibling (if found), else walk up the chain
+        # and find a parent with a sibling. It should have no next sibling.
+        descendant.next_element = None
+        descendant.next_sibling = None
+        target = el
+        while True:
+            if target is None:
+                break
+            elif target.next_sibling is not None:
+                descendant.next_element = target.next_sibling
+                target.next_sibling.previous_element = child
+                break
+            target = target.parent

    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
        """Pops the tag stack up to and including the most recent
@ -520,7 +530,7 @@ class BeautifulSoup(Tag):
                  self.currentTag, self._most_recent_element)
        if tag is None:
            return tag
-        if self._most_recent_element:
+        if self._most_recent_element is not None:
            self._most_recent_element.next_element = tag
        self._most_recent_element = tag
        self.pushTag(tag)
--- a/lib/bs4/builder/init.py
+++ b/lib/bs4/builder/init.py
@ -1,5 +1,5 @@
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"

 from collections import defaultdict
 import itertools
@ -8,7 +8,7 @@ from bs4.element import (
    CharsetMetaAttributeValue,
    ContentMetaAttributeValue,
    HTMLAwareEntitySubstitution,
-    whitespace_re
+    nonwhitespace_re
    )

 __all__ = [
@ -102,6 +102,12 @@ class TreeBuilder(object):
    def __init__(self):
        self.soup = None

+    def initialize_soup(self, soup):
+        """The BeautifulSoup object has been initialized and is now
+        being associated with the TreeBuilder.
+        """
+        self.soup = soup
+        
    def reset(self):
        pass

@ -167,7 +173,7 @@ class TreeBuilder(object):
                    # values. Split it into a list.
                    value = attrs[attr]
                    if isinstance(value, basestring):
-                        values = whitespace_re.split(value)
+                        values = nonwhitespace_re.findall(value)
                    else:
                        # html5lib sometimes calls setAttributes twice
                        # for the same tag when rearranging the parse
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@ -1,5 +1,5 @@
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"

 __all__ = [
    'HTML5TreeBuilder',
@ -15,7 +15,7 @@ from bs4.builder import (
    )
 from bs4.element import (
    NamespacedAttribute,
-    whitespace_re,
+    nonwhitespace_re,
 )
 import html5lib
 from html5lib.constants import (
@ -206,7 +206,7 @@ class AttrList(object):
            # A node that is being cloned may have already undergone
            # this procedure.
            if not isinstance(value, list):
-                value = whitespace_re.split(value)
+                value = nonwhitespace_re.findall(value)
        self.element[name] = value
    def items(self):
        return list(self.attrs.items())
@ -249,7 +249,7 @@ class Element(treebuilder_base.Node):
        if not isinstance(child, basestring) and child.parent is not None:
            node.element.extract()

-        if (string_child and self.element.contents
+        if (string_child is not None and self.element.contents
            and self.element.contents[-1].__class__ == NavigableString):
            # We are appending a string onto another string.
            # TODO This has O(n^2) performance, for input like
@ -360,16 +360,16 @@ class Element(treebuilder_base.Node):
            # Set the first child's previous_element and previous_sibling
            # to elements within the new parent
            first_child = to_append[0]
-            if new_parents_last_descendant:
+            if new_parents_last_descendant is not None:
                first_child.previous_element = new_parents_last_descendant
            else:
                first_child.previous_element = new_parent_element
            first_child.previous_sibling = new_parents_last_child
-            if new_parents_last_descendant:
+            if new_parents_last_descendant is not None:
                new_parents_last_descendant.next_element = first_child
            else:
                new_parent_element.next_element = first_child
-            if new_parents_last_child:
+            if new_parents_last_child is not None:
                new_parents_last_child.next_sibling = first_child

            # Find the very last element being moved. It is now the
@ -379,7 +379,7 @@ class Element(treebuilder_base.Node):
            last_childs_last_descendant = to_append[-1]._last_descendant(False, True)

            last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
-            if new_parents_last_descendant_next_element:
+            if new_parents_last_descendant_next_element is not None:
                # TODO: This code has no test coverage and I'm not sure
                # how to get html5lib to go through this path, but it's
                # just the other side of the previous line.
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -1,8 +1,8 @@
 # encoding: utf-8
 """Use the HTMLParser library to parse HTML files that aren't too bad."""

-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"

 __all__ = [
    'HTMLParserTreeBuilder',
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@ -1,5 +1,6 @@
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
+
 __all__ = [
    'LXMLTreeBuilderForXML',
    'LXMLTreeBuilder',
@ -32,6 +33,10 @@ from bs4.dammit import EncodingDetector

 LXML = 'lxml'

+def _invert(d):
+    "Invert a dictionary."
+    return dict((v,k) for k, v in d.items())
+
 class LXMLTreeBuilderForXML(TreeBuilder):
    DEFAULT_PARSER_CLASS = etree.XMLParser

@ -48,7 +53,29 @@ class LXMLTreeBuilderForXML(TreeBuilder):

    # This namespace mapping is specified in the XML Namespace
    # standard.
-    DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
+    DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace')
+
+    DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS)
+
+    def initialize_soup(self, soup):
+        """Let the BeautifulSoup object know about the standard namespace
+        mapping.
+        """
+        super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
+        self._register_namespaces(self.DEFAULT_NSMAPS)
+
+    def _register_namespaces(self, mapping):
+        """Let the BeautifulSoup object know about namespaces encountered
+        while parsing the document.
+
+        This might be useful later on when creating CSS selectors.
+        """
+        for key, value in mapping.items():
+            if key and key not in self.soup._namespaces:
+                # Let the BeautifulSoup object know about a new namespace.
+                # If there are multiple namespaces defined with the same
+                # prefix, the first one in the document takes precedence.
+                self.soup._namespaces[key] = value

    def default_parser(self, encoding):
        # This can either return a parser object or a class, which
@ -75,8 +102,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        if empty_element_tags is not None:
            self.empty_element_tags = set(empty_element_tags)
        self.soup = None
-        self.nsmaps = [self.DEFAULT_NSMAPS]
-
+        self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
+        
    def _getNsTag(self, tag):
        # Split the namespace URL out of a fully-qualified lxml tag
        # name. Copied from lxml's src/lxml/sax.py.
@ -144,7 +171,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            raise ParserRejectedMarkup(str(e))

    def close(self):
-        self.nsmaps = [self.DEFAULT_NSMAPS]
+        self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]

    def start(self, name, attrs, nsmap={}):
        # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
@ -158,8 +185,14 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                self.nsmaps.append(None)
        elif len(nsmap) > 0:
            # A new namespace mapping has come into play.
-            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
-            self.nsmaps.append(inverted_nsmap)
+
+            # First, Let the BeautifulSoup object know about it.
+            self._register_namespaces(nsmap)
+
+            # Then, add it to our running list of inverted namespace
+            # mappings.
+            self.nsmaps.append(_invert(nsmap))
+
            # Also treat the namespace mapping as a set of attributes on the
            # tag, so we can recreate it later.
            attrs = attrs.copy()
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@ -6,8 +6,7 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
 Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
 __license__ = "MIT"

 import codecs
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -1,7 +1,6 @@
 """Diagnostic functions, mainly for use when doing tech support."""

-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
 __license__ = "MIT"

 import cProfile
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -1,5 +1,4 @@
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
+# Use of this source code is governed by the MIT license.
 __license__ = "MIT"

 try:
@ -7,14 +6,25 @@ try:
 except ImportError , e:
    from collections import Callable
 import re
-import shlex
 import sys
 import warnings
+try:
+    import soupsieve
+except ImportError, e:
+    soupsieve = None
+    warnings.warn(
+        'The soupsieve package is not installed. CSS selectors cannot be used.'
+    )
+
 from bs4.dammit import EntitySubstitution

 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)

+nonwhitespace_re = re.compile(r"\S+")
+
+# NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on
+# the off chance someone imported it for their own use.
 whitespace_re = re.compile(r"\s+")

 def _alias(attr):
@ -207,7 +217,7 @@ class PageElement(object):
        if formatter is None:
            output = s
        else:
-            if callable(formatter):
+            if isinstance(formatter, Callable):
                # Backwards compatibility -- you used to pass in a formatting method.
                output = formatter(s)
            else:
@ -256,26 +266,26 @@ class PageElement(object):
            self.previous_element.next_element = self

        self.next_element = next_element
-        if self.next_element:
+        if self.next_element is not None:
            self.next_element.previous_element = self

        self.next_sibling = next_sibling
-        if self.next_sibling:
+        if self.next_sibling is not None:
            self.next_sibling.previous_sibling = self

-        if (not previous_sibling
+        if (previous_sibling is None
            and self.parent is not None and self.parent.contents):
            previous_sibling = self.parent.contents[-1]

        self.previous_sibling = previous_sibling
-        if previous_sibling:
+        if previous_sibling is not None:
            self.previous_sibling.next_sibling = self

    nextSibling = _alias("next_sibling")  # BS3
    previousSibling = _alias("previous_sibling")  # BS3

    def replace_with(self, replace_with):
-        if not self.parent:
+        if self.parent is None:
            raise ValueError(
                "Cannot replace one element with another when the"
                "element to be replaced is not part of a tree.")
@ -292,7 +302,7 @@ class PageElement(object):

    def unwrap(self):
        my_parent = self.parent
-        if not self.parent:
+        if self.parent is None:
            raise ValueError(
                "Cannot replace an element with its contents when that"
                "element is not part of a tree.")
@ -340,7 +350,7 @@ class PageElement(object):

    def _last_descendant(self, is_initialized=True, accept_self=True):
        "Finds the last element beneath this object to be parsed."
-        if is_initialized and self.next_sibling:
+        if is_initialized and self.next_sibling is not None:
            last_child = self.next_sibling.previous_element
        else:
            last_child = self
@ -430,43 +440,54 @@ class PageElement(object):
        """Appends the given tag to the contents of this tag."""
        self.insert(len(self.contents), tag)

-    def insert_before(self, predecessor):
-        """Makes the given element the immediate predecessor of this one.
+    def extend(self, tags):
+        """Appends the given tags to the contents of this tag."""
+        for tag in tags:
+            self.append(tag)
+
+    def insert_before(self, *args):
+        """Makes the given element(s) the immediate predecessor of this one.

-        The two elements will have the same parent, and the given element
+        The elements will have the same parent, and the given elements
        will be immediately before this one.
        """
-        if self is predecessor:
-            raise ValueError("Can't insert an element before itself.")
        parent = self.parent
        if parent is None:
            raise ValueError(
                "Element has no parent, so 'before' has no meaning.")
-        # Extract first so that the index won't be screwed up if they
-        # are siblings.
-        if isinstance(predecessor, PageElement):
-            predecessor.extract()
-        index = parent.index(self)
-        parent.insert(index, predecessor)
-
-    def insert_after(self, successor):
-        """Makes the given element the immediate successor of this one.
-
-        The two elements will have the same parent, and the given element
+        if any(x is self for x in args):
+                raise ValueError("Can't insert an element before itself.")
+        for predecessor in args:
+            # Extract first so that the index won't be screwed up if they
+            # are siblings.
+            if isinstance(predecessor, PageElement):
+                predecessor.extract()
+            index = parent.index(self)
+            parent.insert(index, predecessor)
+
+    def insert_after(self, *args):
+        """Makes the given element(s) the immediate successor of this one.
+
+        The elements will have the same parent, and the given elements
        will be immediately after this one.
        """
-        if self is successor:
-            raise ValueError("Can't insert an element after itself.")
+        # Do all error checking before modifying the tree.
        parent = self.parent
        if parent is None:
            raise ValueError(
                "Element has no parent, so 'after' has no meaning.")
-        # Extract first so that the index won't be screwed up if they
-        # are siblings.
-        if isinstance(successor, PageElement):
-            successor.extract()
-        index = parent.index(self)
-        parent.insert(index+1, successor)
+        if any(x is self for x in args):
+            raise ValueError("Can't insert an element after itself.")
+        
+        offset = 0
+        for successor in args:
+            # Extract first so that the index won't be screwed up if they
+            # are siblings.
+            if isinstance(successor, PageElement):
+                successor.extract()
+            index = parent.index(self)
+            parent.insert(index+1+offset, successor)
+            offset += 1

    def find_next(self, name=None, attrs={}, text=None, **kwargs):
        """Returns the first item that matches the given criteria and
@ -657,82 +678,6 @@ class PageElement(object):
            yield i
            i = i.parent

-    # Methods for supporting CSS selectors.
-
-    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
-
-    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
-    #   \---------------------------/  \---/\-------------/    \-------/
-    #     |                              |         |               |
-    #     |                              |         |           The value
-    #     |                              |    ~,|,^,$,* or =
-    #     |                           Attribute
-    #    Tag
-    attribselect_re = re.compile(
-        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
-        r'=?"?(?P<value>[^\]"]*)"?\]$'
-        )
-
-    def _attr_value_as_string(self, value, default=None):
-        """Force an attribute value into a string representation.
-
-        A multi-valued attribute will be converted into a
-        space-separated stirng.
-        """
-        value = self.get(value, default)
-        if isinstance(value, list) or isinstance(value, tuple):
-            value =" ".join(value)
-        return value
-
-    def _tag_name_matches_and(self, function, tag_name):
-        if not tag_name:
-            return function
-        else:
-            def _match(tag):
-                return tag.name == tag_name and function(tag)
-            return _match
-
-    def _attribute_checker(self, operator, attribute, value=''):
-        """Create a function that performs a CSS selector operation.
-
-        Takes an operator, attribute and optional value. Returns a
-        function that will return True for elements that match that
-        combination.
-        """
-        if operator == '=':
-            # string representation of `attribute` is equal to `value`
-            return lambda el: el._attr_value_as_string(attribute) == value
-        elif operator == '~':
-            # space-separated list representation of `attribute`
-            # contains `value`
-            def _includes_value(element):
-                attribute_value = element.get(attribute, [])
-                if not isinstance(attribute_value, list):
-                    attribute_value = attribute_value.split()
-                return value in attribute_value
-            return _includes_value
-        elif operator == '^':
-            # string representation of `attribute` starts with `value`
-            return lambda el: el._attr_value_as_string(
-                attribute, '').startswith(value)
-        elif operator == '$':
-            # string representation of `attribute` ends with `value`
-            return lambda el: el._attr_value_as_string(
-                attribute, '').endswith(value)
-        elif operator == '*':
-            # string representation of `attribute` contains `value`
-            return lambda el: value in el._attr_value_as_string(attribute, '')
-        elif operator == '|':
-            # string representation of `attribute` is either exactly
-            # `value` or starts with `value` and then a dash.
-            def _is_or_starts_with_dash(element):
-                attribute_value = element._attr_value_as_string(attribute, '')
-                return (attribute_value == value or attribute_value.startswith(
-                        value + '-'))
-            return _is_or_starts_with_dash
-        else:
-            return lambda el: el.has_attr(attribute)
-
    # Old non-property versions of the generators, for backwards
    # compatibility with BS3.
    def nextGenerator(self):
@ -1193,7 +1138,7 @@ class Tag(PageElement):

        # First off, turn a string formatter into a Formatter object. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
            formatter = self._formatter_for_name(formatter)
        attrs = []
        if self.attrs:
@ -1298,7 +1243,7 @@ class Tag(PageElement):
        """
        # First off, turn a string formatter into a Formatter object. This
        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter) and not callable(formatter):
+        if not isinstance(formatter, Formatter) and not isinstance(formatter, Callable):
            formatter = self._formatter_for_name(formatter)

        pretty_print = (indent_level is not None)
@ -1394,250 +1339,41 @@ class Tag(PageElement):
            current = current.next_element

    # CSS selector code
-
-    _selector_combinators = ['>', '+', '~']
-    _select_debug = False
-    quoted_colon = re.compile('"[^"]*:[^"]*"')
-    def select_one(self, selector):
+    def select_one(self, selector, namespaces=None, **kwargs):
        """Perform a CSS selection operation on the current element."""
-        value = self.select(selector, limit=1)
+        value = self.select(selector, namespaces, 1, **kwargs)
        if value:
            return value[0]
        return None

-    def select(self, selector, _candidate_generator=None, limit=None):
-        """Perform a CSS selection operation on the current element."""
+    def select(self, selector, namespaces=None, limit=None, **kwargs):
+        """Perform a CSS selection operation on the current element.

-        # Handle grouping selectors if ',' exists, ie: p,a
-        if ',' in selector:
-            context = []
-            selectors = [x.strip() for x in selector.split(",")]
-
-            # If a selector is mentioned multiple times we don't want
-            # to use it more than once.
-            used_selectors = set()
-
-            # We also don't want to select the same element more than once,
-            # if it's matched by multiple selectors.
-            selected_object_ids = set()
-            for partial_selector in selectors:
-                if partial_selector == '':
-                    raise ValueError('Invalid group selection syntax: %s' % selector)
-                if partial_selector in used_selectors:
-                    continue
-                used_selectors.add(partial_selector)
-                candidates = self.select(partial_selector, limit=limit)
-                for candidate in candidates:
-                    # This lets us distinguish between distinct tags that
-                    # represent the same markup.
-                    object_id = id(candidate)
-                    if object_id not in selected_object_ids:
-                        context.append(candidate)
-                        selected_object_ids.add(object_id)
-                if limit and len(context) >= limit:
-                    break
-            return context
-        tokens = shlex.split(selector)
-        current_context = [self]
-
-        if tokens[-1] in self._selector_combinators:
-            raise ValueError(
-                'Final combinator "%s" is missing an argument.' % tokens[-1])
+        This uses the SoupSieve library.

-        if self._select_debug:
-            print 'Running CSS selector "%s"' % selector
+        :param selector: A string containing a CSS selector.

-        for index, token in enumerate(tokens):
-            new_context = []
-            new_context_ids = set([])
+        :param namespaces: A dictionary mapping namespace prefixes
+        used in the CSS selector to namespace URIs. By default,
+        Beautiful Soup will use the prefixes it encountered while
+        parsing the document.

-            if tokens[index-1] in self._selector_combinators:
-                # This token was consumed by the previous combinator. Skip it.
-                if self._select_debug:
-                    print '  Token was consumed by the previous combinator.'
-                continue
+        :param limit: After finding this number of results, stop looking.

-            if self._select_debug:
-                print ' Considering token "%s"' % token
-            recursive_candidate_generator = None
-            tag_name = None
-
-            # Each operation corresponds to a checker function, a rule
-            # for determining whether a candidate matches the
-            # selector. Candidates are generated by the active
-            # iterator.
-            checker = None
-
-            m = self.attribselect_re.match(token)
-            if m is not None:
-                # Attribute selector
-                tag_name, attribute, operator, value = m.groups()
-                checker = self._attribute_checker(operator, attribute, value)
-
-            elif '#' in token:
-                # ID selector
-                tag_name, tag_id = token.split('#', 1)
-                def id_matches(tag):
-                    return tag.get('id', None) == tag_id
-                checker = id_matches
-
-            elif '.' in token:
-                # Class selector
-                tag_name, klass = token.split('.', 1)
-                classes = set(klass.split('.'))
-                def classes_match(candidate):
-                    return classes.issubset(candidate.get('class', []))
-                checker = classes_match
-
-            elif ':' in token and not self.quoted_colon.search(token):
-                # Pseudo-class
-                tag_name, pseudo = token.split(':', 1)
-                if tag_name == '':
-                    raise ValueError(
-                        "A pseudo-class must be prefixed with a tag name.")
-                pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
-                found = []
-                if pseudo_attributes is None:
-                    pseudo_type = pseudo
-                    pseudo_value = None
-                else:
-                    pseudo_type, pseudo_value = pseudo_attributes.groups()
-                if pseudo_type == 'nth-of-type':
-                    try:
-                        pseudo_value = int(pseudo_value)
-                    except:
-                        raise NotImplementedError(
-                            'Only numeric values are currently supported for the nth-of-type pseudo-class.')
-                    if pseudo_value < 1:
-                        raise ValueError(
-                            'nth-of-type pseudo-class value must be at least 1.')
-                    class Counter(object):
-                        def __init__(self, destination):
-                            self.count = 0
-                            self.destination = destination
-
-                        def nth_child_of_type(self, tag):
-                            self.count += 1
-                            if self.count == self.destination:
-                                return True
-                            else:
-                                return False
-                    checker = Counter(pseudo_value).nth_child_of_type
-                else:
-                    raise NotImplementedError(
-                        'Only the following pseudo-classes are implemented: nth-of-type.')
-
-            elif token == '*':
-                # Star selector -- matches everything
-                pass
-            elif token == '>':
-                # Run the next token as a CSS selector against the
-                # direct children of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.children
-            elif token == '~':
-                # Run the next token as a CSS selector against the
-                # siblings of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.next_siblings
-            elif token == '+':
-                # For each tag in the current context, run the next
-                # token as a CSS selector against the tag's next
-                # sibling that's a tag.
-                def next_tag_sibling(tag):
-                    yield tag.find_next_sibling(True)
-                recursive_candidate_generator = next_tag_sibling
-
-            elif self.tag_name_re.match(token):
-                # Just a tag name.
-                tag_name = token
-            else:
-                raise ValueError(
-                    'Unsupported or invalid CSS selector: "%s"' % token)
-            if recursive_candidate_generator:
-                # This happens when the selector looks like  "> foo".
-                #
-                # The generator calls select() recursively on every
-                # member of the current context, passing in a different
-                # candidate generator and a different selector.
-                #
-                # In the case of "> foo", the candidate generator is
-                # one that yields a tag's direct children (">"), and
-                # the selector is "foo".
-                next_token = tokens[index+1]
-                def recursive_select(tag):
-                    if self._select_debug:
-                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
-                        print '-' * 40
-                    for i in tag.select(next_token, recursive_candidate_generator):
-                        if self._select_debug:
-                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
-                        yield i
-                    if self._select_debug:
-                        print '-' * 40
-                _use_candidate_generator = recursive_select
-            elif _candidate_generator is None:
-                # By default, a tag's candidates are all of its
-                # children. If tag_name is defined, only yield tags
-                # with that name.
-                if self._select_debug:
-                    if tag_name:
-                        check = "[any]"
-                    else:
-                        check = tag_name
-                    print '   Default candidate generator, tag name="%s"' % check
-                if self._select_debug:
-                    # This is redundant with later code, but it stops
-                    # a bunch of bogus tags from cluttering up the
-                    # debug log.
-                    def default_candidate_generator(tag):
-                        for child in tag.descendants:
-                            if not isinstance(child, Tag):
-                                continue
-                            if tag_name and not child.name == tag_name:
-                                continue
-                            yield child
-                    _use_candidate_generator = default_candidate_generator
-                else:
-                    _use_candidate_generator = lambda tag: tag.descendants
-            else:
-                _use_candidate_generator = _candidate_generator
-
-            count = 0
-            for tag in current_context:
-                if self._select_debug:
-                    print "    Running candidate generator on %s %s" % (
-                        tag.name, repr(tag.attrs))
-                for candidate in _use_candidate_generator(tag):
-                    if not isinstance(candidate, Tag):
-                        continue
-                    if tag_name and candidate.name != tag_name:
-                        continue
-                    if checker is not None:
-                        try:
-                            result = checker(candidate)
-                        except StopIteration:
-                            # The checker has decided we should no longer
-                            # run the generator.
-                            break
-                    if checker is None or result:
-                        if self._select_debug:
-                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
-                        if id(candidate) not in new_context_ids:
-                            # If a tag matches a selector more than once,
-                            # don't include it in the context more than once.
-                            new_context.append(candidate)
-                            new_context_ids.add(id(candidate))
-                    elif self._select_debug:
-                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
-
-            current_context = new_context
-        if limit and len(current_context) >= limit:
-            current_context = current_context[:limit]
-
-        if self._select_debug:
-            print "Final verdict:"
-            for i in current_context:
-                print " %s %s" % (i.name, i.attrs)
-        return current_context
+        :param kwargs: Any extra arguments you'd like to pass in to
+        soupsieve.select().
+        """
+        if namespaces is None:
+            namespaces = self._namespaces
+        
+        if limit is None:
+            limit = 0
+        if soupsieve is None:
+            raise NotImplementedError(
+                "Cannot execute CSS selectors because the soupsieve package is not installed."
+            )
+            
+        return soupsieve.select(selector, self, namespaces, limit, **kwargs)

    # Old names for backwards compatibility
    def childGenerator(self):
@ -1689,7 +1425,7 @@ class SoupStrainer(object):
    def _normalize_search_value(self, value):
        # Leave it alone if it's a Unicode string, a callable, a
        # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+        if (isinstance(value, unicode) or isinstance(value, Callable) or hasattr(value, 'match')
            or isinstance(value, bool) or value is None):
            return value