diff --git a/CHANGES.md b/CHANGES.md index 5c53b3a..d0de7ee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,7 +8,7 @@ * Change backport rarfile_py2; Fixes for multivolume RAR3 with encrypted headers * Update Apprise 0.8.0 (6aa52c3) to 0.8.4 (1ce7cbb) * Update attr 19.2.0.dev0 (daf2bc8) to 20.1.0.dev0 (4bd6827) -* Update Beautiful Soup 4.8.1 (r540) to 4.8.2 (r556) +* Update Beautiful Soup 4.8.1 (r540) to 4.8.2 (r559) * Update Certifi 2019.06.16 (84dc766) to 2019.11.28 (21abb9b) * Update dateutil 2.8.1 (fc9b162) to 2.8.1 (110a09b) * Update DiskCache library 4.0.0 (2c79bb9) to 4.1.0 (b0451e0) @@ -42,6 +42,7 @@ * Update Apprise 0.8.0 (6aa52c3) to 0.8.3 (4aee9de) * Update attr 19.2.0.dev0 (daf2bc8) to 20.1.0.dev0 (9b5e988) * Update Beautiful Soup 4.8.1 (r540) to 4.8.2 (r554) +* Update Beautiful Soup 4.8.2 (r544) to 4.8.2 (r556) * Update Requests library 2.22.0 (3d968ff) to 2.22.0 (d2f65af) * Update Tornado_py3 Web Server 6.0.3 (ff985fe) to 6.1.dev1 (18b653c) * Update urllib3 release 1.25.6 (4a6c288) to 1.25.7 (37ba61a) diff --git a/lib/bs4_py2/__init__.py b/lib/bs4_py2/__init__.py index 3cd2e15..f828cd2 100644 --- a/lib/bs4_py2/__init__.py +++ b/lib/bs4_py2/__init__.py @@ -306,12 +306,11 @@ class BeautifulSoup(Tag): # system. Just let it go. pass if is_file: - if isinstance(markup, unicode): - markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should' ' probably open this file and pass the filehandle into' - ' Beautiful Soup.' % markup) + ' Beautiful Soup.' % self._decode_markup(markup) + ) self._check_markup_is_url(markup) rejections = [] @@ -360,8 +359,21 @@ class BeautifulSoup(Tag): d['builder'] = None return d - @staticmethod - def _check_markup_is_url(markup): + @classmethod + def _decode_markup(cls, markup): + """Ensure `markup` is bytes so it's safe to send into warnings.warn. + + TODO: warnings.warn had this problem back in 2010 but it might not + anymore. + """ + if isinstance(markup, bytes): + decoded = markup.decode('utf-8', 'replace') + else: + decoded = markup + return decoded + + @classmethod + def _check_markup_is_url(cls, markup): """Error-handling method to raise a warning if incoming markup looks like a URL. @@ -378,15 +390,13 @@ class BeautifulSoup(Tag): if any(markup.startswith(prefix) for prefix in cant_start_with): if not space in markup: - if isinstance(markup, bytes): - decoded_markup = markup.decode('utf-8', 'replace') - else: - decoded_markup = markup warnings.warn( '"%s" looks like a URL. Beautiful Soup is not an' ' HTTP client. You should probably use an HTTP client like' ' requests to get the document behind the URL, and feed' - ' that document to Beautiful Soup.' % decoded_markup + ' that document to Beautiful Soup.' % cls._decode_markup( + markup + ) ) def _feed(self): diff --git a/lib/bs4_py2/element.py b/lib/bs4_py2/element.py index 8a0280f..9283b1c 100644 --- a/lib/bs4_py2/element.py +++ b/lib/bs4_py2/element.py @@ -487,7 +487,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_all_next, name, attrs, text, **kwargs) findNext = find_next # BS3 @@ -523,7 +523,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_next_siblings, name, attrs, text, **kwargs) @@ -562,7 +562,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one( self.find_all_previous, name, attrs, text, **kwargs) @@ -601,7 +601,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_previous_siblings, name, attrs, text, **kwargs) @@ -640,7 +640,7 @@ class PageElement(object): :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ # NOTE: We can't use _find_one because findParents takes a different # set of arguments. @@ -663,7 +663,7 @@ class PageElement(object): :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_all(name, attrs, None, limit, self.parents, **kwargs) @@ -675,7 +675,7 @@ class PageElement(object): """The PageElement, if any, that was parsed just after this one. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self.next_element @@ -684,7 +684,7 @@ class PageElement(object): """The PageElement, if any, that was parsed just before this one. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self.previous_element @@ -1690,7 +1690,7 @@ class Tag(PageElement): :param limit: Stop looking after finding this many results. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ r = None l = self.find_all(name, attrs, recursive, text, 1, **kwargs) @@ -1764,7 +1764,7 @@ class Tag(PageElement): soupsieve.select() method. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ value = self.select(selector, namespaces, 1, **kwargs) if value: diff --git a/lib/bs4_py3/__init__.py b/lib/bs4_py3/__init__.py index 1b2d95b..01c3d2c 100644 --- a/lib/bs4_py3/__init__.py +++ b/lib/bs4_py3/__init__.py @@ -306,12 +306,11 @@ class BeautifulSoup(Tag): # system. Just let it go. pass if is_file: - if isinstance(markup, str): - markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should' ' probably open this file and pass the filehandle into' - ' Beautiful Soup.' % markup) + ' Beautiful Soup.' % self._decode_markup(markup) + ) self._check_markup_is_url(markup) rejections = [] @@ -360,8 +359,21 @@ class BeautifulSoup(Tag): d['builder'] = None return d - @staticmethod - def _check_markup_is_url(markup): + @classmethod + def _decode_markup(cls, markup): + """Ensure `markup` is bytes so it's safe to send into warnings.warn. + + TODO: warnings.warn had this problem back in 2010 but it might not + anymore. + """ + if isinstance(markup, bytes): + decoded = markup.decode('utf-8', 'replace') + else: + decoded = markup + return decoded + + @classmethod + def _check_markup_is_url(cls, markup): """Error-handling method to raise a warning if incoming markup looks like a URL. @@ -378,15 +390,13 @@ class BeautifulSoup(Tag): if any(markup.startswith(prefix) for prefix in cant_start_with): if not space in markup: - if isinstance(markup, bytes): - decoded_markup = markup.decode('utf-8', 'replace') - else: - decoded_markup = markup warnings.warn( '"%s" looks like a URL. Beautiful Soup is not an' ' HTTP client. You should probably use an HTTP client like' ' requests to get the document behind the URL, and feed' - ' that document to Beautiful Soup.' % decoded_markup + ' that document to Beautiful Soup.' % cls._decode_markup( + markup + ) ) def _feed(self): diff --git a/lib/bs4_py3/element.py b/lib/bs4_py3/element.py index aa084ff..cabb42c 100644 --- a/lib/bs4_py3/element.py +++ b/lib/bs4_py3/element.py @@ -487,7 +487,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_all_next, name, attrs, text, **kwargs) findNext = find_next # BS3 @@ -523,7 +523,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_next_siblings, name, attrs, text, **kwargs) @@ -562,7 +562,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one( self.find_all_previous, name, attrs, text, **kwargs) @@ -601,7 +601,7 @@ class PageElement(object): :param text: A filter for a NavigableString with specific text. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_one(self.find_previous_siblings, name, attrs, text, **kwargs) @@ -640,7 +640,7 @@ class PageElement(object): :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ # NOTE: We can't use _find_one because findParents takes a different # set of arguments. @@ -663,7 +663,7 @@ class PageElement(object): :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self._find_all(name, attrs, None, limit, self.parents, **kwargs) @@ -675,7 +675,7 @@ class PageElement(object): """The PageElement, if any, that was parsed just after this one. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self.next_element @@ -684,7 +684,7 @@ class PageElement(object): """The PageElement, if any, that was parsed just before this one. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ return self.previous_element @@ -1690,7 +1690,7 @@ class Tag(PageElement): :param limit: Stop looking after finding this many results. :kwargs: A dictionary of filters on attribute values. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ r = None l = self.find_all(name, attrs, recursive, text, 1, **kwargs) @@ -1764,7 +1764,7 @@ class Tag(PageElement): soupsieve.select() method. :return: A PageElement. - :rtype: bs4.element.PageElement + :rtype: Union[bs4.element.Tag, bs4.element.NavigableString] """ value = self.select(selector, namespaces, 1, **kwargs) if value: