# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
syntax_error=u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
syntax_error='You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
classBeautifulSoup(Tag):
"""
@ -69,7 +69,7 @@ class BeautifulSoup(Tag):
likeHTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME=u'[document]'
ROOT_TAG_NAME='[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
@ -135,12 +135,12 @@ class BeautifulSoup(Tag):
"fromEncoding","from_encoding")
iflen(kwargs)>0:
arg=kwargs.keys().pop()
arg=list(kwargs.keys()).pop()
raiseTypeError(
"__init__() got an unexpected keyword argument '%s'"%arg)
ifbuilderisNone:
ifisinstance(features,basestring):
ifisinstance(features,str):
features=[features]
iffeaturesisNoneorlen(features)==0:
features=self.DEFAULT_BUILDER_FEATURES
@ -164,7 +164,7 @@ class BeautifulSoup(Tag):
# involving passing non-markup to Beautiful Soup.
# Beautiful Soup will still parse the input as markup,
# just in case that's what the user really wants.
if(isinstance(markup,unicode)
if(isinstance(markup,str)
andnotos.path.supports_unicode_filenames):
possible_filename=markup.encode("utf8")
else:
@ -172,7 +172,7 @@ class BeautifulSoup(Tag):
is_file=False
try:
is_file=os.path.exists(possible_filename)
exceptException,e:
exceptExceptionase:
# This is almost certainly a problem involving
# characters not valid in filenames on this
# system. Just let it go.
@ -184,7 +184,7 @@ class BeautifulSoup(Tag):
# TODO: This is ugly but I couldn't get it to work in
# Python 3 otherwise.
if((isinstance(markup,bytes)andnotb''inmarkup)
or(isinstance(markup,unicode)andnotu''inmarkup)):
or(isinstance(markup,str)andnot''inmarkup)):
warnings.warn(
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.'%markup)
@ -259,7 +259,7 @@ class BeautifulSoup(Tag):
defendData(self,containerClass=NavigableString):
ifself.current_data:
current_data=u''.join(self.current_data)
current_data=''.join(self.current_data)
# If whitespace is not preserved, and this string contains
# nothing but ASCII spaces, replace it with a single space
# or newline.
@ -367,9 +367,9 @@ class BeautifulSoup(Tag):
encoding_part=''
ifeventual_encoding!=None:
encoding_part=' encoding="%s"'%eventual_encoding
prefix=u'<?xml version="1.0"%s?>\n'%encoding_part
prefix='<?xml version="1.0"%s?>\n'%encoding_part
else:
prefix=u''
prefix=''
ifnotpretty_print:
indent_level=None
else:
@ -403,4 +403,4 @@ class FeatureNotFound(ValueError):
@ -72,9 +72,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name=int(name)
try:
data=unichr(real_name)
except(ValueError,OverflowError),e:
data=u"\N{REPLACEMENT CHARACTER}"
data=chr(real_name)
except(ValueError,OverflowError)ase:
data="\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@ -142,7 +142,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
declaredwithinmarkup,whetheranycharactershadtobe
replacedwithREPLACEMENTCHARACTER).
"""
ifisinstance(markup,unicode):
ifisinstance(markup,str):
yield(markup,None,None,False)
return
@ -158,7 +158,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup=self.soup
try:
parser.feed(markup)
exceptHTMLParseError,e:
exceptHTMLParseErrorase:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
expected=u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
expected="<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
soup=self.soup(text)
self.assertEqual(soup.p.encode("utf-8"),expected)
@ -354,7 +354,7 @@ class HTMLTreeBuilderSmokeTest(object):
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html=u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
unicode_html='<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
@ -493,15 +493,15 @@ class XMLTreeBuilderSmokeTest(object):