You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
604 lines
20 KiB
604 lines
20 KiB
# urllib3/connectionpool.py
|
|
# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
|
|
#
|
|
# This module is part of urllib3 and is released under
|
|
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
|
|
|
import logging
|
|
import socket
|
|
|
|
from socket import error as SocketError, timeout as SocketTimeout
|
|
|
|
try:
|
|
from select import poll, POLLIN
|
|
except ImportError: # Doesn't exist on OSX and other platforms
|
|
from select import select
|
|
poll = False
|
|
|
|
try: # Python 3
|
|
from http.client import HTTPConnection, HTTPSConnection, HTTPException
|
|
from http.client import HTTP_PORT, HTTPS_PORT
|
|
except ImportError:
|
|
from httplib import HTTPConnection, HTTPSConnection, HTTPException
|
|
from httplib import HTTP_PORT, HTTPS_PORT
|
|
|
|
try: # Python 3
|
|
from queue import Queue, Empty, Full
|
|
except ImportError:
|
|
from Queue import Queue, Empty, Full
|
|
|
|
try: # Compiled with SSL?
|
|
import ssl
|
|
BaseSSLError = ssl.SSLError
|
|
except ImportError:
|
|
ssl = None
|
|
BaseSSLError = None
|
|
|
|
|
|
from .packages.ssl_match_hostname import match_hostname, CertificateError
|
|
from .request import RequestMethods
|
|
from .response import HTTPResponse
|
|
from .exceptions import (SSLError,
|
|
MaxRetryError,
|
|
TimeoutError,
|
|
HostChangedError,
|
|
EmptyPoolError,
|
|
)
|
|
|
|
from .packages.ssl_match_hostname import match_hostname, CertificateError
|
|
from .packages import six
|
|
|
|
xrange = six.moves.xrange
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_Default = object()
|
|
|
|
port_by_scheme = {
|
|
'http': HTTP_PORT,
|
|
'https': HTTPS_PORT,
|
|
}
|
|
|
|
## Connection objects (extension of httplib)
|
|
|
|
class VerifiedHTTPSConnection(HTTPSConnection):
|
|
"""
|
|
Based on httplib.HTTPSConnection but wraps the socket with
|
|
SSL certification.
|
|
"""
|
|
cert_reqs = None
|
|
ca_certs = None
|
|
|
|
def set_cert(self, key_file=None, cert_file=None,
|
|
cert_reqs='CERT_NONE', ca_certs=None):
|
|
ssl_req_scheme = {
|
|
'CERT_NONE': ssl.CERT_NONE,
|
|
'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
|
|
'CERT_REQUIRED': ssl.CERT_REQUIRED
|
|
}
|
|
|
|
self.key_file = key_file
|
|
self.cert_file = cert_file
|
|
self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
|
|
self.ca_certs = ca_certs
|
|
|
|
def connect(self):
|
|
# Add certificate verification
|
|
sock = socket.create_connection((self.host, self.port), self.timeout)
|
|
|
|
# Wrap socket using verification with the root certs in
|
|
# trusted_root_certs
|
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
|
|
cert_reqs=self.cert_reqs,
|
|
ca_certs=self.ca_certs)
|
|
if self.ca_certs:
|
|
match_hostname(self.sock.getpeercert(), self.host)
|
|
|
|
## Pool objects
|
|
|
|
class ConnectionPool(object):
|
|
"""
|
|
Base class for all connection pools, such as
|
|
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
|
|
"""
|
|
|
|
scheme = None
|
|
|
|
def __init__(self, host, port=None):
|
|
self.host = host
|
|
self.port = port
|
|
|
|
def __str__(self):
|
|
return '%s(host=%r, port=%r)' % (type(self).__name__,
|
|
self.host, self.port)
|
|
|
|
|
|
class HTTPConnectionPool(ConnectionPool, RequestMethods):
|
|
"""
|
|
Thread-safe connection pool for one host.
|
|
|
|
:param host:
|
|
Host used for this HTTP Connection (e.g. "localhost"), passed into
|
|
:class:`httplib.HTTPConnection`.
|
|
|
|
:param port:
|
|
Port used for this HTTP Connection (None is equivalent to 80), passed
|
|
into :class:`httplib.HTTPConnection`.
|
|
|
|
:param strict:
|
|
Causes BadStatusLine to be raised if the status line can't be parsed
|
|
as a valid HTTP/1.0 or 1.1 status line, passed into
|
|
:class:`httplib.HTTPConnection`.
|
|
|
|
:param timeout:
|
|
Socket timeout for each individual connection, can be a float. None
|
|
disables timeout.
|
|
|
|
:param maxsize:
|
|
Number of connections to save that can be reused. More than 1 is useful
|
|
in multithreaded situations. If ``block`` is set to false, more
|
|
connections will be created but they will not be saved once they've
|
|
been used.
|
|
|
|
:param block:
|
|
If set to True, no more than ``maxsize`` connections will be used at
|
|
a time. When no free connections are available, the call will block
|
|
until a connection has been released. This is a useful side effect for
|
|
particular multithreaded situations where one does not want to use more
|
|
than maxsize connections per host to prevent flooding.
|
|
|
|
:param headers:
|
|
Headers to include with all requests, unless other headers are given
|
|
explicitly.
|
|
"""
|
|
|
|
scheme = 'http'
|
|
|
|
def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
|
|
block=False, headers=None):
|
|
self.host = host
|
|
self.port = port
|
|
self.strict = strict
|
|
self.timeout = timeout
|
|
self.pool = Queue(maxsize)
|
|
self.block = block
|
|
self.headers = headers or {}
|
|
|
|
# Fill the queue up so that doing get() on it will block properly
|
|
for _ in xrange(maxsize):
|
|
self.pool.put(None)
|
|
|
|
# These are mostly for testing and debugging purposes.
|
|
self.num_connections = 0
|
|
self.num_requests = 0
|
|
|
|
def _new_conn(self):
|
|
"""
|
|
Return a fresh :class:`httplib.HTTPConnection`.
|
|
"""
|
|
self.num_connections += 1
|
|
log.info("Starting new HTTP connection (%d): %s" %
|
|
(self.num_connections, self.host))
|
|
return HTTPConnection(host=self.host, port=self.port)
|
|
|
|
def _get_conn(self, timeout=None):
|
|
"""
|
|
Get a connection. Will return a pooled connection if one is available.
|
|
|
|
If no connections are available and :prop:`.block` is ``False``, then a
|
|
fresh connection is returned.
|
|
|
|
:param timeout:
|
|
Seconds to wait before giving up and raising
|
|
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
|
|
:prop:`.block` is ``True``.
|
|
"""
|
|
conn = None
|
|
try:
|
|
conn = self.pool.get(block=self.block, timeout=timeout)
|
|
|
|
# If this is a persistent connection, check if it got disconnected
|
|
if conn and conn.sock and is_connection_dropped(conn):
|
|
log.info("Resetting dropped connection: %s" % self.host)
|
|
conn.close()
|
|
|
|
except Empty:
|
|
if self.block:
|
|
raise EmptyPoolError(self,
|
|
"Pool reached maximum size and no more "
|
|
"connections are allowed.")
|
|
pass # Oh well, we'll create a new connection then
|
|
|
|
return conn or self._new_conn()
|
|
|
|
def _put_conn(self, conn):
|
|
"""
|
|
Put a connection back into the pool.
|
|
|
|
:param conn:
|
|
Connection object for the current host and port as returned by
|
|
:meth:`._new_conn` or :meth:`._get_conn`.
|
|
|
|
If the pool is already full, the connection is discarded because we
|
|
exceeded maxsize. If connections are discarded frequently, then maxsize
|
|
should be increased.
|
|
"""
|
|
try:
|
|
self.pool.put(conn, block=False)
|
|
except Full:
|
|
# This should never happen if self.block == True
|
|
log.warning("HttpConnectionPool is full, discarding connection: %s"
|
|
% self.host)
|
|
|
|
def _make_request(self, conn, method, url, timeout=_Default,
|
|
**httplib_request_kw):
|
|
"""
|
|
Perform a request on a given httplib connection object taken from our
|
|
pool.
|
|
"""
|
|
self.num_requests += 1
|
|
|
|
if timeout is _Default:
|
|
timeout = self.timeout
|
|
|
|
conn.timeout = timeout # This only does anything in Py26+
|
|
|
|
conn.request(method, url, **httplib_request_kw)
|
|
conn.sock.settimeout(timeout)
|
|
httplib_response = conn.getresponse()
|
|
|
|
log.debug("\"%s %s %s\" %s %s" %
|
|
(method, url,
|
|
conn._http_vsn_str, # pylint: disable-msg=W0212
|
|
httplib_response.status, httplib_response.length))
|
|
|
|
return httplib_response
|
|
|
|
|
|
def is_same_host(self, url):
|
|
"""
|
|
Check if the given ``url`` is a member of the same host as this
|
|
connection pool.
|
|
"""
|
|
# TODO: Add optional support for socket.gethostbyname checking.
|
|
scheme, host, port = get_host(url)
|
|
|
|
if self.port and not port:
|
|
# Use explicit default port for comparison when none is given.
|
|
port = port_by_scheme.get(scheme)
|
|
|
|
return (url.startswith('/') or
|
|
(scheme, host, port) == (self.scheme, self.host, self.port))
|
|
|
|
def urlopen(self, method, url, body=None, headers=None, retries=3,
|
|
redirect=True, assert_same_host=True, timeout=_Default,
|
|
pool_timeout=None, release_conn=None, **response_kw):
|
|
"""
|
|
Get a connection from the pool and perform an HTTP request. This is the
|
|
lowest level call for making a request, so you'll need to specify all
|
|
the raw details.
|
|
|
|
.. note::
|
|
|
|
More commonly, it's appropriate to use a convenience method provided
|
|
by :class:`.RequestMethods`, such as :meth:`.request`.
|
|
|
|
.. note::
|
|
|
|
`release_conn` will only behave as expected if
|
|
`preload_content=False` because we want to make
|
|
`preload_content=False` the default behaviour someday soon without
|
|
breaking backwards compatibility.
|
|
|
|
:param method:
|
|
HTTP request method (such as GET, POST, PUT, etc.)
|
|
|
|
:param body:
|
|
Data to send in the request body (useful for creating
|
|
POST requests, see HTTPConnectionPool.post_url for
|
|
more convenience).
|
|
|
|
:param headers:
|
|
Dictionary of custom headers to send, such as User-Agent,
|
|
If-None-Match, etc. If None, pool headers are used. If provided,
|
|
these headers completely replace any pool-specific headers.
|
|
|
|
:param retries:
|
|
Number of retries to allow before raising a MaxRetryError exception.
|
|
|
|
:param redirect:
|
|
Automatically handle redirects (status codes 301, 302, 303, 307),
|
|
each redirect counts as a retry.
|
|
|
|
:param assert_same_host:
|
|
If ``True``, will make sure that the host of the pool requests is
|
|
consistent else will raise HostChangedError. When False, you can
|
|
use the pool on an HTTP proxy and request foreign hosts.
|
|
|
|
:param timeout:
|
|
If specified, overrides the default timeout for this one request.
|
|
|
|
:param pool_timeout:
|
|
If set and the pool is set to block=True, then this method will
|
|
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
|
|
connection is available within the time period.
|
|
|
|
:param release_conn:
|
|
If False, then the urlopen call will not release the connection
|
|
back into the pool once a response is received (but will release if
|
|
you read the entire contents of the response such as when
|
|
`preload_content=True`). This is useful if you're not preloading
|
|
the response's content immediately. You will need to call
|
|
``r.release_conn()`` on the response ``r`` to return the connection
|
|
back into the pool. If None, it takes the value of
|
|
``response_kw.get('preload_content', True)``.
|
|
|
|
:param \**response_kw:
|
|
Additional parameters are passed to
|
|
:meth:`urllib3.response.HTTPResponse.from_httplib`
|
|
"""
|
|
if headers is None:
|
|
headers = self.headers
|
|
|
|
if retries < 0:
|
|
raise MaxRetryError(self, url)
|
|
|
|
if timeout is _Default:
|
|
timeout = self.timeout
|
|
|
|
if release_conn is None:
|
|
release_conn = response_kw.get('preload_content', True)
|
|
|
|
# Check host
|
|
if assert_same_host and not self.is_same_host(url):
|
|
host = "%s://%s" % (self.scheme, self.host)
|
|
if self.port:
|
|
host = "%s:%d" % (host, self.port)
|
|
|
|
raise HostChangedError(self, url, retries - 1)
|
|
|
|
conn = None
|
|
|
|
try:
|
|
# Request a connection from the queue
|
|
# (Could raise SocketError: Bad file descriptor)
|
|
conn = self._get_conn(timeout=pool_timeout)
|
|
|
|
# Make the request on the httplib connection object
|
|
httplib_response = self._make_request(conn, method, url,
|
|
timeout=timeout,
|
|
body=body, headers=headers)
|
|
|
|
# If we're going to release the connection in ``finally:``, then
|
|
# the request doesn't need to know about the connection. Otherwise
|
|
# it will also try to release it and we'll have a double-release
|
|
# mess.
|
|
response_conn = not release_conn and conn
|
|
|
|
# Import httplib's response into our own wrapper object
|
|
response = HTTPResponse.from_httplib(httplib_response,
|
|
pool=self,
|
|
connection=response_conn,
|
|
**response_kw)
|
|
|
|
# else:
|
|
# The connection will be put back into the pool when
|
|
# ``response.release_conn()`` is called (implicitly by
|
|
# ``response.read()``)
|
|
|
|
except Empty as e:
|
|
# Timed out by queue
|
|
raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" %
|
|
pool_timeout)
|
|
|
|
except SocketTimeout as e:
|
|
# Timed out by socket
|
|
raise TimeoutError(self, "Request timed out. (timeout=%s)" %
|
|
timeout)
|
|
|
|
except BaseSSLError as e:
|
|
# SSL certificate error
|
|
raise SSLError(e)
|
|
|
|
except CertificateError as e:
|
|
# Name mismatch
|
|
raise SSLError(e)
|
|
|
|
except (HTTPException, SocketError) as e:
|
|
# Connection broken, discard. It will be replaced next _get_conn().
|
|
conn = None
|
|
# This is necessary so we can access e below
|
|
err = e
|
|
|
|
finally:
|
|
if conn and release_conn:
|
|
# Put the connection back to be reused
|
|
self._put_conn(conn)
|
|
|
|
if not conn:
|
|
log.warn("Retrying (%d attempts remain) after connection "
|
|
"broken by '%r': %s" % (retries, err, url))
|
|
return self.urlopen(method, url, body, headers, retries - 1,
|
|
redirect, assert_same_host) # Try again
|
|
|
|
# Handle redirect?
|
|
redirect_location = redirect and response.get_redirect_location()
|
|
if redirect_location:
|
|
log.info("Redirecting %s -> %s" % (url, redirect_location))
|
|
return self.urlopen(method, redirect_location, body, headers,
|
|
retries - 1, redirect, assert_same_host)
|
|
|
|
return response
|
|
|
|
|
|
class HTTPSConnectionPool(HTTPConnectionPool):
|
|
"""
|
|
Same as :class:`.HTTPConnectionPool`, but HTTPS.
|
|
|
|
When Python is compiled with the :mod:`ssl` module, then
|
|
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
|
|
instead of :class:httplib.HTTPSConnection`.
|
|
|
|
The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
|
|
are only used if :mod:`ssl` is available and are fed into
|
|
:meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
|
|
"""
|
|
|
|
scheme = 'https'
|
|
|
|
def __init__(self, host, port=None,
|
|
strict=False, timeout=None, maxsize=1,
|
|
block=False, headers=None,
|
|
key_file=None, cert_file=None,
|
|
cert_reqs='CERT_NONE', ca_certs=None):
|
|
|
|
super(HTTPSConnectionPool, self).__init__(host, port,
|
|
strict, timeout, maxsize,
|
|
block, headers)
|
|
self.key_file = key_file
|
|
self.cert_file = cert_file
|
|
self.cert_reqs = cert_reqs
|
|
self.ca_certs = ca_certs
|
|
|
|
def _new_conn(self):
|
|
"""
|
|
Return a fresh :class:`httplib.HTTPSConnection`.
|
|
"""
|
|
self.num_connections += 1
|
|
log.info("Starting new HTTPS connection (%d): %s"
|
|
% (self.num_connections, self.host))
|
|
|
|
if not ssl:
|
|
return HTTPSConnection(host=self.host, port=self.port)
|
|
|
|
connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
|
|
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
|
|
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
|
|
return connection
|
|
|
|
|
|
## Helpers
|
|
|
|
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
|
|
basic_auth=None):
|
|
"""
|
|
Shortcuts for generating request headers.
|
|
|
|
:param keep_alive:
|
|
If ``True``, adds 'connection: keep-alive' header.
|
|
|
|
:param accept_encoding:
|
|
Can be a boolean, list, or string.
|
|
``True`` translates to 'gzip,deflate'.
|
|
List will get joined by comma.
|
|
String will be used as provided.
|
|
|
|
:param user_agent:
|
|
String representing the user-agent you want, such as
|
|
"python-urllib3/0.6"
|
|
|
|
:param basic_auth:
|
|
Colon-separated username:password string for 'authorization: basic ...'
|
|
auth header.
|
|
|
|
Example: ::
|
|
|
|
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
|
|
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
|
|
>>> make_headers(accept_encoding=True)
|
|
{'accept-encoding': 'gzip,deflate'}
|
|
"""
|
|
headers = {}
|
|
if accept_encoding:
|
|
if isinstance(accept_encoding, str):
|
|
pass
|
|
elif isinstance(accept_encoding, list):
|
|
accept_encoding = ','.join(accept_encoding)
|
|
else:
|
|
accept_encoding = 'gzip,deflate'
|
|
headers['accept-encoding'] = accept_encoding
|
|
|
|
if user_agent:
|
|
headers['user-agent'] = user_agent
|
|
|
|
if keep_alive:
|
|
headers['connection'] = 'keep-alive'
|
|
|
|
if basic_auth:
|
|
headers['authorization'] = 'Basic ' + \
|
|
basic_auth.encode('base64').strip()
|
|
|
|
return headers
|
|
|
|
|
|
def get_host(url):
|
|
"""
|
|
Given a url, return its scheme, host and port (None if it's not there).
|
|
|
|
For example: ::
|
|
|
|
>>> get_host('http://google.com/mail/')
|
|
('http', 'google.com', None)
|
|
>>> get_host('google.com:80')
|
|
('http', 'google.com', 80)
|
|
"""
|
|
# This code is actually similar to urlparse.urlsplit, but much
|
|
# simplified for our needs.
|
|
port = None
|
|
scheme = 'http'
|
|
if '://' in url:
|
|
scheme, url = url.split('://', 1)
|
|
if '/' in url:
|
|
url, _path = url.split('/', 1)
|
|
if '@' in url:
|
|
_auth, url = url.split('@', 1)
|
|
if ':' in url:
|
|
url, port = url.split(':', 1)
|
|
port = int(port)
|
|
return scheme, url, port
|
|
|
|
|
|
def connection_from_url(url, **kw):
|
|
"""
|
|
Given a url, return an :class:`.ConnectionPool` instance of its host.
|
|
|
|
This is a shortcut for not having to parse out the scheme, host, and port
|
|
of the url before creating an :class:`.ConnectionPool` instance.
|
|
|
|
:param url:
|
|
Absolute URL string that must include the scheme. Port is optional.
|
|
|
|
:param \**kw:
|
|
Passes additional parameters to the constructor of the appropriate
|
|
:class:`.ConnectionPool`. Useful for specifying things like
|
|
timeout, maxsize, headers, etc.
|
|
|
|
Example: ::
|
|
|
|
>>> conn = connection_from_url('http://google.com/')
|
|
>>> r = conn.request('GET', '/')
|
|
"""
|
|
scheme, host, port = get_host(url)
|
|
if scheme == 'https':
|
|
return HTTPSConnectionPool(host, port=port, **kw)
|
|
else:
|
|
return HTTPConnectionPool(host, port=port, **kw)
|
|
|
|
|
|
def is_connection_dropped(conn):
|
|
"""
|
|
Returns True if the connection is dropped and should be closed.
|
|
|
|
:param conn:
|
|
``HTTPConnection`` object.
|
|
"""
|
|
if not poll:
|
|
return select([conn.sock], [], [], 0.0)[0]
|
|
|
|
# This version is better on platforms that support it.
|
|
p = poll()
|
|
p.register(conn.sock, POLLIN)
|
|
for (fno, ev) in p.poll(0.0):
|
|
if fno == conn.sock.fileno():
|
|
# Either data is buffered (bad), or the connection is dropped.
|
|
return True
|
|
|