diff --git a/CHANGES.md b/CHANGES.md index 7c980e5..4f53527 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ * Update Requests library 2.24.0 (1b41763) to 2.24.0 (967a05b) * Update soupsieve_py3 2.0.0.final (e66c311) to 2.0.2.dev (05086ef) * Update soupsieve_py2 backport +* Update urllib3 1.25.9 (a5a45dc) to 1.26.0.dev0 (41eae64) [develop changelog] diff --git a/lib/urllib3/__init__.py b/lib/urllib3/__init__.py index 09024d4..ac3f9ef 100644 --- a/lib/urllib3/__init__.py +++ b/lib/urllib3/__init__.py @@ -1,5 +1,5 @@ """ -urllib3 - Thread-safe connection pooling and re-using. +Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more """ from __future__ import absolute_import import warnings diff --git a/lib/urllib3/_collections.py b/lib/urllib3/_collections.py index 019d151..5c8f366 100644 --- a/lib/urllib3/_collections.py +++ b/lib/urllib3/_collections.py @@ -18,7 +18,7 @@ except ImportError: # Platform-specific: No threads available from collections import OrderedDict from .exceptions import InvalidHeader -from .packages.six import iterkeys, itervalues, PY3 +from .packages.six import ensure_str, iterkeys, itervalues, PY3 __all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"] @@ -154,7 +154,7 @@ class HTTPHeaderDict(MutableMapping): def __getitem__(self, key): val = self._container[key.lower()] - return ", ".join(val[1:]) + return ", ".join([ensure_str(v, "ascii") for v in val[1:]]) def __delitem__(self, key): del self._container[key.lower()] @@ -190,7 +190,7 @@ class HTTPHeaderDict(MutableMapping): def pop(self, key, default=__marker): """D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. + If key is not found, d is returned if given, otherwise KeyError is raised. """ # Using the MutableMapping function directly fails due to the private marker. # Using ordinary dict.pop would expose the internal structures. diff --git a/lib/urllib3/connection.py b/lib/urllib3/connection.py index 7e08631..8147417 100644 --- a/lib/urllib3/connection.py +++ b/lib/urllib3/connection.py @@ -30,6 +30,15 @@ except NameError: pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + BrokenPipeError = BrokenPipeError +except NameError: # Python 2: + + class BrokenPipeError(Exception): + pass + + from .exceptions import ( NewConnectionError, ConnectTimeoutError, @@ -63,34 +72,30 @@ RECENT_DATE = datetime.date(2019, 1, 1) _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") -class DummyConnection(object): - """Used to detect a failed ConnectionCls import.""" - - pass - - class HTTPConnection(_HTTPConnection, object): """ - Based on httplib.HTTPConnection but provides an extra constructor + Based on :class:`http.client.HTTPConnection` but provides an extra constructor backwards-compatibility layer between older and newer Pythons. Additional keyword parameters are used to configure attributes of the connection. Accepted parameters include: - - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` - - ``source_address``: Set the source address for the current connection. - - ``socket_options``: Set specific options on the underlying socket. If not specified, then - defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling - Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. - For example, if you wish to enable TCP Keep Alive in addition to the defaults, - you might pass:: + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass: - HTTPConnection.default_socket_options + [ - (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), - ] + .. code-block:: python - Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ default_port = port_by_scheme["http"] @@ -144,7 +149,7 @@ class HTTPConnection(_HTTPConnection, object): self._dns_host = value def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it. + """Establish a socket connection and set nodelay settings on it. :return: New socket connection. """ @@ -191,7 +196,9 @@ class HTTPConnection(_HTTPConnection, object): self._prepare_conn(conn) def putrequest(self, method, url, *args, **kwargs): - """Send a request to the server""" + """""" + # Empty docstring because the indentation of CPython's implementation + # is broken but we don't want this method in our documentation. match = _CONTAINS_CONTROL_CHAR_RE.search(method) if match: raise ValueError( @@ -240,16 +247,22 @@ class HTTPConnection(_HTTPConnection, object): if not isinstance(chunk, bytes): chunk = chunk.encode("utf8") len_str = hex(len(chunk))[2:] - self.send(len_str.encode("utf-8")) - self.send(b"\r\n") - self.send(chunk) - self.send(b"\r\n") + to_send = bytearray(len_str.encode()) + to_send += b"\r\n" + to_send += chunk + to_send += b"\r\n" + self.send(to_send) # After the if clause, to always have a closed body self.send(b"0\r\n\r\n") class HTTPSConnection(HTTPConnection): + """ + Many of the parameters to this constructor are passed to the underlying SSL + socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. + """ + default_port = port_by_scheme["https"] cert_reqs = None @@ -435,6 +448,12 @@ def _get_default_user_agent(): return "python-urllib3/%s" % __version__ +class DummyConnection(object): + """Used to detect a failed ConnectionCls import.""" + + pass + + if not ssl: HTTPSConnection = DummyConnection # noqa: F811 diff --git a/lib/urllib3/connectionpool.py b/lib/urllib3/connectionpool.py index 492590f..4547223 100644 --- a/lib/urllib3/connectionpool.py +++ b/lib/urllib3/connectionpool.py @@ -34,6 +34,7 @@ from .connection import ( VerifiedHTTPSConnection, HTTPException, BaseSSLError, + BrokenPipeError, ) from .request import RequestMethods from .response import HTTPResponse @@ -111,16 +112,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param host: Host used for this HTTP Connection (e.g. "localhost"), passed into - :class:`httplib.HTTPConnection`. + :class:`http.client.HTTPConnection`. :param port: Port used for this HTTP Connection (None is equivalent to 80), passed - into :class:`httplib.HTTPConnection`. + into :class:`http.client.HTTPConnection`. :param strict: Causes BadStatusLine to be raised if the status line can't be parsed as a valid HTTP/1.0 or 1.1 status line, passed into - :class:`httplib.HTTPConnection`. + :class:`http.client.HTTPConnection`. .. note:: Only works in Python 2. This parameter is ignored in Python 3. @@ -154,11 +155,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy: Parsed proxy URL, should not be used directly, instead, see - :class:`urllib3.connectionpool.ProxyManager`" + :class:`urllib3.ProxyManager` :param _proxy_headers: A dictionary with proxy headers, should not be used directly, - instead, see :class:`urllib3.connectionpool.ProxyManager`" + instead, see :class:`urllib3.ProxyManager` :param \\**conn_kw: Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, @@ -272,7 +273,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() if getattr(conn, "auto_open", 1) == 0: # This is a proxied connection that has been mutated by - # httplib._tunnel() and cannot be reused (since it would + # http.client._tunnel() and cannot be reused (since it would # attempt to bypass the proxy) conn = None @@ -384,12 +385,30 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) raise - # conn.request() calls httplib.*.request, not the method in + # conn.request() calls http.client.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. - if chunked: - conn.request_chunked(method, url, **httplib_request_kw) - else: - conn.request(method, url, **httplib_request_kw) + try: + if chunked: + conn.request_chunked(method, url, **httplib_request_kw) + else: + conn.request(method, url, **httplib_request_kw) + + # We are swallowing BrokenPipeError (errno.EPIPE) since the server is + # legitimately able to close the connection after sending a valid response. + # With this behaviour, the received response is still readable. + except BrokenPipeError: + # Python 3 + pass + except IOError as e: + # Python 2 and macOS/Linux + # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS + # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ + if e.errno not in { + errno.EPIPE, + errno.ESHUTDOWN, + errno.EPROTOTYPE, + }: + raise # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout @@ -565,7 +584,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param assert_same_host: If ``True``, will make sure that the host of the pool requests is - consistent else will raise HostChangedError. When False, you can + consistent else will raise HostChangedError. When ``False``, you can use the pool on an HTTP proxy and request foreign hosts. :param timeout: @@ -837,11 +856,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ Same as :class:`.HTTPConnectionPool`, but HTTPS. - When Python is compiled with the :mod:`ssl` module, then - :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`.HTTPSConnection`. - - :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. If ``assert_hostname`` is False, no verification is done. @@ -938,7 +953,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPSConnection`. + Return a fresh :class:`http.client.HTTPSConnection`. """ self.num_connections += 1 log.debug( diff --git a/lib/urllib3/contrib/__init__.pyi b/lib/urllib3/contrib/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lib/urllib3/contrib/_securetransport/bindings.py b/lib/urllib3/contrib/_securetransport/bindings.py index d9b6733..e879cb5 100644 --- a/lib/urllib3/contrib/_securetransport/bindings.py +++ b/lib/urllib3/contrib/_securetransport/bindings.py @@ -45,17 +45,11 @@ from ctypes import ( c_bool, ) from ctypes import CDLL, POINTER, CFUNCTYPE +from urllib3.packages.six import raise_from -security_path = find_library("Security") -if not security_path: - raise ImportError("The library Security could not be found") - - -core_foundation_path = find_library("CoreFoundation") -if not core_foundation_path: - raise ImportError("The library CoreFoundation could not be found") - +if platform.system() != "Darwin": + raise ImportError("Only macOS is supported") version = platform.mac_ver()[0] version_info = tuple(map(int, version.split("."))) @@ -65,8 +59,31 @@ if version_info < (10, 8): % (version_info[0], version_info[1]) ) -Security = CDLL(security_path, use_errno=True) -CoreFoundation = CDLL(core_foundation_path, use_errno=True) + +def load_cdll(name, macos10_16_path): + """Loads a CDLL by name, falling back to known path on 10.16+""" + try: + # Big Sur is technically 11 but we use 10.16 due to the Big Sur + # beta being labeled as 10.16. + if version_info >= (10, 16): + path = macos10_16_path + else: + path = find_library(name) + if not path: + raise OSError # Caught and reraised as 'ImportError' + return CDLL(path, use_errno=True) + except OSError: + raise_from(ImportError("The library %s failed to load" % name), None) + + +Security = load_cdll( + "Security", "/System/Library/Frameworks/Security.framework/Security" +) +CoreFoundation = load_cdll( + "CoreFoundation", + "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", +) + Boolean = c_bool CFIndex = c_long @@ -276,6 +293,13 @@ try: Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol] Security.SSLSetProtocolVersionMax.restype = OSStatus + try: + Security.SSLSetALPNProtocols.argtypes = [SSLContextRef, CFArrayRef] + Security.SSLSetALPNProtocols.restype = OSStatus + except AttributeError: + # Supported only in 10.12+ + pass + Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p] Security.SecCopyErrorMessageString.restype = CFStringRef diff --git a/lib/urllib3/contrib/_securetransport/low_level.py b/lib/urllib3/contrib/_securetransport/low_level.py index e60168c..3d466a5 100644 --- a/lib/urllib3/contrib/_securetransport/low_level.py +++ b/lib/urllib3/contrib/_securetransport/low_level.py @@ -56,6 +56,51 @@ def _cf_dictionary_from_tuples(tuples): ) +def _cfstr(py_bstr): + """ + Given a Python binary data, create a CFString. + The string must be CFReleased by the caller. + """ + c_str = ctypes.c_char_p(py_bstr) + cf_str = CoreFoundation.CFStringCreateWithCString( + CoreFoundation.kCFAllocatorDefault, + c_str, + CFConst.kCFStringEncodingUTF8, + ) + return cf_str + + +def _create_cfstring_array(lst): + """ + Given a list of Python binary data, create an associated CFMutableArray. + The array must be CFReleased by the caller. + + Raises an ssl.SSLError on failure. + """ + cf_arr = None + try: + cf_arr = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + if not cf_arr: + raise MemoryError("Unable to allocate memory!") + for item in lst: + cf_str = _cfstr(item) + if not cf_str: + raise MemoryError("Unable to allocate memory!") + try: + CoreFoundation.CFArrayAppendValue(cf_arr, cf_str) + finally: + CoreFoundation.CFRelease(cf_str) + except BaseException as e: + if cf_arr: + CoreFoundation.CFRelease(cf_arr) + raise ssl.SSLError("Unable to allocate array: %s" % (e,)) + return cf_arr + + def _cf_string_to_unicode(value): """ Creates a Unicode string from a CFString object. Used entirely for error diff --git a/lib/urllib3/contrib/pyopenssl.py b/lib/urllib3/contrib/pyopenssl.py index 81a8065..54ee493 100644 --- a/lib/urllib3/contrib/pyopenssl.py +++ b/lib/urllib3/contrib/pyopenssl.py @@ -1,27 +1,31 @@ """ -SSL with SNI_-support for Python 2. Follow these instructions if you would -like to verify SSL certificates in Python 2. Note, the default libraries do +TLS with SNI_-support for Python 2. Follow these instructions if you would +like to verify TLS certificates in Python 2. Note, the default libraries do *not* do certificate checking; you need to do additional work to validate certificates yourself. This needs the following packages installed: -* pyOpenSSL (tested with 16.0.0) -* cryptography (minimum 1.3.4, from pyopenssl) -* idna (minimum 2.0, from cryptography) +* `pyOpenSSL`_ (tested with 16.0.0) +* `cryptography`_ (minimum 1.3.4, from pyopenssl) +* `idna`_ (minimum 2.0, from cryptography) However, pyopenssl depends on cryptography, which depends on idna, so while we use all three directly here we end up having relatively few packages required. You can install them with the following command: - pip install pyopenssl cryptography idna +.. code-block:: bash + + $ python -m pip install pyopenssl cryptography idna To activate certificate checking, call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code before you begin making HTTP requests. This can be done in a ``sitecustomize`` module, or at any other time before your application begins using ``urllib3``, -like this:: +like this: + +.. code-block:: python try: import urllib3.contrib.pyopenssl @@ -35,11 +39,11 @@ when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS compression in Python 2 (see `CRIME attack`_). -If you want to configure the default list of supported cipher suites, you can -set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. - .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +.. _pyopenssl: https://www.pyopenssl.org +.. _cryptography: https://cryptography.io +.. _idna: https://github.com/kjd/idna """ from __future__ import absolute_import @@ -465,6 +469,10 @@ class PyOpenSSLContext(object): self._ctx.set_passwd_cb(lambda *_: password) self._ctx.use_privatekey_file(keyfile or certfile) + def set_alpn_protocols(self, protocols): + protocols = [six.ensure_binary(p) for p in protocols] + return self._ctx.set_alpn_protos(protocols) + def wrap_socket( self, sock, diff --git a/lib/urllib3/contrib/securetransport.py b/lib/urllib3/contrib/securetransport.py index a6b7e94..3fffb2f 100644 --- a/lib/urllib3/contrib/securetransport.py +++ b/lib/urllib3/contrib/securetransport.py @@ -29,6 +29,8 @@ library. An enormous debt is owed to him for blazing this trail for us. For that reason, this code should be considered to be covered both by urllib3's license and by oscrypto's: +.. code-block:: + Copyright (c) 2015-2016 Will Bond Permission is hereby granted, free of charge, to any person obtaining a @@ -56,6 +58,7 @@ import ctypes import errno import os.path import shutil +import six import socket import ssl import threading @@ -68,6 +71,7 @@ from ._securetransport.low_level import ( _cert_array_from_pem, _temporary_keychain, _load_client_cert_chain, + _create_cfstring_array, ) try: # Platform-specific: Python 2 @@ -374,6 +378,19 @@ class WrappedSocket(object): ) _assert_no_error(result) + def _set_alpn_protocols(self, protocols): + """ + Sets up the ALPN protocols on the context. + """ + if not protocols: + return + protocols_arr = _create_cfstring_array(protocols) + try: + result = Security.SSLSetALPNProtocols(self.context, protocols_arr) + _assert_no_error(result) + finally: + CoreFoundation.CFRelease(protocols_arr) + def _custom_validate(self, verify, trust_bundle): """ Called when we have set custom validation. We do this in two cases: @@ -441,6 +458,7 @@ class WrappedSocket(object): client_cert, client_key, client_key_passphrase, + alpn_protocols, ): """ Actually performs the TLS handshake. This is run automatically by @@ -481,6 +499,9 @@ class WrappedSocket(object): # Setup the ciphers. self._set_ciphers() + # Setup the ALPN protocols. + self._set_alpn_protocols(alpn_protocols) + # Set the minimum and maximum TLS versions. result = Security.SSLSetProtocolVersionMin(self.context, min_version) _assert_no_error(result) @@ -754,6 +775,7 @@ class SecureTransportContext(object): self._client_cert = None self._client_key = None self._client_key_passphrase = None + self._alpn_protocols = None @property def check_hostname(self): @@ -831,6 +853,18 @@ class SecureTransportContext(object): self._client_key = keyfile self._client_cert_passphrase = password + def set_alpn_protocols(self, protocols): + """ + Sets the ALPN protocols that will later be set on the context. + + Raises a NotImplementedError if ALPN is not supported. + """ + if not hasattr(Security, "SSLSetALPNProtocols"): + raise NotImplementedError( + "SecureTransport supports ALPN only in macOS 10.12+" + ) + self._alpn_protocols = [six.ensure_binary(p) for p in protocols] + def wrap_socket( self, sock, @@ -860,5 +894,6 @@ class SecureTransportContext(object): self._client_cert, self._client_key, self._client_key_passphrase, + self._alpn_protocols, ) return wrapped_socket diff --git a/lib/urllib3/contrib/socks.py b/lib/urllib3/contrib/socks.py index 9e97f7a..516e610 100644 --- a/lib/urllib3/contrib/socks.py +++ b/lib/urllib3/contrib/socks.py @@ -14,22 +14,26 @@ supports the following SOCKS features: - SOCKS5 with local DNS (``proxy_url='socks5://...``) - Usernames and passwords for the SOCKS proxy - .. note:: - It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in - your ``proxy_url`` to ensure that DNS resolution is done from the remote - server instead of client-side when connecting to a domain name. +.. note:: + It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in + your ``proxy_url`` to ensure that DNS resolution is done from the remote + server instead of client-side when connecting to a domain name. SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 supports IPv4, IPv6, and domain names. When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` -will be sent as the ``userid`` section of the SOCKS request:: +will be sent as the ``userid`` section of the SOCKS request: + +.. code-block:: python proxy_url="socks4a://@proxy-host" When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion of the ``proxy_url`` will be sent as the username/password to authenticate -with the proxy:: +with the proxy: + +.. code-block:: python proxy_url="socks5h://:@proxy-host" diff --git a/lib/urllib3/contrib/ssl.py b/lib/urllib3/contrib/ssl.py new file mode 100644 index 0000000..69a9898 --- /dev/null +++ b/lib/urllib3/contrib/ssl.py @@ -0,0 +1,197 @@ +import ssl +import socket +import io + +SSL_BLOCKSIZE = 16384 + + +class SSLTransport: + """ + The SSLTransport wraps an existing socket and establishes an SSL connection. + + Contrary to Python's implementation of SSLSocket, it allows you to chain + multiple TLS connections together. It's particularly useful if you need to + implement TLS within TLS. + + The class supports most of the socket API operations. + """ + + def __init__( + self, socket, ssl_context, suppress_ragged_eofs=True, server_hostname=None + ): + """ + Create an SSLTransport around socket using the provided ssl_context. + """ + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, self.outgoing, server_hostname=server_hostname + ) + + # Perform initial handshake. + self._ssl_io_loop(self.sslobj.do_handshake) + + def __enter__(self): + return self + + def __exit__(self, *_): + self.close() + + def fileno(self): + return self.socket.fileno() + + def read(self, len=1024, buffer=None): + return self._wrap_ssl_read(len, buffer) + + def recv(self, len=1024, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv") + return self._wrap_ssl_read(len) + + def recv_into(self, buffer, nbytes=None, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to recv_into") + if buffer and (nbytes is None): + nbytes = len(buffer) + elif nbytes is None: + nbytes = 1024 + return self.read(nbytes, buffer) + + def sendall(self, data, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to sendall") + count = 0 + with memoryview(data) as view, view.cast("B") as byte_view: + amount = len(byte_view) + while count < amount: + v = self.send(byte_view[count:]) + count += v + + def send(self, data, flags=0): + if flags != 0: + raise ValueError("non-zero flags not allowed in calls to send") + response = self._ssl_io_loop(self.sslobj.write, data) + return response + + def makefile( + self, mode="r", buffering=None, encoding=None, errors=None, newline=None + ): + """ + Python's httpclient uses makefile and buffered io when reading HTTP + messages and we need to support it. + + This is unfortunately a copy and paste of socket.py makefile with small + changes to point to the socket directly. + """ + if not set(mode) <= {"r", "w", "b"}: + raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,)) + + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = socket.SocketIO(self, rawmode) + self.socket._io_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text + + def unwrap(self): + self._ssl_io_loop(self.sslobj.unwrap) + + def close(self): + self.socket.close() + + def getpeercert(self, binary_form=False): + return self.sslobj.getpeercert(binary_form) + + def version(self): + return self.sslobj.version() + + def cipher(self): + return self.sslobj.cipher() + + def selected_alpn_protocol(self): + return self.sslobj.selected_alpn_protocol() + + def selected_npn_protocol(self): + return self.sslobj.selected_npn_protocol() + + def shared_ciphers(self): + return self.sslobj.shared_ciphers() + + def compression(self): + return self.sslobj.compression() + + def settimeout(self, value): + self.socket.settimeout(value) + + def gettimeout(self): + return self.socket.gettimeout() + + def _decref_socketios(self): + self.socket._decref_socketios() + + def _wrap_ssl_read(self, len, buffer=None): + response = None + try: + response = self._ssl_io_loop(self.sslobj.read, len, buffer) + except ssl.SSLError as e: + if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs: + response = 0 # eof, return 0. + else: + raise + return response + + def _ssl_io_loop(self, func, *args): + """ Performs an I/O loop between incoming/outgoing and the socket.""" + should_loop = True + + while should_loop: + errno = None + try: + ret = func(*args) + except ssl.SSLError as e: + if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE): + # WANT_READ, and WANT_WRITE are expected, others are not. + raise e + errno = e.errno + + buf = self.outgoing.read() + self.socket.sendall(buf) + + if errno is None: + should_loop = False + elif errno == ssl.SSL_ERROR_WANT_READ: + buf = self.socket.recv(SSL_BLOCKSIZE) + if buf: + self.incoming.write(buf) + else: + self.incoming.write_eof() + return ret diff --git a/lib/urllib3/exceptions.py b/lib/urllib3/exceptions.py index e59ebbf..87d4ce7 100644 --- a/lib/urllib3/exceptions.py +++ b/lib/urllib3/exceptions.py @@ -5,17 +5,19 @@ from .packages.six.moves.http_client import IncompleteRead as httplib_Incomplete class HTTPError(Exception): - "Base exception used by this module." + """Base exception used by this module.""" + pass class HTTPWarning(Warning): - "Base warning used by this module." + """Base warning used by this module.""" + pass class PoolError(HTTPError): - "Base exception for errors caused within a pool." + """Base exception for errors caused within a pool.""" def __init__(self, pool, message): self.pool = pool @@ -27,7 +29,7 @@ class PoolError(HTTPError): class RequestError(PoolError): - "Base exception for PoolErrors that have associated URLs." + """Base exception for PoolErrors that have associated URLs.""" def __init__(self, pool, url, message): self.url = url @@ -39,12 +41,13 @@ class RequestError(PoolError): class SSLError(HTTPError): - "Raised when SSL certificate fails in an HTTPS connection." + """Raised when SSL certificate fails in an HTTPS connection.""" + pass class ProxyError(HTTPError): - "Raised when the connection to a proxy fails." + """Raised when the connection to a proxy fails.""" def __init__(self, message, error, *args): super(ProxyError, self).__init__(message, error, *args) @@ -52,12 +55,14 @@ class ProxyError(HTTPError): class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." + """Raised when automatic decoding based on Content-Type fails.""" + pass class ProtocolError(HTTPError): - "Raised when something unexpected happens mid-request/response." + """Raised when something unexpected happens mid-request/response.""" + pass @@ -87,7 +92,7 @@ class MaxRetryError(RequestError): class HostChangedError(RequestError): - "Raised when an existing pool gets a request for a foreign host." + """Raised when an existing pool gets a request for a foreign host.""" def __init__(self, pool, url, retries=3): message = "Tried to open a foreign host with url: %s" % url @@ -96,13 +101,13 @@ class HostChangedError(RequestError): class TimeoutStateError(HTTPError): - """ Raised when passing an invalid state to a timeout """ + """Raised when passing an invalid state to a timeout""" pass class TimeoutError(HTTPError): - """ Raised when a socket timeout error occurs. + """Raised when a socket timeout error occurs. Catching this error will catch both :exc:`ReadTimeoutErrors ` and :exc:`ConnectTimeoutErrors `. @@ -112,39 +117,45 @@ class TimeoutError(HTTPError): class ReadTimeoutError(TimeoutError, RequestError): - "Raised when a socket timeout occurs while receiving data from a server" + """Raised when a socket timeout occurs while receiving data from a server""" + pass # This timeout error does not have a URL attached and needs to inherit from the # base HTTPError class ConnectTimeoutError(TimeoutError): - "Raised when a socket timeout occurs while connecting to a server" + """Raised when a socket timeout occurs while connecting to a server""" + pass class NewConnectionError(ConnectTimeoutError, PoolError): - "Raised when we fail to establish a new connection. Usually ECONNREFUSED." + """Raised when we fail to establish a new connection. Usually ECONNREFUSED.""" + pass class EmptyPoolError(PoolError): - "Raised when a pool runs out of connections and no more are allowed." + """Raised when a pool runs out of connections and no more are allowed.""" + pass class ClosedPoolError(PoolError): - "Raised when a request enters a pool after the pool has been closed." + """Raised when a request enters a pool after the pool has been closed.""" + pass class LocationValueError(ValueError, HTTPError): - "Raised when there is something wrong with a given URL input." + """Raised when there is something wrong with a given URL input.""" + pass class LocationParseError(LocationValueError): - "Raised when get_host or similar fails to parse the URL input." + """Raised when get_host or similar fails to parse the URL input.""" def __init__(self, location): message = "Failed to parse: %s" % location @@ -154,7 +165,7 @@ class LocationParseError(LocationValueError): class URLSchemeUnknown(LocationValueError): - "Raised when a URL input has an unsupported scheme." + """Raised when a URL input has an unsupported scheme.""" def __init__(self, scheme): message = "Not supported URL scheme %s" % scheme @@ -164,38 +175,45 @@ class URLSchemeUnknown(LocationValueError): class ResponseError(HTTPError): - "Used as a container for an error reason supplied in a MaxRetryError." + """Used as a container for an error reason supplied in a MaxRetryError.""" + GENERIC_ERROR = "too many error responses" SPECIFIC_ERROR = "too many {status_code} error responses" class SecurityWarning(HTTPWarning): - "Warned when performing security reducing actions" + """Warned when performing security reducing actions""" + pass class SubjectAltNameWarning(SecurityWarning): - "Warned when connecting to a host with a certificate missing a SAN." + """Warned when connecting to a host with a certificate missing a SAN.""" + pass class InsecureRequestWarning(SecurityWarning): - "Warned when making an unverified HTTPS request." + """Warned when making an unverified HTTPS request.""" + pass class SystemTimeWarning(SecurityWarning): - "Warned when system time is suspected to be wrong" + """Warned when system time is suspected to be wrong""" + pass class InsecurePlatformWarning(SecurityWarning): - "Warned when certain SSL configuration is not available on a platform." + """Warned when certain TLS/SSL configuration is not available on a platform.""" + pass class SNIMissingWarning(HTTPWarning): - "Warned when making a HTTPS request without SNI available." + """Warned when making a HTTPS request without SNI available.""" + pass @@ -209,14 +227,15 @@ class DependencyWarning(HTTPWarning): class ResponseNotChunked(ProtocolError, ValueError): - "Response needs to be chunked in order to read it as chunks." + """Response needs to be chunked in order to read it as chunks.""" + pass class BodyNotHttplibCompatible(HTTPError): """ - Body should be httplib.HTTPResponse like (have an fp attribute which - returns raw chunks) for read_chunked(). + Body should be :class:`http.client.HTTPResponse` like + (have an fp attribute which returns raw chunks) for read_chunked(). """ pass @@ -226,9 +245,8 @@ class IncompleteRead(HTTPError, httplib_IncompleteRead): """ Response length doesn't match expected Content-Length - Subclass of http_client.IncompleteRead to allow int value - for `partial` to avoid creating large objects on streamed - reads. + Subclass of :class:`http.client.IncompleteRead` to allow int value + for ``partial`` to avoid creating large objects on streamed reads. """ def __init__(self, partial, expected): @@ -259,12 +277,14 @@ class InvalidChunkLength(HTTPError, httplib_IncompleteRead): class InvalidHeader(HTTPError): - "The header provided was somehow invalid." + """The header provided was somehow invalid.""" + pass class ProxySchemeUnknown(AssertionError, URLSchemeUnknown): - "ProxyManager does not support the supplied scheme" + """ProxyManager does not support the supplied scheme""" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. def __init__(self, scheme): @@ -273,12 +293,13 @@ class ProxySchemeUnknown(AssertionError, URLSchemeUnknown): class ProxySchemeUnsupported(ValueError): - "Fetching HTTPS resources through HTTPS proxies is unsupported" + """Fetching HTTPS resources through HTTPS proxies is unsupported""" + pass class HeaderParsingError(HTTPError): - "Raised by assert_header_parsing, but we convert it to a log.warning statement." + """Raised by assert_header_parsing, but we convert it to a log.warning statement.""" def __init__(self, defects, unparsed_data): message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data) @@ -286,5 +307,6 @@ class HeaderParsingError(HTTPError): class UnrewindableBodyError(HTTPError): - "urllib3 encountered an error when trying to rewind a body" + """urllib3 encountered an error when trying to rewind a body""" + pass diff --git a/lib/urllib3/exceptions.pyi b/lib/urllib3/exceptions.pyi new file mode 100644 index 0000000..1254abe --- /dev/null +++ b/lib/urllib3/exceptions.pyi @@ -0,0 +1,55 @@ +from typing import Any, Optional, Union, Tuple, TYPE_CHECKING + +if TYPE_CHECKING: + from urllib3.connectionpool import ConnectionPool + +class HTTPError(Exception): ... +class HTTPWarning(Warning): ... + +class PoolError(HTTPError): + pool: ConnectionPool + def __init__(self, pool: ConnectionPool, message: str) -> None: ... + def __reduce__(self) -> Union[str, Tuple[Any, ...]]: ... + +class RequestError(PoolError): + url: str + def __init__(self, pool: ConnectionPool, url: str, message: str) -> None: ... + def __reduce__(self) -> Union[str, Tuple[Any, ...]]: ... + +class SSLError(HTTPError): ... +class ProxyError(HTTPError): ... +class DecodeError(HTTPError): ... +class ProtocolError(HTTPError): ... + +ConnectionError: ProtocolError + +class MaxRetryError(RequestError): + reason: str + def __init__( + self, pool: ConnectionPool, url: str, reason: Optional[str] + ) -> None: ... + +class HostChangedError(RequestError): + retries: int + def __init__(self, pool: ConnectionPool, url: str, retries: int) -> None: ... + +class TimeoutStateError(HTTPError): ... +class TimeoutError(HTTPError): ... +class ReadTimeoutError(TimeoutError, RequestError): ... +class ConnectTimeoutError(TimeoutError): ... +class EmptyPoolError(PoolError): ... +class ClosedPoolError(PoolError): ... +class LocationValueError(ValueError, HTTPError): ... + +class LocationParseError(LocationValueError): + location: str + def __init__(self, location: str) -> None: ... + +class ResponseError(HTTPError): + GENERIC_ERROR: Any + SPECIFIC_ERROR: Any + +class SecurityWarning(HTTPWarning): ... +class InsecureRequestWarning(SecurityWarning): ... +class SystemTimeWarning(SecurityWarning): ... +class InsecurePlatformWarning(SecurityWarning): ... diff --git a/lib/urllib3/fields.py b/lib/urllib3/fields.py index 8715b22..29c0bd9 100644 --- a/lib/urllib3/fields.py +++ b/lib/urllib3/fields.py @@ -26,7 +26,8 @@ def format_header_param_rfc2231(name, value): strategy defined in RFC 2231. Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows RFC 2388 Section 4.4. + non-ASCII values, like file names. This follows + `RFC 2388 Section 4.4 `_. :param name: The name of the parameter, a string expected to be ASCII only. @@ -65,7 +66,6 @@ _HTML5_REPLACEMENTS = { u"\u0022": u"%22", # Replace "\" with "\\". u"\u005C": u"\u005C\u005C", - u"\u005C": u"\u005C\u005C", } # All control characters from 0x00 to 0x1F *except* 0x1B. diff --git a/lib/urllib3/fields.pyi b/lib/urllib3/fields.pyi new file mode 100644 index 0000000..e1f3f90 --- /dev/null +++ b/lib/urllib3/fields.pyi @@ -0,0 +1,28 @@ +# Stubs for requests.packages.urllib3.fields (Python 3.4) + +from typing import Any, Callable, Mapping, Optional + +def guess_content_type(filename: str, default: str) -> str: ... +def format_header_param_rfc2231(name: str, value: str) -> str: ... +def format_header_param_html5(name: str, value: str) -> str: ... +def format_header_param(name: str, value: str) -> str: ... + +class RequestField: + data: Any + headers: Optional[Mapping[str, str]] + def __init__( + self, + name: str, + data: Any, + filename: Optional[str], + headers: Optional[Mapping[str, str]], + header_formatter: Callable[[str, str], str], + ) -> None: ... + @classmethod + def from_tuples( + cls, fieldname: str, value: str, header_formatter: Callable[[str, str], str] + ) -> RequestField: ... + def render_headers(self) -> str: ... + def make_multipart( + self, content_disposition: str, content_type: str, content_location: str + ) -> None: ... diff --git a/lib/urllib3/filepost.pyi b/lib/urllib3/filepost.pyi new file mode 100644 index 0000000..54cf62a --- /dev/null +++ b/lib/urllib3/filepost.pyi @@ -0,0 +1,16 @@ +from typing import Any, Generator, List, Mapping, Optional, Tuple, Union + +from . import fields + +RequestField = fields.RequestField +Fields = Union[Mapping[str, str], List[Tuple[str]], List[RequestField]] +Iterator = Generator[Tuple[str], None, None] + +writer: Any + +def choose_boundary() -> str: ... +def iter_field_objects(fields: Fields) -> Iterator: ... +def iter_fields(fields: Fields) -> Iterator: ... +def encode_multipart_formdata( + fields: Fields, boundary: Optional[str] +) -> Tuple[str]: ... diff --git a/lib/urllib3/packages/__init__.pyi b/lib/urllib3/packages/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lib/urllib3/packages/six.pyi b/lib/urllib3/packages/six.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lib/urllib3/packages/ssl_match_hostname/__init__.py b/lib/urllib3/packages/ssl_match_hostname/__init__.py index 75b6bb1..72e6899 100644 --- a/lib/urllib3/packages/ssl_match_hostname/__init__.py +++ b/lib/urllib3/packages/ssl_match_hostname/__init__.py @@ -10,10 +10,10 @@ try: except ImportError: try: # Backport of the function from a pypi module - from backports.ssl_match_hostname import CertificateError, match_hostname + from backports.ssl_match_hostname import CertificateError, match_hostname # type: ignore except ImportError: # Our vendored copy - from ._implementation import CertificateError, match_hostname + from ._implementation import CertificateError, match_hostname # type: ignore # Not needed, but documenting what we provide. __all__ = ("CertificateError", "match_hostname") diff --git a/lib/urllib3/packages/ssl_match_hostname/__init__.pyi b/lib/urllib3/packages/ssl_match_hostname/__init__.pyi new file mode 100644 index 0000000..1915c0e --- /dev/null +++ b/lib/urllib3/packages/ssl_match_hostname/__init__.pyi @@ -0,0 +1,4 @@ +import ssl + +CertificateError = ssl.CertificateError +match_hostname = ssl.match_hostname diff --git a/lib/urllib3/packages/ssl_match_hostname/_implementation.pyi b/lib/urllib3/packages/ssl_match_hostname/_implementation.pyi new file mode 100644 index 0000000..ed472ba --- /dev/null +++ b/lib/urllib3/packages/ssl_match_hostname/_implementation.pyi @@ -0,0 +1,11 @@ +from typing import Dict, Tuple, Union + +# https://github.com/python/typeshed/blob/master/stdlib/2and3/ssl.pyi +_PCTRTT = Tuple[Tuple[str, str], ...] +_PCTRTTT = Tuple[_PCTRTT, ...] +_PeerCertRetDictType = Dict[str, Union[str, _PCTRTTT, _PCTRTT]] +_PeerCertRetType = Union[_PeerCertRetDictType, bytes, None] + +class CertificateError(ValueError): ... + +def match_hostname(cert: _PeerCertRetType, hostname: str) -> None: ... diff --git a/lib/urllib3/poolmanager.py b/lib/urllib3/poolmanager.py index ab183de..70933fd 100644 --- a/lib/urllib3/poolmanager.py +++ b/lib/urllib3/poolmanager.py @@ -191,7 +191,7 @@ class PoolManager(RequestMethods): def _new_pool(self, scheme, host, port, request_context=None): """ - Create a new :class:`ConnectionPool` based on host, port, scheme, and + Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and any additional pool keyword arguments. If ``request_context`` is provided, it is provided as keyword arguments @@ -227,7 +227,7 @@ class PoolManager(RequestMethods): def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): """ - Get a :class:`ConnectionPool` based on the host, port, and scheme. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is @@ -250,7 +250,7 @@ class PoolManager(RequestMethods): def connection_from_context(self, request_context): """ - Get a :class:`ConnectionPool` based on the request context. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. ``request_context`` must at least contain the ``scheme`` key and its value must be a key in ``key_fn_by_scheme`` instance variable. @@ -265,7 +265,7 @@ class PoolManager(RequestMethods): def connection_from_pool_key(self, pool_key, request_context=None): """ - Get a :class:`ConnectionPool` based on the provided pool key. + Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. ``pool_key`` should be a namedtuple that only contains immutable objects. At a minimum it must have the ``scheme``, ``host``, and @@ -337,7 +337,7 @@ class PoolManager(RequestMethods): def urlopen(self, method, url, redirect=True, **kw): """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + Same as :meth:`urllib3.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri portion of the ``url``. diff --git a/lib/urllib3/request.py b/lib/urllib3/request.py index 55f160b..b058bf6 100644 --- a/lib/urllib3/request.py +++ b/lib/urllib3/request.py @@ -10,8 +10,8 @@ __all__ = ["RequestMethods"] class RequestMethods(object): """ Convenience mixin for classes who implement a :meth:`urlopen` method, such - as :class:`~urllib3.connectionpool.HTTPConnectionPool` and - :class:`~urllib3.poolmanager.PoolManager`. + as :class:`urllib3.HTTPConnectionPool` and + :class:`urllib3.PoolManager`. Provides behavior for making common types of HTTP request methods and decides which type of request field encoding to use. @@ -111,9 +111,9 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + :func:`urllib3.encode_multipart_formdata` is used to encode the payload with the appropriate content type. Otherwise - :meth:`urllib.urlencode` is used with the + :func:`urllib.parse.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably diff --git a/lib/urllib3/response.py b/lib/urllib3/response.py index fcf9d7f..149486e 100644 --- a/lib/urllib3/response.py +++ b/lib/urllib3/response.py @@ -22,6 +22,7 @@ from .exceptions import ( InvalidChunkLength, InvalidHeader, HTTPError, + SSLError, ) from .packages.six import string_types as basestring, PY3 from .connection import HTTPException, BaseSSLError @@ -156,13 +157,13 @@ class HTTPResponse(io.IOBase): """ HTTP Response container. - Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is loaded and decoded on-demand when the ``data`` property is accessed. This class is also compatible with the Python standard library's :mod:`io` module, and can hence be treated as a readable object in the context of that framework. - Extra parameters for behaviour not present in httplib.HTTPResponse: + Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: :param preload_content: If True, the response's body will be preloaded during construction. @@ -172,7 +173,7 @@ class HTTPResponse(io.IOBase): 'content-encoding' header. :param original_response: - When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` object, it's convenient to include the original for debug purposes. It's otherwise unused. @@ -307,8 +308,8 @@ class HTTPResponse(io.IOBase): def tell(self): """ Obtain the number of bytes pulled over the wire so far. May differ from - the amount of content returned by :meth:``HTTPResponse.read`` if bytes - are encoded on the wire (e.g, compressed). + the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` + if bytes are encoded on the wire (e.g, compressed). """ return self._fp_bytes_read @@ -442,10 +443,9 @@ class HTTPResponse(io.IOBase): except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if "read operation timed out" not in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise + if "read operation timed out" not in str(e): + # SSL errors related to framing/MAC get wrapped and reraised here + raise SSLError(e) raise ReadTimeoutError(self._pool, None, "Read timed out.") @@ -479,7 +479,7 @@ class HTTPResponse(io.IOBase): def read(self, amt=None, decode_content=None, cache_content=False): """ - Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + Similar to :meth:`http.client.HTTPResponse.read`, but with two additional parameters: ``decode_content`` and ``cache_content``. :param amt: @@ -580,7 +580,7 @@ class HTTPResponse(io.IOBase): @classmethod def from_httplib(ResponseCls, r, **response_kw): """ - Given an :class:`httplib.HTTPResponse` instance ``r``, return a + Given an :class:`http.client.HTTPResponse` instance ``r``, return a corresponding :class:`urllib3.response.HTTPResponse` object. Remaining parameters are passed to the HTTPResponse constructor, along @@ -609,7 +609,7 @@ class HTTPResponse(io.IOBase): ) return resp - # Backwards-compatibility methods for httplib.HTTPResponse + # Backwards-compatibility methods for http.client.HTTPResponse def getheaders(self): return self.headers @@ -679,8 +679,8 @@ class HTTPResponse(io.IOBase): def supports_chunked_reads(self): """ Checks if the underlying file-like object looks like a - httplib.HTTPResponse object. We do this by testing for the fp - attribute. If it is present we assume it returns raw chunks as + :class:`http.client.HTTPResponse` object. We do this by testing for + the fp attribute. If it is present we assume it returns raw chunks as processed by read_chunked(). """ return hasattr(self._fp, "fp") @@ -744,7 +744,7 @@ class HTTPResponse(io.IOBase): ) if not self.supports_chunked_reads(): raise BodyNotHttplibCompatible( - "Body should be httplib.HTTPResponse like. " + "Body should be http.client.HTTPResponse like. " "It should have have an fp attribute which returns raw chunks." ) diff --git a/lib/urllib3/util/__init__.py b/lib/urllib3/util/__init__.py index 3fa98c5..24c16a2 100644 --- a/lib/urllib3/util/__init__.py +++ b/lib/urllib3/util/__init__.py @@ -14,6 +14,7 @@ from .ssl_ import ( resolve_ssl_version, ssl_wrap_socket, PROTOCOL_TLS, + ALPN_PROTOCOLS, ) from .timeout import current_time, Timeout @@ -27,6 +28,7 @@ __all__ = ( "IS_SECURETRANSPORT", "SSLContext", "PROTOCOL_TLS", + "ALPN_PROTOCOLS", "Retry", "Timeout", "Url", diff --git a/lib/urllib3/util/connection.py b/lib/urllib3/util/connection.py index 86f0a3b..9c89dc1 100644 --- a/lib/urllib3/util/connection.py +++ b/lib/urllib3/util/connection.py @@ -9,7 +9,7 @@ def is_connection_dropped(conn): # Platform-specific Returns True if the connection is dropped and should be closed. :param conn: - :class:`httplib.HTTPConnection` object. + :class:`http.client.HTTPConnection` object. Note: For platforms like AppEngine, this will always return ``False`` to let the platform handle connection recycling transparently for us. @@ -42,7 +42,7 @@ def create_connection( port)``) and return the socket object. Passing the optional *timeout* parameter will set the timeout on the socket instance before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` + global default timeout setting returned by :func:`socket.getdefaulttimeout` is used. If *source_address* is set it must be a tuple of (host, port) for the socket to bind as a source address before making the connection. An host of '' or port 0 tells the OS to use the default. diff --git a/lib/urllib3/util/queue.pyi b/lib/urllib3/util/queue.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lib/urllib3/util/response.py b/lib/urllib3/util/response.py index 715868d..063d991 100644 --- a/lib/urllib3/util/response.py +++ b/lib/urllib3/util/response.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +from email.errors import StartBoundaryNotFoundDefect, MultipartInvariantViolationDefect from ..packages.six.moves import http_client as httplib from ..exceptions import HeaderParsingError @@ -42,8 +43,7 @@ def assert_header_parsing(headers): Only works on Python 3. - :param headers: Headers to verify. - :type headers: `httplib.HTTPMessage`. + :param http.client.HTTPMessage headers: Headers to verify. :raises urllib3.exceptions.HeaderParsingError: If parsing errors are found. @@ -66,6 +66,25 @@ def assert_header_parsing(headers): if isinstance(payload, (bytes, str)): unparsed_data = payload + if defects: + # httplib is assuming a response body is available + # when parsing headers even when httplib only sends + # header data to parse_headers() This results in + # defects on multipart responses in particular. + # See: https://github.com/urllib3/urllib3/issues/800 + + # So we ignore the following defects: + # - StartBoundaryNotFoundDefect: + # The claimed start boundary was never found. + # - MultipartInvariantViolationDefect: + # A message claimed to be a multipart but no subparts were found. + defects = [ + defect + for defect in defects + if not isinstance( + defect, (StartBoundaryNotFoundDefect, MultipartInvariantViolationDefect) + ) + ] if defects or unparsed_data: raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) @@ -76,8 +95,9 @@ def is_response_to_head(response): Checks whether the request of a response has been a HEAD-request. Handles the quirks of AppEngine. - :param conn: - :type conn: :class:`httplib.HTTPResponse` + :param http.client.HTTPResponse response: + Response to check if the originating request + used 'HEAD' as a method. """ # FIXME: Can we do this somehow without accessing private httplib _method? method = response._method diff --git a/lib/urllib3/util/retry.py b/lib/urllib3/util/retry.py index 3d82832..e5eda7a 100644 --- a/lib/urllib3/util/retry.py +++ b/lib/urllib3/util/retry.py @@ -28,7 +28,7 @@ RequestHistory = namedtuple( class Retry(object): - """ Retry configuration. + """Retry configuration. Each retry attempt will create a new Retry object with updated values, so they can be safely reused. @@ -245,7 +245,7 @@ class Retry(object): return new_retries def get_backoff_time(self): - """ Formula for computing the current backoff + """Formula for computing the current backoff :rtype: float """ @@ -266,10 +266,17 @@ class Retry(object): if re.match(r"^\s*[0-9]+\s*$", retry_after): seconds = int(retry_after) else: - retry_date_tuple = email.utils.parsedate(retry_after) + retry_date_tuple = email.utils.parsedate_tz(retry_after) if retry_date_tuple is None: raise InvalidHeader("Invalid Retry-After header: %s" % retry_after) - retry_date = time.mktime(retry_date_tuple) + if retry_date_tuple[9] is None: # Python 2 + # Assume UTC if no timezone was specified + # On Python2.7, parsedate_tz returns None for a timezone offset + # instead of 0 if no timezone is given, where mktime_tz treats + # a None timezone offset as local time. + retry_date_tuple = retry_date_tuple[:9] + (0,) + retry_date_tuple[10:] + + retry_date = email.utils.mktime_tz(retry_date_tuple) seconds = retry_date - time.time() if seconds < 0: @@ -302,7 +309,7 @@ class Retry(object): time.sleep(backoff) def sleep(self, response=None): - """ Sleep between retry attempts. + """Sleep between retry attempts. This method will respect a server's ``Retry-After`` response header and sleep the duration of the time requested. If that is not present, it @@ -318,7 +325,7 @@ class Retry(object): self._sleep_backoff() def _is_connection_error(self, err): - """ Errors when we're fairly sure that the server did not receive the + """Errors when we're fairly sure that the server did not receive the request, so it should be safe to retry. """ if isinstance(err, ProxyError): @@ -326,13 +333,13 @@ class Retry(object): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we should + """Errors that occur after the request has been started, so we should assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) def _is_method_retryable(self, method): - """ Checks if a given HTTP method should be retried upon, depending if + """Checks if a given HTTP method should be retried upon, depending if it is included on the method whitelist. """ if self.method_whitelist and method.upper() not in self.method_whitelist: @@ -341,7 +348,7 @@ class Retry(object): return True def is_retry(self, method, status_code, has_retry_after=False): - """ Is this method/status code retryable? (Based on whitelists and control + """Is this method/status code retryable? (Based on whitelists and control variables such as the number of total retries to allow, whether to respect the Retry-After header, whether this header is present, and whether the returned status code is on the list of status codes to @@ -385,7 +392,7 @@ class Retry(object): _pool=None, _stacktrace=None, ): - """ Return a new Retry object with incremented retry counters. + """Return a new Retry object with incremented retry counters. :param response: A response object, or None, if the server did not return a response. diff --git a/lib/urllib3/util/ssl_.py b/lib/urllib3/util/ssl_.py index 3d89a56..5d84409 100644 --- a/lib/urllib3/util/ssl_.py +++ b/lib/urllib3/util/ssl_.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -import errno import warnings import hmac import os @@ -17,6 +16,7 @@ SSLContext = None HAS_SNI = False IS_PYOPENSSL = False IS_SECURETRANSPORT = False +ALPN_PROTOCOLS = ["http/1.1"] # Maps the length of a digest to a possible hash function producing this digest HASHFUNC_MAP = {32: md5, 40: sha1, 64: sha256} @@ -348,14 +348,8 @@ def ssl_wrap_socket( if ca_certs or ca_cert_dir or ca_cert_data: try: context.load_verify_locations(ca_certs, ca_cert_dir, ca_cert_data) - except IOError as e: # Platform-specific: Python 2.7 + except (IOError, OSError) as e: raise SSLError(e) - # Py33 raises FileNotFoundError which subclasses OSError - # These are not equivalent unless we check the errno attribute - except OSError as e: # Platform-specific: Python 3.3 and beyond - if e.errno == errno.ENOENT: - raise SSLError(e) - raise elif ssl_context is None and hasattr(context, "load_default_certs"): # try to load OS default certs; works well on Windows (require Python3.4+) @@ -373,16 +367,21 @@ def ssl_wrap_socket( else: context.load_cert_chain(certfile, keyfile, key_password) + try: + if hasattr(context, "set_alpn_protocols"): + context.set_alpn_protocols(ALPN_PROTOCOLS) + except NotImplementedError: + pass + # If we detect server_hostname is an IP address then the SNI # extension should not be used according to RFC3546 Section 3.1 - # We shouldn't warn the user if SNI isn't available but we would - # not be using SNI anyways due to IP address for server_hostname. - if ( - server_hostname is not None and not is_ipaddress(server_hostname) - ) or IS_SECURETRANSPORT: - if HAS_SNI and server_hostname is not None: - return context.wrap_socket(sock, server_hostname=server_hostname) - + use_sni_hostname = server_hostname and not is_ipaddress(server_hostname) + # SecureTransport uses server_hostname in certificate verification. + send_sni = (use_sni_hostname and HAS_SNI) or ( + IS_SECURETRANSPORT and server_hostname + ) + # Do not warn the user if server_hostname is an invalid SNI hostname. + if not HAS_SNI and use_sni_hostname: warnings.warn( "An HTTPS request has been made, but the SNI (Server Name " "Indication) extension to TLS is not available on this platform. " @@ -394,7 +393,11 @@ def ssl_wrap_socket( SNIMissingWarning, ) - return context.wrap_socket(sock) + if send_sni: + ssl_sock = context.wrap_socket(sock, server_hostname=server_hostname) + else: + ssl_sock = context.wrap_socket(sock) + return ssl_sock def is_ipaddress(hostname): diff --git a/lib/urllib3/util/timeout.py b/lib/urllib3/util/timeout.py index b61fea7..2f287a3 100644 --- a/lib/urllib3/util/timeout.py +++ b/lib/urllib3/util/timeout.py @@ -17,22 +17,28 @@ current_time = getattr(time, "monotonic", time.time) class Timeout(object): - """ Timeout configuration. + """Timeout configuration. - Timeouts can be defined as a default for a pool:: + Timeouts can be defined as a default for a pool: - timeout = Timeout(connect=2.0, read=7.0) - http = PoolManager(timeout=timeout) - response = http.request('GET', 'http://example.com/') + .. code-block:: python - Or per-request (which overrides the default for the pool):: + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') - response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + Or per-request (which overrides the default for the pool): - Timeouts can be disabled by setting all the parameters to ``None``:: + .. code-block:: python - no_timeout = Timeout(connect=None, read=None) - response = http.request('GET', 'http://example.com/, timeout=no_timeout) + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``: + + .. code-block:: python + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) :param total: @@ -43,7 +49,7 @@ class Timeout(object): Defaults to None. - :type total: integer, float, or None + :type total: int, float, or None :param connect: The maximum amount of time (in seconds) to wait for a connection @@ -53,7 +59,7 @@ class Timeout(object): `_. None will set an infinite timeout for connection attempts. - :type connect: integer, float, or None + :type connect: int, float, or None :param read: The maximum amount of time (in seconds) to wait between consecutive @@ -63,7 +69,7 @@ class Timeout(object): `_. None will set an infinite timeout. - :type read: integer, float, or None + :type read: int, float, or None .. note:: @@ -111,7 +117,7 @@ class Timeout(object): @classmethod def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid. + """Check that a timeout attribute is valid. :param value: The timeout value to validate :param name: The name of the timeout attribute to validate. This is @@ -157,7 +163,7 @@ class Timeout(object): @classmethod def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. + """Create a new Timeout from a legacy timeout value. The timeout value used by httplib.py sets the same timeout on the connect(), and recv() socket requests. This creates a :class:`Timeout` @@ -172,7 +178,7 @@ class Timeout(object): return Timeout(read=timeout, connect=timeout) def clone(self): - """ Create a copy of the timeout object + """Create a copy of the timeout object Timeout properties are stored per-pool but each request needs a fresh Timeout object to ensure each one has its own start/stop configured. @@ -186,7 +192,7 @@ class Timeout(object): return Timeout(connect=self._connect, read=self._read, total=self.total) def start_connect(self): - """ Start the timeout clock, used during a connect() attempt + """Start the timeout clock, used during a connect() attempt :raises urllib3.exceptions.TimeoutStateError: if you attempt to start a timer that has been started already. @@ -197,7 +203,7 @@ class Timeout(object): return self._start_connect def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. + """Gets the time elapsed since the call to :meth:`start_connect`. :return: Elapsed time in seconds. :rtype: float @@ -212,7 +218,7 @@ class Timeout(object): @property def connect_timeout(self): - """ Get the value to use when setting a connection timeout. + """Get the value to use when setting a connection timeout. This will be a positive float or integer, the value None (never timeout), or the default system timeout. @@ -230,7 +236,7 @@ class Timeout(object): @property def read_timeout(self): - """ Get the value for the read timeout. + """Get the value for the read timeout. This assumes some time has elapsed in the connection timeout and computes the read timeout appropriately. diff --git a/lib/urllib3/util/url.pyi b/lib/urllib3/util/url.pyi new file mode 100644 index 0000000..67e308f --- /dev/null +++ b/lib/urllib3/util/url.pyi @@ -0,0 +1,32 @@ +from typing import Any, List, Optional, Tuple, Union + +from .. import exceptions + +LocationParseError = exceptions.LocationParseError + +url_attrs: List[str] + +class Url: + slots: Any + def __new__( + cls, + scheme: Optional[str], + auth: Optional[str], + host: Optional[str], + port: Optional[str], + path: Optional[str], + query: Optional[str], + fragment: Optional[str], + ) -> Url: ... + @property + def hostname(self) -> str: ... + @property + def request_uri(self) -> str: ... + @property + def netloc(self) -> str: ... + @property + def url(self) -> str: ... + +def split_first(s: str, delims: str) -> Tuple[str, str, Optional[str]]: ... +def parse_url(url: str) -> Url: ... +def get_host(url: str) -> Union[str, Tuple[str]]: ... diff --git a/lib/urllib3/util/wait.py b/lib/urllib3/util/wait.py index d71d2fd..53d10ee 100644 --- a/lib/urllib3/util/wait.py +++ b/lib/urllib3/util/wait.py @@ -140,14 +140,14 @@ def wait_for_socket(*args, **kwargs): def wait_for_read(sock, timeout=None): - """ Waits for reading to be available on a given socket. + """Waits for reading to be available on a given socket. Returns True if the socket is readable, or False if the timeout expired. """ return wait_for_socket(sock, read=True, timeout=timeout) def wait_for_write(sock, timeout=None): - """ Waits for writing to be available on a given socket. + """Waits for writing to be available on a given socket. Returns True if the socket is readable, or False if the timeout expired. """ return wait_for_socket(sock, write=True, timeout=timeout)