Update cachecontrol library 0.12.5 (007e8ca) → 0.12.6 (167a605).

5 years ago · 19754c1d4b
14 changed files with 118 additions and 39 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -45,6 +45,7 @@
 * Update included fallback timezone info file to 2020d
 * Update attr 20.1.0.dev0 (4bd6827) to 20.2.0 (4f74fba)
 * Update Beautiful Soup 4.8.2 (r559) to 4.9.3 (r593)
+* Update cachecontrol library 0.12.5 (007e8ca) to 0.12.6 (167a605)
 * Update dateutil 2.8.1 (43b7838) to 2.8.1 (c496b4f)
 * Change add diskcache_py3 5.0.1 (9670fbb)
 * Change add diskcache_py2 4.1.0 (b0451e0)
--- a/lib/cachecontrol/init.py
+++ b/lib/cachecontrol/init.py
@ -1,11 +1,18 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """CacheControl import Interface.

 Make it easy to import from cachecontrol without long namespaces.
 """
 __author__ = "Eric Larson"
 __email__ = "eric@ionrock.org"
-__version__ = "0.12.5"
+__version__ = "0.12.6"

 from .wrapper import CacheControl
 from .adapter import CacheControlAdapter
 from .controller import CacheController
+
+import logging
+logging.getLogger(__name__).addHandler(logging.NullHandler())
--- a/lib/cachecontrol/_cmd.py
+++ b/lib/cachecontrol/_cmd.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import logging

 import requests
--- a/lib/cachecontrol/adapter.py
+++ b/lib/cachecontrol/adapter.py
@ -1,16 +1,20 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import types
 import functools
 import zlib

 from requests.adapters import HTTPAdapter

-from .controller import CacheController
+from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
 from .cache import DictCache
 from .filewrapper import CallbackFileWrapper


 class CacheControlAdapter(HTTPAdapter):
-    invalidating_methods = {"PUT", "DELETE"}
+    invalidating_methods = {"PUT", "PATCH", "DELETE"}

    def __init__(
        self,
@ -93,7 +97,7 @@ class CacheControlAdapter(HTTPAdapter):
                response = cached_response

            # We always cache the 301 responses
-            elif response.status == 301:
+            elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
                self.controller.cache_response(request, response)
            else:
                # Wrap the response file with a wrapper that will cache the
--- a/lib/cachecontrol/cache.py
+++ b/lib/cachecontrol/cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """
 The cache object API for implementing caches. The default is a thread
 safe in-memory dictionary.
@ -10,7 +14,7 @@ class BaseCache(object):
    def get(self, key):
        raise NotImplementedError()

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        raise NotImplementedError()

    def delete(self, key):
@ -29,7 +33,7 @@ class DictCache(BaseCache):
    def get(self, key):
        return self.data.get(key, None)

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        with self.lock:
            self.data.update({key: value})

--- a/lib/cachecontrol/caches/init.py
+++ b/lib/cachecontrol/caches/init.py
@ -1,2 +1,6 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from .file_cache import FileCache  # noqa
 from .redis_cache import RedisCache  # noqa
--- a/lib/cachecontrol/caches/file_cache.py
+++ b/lib/cachecontrol/caches/file_cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import hashlib
 import os
 from textwrap import dedent
@ -114,7 +118,7 @@ class FileCache(BaseCache):
        except FileNotFoundError:
            return None

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        name = self._fn(key)

        # Make sure the directory exists
--- a/lib/cachecontrol/caches/redis_cache.py
+++ b/lib/cachecontrol/caches/redis_cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from __future__ import division

 from datetime import datetime
--- a/lib/cachecontrol/compat.py
+++ b/lib/cachecontrol/compat.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 try:
    from urllib.parse import urljoin
 except ImportError:
@ -9,7 +13,6 @@ try:
 except ImportError:
    import pickle

-
 # Handle the case where the requests module has been patched to not have
 # urllib3 bundled as part of its source.
 try:
--- a/lib/cachecontrol/controller.py
+++ b/lib/cachecontrol/controller.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """
 The httplib2 algorithms ported for use with requests.
 """
@ -17,6 +21,8 @@ logger = logging.getLogger(__name__)

 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

+PERMANENT_REDIRECT_STATUSES = (301, 308)
+

 def parse_uri(uri):
    """Parses a URI using the regex given in Appendix B of RFC 3986.
@ -37,7 +43,7 @@ class CacheController(object):
        self.cache = DictCache() if cache is None else cache
        self.cache_etags = cache_etags
        self.serializer = serializer or Serializer()
-        self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
+        self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)

    @classmethod
    def _urlnorm(cls, uri):
@ -147,17 +153,18 @@ class CacheController(object):
            logger.warning("Cache entry deserialization failed, entry ignored")
            return False

-        # If we have a cached 301, return it immediately. We don't
-        # need to test our response for other headers b/c it is
+        # If we have a cached permanent redirect, return it immediately. We
+        # don't need to test our response for other headers b/c it is
        # intrinsically "cacheable" as it is Permanent.
+        #
        # See:
        #   https://tools.ietf.org/html/rfc7231#section-6.4.2
        #
        # Client can try to refresh the value by repeating the request
        # with cache busting headers as usual (ie no-cache).
-        if resp.status == 301:
+        if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
            msg = (
-                'Returning cached "301 Moved Permanently" response '
+                'Returning cached permanent redirect response '
                "(ignoring date and etag information)"
            )
            logger.debug(msg)
@ -261,6 +268,13 @@ class CacheController(object):

        response_headers = CaseInsensitiveDict(response.headers)

+        if 'date' in response_headers:
+            date = calendar.timegm(
+                parsedate_tz(response_headers['date'])
+            )
+        else:
+            date = 0
+
        # If we've been given a body, our response has a Content-Length, that
        # Content-Length is valid then we can check to see if the body we've
        # been given matches the expected size, and if it doesn't we'll just
@ -304,35 +318,60 @@ class CacheController(object):

        # If we've been given an etag, then keep the response
        if self.cache_etags and "etag" in response_headers:
+            expires_time = 0
+            if response_headers.get('expires'):
+                expires = parsedate_tz(response_headers['expires'])
+                if expires is not None:
+                    expires_time = calendar.timegm(expires) - date
+
+            expires_time = max(expires_time, 14 * 86400)
+
+            logger.debug('etag object cached for {0} seconds'.format(expires_time))
            logger.debug("Caching due to etag")
            self.cache.set(
-                cache_url, self.serializer.dumps(request, response, body=body)
+                cache_url,
+                self.serializer.dumps(request, response, body),
+                expires=expires_time
            )

-        # Add to the cache any 301s. We do this before looking that
-        # the Date headers.
-        elif response.status == 301:
-            logger.debug("Caching permanant redirect")
-            self.cache.set(cache_url, self.serializer.dumps(request, response))
+        # Add to the cache any permanent redirects. We do this before looking
+        # that the Date headers.
+        elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
+            logger.debug("Caching permanent redirect")
+            self.cache.set(cache_url, self.serializer.dumps(request, response, b''))

        # Add to the cache if the response headers demand it. If there
        # is no date header then we can't do anything about expiring
        # the cache.
        elif "date" in response_headers:
+            date = calendar.timegm(
+                parsedate_tz(response_headers['date'])
+            )
            # cache when there is a max-age > 0
            if "max-age" in cc and cc["max-age"] > 0:
                logger.debug("Caching b/c date exists and max-age > 0")
+                expires_time = cc['max-age']
                self.cache.set(
-                    cache_url, self.serializer.dumps(request, response, body=body)
+                    cache_url,
+                    self.serializer.dumps(request, response, body),
+                    expires=expires_time
                )

            # If the request can expire, it means we should cache it
            # in the meantime.
            elif "expires" in response_headers:
                if response_headers["expires"]:
-                    logger.debug("Caching b/c of expires header")
+                    expires = parsedate_tz(response_headers['expires'])
+                    if expires is not None:
+                        expires_time = calendar.timegm(expires) - date
+                    else:
+                        expires_time = None
+
+                    logger.debug('Caching b/c of expires header. expires in {0} seconds'.format(expires_time))
                    self.cache.set(
-                        cache_url, self.serializer.dumps(request, response, body=body)
+                        cache_url,
+                        self.serializer.dumps(request, response, body=body),
+                        expires=expires_time,
                    )

    def update_cached_response(self, request, response):
@ -371,6 +410,7 @@ class CacheController(object):
        cached_response.status = 200

        # update our cache
-        self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
+        body = cached_response.read(decode_content=False)
+        self.cache.set(cache_url, self.serializer.dumps(request, cached_response, body))

        return cached_response
--- a/lib/cachecontrol/filewrapper.py
+++ b/lib/cachecontrol/filewrapper.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from io import BytesIO


--- a/lib/cachecontrol/heuristics.py
+++ b/lib/cachecontrol/heuristics.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import calendar
 import time

--- a/lib/cachecontrol/serialize.py
+++ b/lib/cachecontrol/serialize.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import base64
 import io
 import json
@ -17,26 +21,14 @@ def _b64_decode_str(s):
    return _b64_decode_bytes(s).decode("utf8")


+_default_body_read = object()
+
+
 class Serializer(object):

-    def dumps(self, request, response, body=None):
+    def dumps(self, request, response, body):
        response_headers = CaseInsensitiveDict(response.headers)

-        if body is None:
-            body = response.read(decode_content=False)
-
-            # NOTE: 99% sure this is dead code. I'm only leaving it
-            #       here b/c I don't have a test yet to prove
-            #       it. Basically, before using
-            #       `cachecontrol.filewrapper.CallbackFileWrapper`,
-            #       this made an effort to reset the file handle. The
-            #       `CallbackFileWrapper` short circuits this code by
-            #       setting the body as the content is consumed, the
-            #       result being a `body` argument is *always* passed
-            #       into cache_response, and in turn,
-            #       `Serializer.dump`.
-            response._fp = io.BytesIO(body)
-
        # NOTE: This is all a bit weird, but it's really important that on
        #       Python 2.x these objects are unicode and not str, even when
        #       they contain only ascii. The problem here is that msgpack
--- a/lib/cachecontrol/wrapper.py
+++ b/lib/cachecontrol/wrapper.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from .adapter import CacheControlAdapter
 from .cache import DictCache