From 19754c1d4b76176e3b4621caa636b12eb8fe7f82 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 3 Nov 2020 23:43:07 +0000 Subject: [PATCH] =?UTF-8?q?Update=20cachecontrol=20library=200.12.5=20(007?= =?UTF-8?q?e8ca)=20=E2=86=92=200.12.6=20(167a605).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 1 + lib/cachecontrol/__init__.py | 9 ++++- lib/cachecontrol/_cmd.py | 4 ++ lib/cachecontrol/adapter.py | 10 +++-- lib/cachecontrol/cache.py | 8 +++- lib/cachecontrol/caches/__init__.py | 4 ++ lib/cachecontrol/caches/file_cache.py | 6 ++- lib/cachecontrol/caches/redis_cache.py | 4 ++ lib/cachecontrol/compat.py | 5 ++- lib/cachecontrol/controller.py | 70 ++++++++++++++++++++++++++-------- lib/cachecontrol/filewrapper.py | 4 ++ lib/cachecontrol/heuristics.py | 4 ++ lib/cachecontrol/serialize.py | 24 ++++-------- lib/cachecontrol/wrapper.py | 4 ++ 14 files changed, 118 insertions(+), 39 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 7144880..1de0574 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -45,6 +45,7 @@ * Update included fallback timezone info file to 2020d * Update attr 20.1.0.dev0 (4bd6827) to 20.2.0 (4f74fba) * Update Beautiful Soup 4.8.2 (r559) to 4.9.3 (r593) +* Update cachecontrol library 0.12.5 (007e8ca) to 0.12.6 (167a605) * Update dateutil 2.8.1 (43b7838) to 2.8.1 (c496b4f) * Change add diskcache_py3 5.0.1 (9670fbb) * Change add diskcache_py2 4.1.0 (b0451e0) diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py index 8fdee66..6ba19d3 100644 --- a/lib/cachecontrol/__init__.py +++ b/lib/cachecontrol/__init__.py @@ -1,11 +1,18 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """CacheControl import Interface. Make it easy to import from cachecontrol without long namespaces. """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" -__version__ = "0.12.5" +__version__ = "0.12.6" from .wrapper import CacheControl from .adapter import CacheControlAdapter from .controller import CacheController + +import logging +logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/lib/cachecontrol/_cmd.py b/lib/cachecontrol/_cmd.py index ee8d60d..ccee007 100644 --- a/lib/cachecontrol/_cmd.py +++ b/lib/cachecontrol/_cmd.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import logging import requests diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py index de50006..22b4963 100644 --- a/lib/cachecontrol/adapter.py +++ b/lib/cachecontrol/adapter.py @@ -1,16 +1,20 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import types import functools import zlib from requests.adapters import HTTPAdapter -from .controller import CacheController +from .controller import CacheController, PERMANENT_REDIRECT_STATUSES from .cache import DictCache from .filewrapper import CallbackFileWrapper class CacheControlAdapter(HTTPAdapter): - invalidating_methods = {"PUT", "DELETE"} + invalidating_methods = {"PUT", "PATCH", "DELETE"} def __init__( self, @@ -93,7 +97,7 @@ class CacheControlAdapter(HTTPAdapter): response = cached_response # We always cache the 301 responses - elif response.status == 301: + elif int(response.status) in PERMANENT_REDIRECT_STATUSES: self.controller.cache_response(request, response) else: # Wrap the response file with a wrapper that will cache the diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py index 94e0773..44e4309 100644 --- a/lib/cachecontrol/cache.py +++ b/lib/cachecontrol/cache.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """ The cache object API for implementing caches. The default is a thread safe in-memory dictionary. @@ -10,7 +14,7 @@ class BaseCache(object): def get(self, key): raise NotImplementedError() - def set(self, key, value): + def set(self, key, value, expires=None): raise NotImplementedError() def delete(self, key): @@ -29,7 +33,7 @@ class DictCache(BaseCache): def get(self, key): return self.data.get(key, None) - def set(self, key, value): + def set(self, key, value, expires=None): with self.lock: self.data.update({key: value}) diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py index 0e1658f..44becd6 100644 --- a/lib/cachecontrol/caches/__init__.py +++ b/lib/cachecontrol/caches/__init__.py @@ -1,2 +1,6 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from .file_cache import FileCache # noqa from .redis_cache import RedisCache # noqa diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py index 607b945..6cd1106 100644 --- a/lib/cachecontrol/caches/file_cache.py +++ b/lib/cachecontrol/caches/file_cache.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import hashlib import os from textwrap import dedent @@ -114,7 +118,7 @@ class FileCache(BaseCache): except FileNotFoundError: return None - def set(self, key, value): + def set(self, key, value, expires=None): name = self._fn(key) # Make sure the directory exists diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py index 16da0ae..564c30e 100644 --- a/lib/cachecontrol/caches/redis_cache.py +++ b/lib/cachecontrol/caches/redis_cache.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import division from datetime import datetime diff --git a/lib/cachecontrol/compat.py b/lib/cachecontrol/compat.py index 143c8ab..72c456c 100644 --- a/lib/cachecontrol/compat.py +++ b/lib/cachecontrol/compat.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + try: from urllib.parse import urljoin except ImportError: @@ -9,7 +13,6 @@ try: except ImportError: import pickle - # Handle the case where the requests module has been patched to not have # urllib3 bundled as part of its source. try: diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py index c5c4a50..e411a32 100644 --- a/lib/cachecontrol/controller.py +++ b/lib/cachecontrol/controller.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """ The httplib2 algorithms ported for use with requests. """ @@ -17,6 +21,8 @@ logger = logging.getLogger(__name__) URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") +PERMANENT_REDIRECT_STATUSES = (301, 308) + def parse_uri(uri): """Parses a URI using the regex given in Appendix B of RFC 3986. @@ -37,7 +43,7 @@ class CacheController(object): self.cache = DictCache() if cache is None else cache self.cache_etags = cache_etags self.serializer = serializer or Serializer() - self.cacheable_status_codes = status_codes or (200, 203, 300, 301) + self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308) @classmethod def _urlnorm(cls, uri): @@ -147,17 +153,18 @@ class CacheController(object): logger.warning("Cache entry deserialization failed, entry ignored") return False - # If we have a cached 301, return it immediately. We don't - # need to test our response for other headers b/c it is + # If we have a cached permanent redirect, return it immediately. We + # don't need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. + # # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). - if resp.status == 301: + if int(resp.status) in PERMANENT_REDIRECT_STATUSES: msg = ( - 'Returning cached "301 Moved Permanently" response ' + 'Returning cached permanent redirect response ' "(ignoring date and etag information)" ) logger.debug(msg) @@ -261,6 +268,13 @@ class CacheController(object): response_headers = CaseInsensitiveDict(response.headers) + if 'date' in response_headers: + date = calendar.timegm( + parsedate_tz(response_headers['date']) + ) + else: + date = 0 + # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just @@ -304,35 +318,60 @@ class CacheController(object): # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: + expires_time = 0 + if response_headers.get('expires'): + expires = parsedate_tz(response_headers['expires']) + if expires is not None: + expires_time = calendar.timegm(expires) - date + + expires_time = max(expires_time, 14 * 86400) + + logger.debug('etag object cached for {0} seconds'.format(expires_time)) logger.debug("Caching due to etag") self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) + cache_url, + self.serializer.dumps(request, response, body), + expires=expires_time ) - # Add to the cache any 301s. We do this before looking that - # the Date headers. - elif response.status == 301: - logger.debug("Caching permanant redirect") - self.cache.set(cache_url, self.serializer.dumps(request, response)) + # Add to the cache any permanent redirects. We do this before looking + # that the Date headers. + elif int(response.status) in PERMANENT_REDIRECT_STATUSES: + logger.debug("Caching permanent redirect") + self.cache.set(cache_url, self.serializer.dumps(request, response, b'')) # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: + date = calendar.timegm( + parsedate_tz(response_headers['date']) + ) # cache when there is a max-age > 0 if "max-age" in cc and cc["max-age"] > 0: logger.debug("Caching b/c date exists and max-age > 0") + expires_time = cc['max-age'] self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) + cache_url, + self.serializer.dumps(request, response, body), + expires=expires_time ) # If the request can expire, it means we should cache it # in the meantime. elif "expires" in response_headers: if response_headers["expires"]: - logger.debug("Caching b/c of expires header") + expires = parsedate_tz(response_headers['expires']) + if expires is not None: + expires_time = calendar.timegm(expires) - date + else: + expires_time = None + + logger.debug('Caching b/c of expires header. expires in {0} seconds'.format(expires_time)) self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) + cache_url, + self.serializer.dumps(request, response, body=body), + expires=expires_time, ) def update_cached_response(self, request, response): @@ -371,6 +410,7 @@ class CacheController(object): cached_response.status = 200 # update our cache - self.cache.set(cache_url, self.serializer.dumps(request, cached_response)) + body = cached_response.read(decode_content=False) + self.cache.set(cache_url, self.serializer.dumps(request, cached_response, body)) return cached_response diff --git a/lib/cachecontrol/filewrapper.py b/lib/cachecontrol/filewrapper.py index 30ed4c5..dd91334 100644 --- a/lib/cachecontrol/filewrapper.py +++ b/lib/cachecontrol/filewrapper.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from io import BytesIO diff --git a/lib/cachecontrol/heuristics.py b/lib/cachecontrol/heuristics.py index 6c0e979..ebe4a96 100644 --- a/lib/cachecontrol/heuristics.py +++ b/lib/cachecontrol/heuristics.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import calendar import time diff --git a/lib/cachecontrol/serialize.py b/lib/cachecontrol/serialize.py index 572cf0e..4e49a90 100644 --- a/lib/cachecontrol/serialize.py +++ b/lib/cachecontrol/serialize.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import base64 import io import json @@ -17,26 +21,14 @@ def _b64_decode_str(s): return _b64_decode_bytes(s).decode("utf8") +_default_body_read = object() + + class Serializer(object): - def dumps(self, request, response, body=None): + def dumps(self, request, response, body): response_headers = CaseInsensitiveDict(response.headers) - if body is None: - body = response.read(decode_content=False) - - # NOTE: 99% sure this is dead code. I'm only leaving it - # here b/c I don't have a test yet to prove - # it. Basically, before using - # `cachecontrol.filewrapper.CallbackFileWrapper`, - # this made an effort to reset the file handle. The - # `CallbackFileWrapper` short circuits this code by - # setting the body as the content is consumed, the - # result being a `body` argument is *always* passed - # into cache_response, and in turn, - # `Serializer.dump`. - response._fp = io.BytesIO(body) - # NOTE: This is all a bit weird, but it's really important that on # Python 2.x these objects are unicode and not str, even when # they contain only ascii. The problem here is that msgpack diff --git a/lib/cachecontrol/wrapper.py b/lib/cachecontrol/wrapper.py index d8e6fc6..b6ee7f2 100644 --- a/lib/cachecontrol/wrapper.py +++ b/lib/cachecontrol/wrapper.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from .adapter import CacheControlAdapter from .cache import DictCache