30 changed files with 591 additions and 1434 deletions
@ -0,0 +1,13 @@ |
|||||
|
"""CacheControl import Interface. |
||||
|
|
||||
|
Make it easy to import from cachecontrol without long namespaces. |
||||
|
""" |
||||
|
|
||||
|
# patch our requests.models.Response to make them pickleable in older |
||||
|
# versions of requests. |
||||
|
|
||||
|
import cachecontrol.patch_requests |
||||
|
|
||||
|
from cachecontrol.wrapper import CacheControl |
||||
|
from cachecontrol.adapter import CacheControlAdapter |
||||
|
from cachecontrol.controller import CacheController |
@ -0,0 +1,75 @@ |
|||||
|
from requests.adapters import HTTPAdapter |
||||
|
|
||||
|
from cachecontrol.controller import CacheController |
||||
|
from cachecontrol.cache import DictCache |
||||
|
|
||||
|
|
||||
|
class CacheControlAdapter(HTTPAdapter): |
||||
|
invalidating_methods = set(['PUT', 'DELETE']) |
||||
|
|
||||
|
def __init__(self, cache=None, cache_etags=True, cache_all=False, *args, **kw): |
||||
|
super(CacheControlAdapter, self).__init__(*args, **kw) |
||||
|
self.cache = cache or DictCache() |
||||
|
self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_all=cache_all) |
||||
|
|
||||
|
def send(self, request, **kw): |
||||
|
"""Send a request. Use the request information to see if it |
||||
|
exists in the cache. |
||||
|
""" |
||||
|
if request.method == 'GET': |
||||
|
cached_response = self.controller.cached_request( |
||||
|
request.url, request.headers |
||||
|
) |
||||
|
if cached_response: |
||||
|
# Cached responses should not have a raw field since |
||||
|
# they *cannot* be created from some stream. |
||||
|
cached_response.raw = None |
||||
|
return cached_response |
||||
|
|
||||
|
# check for etags and add headers if appropriate |
||||
|
headers = self.controller.add_headers(request.url) |
||||
|
request.headers.update(headers) |
||||
|
|
||||
|
resp = super(CacheControlAdapter, self).send(request, **kw) |
||||
|
return resp |
||||
|
|
||||
|
def build_response(self, request, response): |
||||
|
"""Build a response by making a request or using the cache. |
||||
|
|
||||
|
This will end up calling send and returning a potentially |
||||
|
cached response |
||||
|
""" |
||||
|
resp = super(CacheControlAdapter, self).build_response( |
||||
|
request, response |
||||
|
) |
||||
|
|
||||
|
# See if we should invalidate the cache. |
||||
|
if request.method in self.invalidating_methods and resp.ok: |
||||
|
cache_url = self.controller.cache_url(request.url) |
||||
|
self.cache.delete(cache_url) |
||||
|
|
||||
|
# Try to store the response if it is a GET |
||||
|
elif request.method == 'GET': |
||||
|
if response.status == 304: |
||||
|
# We must have sent an ETag request. This could mean |
||||
|
# that we've been expired already or that we simply |
||||
|
# have an etag. In either case, we want to try and |
||||
|
# update the cache if that is the case. |
||||
|
resp = self.controller.update_cached_response( |
||||
|
request, response |
||||
|
) |
||||
|
# Fix possible exception when using missing `raw` field in |
||||
|
# requests |
||||
|
# TODO: remove when requests will be bump to 2.2.2 or 2.3 |
||||
|
# version |
||||
|
resp.raw = None |
||||
|
else: |
||||
|
# try to cache the response |
||||
|
self.controller.cache_response(request, resp) |
||||
|
|
||||
|
# Give the request a from_cache attr to let people use it |
||||
|
# rather than testing for hasattr. |
||||
|
if not hasattr(resp, 'from_cache'): |
||||
|
resp.from_cache = False |
||||
|
|
||||
|
return resp |
@ -0,0 +1,36 @@ |
|||||
|
""" |
||||
|
The cache object API for implementing caches. The default is just a |
||||
|
dictionary, which in turns means it is not threadsafe for writing. |
||||
|
""" |
||||
|
from threading import Lock |
||||
|
|
||||
|
|
||||
|
class BaseCache(object): |
||||
|
|
||||
|
def get(self, key): |
||||
|
raise NotImplemented() |
||||
|
|
||||
|
def set(self, key, value): |
||||
|
raise NotImplemented() |
||||
|
|
||||
|
def delete(self, key): |
||||
|
raise NotImplemented() |
||||
|
|
||||
|
|
||||
|
class DictCache(BaseCache): |
||||
|
|
||||
|
def __init__(self, init_dict=None): |
||||
|
self.lock = Lock() |
||||
|
self.data = init_dict or {} |
||||
|
|
||||
|
def get(self, key): |
||||
|
return self.data.get(key, None) |
||||
|
|
||||
|
def set(self, key, value): |
||||
|
with self.lock: |
||||
|
self.data.update({key: value}) |
||||
|
|
||||
|
def delete(self, key): |
||||
|
with self.lock: |
||||
|
if key in self.data: |
||||
|
self.data.pop(key) |
@ -0,0 +1,18 @@ |
|||||
|
from textwrap import dedent |
||||
|
|
||||
|
try: |
||||
|
from cachecontrol.caches.file_cache import FileCache |
||||
|
except ImportError: |
||||
|
notice = dedent(''' |
||||
|
NOTE: In order to use the FileCache you must have |
||||
|
lockfile installed. You can install it via pip: |
||||
|
pip install lockfile |
||||
|
''') |
||||
|
print(notice) |
||||
|
|
||||
|
|
||||
|
try: |
||||
|
import redis |
||||
|
from cachecontrol.caches.redis_cache import RedisCache |
||||
|
except ImportError: |
||||
|
pass |
@ -0,0 +1,51 @@ |
|||||
|
import os |
||||
|
import sys |
||||
|
from hashlib import md5 |
||||
|
|
||||
|
try: |
||||
|
from pickle import load, dump, HIGHEST_PROTOCOL |
||||
|
except ImportError: |
||||
|
from cPickle import load, dump, HIGHEST_PROTOCOL |
||||
|
|
||||
|
from lockfile import FileLock |
||||
|
|
||||
|
|
||||
|
class FileCache(object): |
||||
|
def __init__(self, directory, forever=False): |
||||
|
self.directory = directory |
||||
|
self.forever = forever |
||||
|
|
||||
|
if not os.path.isdir(self.directory): |
||||
|
os.mkdir(self.directory) |
||||
|
|
||||
|
@staticmethod |
||||
|
def encode(x): |
||||
|
return md5(x.encode()).hexdigest() |
||||
|
|
||||
|
def _fn(self, name): |
||||
|
return os.path.join(self.directory, self.encode(name)) |
||||
|
|
||||
|
def get(self, key): |
||||
|
name = self._fn(key) |
||||
|
if not os.path.exists(name): |
||||
|
return None |
||||
|
|
||||
|
with open(name, 'rb') as fh: |
||||
|
try: |
||||
|
if sys.version < '3': |
||||
|
return load(fh) |
||||
|
else: |
||||
|
return load(fh, encoding='latin1') |
||||
|
except ValueError: |
||||
|
return None |
||||
|
|
||||
|
def set(self, key, value): |
||||
|
name = self._fn(key) |
||||
|
with FileLock(name) as lock: |
||||
|
with open(lock.path, 'wb') as fh: |
||||
|
dump(value, fh, HIGHEST_PROTOCOL) |
||||
|
|
||||
|
def delete(self, key): |
||||
|
name = self._fn(key) |
||||
|
if not self.forever: |
||||
|
os.remove(name) |
@ -0,0 +1,46 @@ |
|||||
|
from __future__ import division |
||||
|
|
||||
|
from datetime import datetime |
||||
|
|
||||
|
try: |
||||
|
from cPickle import loads, dumps |
||||
|
except ImportError: # Python 3.x |
||||
|
from pickle import loads, dumps |
||||
|
|
||||
|
|
||||
|
def total_seconds(td): |
||||
|
"""Python 2.6 compatability""" |
||||
|
if hasattr(td, 'total_seconds'): |
||||
|
return td.total_seconds() |
||||
|
|
||||
|
ms = td.microseconds |
||||
|
secs = (td.seconds + td.days * 24 * 3600) |
||||
|
return (ms + secs * 10**6) / 10**6 |
||||
|
|
||||
|
|
||||
|
class RedisCache(object): |
||||
|
|
||||
|
def __init__(self, conn): |
||||
|
self.conn = conn |
||||
|
|
||||
|
def get(self, key): |
||||
|
val = self.conn.get(key) |
||||
|
if val: |
||||
|
return loads(val) |
||||
|
return None |
||||
|
|
||||
|
def set(self, key, value, expires=None): |
||||
|
if not expires: |
||||
|
self.conn.set(key, dumps(value)) |
||||
|
else: |
||||
|
expires = expires - datetime.now() |
||||
|
self.conn.setex(key, total_seconds(expires), value) |
||||
|
|
||||
|
def delete(self, key): |
||||
|
self.conn.delete(key) |
||||
|
|
||||
|
def clear(self): |
||||
|
"""Helper for clearing all the keys in a database. Use with |
||||
|
caution!""" |
||||
|
for key in self.conn.keys(): |
||||
|
self.conn.delete(key) |
@ -0,0 +1,12 @@ |
|||||
|
try: |
||||
|
from urllib.parse import urljoin |
||||
|
except ImportError: |
||||
|
from urlparse import urljoin |
||||
|
|
||||
|
|
||||
|
try: |
||||
|
import email.utils |
||||
|
parsedate_tz = email.utils.parsedate_tz |
||||
|
except ImportError: |
||||
|
import email.Utils |
||||
|
parsedate_tz = email.Utils.parsedate_tz |
@ -0,0 +1,258 @@ |
|||||
|
""" |
||||
|
The httplib2 algorithms ported for use with requests. |
||||
|
""" |
||||
|
import re |
||||
|
import calendar |
||||
|
import time |
||||
|
import datetime |
||||
|
|
||||
|
from cachecontrol.cache import DictCache |
||||
|
from cachecontrol.compat import parsedate_tz |
||||
|
|
||||
|
|
||||
|
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") |
||||
|
|
||||
|
|
||||
|
def parse_uri(uri): |
||||
|
"""Parses a URI using the regex given in Appendix B of RFC 3986. |
||||
|
|
||||
|
(scheme, authority, path, query, fragment) = parse_uri(uri) |
||||
|
""" |
||||
|
groups = URI.match(uri).groups() |
||||
|
return (groups[1], groups[3], groups[4], groups[6], groups[8]) |
||||
|
|
||||
|
|
||||
|
class CacheController(object): |
||||
|
"""An interface to see if request should cached or not. |
||||
|
""" |
||||
|
def __init__(self, cache=None, cache_etags=True, cache_all=False): |
||||
|
self.cache = cache or DictCache() |
||||
|
self.cache_etags = cache_etags |
||||
|
self.cache_all = cache_all |
||||
|
|
||||
|
def _urlnorm(self, uri): |
||||
|
"""Normalize the URL to create a safe key for the cache""" |
||||
|
(scheme, authority, path, query, fragment) = parse_uri(uri) |
||||
|
if not scheme or not authority: |
||||
|
raise Exception("Only absolute URIs are allowed. uri = %s" % uri) |
||||
|
authority = authority.lower() |
||||
|
scheme = scheme.lower() |
||||
|
if not path: |
||||
|
path = "/" |
||||
|
|
||||
|
# Could do syntax based normalization of the URI before |
||||
|
# computing the digest. See Section 6.2.2 of Std 66. |
||||
|
request_uri = query and "?".join([path, query]) or path |
||||
|
scheme = scheme.lower() |
||||
|
defrag_uri = scheme + "://" + authority + request_uri |
||||
|
|
||||
|
return defrag_uri |
||||
|
|
||||
|
def cache_url(self, uri): |
||||
|
return self._urlnorm(uri) |
||||
|
|
||||
|
def parse_cache_control(self, headers): |
||||
|
""" |
||||
|
Parse the cache control headers returning a dictionary with values |
||||
|
for the different directives. |
||||
|
""" |
||||
|
retval = {} |
||||
|
|
||||
|
cc_header = 'cache-control' |
||||
|
if 'Cache-Control' in headers: |
||||
|
cc_header = 'Cache-Control' |
||||
|
|
||||
|
if cc_header in headers: |
||||
|
parts = headers[cc_header].split(',') |
||||
|
parts_with_args = [ |
||||
|
tuple([x.strip().lower() for x in part.split("=", 1)]) |
||||
|
for part in parts if -1 != part.find("=")] |
||||
|
parts_wo_args = [(name.strip().lower(), 1) |
||||
|
for name in parts if -1 == name.find("=")] |
||||
|
retval = dict(parts_with_args + parts_wo_args) |
||||
|
return retval |
||||
|
|
||||
|
def cached_request(self, url, headers): |
||||
|
cache_url = self.cache_url(url) |
||||
|
cc = self.parse_cache_control(headers) |
||||
|
|
||||
|
# non-caching states |
||||
|
no_cache = True if 'no-cache' in cc else False |
||||
|
if 'max-age' in cc and cc['max-age'] == 0: |
||||
|
no_cache = True |
||||
|
|
||||
|
# see if it is in the cache anyways |
||||
|
in_cache = self.cache.get(cache_url) |
||||
|
if no_cache or not in_cache: |
||||
|
return False |
||||
|
|
||||
|
# It is in the cache, so lets see if it is going to be |
||||
|
# fresh enough |
||||
|
resp = self.cache.get(cache_url) |
||||
|
|
||||
|
# Check our Vary header to make sure our request headers match |
||||
|
# up. We don't delete it from the though, we just don't return |
||||
|
# our cached value. |
||||
|
# |
||||
|
# NOTE: Because httplib2 stores raw content, it denotes |
||||
|
# headers that were sent in the original response by |
||||
|
# adding -varied-$name. We don't have to do that b/c we |
||||
|
# are storing the object which has a reference to the |
||||
|
# original request. If that changes, then I'd propose |
||||
|
# using the varied headers in the cache key to avoid the |
||||
|
# situation all together. |
||||
|
if 'vary' in resp.headers: |
||||
|
varied_headers = resp.headers['vary'].replace(' ', '').split(',') |
||||
|
original_headers = resp.request.headers |
||||
|
for header in varied_headers: |
||||
|
# If our headers don't match for the headers listed in |
||||
|
# the vary header, then don't use the cached response |
||||
|
if headers.get(header, None) != original_headers.get(header): |
||||
|
return False |
||||
|
|
||||
|
now = time.time() |
||||
|
date = calendar.timegm( |
||||
|
parsedate_tz(resp.headers['date']) |
||||
|
) |
||||
|
current_age = max(0, now - date) |
||||
|
|
||||
|
# TODO: There is an assumption that the result will be a |
||||
|
# requests response object. This may not be best since we |
||||
|
# could probably avoid instantiating or constructing the |
||||
|
# response until we know we need it. |
||||
|
resp_cc = self.parse_cache_control(resp.headers) |
||||
|
|
||||
|
# determine freshness |
||||
|
freshness_lifetime = 0 |
||||
|
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): |
||||
|
freshness_lifetime = int(resp_cc['max-age']) |
||||
|
elif 'expires' in resp.headers: |
||||
|
expires = parsedate_tz(resp.headers['expires']) |
||||
|
if expires is not None: |
||||
|
expire_time = calendar.timegm(expires) - date |
||||
|
freshness_lifetime = max(0, expire_time) |
||||
|
|
||||
|
# determine if we are setting freshness limit in the req |
||||
|
if 'max-age' in cc: |
||||
|
try: |
||||
|
freshness_lifetime = int(cc['max-age']) |
||||
|
except ValueError: |
||||
|
freshness_lifetime = 0 |
||||
|
|
||||
|
if 'min-fresh' in cc: |
||||
|
try: |
||||
|
min_fresh = int(cc['min-fresh']) |
||||
|
except ValueError: |
||||
|
min_fresh = 0 |
||||
|
# adjust our current age by our min fresh |
||||
|
current_age += min_fresh |
||||
|
|
||||
|
# see how fresh we actually are |
||||
|
fresh = (freshness_lifetime > current_age) |
||||
|
|
||||
|
if fresh: |
||||
|
# make sure we set the from_cache to true |
||||
|
resp.from_cache = True |
||||
|
return resp |
||||
|
|
||||
|
# we're not fresh. If we don't have an Etag, clear it out |
||||
|
if 'etag' not in resp.headers: |
||||
|
self.cache.delete(cache_url) |
||||
|
|
||||
|
if 'etag' in resp.headers: |
||||
|
headers['If-None-Match'] = resp.headers['ETag'] |
||||
|
|
||||
|
if 'last-modified' in resp.headers: |
||||
|
headers['If-Modified-Since'] = resp.headers['Last-Modified'] |
||||
|
|
||||
|
# return the original handler |
||||
|
return False |
||||
|
|
||||
|
def add_headers(self, url): |
||||
|
resp = self.cache.get(url) |
||||
|
if resp and 'etag' in resp.headers: |
||||
|
return {'If-None-Match': resp.headers['etag']} |
||||
|
return {} |
||||
|
|
||||
|
def cache_response(self, request, resp): |
||||
|
""" |
||||
|
Algorithm for caching requests. |
||||
|
|
||||
|
This assumes a requests Response object. |
||||
|
""" |
||||
|
# From httplib2: Don't cache 206's since we aren't going to |
||||
|
# handle byte range requests |
||||
|
if resp.status_code not in [200, 203]: |
||||
|
return |
||||
|
|
||||
|
cc_req = self.parse_cache_control(request.headers) |
||||
|
cc = self.parse_cache_control(resp.headers) |
||||
|
|
||||
|
cache_url = self.cache_url(request.url) |
||||
|
|
||||
|
# Delete it from the cache if we happen to have it stored there |
||||
|
no_store = cc.get('no-store') or cc_req.get('no-store') |
||||
|
if no_store and self.cache.get(cache_url): |
||||
|
self.cache.delete(cache_url) |
||||
|
|
||||
|
# If we've been given an etag, then keep the response |
||||
|
if self.cache_etags and 'etag' in resp.headers: |
||||
|
self.cache.set(cache_url, resp) |
||||
|
|
||||
|
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response |
||||
|
if self.cache_all: |
||||
|
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(25 * 365)) |
||||
|
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") |
||||
|
headers = {'Cache-Control': 'public,max-age=%d' % int(3600), |
||||
|
'Expires': expires} |
||||
|
resp.headers.update(headers) |
||||
|
self.cache.set(cache_url, resp) |
||||
|
|
||||
|
# Add to the cache if the response headers demand it. If there |
||||
|
# is no date header then we can't do anything about expiring |
||||
|
# the cache. |
||||
|
elif 'date' in resp.headers: |
||||
|
# cache when there is a max-age > 0 |
||||
|
if cc and cc.get('max-age'): |
||||
|
if int(cc['max-age']) > 0: |
||||
|
self.cache.set(cache_url, resp) |
||||
|
|
||||
|
# If the request can expire, it means we should cache it |
||||
|
# in the meantime. |
||||
|
elif 'expires' in resp.headers: |
||||
|
if resp.headers['expires']: |
||||
|
self.cache.set(cache_url, resp) |
||||
|
|
||||
|
def update_cached_response(self, request, response): |
||||
|
"""On a 304 we will get a new set of headers that we want to |
||||
|
update our cached value with, assuming we have one. |
||||
|
|
||||
|
This should only ever be called when we've sent an ETag and |
||||
|
gotten a 304 as the response. |
||||
|
""" |
||||
|
cache_url = self.cache_url(request.url) |
||||
|
|
||||
|
resp = self.cache.get(cache_url) |
||||
|
|
||||
|
if not resp: |
||||
|
# we didn't have a cached response |
||||
|
return response |
||||
|
|
||||
|
# did so lets update our headers |
||||
|
resp.headers.update(resp.headers) |
||||
|
|
||||
|
# we want a 200 b/c we have content via the cache |
||||
|
request.status_code = 200 |
||||
|
|
||||
|
# update the request as it has the if-none-match header + any |
||||
|
# other headers that the server might have updated (ie Date, |
||||
|
# Cache-Control, Expires, etc.) |
||||
|
resp.request = request |
||||
|
|
||||
|
# update our cache |
||||
|
self.cache.set(cache_url, resp) |
||||
|
|
||||
|
# Let everyone know this was from the cache. |
||||
|
resp.from_cache = True |
||||
|
|
||||
|
return resp |
@ -0,0 +1,56 @@ |
|||||
|
import requests |
||||
|
|
||||
|
from requests import models |
||||
|
from requests.packages.urllib3.response import HTTPResponse |
||||
|
|
||||
|
__attrs__ = [ |
||||
|
'_content', |
||||
|
'status_code', |
||||
|
'headers', |
||||
|
'url', |
||||
|
'history', |
||||
|
'encoding', |
||||
|
'reason', |
||||
|
'cookies', |
||||
|
'elapsed', |
||||
|
] |
||||
|
|
||||
|
|
||||
|
def response_getstate(self): |
||||
|
# consume everything |
||||
|
if not self._content_consumed: |
||||
|
self.content |
||||
|
|
||||
|
state = dict( |
||||
|
(attr, getattr(self, attr, None)) |
||||
|
for attr in __attrs__ |
||||
|
) |
||||
|
|
||||
|
# deal with our raw content b/c we need it for our cookie jar |
||||
|
state['raw_original_response'] = self.raw._original_response |
||||
|
return state |
||||
|
|
||||
|
|
||||
|
def response_setstate(self, state): |
||||
|
for name, value in state.items(): |
||||
|
if name != 'raw_original_response': |
||||
|
setattr(self, name, value) |
||||
|
|
||||
|
setattr(self, 'raw', HTTPResponse()) |
||||
|
self.raw._original_response = state['raw_original_response'] |
||||
|
|
||||
|
|
||||
|
def make_responses_pickleable(): |
||||
|
try: |
||||
|
version_parts = [int(part) for part in requests.__version__.split('.')] |
||||
|
|
||||
|
# must be >= 2.2.x |
||||
|
if not version_parts[0] >= 2 or not version_parts[1] >= 2: |
||||
|
models.Response.__getstate__ = response_getstate |
||||
|
models.Response.__setstate__ = response_setstate |
||||
|
except: |
||||
|
raise |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
make_responses_pickleable() |
@ -0,0 +1,10 @@ |
|||||
|
from cachecontrol.adapter import CacheControlAdapter |
||||
|
from cachecontrol.cache import DictCache |
||||
|
|
||||
|
|
||||
|
def CacheControl(sess, cache=None, cache_etags=True, cache_all=False): |
||||
|
cache = cache or DictCache() |
||||
|
adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_all=cache_all) |
||||
|
sess.mount('http://', adapter) |
||||
|
|
||||
|
return sess |
@ -1,14 +0,0 @@ |
|||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
__init__.py |
|
||||
~~~~~~~~~~~ |
|
||||
|
|
||||
Defines the public API to the httpcache module. |
|
||||
""" |
|
||||
|
|
||||
__version__ = '0.1.3' |
|
||||
|
|
||||
from .cache import HTTPCache |
|
||||
from .adapter import CachingHTTPAdapter |
|
||||
|
|
||||
__all__ = [HTTPCache, CachingHTTPAdapter] |
|
@ -1,55 +0,0 @@ |
|||||
""" |
|
||||
adapter.py |
|
||||
~~~~~~~~~~ |
|
||||
|
|
||||
Contains an implementation of an HTTP adapter for Requests that is aware of the |
|
||||
cache contained in this module. |
|
||||
""" |
|
||||
from requests.adapters import HTTPAdapter |
|
||||
from .cache import HTTPCache |
|
||||
|
|
||||
|
|
||||
class CachingHTTPAdapter(HTTPAdapter): |
|
||||
""" |
|
||||
A HTTP-caching-aware Transport Adapter for Python Requests. The central |
|
||||
portion of the API. |
|
||||
|
|
||||
:param capacity: The maximum capacity of the backing cache. |
|
||||
""" |
|
||||
def __init__(self, capacity=50, **kwargs): |
|
||||
super(CachingHTTPAdapter, self).__init__(**kwargs) |
|
||||
|
|
||||
#: The HTTP Cache backing the adapter. |
|
||||
self.cache = HTTPCache(capacity=capacity) |
|
||||
|
|
||||
def send(self, request, **kwargs): |
|
||||
""" |
|
||||
Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP |
|
||||
caching. Returns a Response object that may have been cached. |
|
||||
|
|
||||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object to send. |
|
||||
""" |
|
||||
cached_resp = self.cache.retrieve(request) |
|
||||
|
|
||||
if cached_resp is not None: |
|
||||
return cached_resp |
|
||||
else: |
|
||||
return super(CachingHTTPAdapter, self).send(request, **kwargs) |
|
||||
|
|
||||
def build_response(self, request, response): |
|
||||
""" |
|
||||
Builds a Response object from a urllib3 response. May involve returning |
|
||||
a cached Response. |
|
||||
|
|
||||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object sent. |
|
||||
:param response: The urllib3 response. |
|
||||
""" |
|
||||
resp = super(CachingHTTPAdapter, self).build_response(request, |
|
||||
response) |
|
||||
|
|
||||
if resp.status_code == 304: |
|
||||
resp = self.cache.handle_304(resp) |
|
||||
else: |
|
||||
self.cache.store(resp) |
|
||||
|
|
||||
return resp |
|
@ -1,207 +0,0 @@ |
|||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
cache.py |
|
||||
~~~~~~~~ |
|
||||
|
|
||||
Contains the primary cache structure used in http-cache. |
|
||||
""" |
|
||||
from .structures import RecentOrderedDict |
|
||||
from .utils import (parse_date_header, build_date_header, |
|
||||
expires_from_cache_control, url_contains_query) |
|
||||
from datetime import datetime |
|
||||
|
|
||||
|
|
||||
# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative, |
|
||||
# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and |
|
||||
# 410 Gone responses. We don't cache 206s at the moment because we |
|
||||
# don't handle Range and Content-Range headers. |
|
||||
CACHEABLE_RCS = (200, 203, 300, 301, 410) |
|
||||
|
|
||||
# Cacheable verbs. |
|
||||
CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS') |
|
||||
|
|
||||
# Some verbs MUST invalidate the resource in the cache, according to RFC 2616. |
|
||||
# If we send one of these, or any verb we don't recognise, invalidate the |
|
||||
# cache entry for that URL. As it happens, these are also the cacheable |
|
||||
# verbs. That works out well for us. |
|
||||
NON_INVALIDATING_VERBS = CACHEABLE_VERBS |
|
||||
|
|
||||
|
|
||||
class HTTPCache(object): |
|
||||
""" |
|
||||
The HTTP Cache object. Manages caching of responses according to RFC 2616, |
|
||||
adding necessary headers to HTTP request objects, and returning cached |
|
||||
responses based on server responses. |
|
||||
|
|
||||
This object is not expected to be used by most users. It is exposed as part |
|
||||
of the public API for users who feel the need for more control. This API |
|
||||
may change in a minor version increase. Be warned. |
|
||||
|
|
||||
:param capacity: (Optional) The maximum capacity of the HTTP cache. |
|
||||
""" |
|
||||
def __init__(self, capacity=50): |
|
||||
#: The maximum capacity of the HTTP cache. When this many cache entries |
|
||||
#: end up in the cache, the oldest entries are removed. |
|
||||
self.capacity = capacity |
|
||||
|
|
||||
#: The cache backing store. Cache entries are stored here as key-value |
|
||||
#: pairs. The key is the URL used to retrieve the cached response. The |
|
||||
#: value is a python dict, which stores three objects: the response |
|
||||
#: (keyed off of 'response'), the retrieval or creation date (keyed off |
|
||||
#: of 'creation') and the cache expiry date (keyed off of 'expiry'). |
|
||||
#: This last value may be None. |
|
||||
self._cache = RecentOrderedDict() |
|
||||
|
|
||||
def store(self, response): |
|
||||
""" |
|
||||
Takes an HTTP response object and stores it in the cache according to |
|
||||
RFC 2616. Returns a boolean value indicating whether the response was |
|
||||
cached or not. |
|
||||
|
|
||||
:param response: Requests :class:`Response <Response>` object to cache. |
|
||||
""" |
|
||||
# Define an internal utility function. |
|
||||
def date_header_or_default(header_name, default, response): |
|
||||
try: |
|
||||
date_header = response.headers[header_name] |
|
||||
except KeyError: |
|
||||
value = default |
|
||||
else: |
|
||||
value = parse_date_header(date_header) |
|
||||
return value |
|
||||
|
|
||||
if response.status_code not in CACHEABLE_RCS: |
|
||||
return False |
|
||||
|
|
||||
if response.request.method not in CACHEABLE_VERBS: |
|
||||
return False |
|
||||
|
|
||||
url = response.url |
|
||||
now = datetime.utcnow() |
|
||||
|
|
||||
# Get the value of the 'Date' header, if it exists. If it doesn't, just |
|
||||
# use now. |
|
||||
creation = date_header_or_default('Date', now, response) |
|
||||
|
|
||||
# Get the value of the 'Cache-Control' header, if it exists. |
|
||||
cc = response.headers.get('Cache-Control', None) |
|
||||
if cc is not None: |
|
||||
expiry = expires_from_cache_control(cc, now) |
|
||||
|
|
||||
# If the above returns None, we are explicitly instructed not to |
|
||||
# cache this. |
|
||||
if expiry is None: |
|
||||
return False |
|
||||
|
|
||||
# Get the value of the 'Expires' header, if it exists, and if we don't |
|
||||
# have anything from the 'Cache-Control' header. |
|
||||
if cc is None: |
|
||||
expiry = date_header_or_default('Expires', None, response) |
|
||||
|
|
||||
# If the expiry date is earlier or the same as the Date header, don't |
|
||||
# cache the response at all. |
|
||||
if expiry is not None and expiry <= creation: |
|
||||
return False |
|
||||
|
|
||||
# If there's a query portion of the url and it's a GET, don't cache |
|
||||
# this unless explicitly instructed to. |
|
||||
if expiry is None and response.request.method == 'GET': |
|
||||
if url_contains_query(url): |
|
||||
return False |
|
||||
|
|
||||
self._cache[url] = {'response': response, |
|
||||
'creation': creation, |
|
||||
'expiry': expiry} |
|
||||
|
|
||||
self.__reduce_cache_count() |
|
||||
|
|
||||
return True |
|
||||
|
|
||||
def handle_304(self, response): |
|
||||
""" |
|
||||
Given a 304 response, retrieves the cached entry. This unconditionally |
|
||||
returns the cached entry, so it can be used when the 'intelligent' |
|
||||
behaviour of retrieve() is not desired. |
|
||||
|
|
||||
Returns None if there is no entry in the cache. |
|
||||
|
|
||||
:param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response <Response>`. |
|
||||
""" |
|
||||
try: |
|
||||
cached_response = self._cache[response.url]['response'] |
|
||||
except KeyError: |
|
||||
cached_response = None |
|
||||
|
|
||||
return cached_response |
|
||||
|
|
||||
def retrieve(self, request): |
|
||||
""" |
|
||||
Retrieves a cached response if possible. |
|
||||
|
|
||||
If there is a response that can be unconditionally returned (e.g. one |
|
||||
that had a Cache-Control header set), that response is returned. If |
|
||||
there is one that can be conditionally returned (if a 304 is returned), |
|
||||
applies an If-Modified-Since header to the request and returns None. |
|
||||
|
|
||||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object. |
|
||||
""" |
|
||||
return_response = None |
|
||||
url = request.url |
|
||||
|
|
||||
try: |
|
||||
cached_response = self._cache[url] |
|
||||
except KeyError: |
|
||||
return None |
|
||||
|
|
||||
if request.method not in NON_INVALIDATING_VERBS: |
|
||||
del self._cache[url] |
|
||||
return None |
|
||||
|
|
||||
if cached_response['expiry'] is None: |
|
||||
# We have no explicit expiry time, so we weren't instructed to |
|
||||
# cache. Add an 'If-Modified-Since' header. |
|
||||
creation = cached_response['creation'] |
|
||||
header = build_date_header(creation) |
|
||||
request.headers['If-Modified-Since'] = header |
|
||||
else: |
|
||||
# We have an explicit expiry time. If we're earlier than the expiry |
|
||||
# time, return the response. |
|
||||
now = datetime.utcnow() |
|
||||
|
|
||||
if now <= cached_response['expiry']: |
|
||||
return_response = cached_response['response'] |
|
||||
else: |
|
||||
del self._cache[url] |
|
||||
|
|
||||
return return_response |
|
||||
|
|
||||
def __reduce_cache_count(self): |
|
||||
""" |
|
||||
Drops the number of entries in the cache to the capacity of the cache. |
|
||||
|
|
||||
Walks the backing RecentOrderedDict in order from oldest to youngest. |
|
||||
Deletes cache entries that are either invalid or being speculatively |
|
||||
cached until the number of cache entries drops to the capacity. If this |
|
||||
leaves the cache above capacity, begins deleting the least-used cache |
|
||||
entries that are still valid until the cache has space. |
|
||||
""" |
|
||||
if len(self._cache) <= self.capacity: |
|
||||
return |
|
||||
|
|
||||
to_delete = len(self._cache) - self.capacity |
|
||||
keys = list(self._cache.keys()) |
|
||||
|
|
||||
for key in keys: |
|
||||
if self._cache[key]['expiry'] is None: |
|
||||
del self._cache[key] |
|
||||
to_delete -= 1 |
|
||||
|
|
||||
if to_delete == 0: |
|
||||
return |
|
||||
|
|
||||
keys = list(self._cache.keys()) |
|
||||
|
|
||||
for i in range(to_delete): |
|
||||
del self._cache[keys[i]] |
|
||||
|
|
||||
return |
|
@ -1,10 +0,0 @@ |
|||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
compat.py |
|
||||
~~~~~~~~~ |
|
||||
|
|
||||
Defines cross-platform functions and classes needed to achieve proper |
|
||||
functionality. |
|
||||
""" |
|
||||
|
|
||||
pass |
|
@ -1,59 +0,0 @@ |
|||||
""" |
|
||||
structures.py |
|
||||
~~~~~~~~~~~~~ |
|
||||
|
|
||||
Defines structures used by the httpcache module. |
|
||||
""" |
|
||||
|
|
||||
class RecentOrderedDict(dict): |
|
||||
""" |
|
||||
A custom variant of the dictionary that ensures that the object most |
|
||||
recently inserted _or_ retrieved from the dictionary is enumerated first. |
|
||||
""" |
|
||||
def __init__(self): |
|
||||
self._data = {} |
|
||||
self._order = [] |
|
||||
|
|
||||
def __setitem__(self, key, value): |
|
||||
if key in self._data: |
|
||||
self._order.remove(key) |
|
||||
|
|
||||
self._order.append(key) |
|
||||
self._data[key] = value |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
value = self._data[key] |
|
||||
self._order.remove(key) |
|
||||
self._order.append(key) |
|
||||
return value |
|
||||
|
|
||||
def __delitem__(self, key): |
|
||||
del self._data[key] |
|
||||
self._order.remove(key) |
|
||||
|
|
||||
def __iter__(self): |
|
||||
return self._order |
|
||||
|
|
||||
def __len__(self): |
|
||||
return len(self._order) |
|
||||
|
|
||||
def __contains__(self, value): |
|
||||
return self._data.__contains__(value) |
|
||||
|
|
||||
def items(self): |
|
||||
return [(key, self._data[key]) for key in self._order] |
|
||||
|
|
||||
def keys(self): |
|
||||
return self._order |
|
||||
|
|
||||
def values(self): |
|
||||
return [self._data[key] for key in self._order] |
|
||||
|
|
||||
def clear(self): |
|
||||
self._data = {} |
|
||||
self._order = [] |
|
||||
|
|
||||
def copy(self): |
|
||||
c = RecentOrderedDict() |
|
||||
c._data = self._data.copy() |
|
||||
c._order = self._order[:] |
|
@ -1,97 +0,0 @@ |
|||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
utils.py |
|
||||
~~~~~~~~ |
|
||||
|
|
||||
Utility functions for use with httpcache. |
|
||||
""" |
|
||||
from datetime import datetime, timedelta |
|
||||
|
|
||||
try: # Python 2 |
|
||||
from urlparse import urlparse |
|
||||
except ImportError: # Python 3 |
|
||||
from urllib.parse import urlparse |
|
||||
|
|
||||
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" |
|
||||
RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT" |
|
||||
|
|
||||
|
|
||||
def parse_date_header(header): |
|
||||
""" |
|
||||
Given a date header in the form specified by RFC 2616, return a Python |
|
||||
datetime object. |
|
||||
|
|
||||
RFC 2616 specifies three possible formats for date/time headers, and |
|
||||
makes it clear that all dates/times should be in UTC/GMT. That is assumed |
|
||||
by this library, which simply does everything in UTC. This currently does |
|
||||
not parse the C asctime() string, because that's effort. |
|
||||
|
|
||||
This function does _not_ follow Postel's Law. If a format does not strictly |
|
||||
match the defined strings, this function returns None. This is considered |
|
||||
'safe' behaviour. |
|
||||
""" |
|
||||
try: |
|
||||
dt = datetime.strptime(header, RFC_1123_DT_STR) |
|
||||
except ValueError: |
|
||||
try: |
|
||||
dt = datetime.strptime(header, RFC_850_DT_STR) |
|
||||
except ValueError: |
|
||||
dt = None |
|
||||
except TypeError: |
|
||||
dt = None |
|
||||
|
|
||||
return dt |
|
||||
|
|
||||
|
|
||||
def build_date_header(dt): |
|
||||
""" |
|
||||
Given a Python datetime object, build a Date header value according to |
|
||||
RFC 2616. |
|
||||
|
|
||||
RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is |
|
||||
what we use. |
|
||||
""" |
|
||||
return dt.strftime(RFC_1123_DT_STR) |
|
||||
|
|
||||
|
|
||||
def expires_from_cache_control(header, current_time): |
|
||||
""" |
|
||||
Given a Cache-Control header, builds a Python datetime object corresponding |
|
||||
to the expiry time (in UTC). This function should respect all relevant |
|
||||
Cache-Control directives. |
|
||||
|
|
||||
Takes current_time as an argument to ensure that 'max-age=0' generates the |
|
||||
correct behaviour without being special-cased. |
|
||||
|
|
||||
Returns None to indicate that a request must not be cached. |
|
||||
""" |
|
||||
# Cache control header values are made of multiple comma separated fields. |
|
||||
# Splitting them like this is probably a bad idea, but I'm going to roll with |
|
||||
# it for now. We'll come back to it. |
|
||||
fields = header.split(', ') |
|
||||
duration = None |
|
||||
|
|
||||
for field in fields: |
|
||||
# Right now we don't handle no-cache applied to specific fields. To be |
|
||||
# as 'nice' as possible, treat any no-cache as applying to the whole |
|
||||
# request. Bail early, because there's no reason to stick around. |
|
||||
if field.startswith('no-cache') or field == 'no-store': |
|
||||
return None |
|
||||
|
|
||||
if field.startswith('max-age'): |
|
||||
_, duration = field.split('=') |
|
||||
duration = int(duration) |
|
||||
|
|
||||
if duration: |
|
||||
interval = timedelta(seconds=int(duration)) |
|
||||
return current_time + interval |
|
||||
|
|
||||
def url_contains_query(url): |
|
||||
""" |
|
||||
A very stupid function for determining if a URL contains a query string |
|
||||
or not. |
|
||||
""" |
|
||||
if urlparse(url).query: |
|
||||
return True |
|
||||
else: |
|
||||
return False |
|
@ -1,31 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache |
|
||||
~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Transparent cache for ``requests`` library with persistence and async support |
|
||||
|
|
||||
Just write:: |
|
||||
|
|
||||
import requests_cache |
|
||||
requests_cache.install_cache() |
|
||||
|
|
||||
And requests to resources will be cached for faster repeated access:: |
|
||||
|
|
||||
import requests |
|
||||
for i in range(10): |
|
||||
r = requests.get('http://httpbin.org/delay/5') |
|
||||
# will took approximately 5 seconds instead 50 |
|
||||
|
|
||||
|
|
||||
:copyright: (c) 2012 by Roman Haritonov. |
|
||||
:license: BSD, see LICENSE for more details. |
|
||||
""" |
|
||||
__docformat__ = 'restructuredtext' |
|
||||
__version__ = '0.4.4' |
|
||||
|
|
||||
from .core import( |
|
||||
CachedSession, install_cache, uninstall_cache, |
|
||||
disabled, enabled, get_cache, clear, configure |
|
||||
) |
|
@ -1,50 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Classes and functions for cache persistence |
|
||||
""" |
|
||||
|
|
||||
|
|
||||
from .base import BaseCache |
|
||||
|
|
||||
registry = { |
|
||||
'memory': BaseCache, |
|
||||
} |
|
||||
|
|
||||
try: |
|
||||
# Heroku doesn't allow the SQLite3 module to be installed |
|
||||
from .sqlite import DbCache |
|
||||
registry['sqlite'] = DbCache |
|
||||
except ImportError: |
|
||||
DbCache = None |
|
||||
|
|
||||
try: |
|
||||
from .mongo import MongoCache |
|
||||
registry['mongo'] = registry['mongodb'] = MongoCache |
|
||||
except ImportError: |
|
||||
MongoCache = None |
|
||||
|
|
||||
try: |
|
||||
from .redis import RedisCache |
|
||||
registry['redis'] = RedisCache |
|
||||
except ImportError: |
|
||||
RedisCache = None |
|
||||
|
|
||||
|
|
||||
def create_backend(backend_name, cache_name, options): |
|
||||
if backend_name is None: |
|
||||
backend_name = _get_default_backend_name() |
|
||||
try: |
|
||||
return registry[backend_name](cache_name, **options) |
|
||||
except KeyError: |
|
||||
raise ValueError('Unsupported backend "%s" try one of: %s' % |
|
||||
(backend_name, ', '.join(registry.keys()))) |
|
||||
|
|
||||
|
|
||||
def _get_default_backend_name(): |
|
||||
if 'sqlite' in registry: |
|
||||
return 'sqlite' |
|
||||
return 'memory' |
|
@ -1,171 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.base |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Contains BaseCache class which can be used as in-memory cache backend or |
|
||||
extended to support persistence. |
|
||||
""" |
|
||||
from datetime import datetime |
|
||||
import hashlib |
|
||||
from copy import copy |
|
||||
|
|
||||
import requests |
|
||||
|
|
||||
from ..compat import is_py2 |
|
||||
|
|
||||
|
|
||||
class BaseCache(object): |
|
||||
""" Base class for cache implementations, can be used as in-memory cache. |
|
||||
|
|
||||
To extend it you can provide dictionary-like objects for |
|
||||
:attr:`keys_map` and :attr:`responses` or override public methods. |
|
||||
""" |
|
||||
def __init__(self, *args, **kwargs): |
|
||||
#: `key` -> `key_in_responses` mapping |
|
||||
self.keys_map = {} |
|
||||
#: `key_in_cache` -> `response` mapping |
|
||||
self.responses = {} |
|
||||
|
|
||||
def save_response(self, key, response): |
|
||||
""" Save response to cache |
|
||||
|
|
||||
:param key: key for this response |
|
||||
:param response: response to save |
|
||||
|
|
||||
.. note:: Response is reduced before saving (with :meth:`reduce_response`) |
|
||||
to make it picklable |
|
||||
""" |
|
||||
self.responses[key] = self.reduce_response(response), datetime.utcnow() |
|
||||
|
|
||||
def add_key_mapping(self, new_key, key_to_response): |
|
||||
""" |
|
||||
Adds mapping of `new_key` to `key_to_response` to make it possible to |
|
||||
associate many keys with single response |
|
||||
|
|
||||
:param new_key: new key (e.g. url from redirect) |
|
||||
:param key_to_response: key which can be found in :attr:`responses` |
|
||||
:return: |
|
||||
""" |
|
||||
self.keys_map[new_key] = key_to_response |
|
||||
|
|
||||
def get_response_and_time(self, key, default=(None, None)): |
|
||||
""" Retrieves response and timestamp for `key` if it's stored in cache, |
|
||||
otherwise returns `default` |
|
||||
|
|
||||
:param key: key of resource |
|
||||
:param default: return this if `key` not found in cache |
|
||||
:returns: tuple (response, datetime) |
|
||||
|
|
||||
.. note:: Response is restored after unpickling with :meth:`restore_response` |
|
||||
""" |
|
||||
try: |
|
||||
if key not in self.responses: |
|
||||
key = self.keys_map[key] |
|
||||
response, timestamp = self.responses[key] |
|
||||
except KeyError: |
|
||||
return default |
|
||||
return self.restore_response(response), timestamp |
|
||||
|
|
||||
def delete(self, key): |
|
||||
""" Delete `key` from cache. Also deletes all responses from response history |
|
||||
""" |
|
||||
try: |
|
||||
if key in self.responses: |
|
||||
response, _ = self.responses[key] |
|
||||
del self.responses[key] |
|
||||
else: |
|
||||
response, _ = self.responses[self.keys_map[key]] |
|
||||
del self.keys_map[key] |
|
||||
for r in response.history: |
|
||||
del self.keys_map[self.create_key(r.request)] |
|
||||
except KeyError: |
|
||||
pass |
|
||||
|
|
||||
def delete_url(self, url): |
|
||||
""" Delete response associated with `url` from cache. |
|
||||
Also deletes all responses from response history. Works only for GET requests |
|
||||
""" |
|
||||
self.delete(self._url_to_key(url)) |
|
||||
|
|
||||
def clear(self): |
|
||||
""" Clear cache |
|
||||
""" |
|
||||
self.responses.clear() |
|
||||
self.keys_map.clear() |
|
||||
|
|
||||
def has_key(self, key): |
|
||||
""" Returns `True` if cache has `key`, `False` otherwise |
|
||||
""" |
|
||||
return key in self.responses or key in self.keys_map |
|
||||
|
|
||||
def has_url(self, url): |
|
||||
""" Returns `True` if cache has `url`, `False` otherwise. |
|
||||
Works only for GET request urls |
|
||||
""" |
|
||||
return self.has_key(self._url_to_key(url)) |
|
||||
|
|
||||
def _url_to_key(self, url): |
|
||||
from requests import Request |
|
||||
return self.create_key(Request('GET', url).prepare()) |
|
||||
|
|
||||
_response_attrs = ['_content', 'url', 'status_code', 'cookies', |
|
||||
'headers', 'encoding', 'request', 'reason', 'raw'] |
|
||||
|
|
||||
_raw_response_attrs = ['_original_response', 'decode_content', 'headers', |
|
||||
'reason', 'status', 'strict', 'version'] |
|
||||
|
|
||||
def reduce_response(self, response): |
|
||||
""" Reduce response object to make it compatible with ``pickle`` |
|
||||
""" |
|
||||
result = _Store() |
|
||||
# prefetch |
|
||||
response.content |
|
||||
for field in self._response_attrs: |
|
||||
setattr(result, field, self._picklable_field(response, field)) |
|
||||
result.history = tuple(self.reduce_response(r) for r in response.history) |
|
||||
return result |
|
||||
|
|
||||
def _picklable_field(self, response, name): |
|
||||
value = getattr(response, name) |
|
||||
if name == 'request': |
|
||||
value = copy(value) |
|
||||
value.hooks = [] |
|
||||
elif name == 'raw': |
|
||||
result = _Store() |
|
||||
for field in self._raw_response_attrs: |
|
||||
setattr(result, field, getattr(value, field, None)) |
|
||||
value = result |
|
||||
return value |
|
||||
|
|
||||
def restore_response(self, response): |
|
||||
""" Restore response object after unpickling |
|
||||
""" |
|
||||
result = requests.Response() |
|
||||
for field in self._response_attrs: |
|
||||
setattr(result, field, getattr(response, field, None)) |
|
||||
result.history = tuple(self.restore_response(r) for r in response.history) |
|
||||
return result |
|
||||
|
|
||||
def create_key(self, request): |
|
||||
key = hashlib.sha256() |
|
||||
key.update(_to_bytes(request.method.upper())) |
|
||||
key.update(_to_bytes(request.url)) |
|
||||
if request.body: |
|
||||
key.update(_to_bytes(request.body)) |
|
||||
return key.hexdigest() |
|
||||
|
|
||||
def __str__(self): |
|
||||
return 'keys: %s\nresponses: %s' % (self.keys_map, self.responses) |
|
||||
|
|
||||
|
|
||||
# used for saving response attributes |
|
||||
class _Store(object): |
|
||||
pass |
|
||||
|
|
||||
|
|
||||
def _to_bytes(s, encoding='utf-8'): |
|
||||
if is_py2 or isinstance(s, bytes): |
|
||||
return s |
|
||||
return bytes(s, encoding) |
|
@ -1,25 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.mongo |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
``mongo`` cache backend |
|
||||
""" |
|
||||
from .base import BaseCache |
|
||||
from .storage.mongodict import MongoDict, MongoPickleDict |
|
||||
|
|
||||
|
|
||||
class MongoCache(BaseCache): |
|
||||
""" ``mongo`` cache backend. |
|
||||
""" |
|
||||
def __init__(self, db_name='requests-cache', **options): |
|
||||
""" |
|
||||
:param db_name: database name (default: ``'requests-cache'``) |
|
||||
:param connection: (optional) ``pymongo.Connection`` |
|
||||
""" |
|
||||
super(MongoCache, self).__init__() |
|
||||
self.responses = MongoPickleDict(db_name, 'responses', |
|
||||
options.get('connection')) |
|
||||
self.keys_map = MongoDict(db_name, 'urls', self.responses.connection) |
|
||||
|
|
@ -1,24 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.redis |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
``redis`` cache backend |
|
||||
""" |
|
||||
from .base import BaseCache |
|
||||
from .storage.redisdict import RedisDict |
|
||||
|
|
||||
|
|
||||
class RedisCache(BaseCache): |
|
||||
""" ``redis`` cache backend. |
|
||||
""" |
|
||||
def __init__(self, namespace='requests-cache', **options): |
|
||||
""" |
|
||||
:param namespace: redis namespace (default: ``'requests-cache'``) |
|
||||
:param connection: (optional) ``redis.StrictRedis`` |
|
||||
""" |
|
||||
super(RedisCache, self).__init__() |
|
||||
self.responses = RedisDict(namespace, 'responses', |
|
||||
options.get('connection')) |
|
||||
self.keys_map = RedisDict(namespace, 'urls', self.responses.connection) |
|
@ -1,30 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.sqlite |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
``sqlite3`` cache backend |
|
||||
""" |
|
||||
from .base import BaseCache |
|
||||
from .storage.dbdict import DbDict, DbPickleDict |
|
||||
|
|
||||
|
|
||||
class DbCache(BaseCache): |
|
||||
""" sqlite cache backend. |
|
||||
|
|
||||
Reading is fast, saving is a bit slower. It can store big amount of data |
|
||||
with low memory usage. |
|
||||
""" |
|
||||
def __init__(self, location='cache', |
|
||||
fast_save=False, extension='.sqlite', **options): |
|
||||
""" |
|
||||
:param location: database filename prefix (default: ``'cache'``) |
|
||||
:param fast_save: Speedup cache saving up to 50 times but with possibility of data loss. |
|
||||
See :ref:`backends.DbDict <backends_dbdict>` for more info |
|
||||
:param extension: extension for filename (default: ``'.sqlite'``) |
|
||||
""" |
|
||||
super(DbCache, self).__init__() |
|
||||
self.responses = DbPickleDict(location + extension, 'responses', fast_save=fast_save) |
|
||||
self.keys_map = DbDict(location + extension, 'urls') |
|
||||
|
|
@ -1,171 +0,0 @@ |
|||||
#!/usr/bin/python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.dbdict |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Dictionary-like objects for saving large data sets to `sqlite` database |
|
||||
""" |
|
||||
from collections import MutableMapping |
|
||||
import sqlite3 as sqlite |
|
||||
from contextlib import contextmanager |
|
||||
try: |
|
||||
import threading |
|
||||
except ImportError: |
|
||||
import dummy_threading as threading |
|
||||
try: |
|
||||
import cPickle as pickle |
|
||||
except ImportError: |
|
||||
import pickle |
|
||||
|
|
||||
from requests_cache.compat import bytes |
|
||||
|
|
||||
|
|
||||
|
|
||||
class DbDict(MutableMapping): |
|
||||
""" DbDict - a dictionary-like object for saving large datasets to `sqlite` database |
|
||||
|
|
||||
It's possible to create multiply DbDict instances, which will be stored as separate |
|
||||
tables in one database:: |
|
||||
|
|
||||
d1 = DbDict('test', 'table1') |
|
||||
d2 = DbDict('test', 'table2') |
|
||||
d3 = DbDict('test', 'table3') |
|
||||
|
|
||||
all data will be stored in ``test.sqlite`` database into |
|
||||
correspondent tables: ``table1``, ``table2`` and ``table3`` |
|
||||
""" |
|
||||
|
|
||||
def __init__(self, filename, table_name='data', fast_save=False, **options): |
|
||||
""" |
|
||||
:param filename: filename for database (without extension) |
|
||||
:param table_name: table name |
|
||||
:param fast_save: If it's True, then sqlite will be configured with |
|
||||
`"PRAGMA synchronous = 0;" <http://www.sqlite.org/pragma.html#pragma_synchronous>`_ |
|
||||
to speedup cache saving, but be careful, it's dangerous. |
|
||||
Tests showed that insertion order of records can be wrong with this option. |
|
||||
""" |
|
||||
self.filename = filename |
|
||||
self.table_name = table_name |
|
||||
self.fast_save = fast_save |
|
||||
|
|
||||
#: Transactions can be commited if this property is set to `True` |
|
||||
self.can_commit = True |
|
||||
|
|
||||
|
|
||||
self._bulk_commit = False |
|
||||
self._pending_connection = None |
|
||||
self._lock = threading.RLock() |
|
||||
with self.connection() as con: |
|
||||
con.execute("create table if not exists `%s` (key PRIMARY KEY, value)" % self.table_name) |
|
||||
|
|
||||
|
|
||||
@contextmanager |
|
||||
def connection(self, commit_on_success=False): |
|
||||
with self._lock: |
|
||||
if self._bulk_commit: |
|
||||
if self._pending_connection is None: |
|
||||
self._pending_connection = sqlite.connect(self.filename) |
|
||||
con = self._pending_connection |
|
||||
else: |
|
||||
con = sqlite.connect(self.filename) |
|
||||
try: |
|
||||
if self.fast_save: |
|
||||
con.execute("PRAGMA synchronous = 0;") |
|
||||
yield con |
|
||||
if commit_on_success and self.can_commit: |
|
||||
con.commit() |
|
||||
finally: |
|
||||
if not self._bulk_commit: |
|
||||
con.close() |
|
||||
|
|
||||
def commit(self, force=False): |
|
||||
""" |
|
||||
Commits pending transaction if :attr:`can_commit` or `force` is `True` |
|
||||
|
|
||||
:param force: force commit, ignore :attr:`can_commit` |
|
||||
""" |
|
||||
if force or self.can_commit: |
|
||||
if self._pending_connection is not None: |
|
||||
self._pending_connection.commit() |
|
||||
|
|
||||
@contextmanager |
|
||||
def bulk_commit(self): |
|
||||
""" |
|
||||
Context manager used to speedup insertion of big number of records |
|
||||
:: |
|
||||
|
|
||||
>>> d1 = DbDict('test') |
|
||||
>>> with d1.bulk_commit(): |
|
||||
... for i in range(1000): |
|
||||
... d1[i] = i * 2 |
|
||||
|
|
||||
""" |
|
||||
self._bulk_commit = True |
|
||||
self.can_commit = False |
|
||||
try: |
|
||||
yield |
|
||||
self.commit(True) |
|
||||
finally: |
|
||||
self._bulk_commit = False |
|
||||
self.can_commit = True |
|
||||
self._pending_connection.close() |
|
||||
self._pending_connection = None |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
with self.connection() as con: |
|
||||
row = con.execute("select value from `%s` where key=?" % |
|
||||
self.table_name, (key,)).fetchone() |
|
||||
if not row: |
|
||||
raise KeyError |
|
||||
return row[0] |
|
||||
|
|
||||
def __setitem__(self, key, item): |
|
||||
with self.connection(True) as con: |
|
||||
if con.execute("select key from `%s` where key=?" % |
|
||||
self.table_name, (key,)).fetchone(): |
|
||||
con.execute("update `%s` set value=? where key=?" % |
|
||||
self.table_name, (item, key)) |
|
||||
else: |
|
||||
con.execute("insert into `%s` (key,value) values (?,?)" % |
|
||||
self.table_name, (key, item)) |
|
||||
|
|
||||
def __delitem__(self, key): |
|
||||
with self.connection(True) as con: |
|
||||
if con.execute("select key from `%s` where key=?" % |
|
||||
self.table_name, (key,)).fetchone(): |
|
||||
con.execute("delete from `%s` where key=?" % |
|
||||
self.table_name, (key,)) |
|
||||
else: |
|
||||
raise KeyError |
|
||||
|
|
||||
def __iter__(self): |
|
||||
with self.connection() as con: |
|
||||
for row in con.execute("select key from `%s`" % |
|
||||
self.table_name): |
|
||||
yield row[0] |
|
||||
|
|
||||
def __len__(self): |
|
||||
with self.connection() as con: |
|
||||
return con.execute("select count(key) from `%s`" % |
|
||||
self.table_name).fetchone()[0] |
|
||||
|
|
||||
def clear(self): |
|
||||
with self.connection(True) as con: |
|
||||
con.execute("drop table `%s`" % self.table_name) |
|
||||
con.execute("create table `%s` (key PRIMARY KEY, value)" % |
|
||||
self.table_name) |
|
||||
|
|
||||
def __str__(self): |
|
||||
return str(dict(self.items())) |
|
||||
|
|
||||
|
|
||||
class DbPickleDict(DbDict): |
|
||||
""" Same as :class:`DbDict`, but pickles values before saving |
|
||||
""" |
|
||||
def __setitem__(self, key, item): |
|
||||
super(DbPickleDict, self).__setitem__(key, |
|
||||
sqlite.Binary(pickle.dumps(item))) |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
return pickle.loads(bytes(super(DbPickleDict, self).__getitem__(key))) |
|
@ -1,74 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.mongodict |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Dictionary-like objects for saving large data sets to ``mongodb`` database |
|
||||
""" |
|
||||
from collections import MutableMapping |
|
||||
try: |
|
||||
import cPickle as pickle |
|
||||
except ImportError: |
|
||||
import pickle |
|
||||
|
|
||||
from pymongo import Connection |
|
||||
|
|
||||
|
|
||||
class MongoDict(MutableMapping): |
|
||||
""" MongoDict - a dictionary-like interface for ``mongo`` database |
|
||||
""" |
|
||||
def __init__(self, db_name, |
|
||||
collection_name='mongo_dict_data', connection=None): |
|
||||
""" |
|
||||
:param db_name: database name (be careful with production databases) |
|
||||
:param collection_name: collection name (default: mongo_dict_data) |
|
||||
:param connection: ``pymongo.Connection`` instance. If it's ``None`` |
|
||||
(default) new connection with default options will |
|
||||
be created |
|
||||
""" |
|
||||
if connection is not None: |
|
||||
self.connection = connection |
|
||||
else: |
|
||||
self.connection = Connection() |
|
||||
self.db = self.connection[db_name] |
|
||||
self.collection = self.db[collection_name] |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
result = self.collection.find_one({'_id': key}) |
|
||||
if result is None: |
|
||||
raise KeyError |
|
||||
return result['data'] |
|
||||
|
|
||||
def __setitem__(self, key, item): |
|
||||
self.collection.save({'_id': key, 'data': item}) |
|
||||
|
|
||||
def __delitem__(self, key): |
|
||||
spec = {'_id': key} |
|
||||
if self.collection.find_one(spec, fields=['_id']): |
|
||||
self.collection.remove(spec) |
|
||||
else: |
|
||||
raise KeyError |
|
||||
|
|
||||
def __len__(self): |
|
||||
return self.collection.count() |
|
||||
|
|
||||
def __iter__(self): |
|
||||
for d in self.collection.find(fields=['_id']): |
|
||||
yield d['_id'] |
|
||||
|
|
||||
def clear(self): |
|
||||
self.collection.drop() |
|
||||
|
|
||||
def __str__(self): |
|
||||
return str(dict(self.items())) |
|
||||
|
|
||||
|
|
||||
class MongoPickleDict(MongoDict): |
|
||||
""" Same as :class:`MongoDict`, but pickles values before saving |
|
||||
""" |
|
||||
def __setitem__(self, key, item): |
|
||||
super(MongoPickleDict, self).__setitem__(key, pickle.dumps(item)) |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
return pickle.loads(bytes(super(MongoPickleDict, self).__getitem__(key))) |
|
@ -1,68 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.backends.redisdict |
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Dictionary-like objects for saving large data sets to ``redis`` key-store |
|
||||
""" |
|
||||
from collections import MutableMapping |
|
||||
try: |
|
||||
import cPickle as pickle |
|
||||
except ImportError: |
|
||||
import pickle |
|
||||
from redis import StrictRedis as Redis |
|
||||
|
|
||||
|
|
||||
class RedisDict(MutableMapping): |
|
||||
""" RedisDict - a dictionary-like interface for ``redis`` key-stores |
|
||||
""" |
|
||||
def __init__(self, namespace, collection_name='redis_dict_data', |
|
||||
connection=None): |
|
||||
""" |
|
||||
The actual key name on the redis server will be |
|
||||
``namespace``:``collection_name`` |
|
||||
|
|
||||
In order to deal with how redis stores data/keys, |
|
||||
everything, i.e. keys and data, must be pickled. |
|
||||
|
|
||||
:param namespace: namespace to use |
|
||||
:param collection_name: name of the hash map stored in redis |
|
||||
(default: redis_dict_data) |
|
||||
:param connection: ``redis.StrictRedis`` instance. |
|
||||
If it's ``None`` (default), a new connection with |
|
||||
default options will be created |
|
||||
|
|
||||
""" |
|
||||
if connection is not None: |
|
||||
self.connection = connection |
|
||||
else: |
|
||||
self.connection = Redis() |
|
||||
self._self_key = ':'.join([namespace, collection_name]) |
|
||||
|
|
||||
def __getitem__(self, key): |
|
||||
result = self.connection.hget(self._self_key, pickle.dumps(key)) |
|
||||
if result is None: |
|
||||
raise KeyError |
|
||||
return pickle.loads(bytes(result)) |
|
||||
|
|
||||
def __setitem__(self, key, item): |
|
||||
self.connection.hset(self._self_key, pickle.dumps(key), |
|
||||
pickle.dumps(item)) |
|
||||
|
|
||||
def __delitem__(self, key): |
|
||||
if not self.connection.hdel(self._self_key, pickle.dumps(key)): |
|
||||
raise KeyError |
|
||||
|
|
||||
def __len__(self): |
|
||||
return self.connection.hlen(self._self_key) |
|
||||
|
|
||||
def __iter__(self): |
|
||||
for v in self.connection.hkeys(self._self_key): |
|
||||
yield pickle.loads(bytes(v)) |
|
||||
|
|
||||
def clear(self): |
|
||||
self.connection.delete(self._self_key) |
|
||||
|
|
||||
def __str__(self): |
|
||||
return str(dict(self.items())) |
|
@ -1,103 +0,0 @@ |
|||||
# -*- coding: utf-8 -*- |
|
||||
# taken from requests library: https://github.com/kennethreitz/requests |
|
||||
""" |
|
||||
pythoncompat |
|
||||
""" |
|
||||
|
|
||||
|
|
||||
import sys |
|
||||
|
|
||||
# ------- |
|
||||
# Pythons |
|
||||
# ------- |
|
||||
|
|
||||
# Syntax sugar. |
|
||||
_ver = sys.version_info |
|
||||
|
|
||||
#: Python 2.x? |
|
||||
is_py2 = (_ver[0] == 2) |
|
||||
|
|
||||
#: Python 3.x? |
|
||||
is_py3 = (_ver[0] == 3) |
|
||||
|
|
||||
#: Python 3.0.x |
|
||||
is_py30 = (is_py3 and _ver[1] == 0) |
|
||||
|
|
||||
#: Python 3.1.x |
|
||||
is_py31 = (is_py3 and _ver[1] == 1) |
|
||||
|
|
||||
#: Python 3.2.x |
|
||||
is_py32 = (is_py3 and _ver[1] == 2) |
|
||||
|
|
||||
#: Python 3.3.x |
|
||||
is_py33 = (is_py3 and _ver[1] == 3) |
|
||||
|
|
||||
#: Python 3.4.x |
|
||||
is_py34 = (is_py3 and _ver[1] == 4) |
|
||||
|
|
||||
#: Python 2.7.x |
|
||||
is_py27 = (is_py2 and _ver[1] == 7) |
|
||||
|
|
||||
#: Python 2.6.x |
|
||||
is_py26 = (is_py2 and _ver[1] == 6) |
|
||||
|
|
||||
#: Python 2.5.x |
|
||||
is_py25 = (is_py2 and _ver[1] == 5) |
|
||||
|
|
||||
#: Python 2.4.x |
|
||||
is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. |
|
||||
|
|
||||
|
|
||||
# --------- |
|
||||
# Platforms |
|
||||
# --------- |
|
||||
|
|
||||
|
|
||||
# Syntax sugar. |
|
||||
_ver = sys.version.lower() |
|
||||
|
|
||||
is_pypy = ('pypy' in _ver) |
|
||||
is_jython = ('jython' in _ver) |
|
||||
is_ironpython = ('iron' in _ver) |
|
||||
|
|
||||
# Assume CPython, if nothing else. |
|
||||
is_cpython = not any((is_pypy, is_jython, is_ironpython)) |
|
||||
|
|
||||
# Windows-based system. |
|
||||
is_windows = 'win32' in str(sys.platform).lower() |
|
||||
|
|
||||
# Standard Linux 2+ system. |
|
||||
is_linux = ('linux' in str(sys.platform).lower()) |
|
||||
is_osx = ('darwin' in str(sys.platform).lower()) |
|
||||
is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. |
|
||||
is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. |
|
||||
|
|
||||
|
|
||||
# --------- |
|
||||
# Specifics |
|
||||
# --------- |
|
||||
|
|
||||
|
|
||||
if is_py2: |
|
||||
from urllib import quote, unquote, urlencode |
|
||||
from urlparse import urlparse, urlunparse, urljoin, urlsplit |
|
||||
from urllib2 import parse_http_list |
|
||||
import cookielib |
|
||||
from StringIO import StringIO |
|
||||
bytes = str |
|
||||
str = unicode |
|
||||
basestring = basestring |
|
||||
|
|
||||
|
|
||||
|
|
||||
elif is_py3: |
|
||||
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote |
|
||||
from urllib.request import parse_http_list |
|
||||
from http import cookiejar as cookielib |
|
||||
from http.cookies import SimpleCookie |
|
||||
from io import StringIO |
|
||||
|
|
||||
str = str |
|
||||
bytes = bytes |
|
||||
basestring = (str,bytes) |
|
||||
|
|
@ -1,227 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
# -*- coding: utf-8 -*- |
|
||||
""" |
|
||||
requests_cache.core |
|
||||
~~~~~~~~~~~~~~~~~~~ |
|
||||
|
|
||||
Core functions for configuring cache and monkey patching ``requests`` |
|
||||
""" |
|
||||
from contextlib import contextmanager |
|
||||
from datetime import datetime, timedelta |
|
||||
|
|
||||
import requests |
|
||||
from requests import Session as OriginalSession |
|
||||
from requests.hooks import dispatch_hook |
|
||||
|
|
||||
from requests_cache import backends |
|
||||
from requests_cache.compat import str, basestring |
|
||||
|
|
||||
try: |
|
||||
ver = tuple(map(int, requests.__version__.split("."))) |
|
||||
except ValueError: |
|
||||
pass |
|
||||
else: |
|
||||
# We don't need to dispatch hook in Requests <= 1.1.0 |
|
||||
if ver < (1, 2, 0): |
|
||||
dispatch_hook = lambda key, hooks, hook_data, *a, **kw: hook_data |
|
||||
del ver |
|
||||
|
|
||||
|
|
||||
class CachedSession(OriginalSession): |
|
||||
""" Requests ``Sessions`` with caching support. |
|
||||
""" |
|
||||
|
|
||||
def __init__(self, cache_name='cache', backend=None, expire_after=None, |
|
||||
allowable_codes=(200,), allowable_methods=('GET',), |
|
||||
**backend_options): |
|
||||
""" |
|
||||
:param cache_name: for ``sqlite`` backend: cache file will start with this prefix, |
|
||||
e.g ``cache.sqlite`` |
|
||||
|
|
||||
for ``mongodb``: it's used as database name |
|
||||
|
|
||||
for ``redis``: it's used as the namespace. This means all keys |
|
||||
are prefixed with ``'cache_name:'`` |
|
||||
:param backend: cache backend name e.g ``'sqlite'``, ``'mongodb'``, ``'redis'``, ``'memory'``. |
|
||||
(see :ref:`persistence`). Or instance of backend implementation. |
|
||||
Default value is ``None``, which means use ``'sqlite'`` if available, |
|
||||
otherwise fallback to ``'memory'``. |
|
||||
:param expire_after: number of seconds after cache will be expired |
|
||||
or `None` (default) to ignore expiration |
|
||||
:type expire_after: float |
|
||||
:param allowable_codes: limit caching only for response with this codes (default: 200) |
|
||||
:type allowable_codes: tuple |
|
||||
:param allowable_methods: cache only requests of this methods (default: 'GET') |
|
||||
:type allowable_methods: tuple |
|
||||
:kwarg backend_options: options for chosen backend. See corresponding |
|
||||
:ref:`sqlite <backends_sqlite>`, :ref:`mongo <backends_mongo>` |
|
||||
and :ref:`redis <backends_redis>` backends API documentation |
|
||||
""" |
|
||||
if backend is None or isinstance(backend, basestring): |
|
||||
self.cache = backends.create_backend(backend, cache_name, |
|
||||
backend_options) |
|
||||
else: |
|
||||
self.cache = backend |
|
||||
|
|
||||
self._cache_expire_after = expire_after |
|
||||
self._cache_allowable_codes = allowable_codes |
|
||||
self._cache_allowable_methods = allowable_methods |
|
||||
self._is_cache_disabled = False |
|
||||
super(CachedSession, self).__init__() |
|
||||
|
|
||||
def send(self, request, **kwargs): |
|
||||
if (self._is_cache_disabled |
|
||||
or request.method not in self._cache_allowable_methods): |
|
||||
response = super(CachedSession, self).send(request, **kwargs) |
|
||||
response.from_cache = False |
|
||||
return response |
|
||||
|
|
||||
cache_key = self.cache.create_key(request) |
|
||||
|
|
||||
def send_request_and_cache_response(): |
|
||||
response = super(CachedSession, self).send(request, **kwargs) |
|
||||
if response.status_code in self._cache_allowable_codes: |
|
||||
self.cache.save_response(cache_key, response) |
|
||||
response.from_cache = False |
|
||||
return response |
|
||||
|
|
||||
response, timestamp = self.cache.get_response_and_time(cache_key) |
|
||||
if response is None: |
|
||||
return send_request_and_cache_response() |
|
||||
|
|
||||
if self._cache_expire_after is not None: |
|
||||
difference = datetime.utcnow() - timestamp |
|
||||
if difference > timedelta(seconds=self._cache_expire_after): |
|
||||
self.cache.delete(cache_key) |
|
||||
return send_request_and_cache_response() |
|
||||
# dispatch hook here, because we've removed it before pickling |
|
||||
response.from_cache = True |
|
||||
response = dispatch_hook('response', request.hooks, response, **kwargs) |
|
||||
return response |
|
||||
|
|
||||
def request(self, method, url, params=None, data=None, headers=None, |
|
||||
cookies=None, files=None, auth=None, timeout=None, |
|
||||
allow_redirects=True, proxies=None, hooks=None, stream=None, |
|
||||
verify=None, cert=None): |
|
||||
response = super(CachedSession, self).request(method, url, params, data, |
|
||||
headers, cookies, files, |
|
||||
auth, timeout, |
|
||||
allow_redirects, proxies, |
|
||||
hooks, stream, verify, cert) |
|
||||
if self._is_cache_disabled: |
|
||||
return response |
|
||||
|
|
||||
main_key = self.cache.create_key(response.request) |
|
||||
for r in response.history: |
|
||||
self.cache.add_key_mapping( |
|
||||
self.cache.create_key(r.request), main_key |
|
||||
) |
|
||||
return response |
|
||||
|
|
||||
@contextmanager |
|
||||
def cache_disabled(self): |
|
||||
""" |
|
||||
Context manager for temporary disabling cache |
|
||||
:: |
|
||||
|
|
||||
>>> s = CachedSession() |
|
||||
>>> with s.cache_disabled(): |
|
||||
... s.get('http://httpbin.org/ip') |
|
||||
""" |
|
||||
self._is_cache_disabled = True |
|
||||
try: |
|
||||
yield |
|
||||
finally: |
|
||||
self._is_cache_disabled = False |
|
||||
|
|
||||
|
|
||||
def install_cache(cache_name='cache', backend=None, expire_after=None, |
|
||||
allowable_codes=(200,), allowable_methods=('GET',), |
|
||||
session_factory=CachedSession, **backend_options): |
|
||||
""" |
|
||||
Installs cache for all ``Requests`` requests by monkey-patching ``Session`` |
|
||||
|
|
||||
Parameters are the same as in :class:`CachedSession`. Additional parameters: |
|
||||
|
|
||||
:param session_factory: Session factory. It should inherit :class:`CachedSession` (default) |
|
||||
""" |
|
||||
if backend: |
|
||||
backend = backends.create_backend(backend, cache_name, backend_options) |
|
||||
_patch_session_factory( |
|
||||
lambda : session_factory(cache_name=cache_name, |
|
||||
backend=backend, |
|
||||
expire_after=expire_after, |
|
||||
allowable_codes=allowable_codes, |
|
||||
allowable_methods=allowable_methods, |
|
||||
**backend_options) |
|
||||
) |
|
||||
|
|
||||
|
|
||||
# backward compatibility |
|
||||
configure = install_cache |
|
||||
|
|
||||
|
|
||||
def uninstall_cache(): |
|
||||
""" Restores ``requests.Session`` and disables cache |
|
||||
""" |
|
||||
_patch_session_factory(OriginalSession) |
|
||||
|
|
||||
|
|
||||
@contextmanager |
|
||||
def disabled(): |
|
||||
""" |
|
||||
Context manager for temporary disabling globally installed cache |
|
||||
|
|
||||
.. warning:: not thread-safe |
|
||||
|
|
||||
:: |
|
||||
|
|
||||
>>> with requests_cache.disabled(): |
|
||||
... requests.get('http://httpbin.org/ip') |
|
||||
... requests.get('http://httpbin.org/get') |
|
||||
|
|
||||
""" |
|
||||
previous = requests.Session |
|
||||
uninstall_cache() |
|
||||
try: |
|
||||
yield |
|
||||
finally: |
|
||||
_patch_session_factory(previous) |
|
||||
|
|
||||
|
|
||||
@contextmanager |
|
||||
def enabled(*args, **kwargs): |
|
||||
""" |
|
||||
Context manager for temporary installing global cache. |
|
||||
|
|
||||
Accepts same arguments as :func:`install_cache` |
|
||||
|
|
||||
.. warning:: not thread-safe |
|
||||
|
|
||||
:: |
|
||||
|
|
||||
>>> with requests_cache.enabled('cache_db'): |
|
||||
... requests.get('http://httpbin.org/get') |
|
||||
|
|
||||
""" |
|
||||
install_cache(*args, **kwargs) |
|
||||
try: |
|
||||
yield |
|
||||
finally: |
|
||||
uninstall_cache() |
|
||||
|
|
||||
|
|
||||
def get_cache(): |
|
||||
""" Returns internal cache object from globally installed ``CachedSession`` |
|
||||
""" |
|
||||
return requests.Session().cache |
|
||||
|
|
||||
|
|
||||
def clear(): |
|
||||
""" Clears globally installed cache |
|
||||
""" |
|
||||
get_cache().clear() |
|
||||
|
|
||||
|
|
||||
def _patch_session_factory(session_factory=CachedSession): |
|
||||
requests.Session = requests.sessions.Session = session_factory |
|
Loading…
Reference in new issue