30 changed files with 591 additions and 1434 deletions
@ -0,0 +1,13 @@ |
|||
"""CacheControl import Interface. |
|||
|
|||
Make it easy to import from cachecontrol without long namespaces. |
|||
""" |
|||
|
|||
# patch our requests.models.Response to make them pickleable in older |
|||
# versions of requests. |
|||
|
|||
import cachecontrol.patch_requests |
|||
|
|||
from cachecontrol.wrapper import CacheControl |
|||
from cachecontrol.adapter import CacheControlAdapter |
|||
from cachecontrol.controller import CacheController |
@ -0,0 +1,75 @@ |
|||
from requests.adapters import HTTPAdapter |
|||
|
|||
from cachecontrol.controller import CacheController |
|||
from cachecontrol.cache import DictCache |
|||
|
|||
|
|||
class CacheControlAdapter(HTTPAdapter): |
|||
invalidating_methods = set(['PUT', 'DELETE']) |
|||
|
|||
def __init__(self, cache=None, cache_etags=True, cache_all=False, *args, **kw): |
|||
super(CacheControlAdapter, self).__init__(*args, **kw) |
|||
self.cache = cache or DictCache() |
|||
self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_all=cache_all) |
|||
|
|||
def send(self, request, **kw): |
|||
"""Send a request. Use the request information to see if it |
|||
exists in the cache. |
|||
""" |
|||
if request.method == 'GET': |
|||
cached_response = self.controller.cached_request( |
|||
request.url, request.headers |
|||
) |
|||
if cached_response: |
|||
# Cached responses should not have a raw field since |
|||
# they *cannot* be created from some stream. |
|||
cached_response.raw = None |
|||
return cached_response |
|||
|
|||
# check for etags and add headers if appropriate |
|||
headers = self.controller.add_headers(request.url) |
|||
request.headers.update(headers) |
|||
|
|||
resp = super(CacheControlAdapter, self).send(request, **kw) |
|||
return resp |
|||
|
|||
def build_response(self, request, response): |
|||
"""Build a response by making a request or using the cache. |
|||
|
|||
This will end up calling send and returning a potentially |
|||
cached response |
|||
""" |
|||
resp = super(CacheControlAdapter, self).build_response( |
|||
request, response |
|||
) |
|||
|
|||
# See if we should invalidate the cache. |
|||
if request.method in self.invalidating_methods and resp.ok: |
|||
cache_url = self.controller.cache_url(request.url) |
|||
self.cache.delete(cache_url) |
|||
|
|||
# Try to store the response if it is a GET |
|||
elif request.method == 'GET': |
|||
if response.status == 304: |
|||
# We must have sent an ETag request. This could mean |
|||
# that we've been expired already or that we simply |
|||
# have an etag. In either case, we want to try and |
|||
# update the cache if that is the case. |
|||
resp = self.controller.update_cached_response( |
|||
request, response |
|||
) |
|||
# Fix possible exception when using missing `raw` field in |
|||
# requests |
|||
# TODO: remove when requests will be bump to 2.2.2 or 2.3 |
|||
# version |
|||
resp.raw = None |
|||
else: |
|||
# try to cache the response |
|||
self.controller.cache_response(request, resp) |
|||
|
|||
# Give the request a from_cache attr to let people use it |
|||
# rather than testing for hasattr. |
|||
if not hasattr(resp, 'from_cache'): |
|||
resp.from_cache = False |
|||
|
|||
return resp |
@ -0,0 +1,36 @@ |
|||
""" |
|||
The cache object API for implementing caches. The default is just a |
|||
dictionary, which in turns means it is not threadsafe for writing. |
|||
""" |
|||
from threading import Lock |
|||
|
|||
|
|||
class BaseCache(object): |
|||
|
|||
def get(self, key): |
|||
raise NotImplemented() |
|||
|
|||
def set(self, key, value): |
|||
raise NotImplemented() |
|||
|
|||
def delete(self, key): |
|||
raise NotImplemented() |
|||
|
|||
|
|||
class DictCache(BaseCache): |
|||
|
|||
def __init__(self, init_dict=None): |
|||
self.lock = Lock() |
|||
self.data = init_dict or {} |
|||
|
|||
def get(self, key): |
|||
return self.data.get(key, None) |
|||
|
|||
def set(self, key, value): |
|||
with self.lock: |
|||
self.data.update({key: value}) |
|||
|
|||
def delete(self, key): |
|||
with self.lock: |
|||
if key in self.data: |
|||
self.data.pop(key) |
@ -0,0 +1,18 @@ |
|||
from textwrap import dedent |
|||
|
|||
try: |
|||
from cachecontrol.caches.file_cache import FileCache |
|||
except ImportError: |
|||
notice = dedent(''' |
|||
NOTE: In order to use the FileCache you must have |
|||
lockfile installed. You can install it via pip: |
|||
pip install lockfile |
|||
''') |
|||
print(notice) |
|||
|
|||
|
|||
try: |
|||
import redis |
|||
from cachecontrol.caches.redis_cache import RedisCache |
|||
except ImportError: |
|||
pass |
@ -0,0 +1,51 @@ |
|||
import os |
|||
import sys |
|||
from hashlib import md5 |
|||
|
|||
try: |
|||
from pickle import load, dump, HIGHEST_PROTOCOL |
|||
except ImportError: |
|||
from cPickle import load, dump, HIGHEST_PROTOCOL |
|||
|
|||
from lockfile import FileLock |
|||
|
|||
|
|||
class FileCache(object): |
|||
def __init__(self, directory, forever=False): |
|||
self.directory = directory |
|||
self.forever = forever |
|||
|
|||
if not os.path.isdir(self.directory): |
|||
os.mkdir(self.directory) |
|||
|
|||
@staticmethod |
|||
def encode(x): |
|||
return md5(x.encode()).hexdigest() |
|||
|
|||
def _fn(self, name): |
|||
return os.path.join(self.directory, self.encode(name)) |
|||
|
|||
def get(self, key): |
|||
name = self._fn(key) |
|||
if not os.path.exists(name): |
|||
return None |
|||
|
|||
with open(name, 'rb') as fh: |
|||
try: |
|||
if sys.version < '3': |
|||
return load(fh) |
|||
else: |
|||
return load(fh, encoding='latin1') |
|||
except ValueError: |
|||
return None |
|||
|
|||
def set(self, key, value): |
|||
name = self._fn(key) |
|||
with FileLock(name) as lock: |
|||
with open(lock.path, 'wb') as fh: |
|||
dump(value, fh, HIGHEST_PROTOCOL) |
|||
|
|||
def delete(self, key): |
|||
name = self._fn(key) |
|||
if not self.forever: |
|||
os.remove(name) |
@ -0,0 +1,46 @@ |
|||
from __future__ import division |
|||
|
|||
from datetime import datetime |
|||
|
|||
try: |
|||
from cPickle import loads, dumps |
|||
except ImportError: # Python 3.x |
|||
from pickle import loads, dumps |
|||
|
|||
|
|||
def total_seconds(td): |
|||
"""Python 2.6 compatability""" |
|||
if hasattr(td, 'total_seconds'): |
|||
return td.total_seconds() |
|||
|
|||
ms = td.microseconds |
|||
secs = (td.seconds + td.days * 24 * 3600) |
|||
return (ms + secs * 10**6) / 10**6 |
|||
|
|||
|
|||
class RedisCache(object): |
|||
|
|||
def __init__(self, conn): |
|||
self.conn = conn |
|||
|
|||
def get(self, key): |
|||
val = self.conn.get(key) |
|||
if val: |
|||
return loads(val) |
|||
return None |
|||
|
|||
def set(self, key, value, expires=None): |
|||
if not expires: |
|||
self.conn.set(key, dumps(value)) |
|||
else: |
|||
expires = expires - datetime.now() |
|||
self.conn.setex(key, total_seconds(expires), value) |
|||
|
|||
def delete(self, key): |
|||
self.conn.delete(key) |
|||
|
|||
def clear(self): |
|||
"""Helper for clearing all the keys in a database. Use with |
|||
caution!""" |
|||
for key in self.conn.keys(): |
|||
self.conn.delete(key) |
@ -0,0 +1,12 @@ |
|||
try: |
|||
from urllib.parse import urljoin |
|||
except ImportError: |
|||
from urlparse import urljoin |
|||
|
|||
|
|||
try: |
|||
import email.utils |
|||
parsedate_tz = email.utils.parsedate_tz |
|||
except ImportError: |
|||
import email.Utils |
|||
parsedate_tz = email.Utils.parsedate_tz |
@ -0,0 +1,258 @@ |
|||
""" |
|||
The httplib2 algorithms ported for use with requests. |
|||
""" |
|||
import re |
|||
import calendar |
|||
import time |
|||
import datetime |
|||
|
|||
from cachecontrol.cache import DictCache |
|||
from cachecontrol.compat import parsedate_tz |
|||
|
|||
|
|||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") |
|||
|
|||
|
|||
def parse_uri(uri): |
|||
"""Parses a URI using the regex given in Appendix B of RFC 3986. |
|||
|
|||
(scheme, authority, path, query, fragment) = parse_uri(uri) |
|||
""" |
|||
groups = URI.match(uri).groups() |
|||
return (groups[1], groups[3], groups[4], groups[6], groups[8]) |
|||
|
|||
|
|||
class CacheController(object): |
|||
"""An interface to see if request should cached or not. |
|||
""" |
|||
def __init__(self, cache=None, cache_etags=True, cache_all=False): |
|||
self.cache = cache or DictCache() |
|||
self.cache_etags = cache_etags |
|||
self.cache_all = cache_all |
|||
|
|||
def _urlnorm(self, uri): |
|||
"""Normalize the URL to create a safe key for the cache""" |
|||
(scheme, authority, path, query, fragment) = parse_uri(uri) |
|||
if not scheme or not authority: |
|||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri) |
|||
authority = authority.lower() |
|||
scheme = scheme.lower() |
|||
if not path: |
|||
path = "/" |
|||
|
|||
# Could do syntax based normalization of the URI before |
|||
# computing the digest. See Section 6.2.2 of Std 66. |
|||
request_uri = query and "?".join([path, query]) or path |
|||
scheme = scheme.lower() |
|||
defrag_uri = scheme + "://" + authority + request_uri |
|||
|
|||
return defrag_uri |
|||
|
|||
def cache_url(self, uri): |
|||
return self._urlnorm(uri) |
|||
|
|||
def parse_cache_control(self, headers): |
|||
""" |
|||
Parse the cache control headers returning a dictionary with values |
|||
for the different directives. |
|||
""" |
|||
retval = {} |
|||
|
|||
cc_header = 'cache-control' |
|||
if 'Cache-Control' in headers: |
|||
cc_header = 'Cache-Control' |
|||
|
|||
if cc_header in headers: |
|||
parts = headers[cc_header].split(',') |
|||
parts_with_args = [ |
|||
tuple([x.strip().lower() for x in part.split("=", 1)]) |
|||
for part in parts if -1 != part.find("=")] |
|||
parts_wo_args = [(name.strip().lower(), 1) |
|||
for name in parts if -1 == name.find("=")] |
|||
retval = dict(parts_with_args + parts_wo_args) |
|||
return retval |
|||
|
|||
def cached_request(self, url, headers): |
|||
cache_url = self.cache_url(url) |
|||
cc = self.parse_cache_control(headers) |
|||
|
|||
# non-caching states |
|||
no_cache = True if 'no-cache' in cc else False |
|||
if 'max-age' in cc and cc['max-age'] == 0: |
|||
no_cache = True |
|||
|
|||
# see if it is in the cache anyways |
|||
in_cache = self.cache.get(cache_url) |
|||
if no_cache or not in_cache: |
|||
return False |
|||
|
|||
# It is in the cache, so lets see if it is going to be |
|||
# fresh enough |
|||
resp = self.cache.get(cache_url) |
|||
|
|||
# Check our Vary header to make sure our request headers match |
|||
# up. We don't delete it from the though, we just don't return |
|||
# our cached value. |
|||
# |
|||
# NOTE: Because httplib2 stores raw content, it denotes |
|||
# headers that were sent in the original response by |
|||
# adding -varied-$name. We don't have to do that b/c we |
|||
# are storing the object which has a reference to the |
|||
# original request. If that changes, then I'd propose |
|||
# using the varied headers in the cache key to avoid the |
|||
# situation all together. |
|||
if 'vary' in resp.headers: |
|||
varied_headers = resp.headers['vary'].replace(' ', '').split(',') |
|||
original_headers = resp.request.headers |
|||
for header in varied_headers: |
|||
# If our headers don't match for the headers listed in |
|||
# the vary header, then don't use the cached response |
|||
if headers.get(header, None) != original_headers.get(header): |
|||
return False |
|||
|
|||
now = time.time() |
|||
date = calendar.timegm( |
|||
parsedate_tz(resp.headers['date']) |
|||
) |
|||
current_age = max(0, now - date) |
|||
|
|||
# TODO: There is an assumption that the result will be a |
|||
# requests response object. This may not be best since we |
|||
# could probably avoid instantiating or constructing the |
|||
# response until we know we need it. |
|||
resp_cc = self.parse_cache_control(resp.headers) |
|||
|
|||
# determine freshness |
|||
freshness_lifetime = 0 |
|||
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): |
|||
freshness_lifetime = int(resp_cc['max-age']) |
|||
elif 'expires' in resp.headers: |
|||
expires = parsedate_tz(resp.headers['expires']) |
|||
if expires is not None: |
|||
expire_time = calendar.timegm(expires) - date |
|||
freshness_lifetime = max(0, expire_time) |
|||
|
|||
# determine if we are setting freshness limit in the req |
|||
if 'max-age' in cc: |
|||
try: |
|||
freshness_lifetime = int(cc['max-age']) |
|||
except ValueError: |
|||
freshness_lifetime = 0 |
|||
|
|||
if 'min-fresh' in cc: |
|||
try: |
|||
min_fresh = int(cc['min-fresh']) |
|||
except ValueError: |
|||
min_fresh = 0 |
|||
# adjust our current age by our min fresh |
|||
current_age += min_fresh |
|||
|
|||
# see how fresh we actually are |
|||
fresh = (freshness_lifetime > current_age) |
|||
|
|||
if fresh: |
|||
# make sure we set the from_cache to true |
|||
resp.from_cache = True |
|||
return resp |
|||
|
|||
# we're not fresh. If we don't have an Etag, clear it out |
|||
if 'etag' not in resp.headers: |
|||
self.cache.delete(cache_url) |
|||
|
|||
if 'etag' in resp.headers: |
|||
headers['If-None-Match'] = resp.headers['ETag'] |
|||
|
|||
if 'last-modified' in resp.headers: |
|||
headers['If-Modified-Since'] = resp.headers['Last-Modified'] |
|||
|
|||
# return the original handler |
|||
return False |
|||
|
|||
def add_headers(self, url): |
|||
resp = self.cache.get(url) |
|||
if resp and 'etag' in resp.headers: |
|||
return {'If-None-Match': resp.headers['etag']} |
|||
return {} |
|||
|
|||
def cache_response(self, request, resp): |
|||
""" |
|||
Algorithm for caching requests. |
|||
|
|||
This assumes a requests Response object. |
|||
""" |
|||
# From httplib2: Don't cache 206's since we aren't going to |
|||
# handle byte range requests |
|||
if resp.status_code not in [200, 203]: |
|||
return |
|||
|
|||
cc_req = self.parse_cache_control(request.headers) |
|||
cc = self.parse_cache_control(resp.headers) |
|||
|
|||
cache_url = self.cache_url(request.url) |
|||
|
|||
# Delete it from the cache if we happen to have it stored there |
|||
no_store = cc.get('no-store') or cc_req.get('no-store') |
|||
if no_store and self.cache.get(cache_url): |
|||
self.cache.delete(cache_url) |
|||
|
|||
# If we've been given an etag, then keep the response |
|||
if self.cache_etags and 'etag' in resp.headers: |
|||
self.cache.set(cache_url, resp) |
|||
|
|||
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response |
|||
if self.cache_all: |
|||
expires = datetime.datetime.utcnow() + datetime.timedelta(days=(25 * 365)) |
|||
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") |
|||
headers = {'Cache-Control': 'public,max-age=%d' % int(3600), |
|||
'Expires': expires} |
|||
resp.headers.update(headers) |
|||
self.cache.set(cache_url, resp) |
|||
|
|||
# Add to the cache if the response headers demand it. If there |
|||
# is no date header then we can't do anything about expiring |
|||
# the cache. |
|||
elif 'date' in resp.headers: |
|||
# cache when there is a max-age > 0 |
|||
if cc and cc.get('max-age'): |
|||
if int(cc['max-age']) > 0: |
|||
self.cache.set(cache_url, resp) |
|||
|
|||
# If the request can expire, it means we should cache it |
|||
# in the meantime. |
|||
elif 'expires' in resp.headers: |
|||
if resp.headers['expires']: |
|||
self.cache.set(cache_url, resp) |
|||
|
|||
def update_cached_response(self, request, response): |
|||
"""On a 304 we will get a new set of headers that we want to |
|||
update our cached value with, assuming we have one. |
|||
|
|||
This should only ever be called when we've sent an ETag and |
|||
gotten a 304 as the response. |
|||
""" |
|||
cache_url = self.cache_url(request.url) |
|||
|
|||
resp = self.cache.get(cache_url) |
|||
|
|||
if not resp: |
|||
# we didn't have a cached response |
|||
return response |
|||
|
|||
# did so lets update our headers |
|||
resp.headers.update(resp.headers) |
|||
|
|||
# we want a 200 b/c we have content via the cache |
|||
request.status_code = 200 |
|||
|
|||
# update the request as it has the if-none-match header + any |
|||
# other headers that the server might have updated (ie Date, |
|||
# Cache-Control, Expires, etc.) |
|||
resp.request = request |
|||
|
|||
# update our cache |
|||
self.cache.set(cache_url, resp) |
|||
|
|||
# Let everyone know this was from the cache. |
|||
resp.from_cache = True |
|||
|
|||
return resp |
@ -0,0 +1,56 @@ |
|||
import requests |
|||
|
|||
from requests import models |
|||
from requests.packages.urllib3.response import HTTPResponse |
|||
|
|||
__attrs__ = [ |
|||
'_content', |
|||
'status_code', |
|||
'headers', |
|||
'url', |
|||
'history', |
|||
'encoding', |
|||
'reason', |
|||
'cookies', |
|||
'elapsed', |
|||
] |
|||
|
|||
|
|||
def response_getstate(self): |
|||
# consume everything |
|||
if not self._content_consumed: |
|||
self.content |
|||
|
|||
state = dict( |
|||
(attr, getattr(self, attr, None)) |
|||
for attr in __attrs__ |
|||
) |
|||
|
|||
# deal with our raw content b/c we need it for our cookie jar |
|||
state['raw_original_response'] = self.raw._original_response |
|||
return state |
|||
|
|||
|
|||
def response_setstate(self, state): |
|||
for name, value in state.items(): |
|||
if name != 'raw_original_response': |
|||
setattr(self, name, value) |
|||
|
|||
setattr(self, 'raw', HTTPResponse()) |
|||
self.raw._original_response = state['raw_original_response'] |
|||
|
|||
|
|||
def make_responses_pickleable(): |
|||
try: |
|||
version_parts = [int(part) for part in requests.__version__.split('.')] |
|||
|
|||
# must be >= 2.2.x |
|||
if not version_parts[0] >= 2 or not version_parts[1] >= 2: |
|||
models.Response.__getstate__ = response_getstate |
|||
models.Response.__setstate__ = response_setstate |
|||
except: |
|||
raise |
|||
pass |
|||
|
|||
|
|||
make_responses_pickleable() |
@ -0,0 +1,10 @@ |
|||
from cachecontrol.adapter import CacheControlAdapter |
|||
from cachecontrol.cache import DictCache |
|||
|
|||
|
|||
def CacheControl(sess, cache=None, cache_etags=True, cache_all=False): |
|||
cache = cache or DictCache() |
|||
adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_all=cache_all) |
|||
sess.mount('http://', adapter) |
|||
|
|||
return sess |
@ -1,14 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
__init__.py |
|||
~~~~~~~~~~~ |
|||
|
|||
Defines the public API to the httpcache module. |
|||
""" |
|||
|
|||
__version__ = '0.1.3' |
|||
|
|||
from .cache import HTTPCache |
|||
from .adapter import CachingHTTPAdapter |
|||
|
|||
__all__ = [HTTPCache, CachingHTTPAdapter] |
@ -1,55 +0,0 @@ |
|||
""" |
|||
adapter.py |
|||
~~~~~~~~~~ |
|||
|
|||
Contains an implementation of an HTTP adapter for Requests that is aware of the |
|||
cache contained in this module. |
|||
""" |
|||
from requests.adapters import HTTPAdapter |
|||
from .cache import HTTPCache |
|||
|
|||
|
|||
class CachingHTTPAdapter(HTTPAdapter): |
|||
""" |
|||
A HTTP-caching-aware Transport Adapter for Python Requests. The central |
|||
portion of the API. |
|||
|
|||
:param capacity: The maximum capacity of the backing cache. |
|||
""" |
|||
def __init__(self, capacity=50, **kwargs): |
|||
super(CachingHTTPAdapter, self).__init__(**kwargs) |
|||
|
|||
#: The HTTP Cache backing the adapter. |
|||
self.cache = HTTPCache(capacity=capacity) |
|||
|
|||
def send(self, request, **kwargs): |
|||
""" |
|||
Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP |
|||
caching. Returns a Response object that may have been cached. |
|||
|
|||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object to send. |
|||
""" |
|||
cached_resp = self.cache.retrieve(request) |
|||
|
|||
if cached_resp is not None: |
|||
return cached_resp |
|||
else: |
|||
return super(CachingHTTPAdapter, self).send(request, **kwargs) |
|||
|
|||
def build_response(self, request, response): |
|||
""" |
|||
Builds a Response object from a urllib3 response. May involve returning |
|||
a cached Response. |
|||
|
|||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object sent. |
|||
:param response: The urllib3 response. |
|||
""" |
|||
resp = super(CachingHTTPAdapter, self).build_response(request, |
|||
response) |
|||
|
|||
if resp.status_code == 304: |
|||
resp = self.cache.handle_304(resp) |
|||
else: |
|||
self.cache.store(resp) |
|||
|
|||
return resp |
@ -1,207 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
cache.py |
|||
~~~~~~~~ |
|||
|
|||
Contains the primary cache structure used in http-cache. |
|||
""" |
|||
from .structures import RecentOrderedDict |
|||
from .utils import (parse_date_header, build_date_header, |
|||
expires_from_cache_control, url_contains_query) |
|||
from datetime import datetime |
|||
|
|||
|
|||
# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative, |
|||
# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and |
|||
# 410 Gone responses. We don't cache 206s at the moment because we |
|||
# don't handle Range and Content-Range headers. |
|||
CACHEABLE_RCS = (200, 203, 300, 301, 410) |
|||
|
|||
# Cacheable verbs. |
|||
CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS') |
|||
|
|||
# Some verbs MUST invalidate the resource in the cache, according to RFC 2616. |
|||
# If we send one of these, or any verb we don't recognise, invalidate the |
|||
# cache entry for that URL. As it happens, these are also the cacheable |
|||
# verbs. That works out well for us. |
|||
NON_INVALIDATING_VERBS = CACHEABLE_VERBS |
|||
|
|||
|
|||
class HTTPCache(object): |
|||
""" |
|||
The HTTP Cache object. Manages caching of responses according to RFC 2616, |
|||
adding necessary headers to HTTP request objects, and returning cached |
|||
responses based on server responses. |
|||
|
|||
This object is not expected to be used by most users. It is exposed as part |
|||
of the public API for users who feel the need for more control. This API |
|||
may change in a minor version increase. Be warned. |
|||
|
|||
:param capacity: (Optional) The maximum capacity of the HTTP cache. |
|||
""" |
|||
def __init__(self, capacity=50): |
|||
#: The maximum capacity of the HTTP cache. When this many cache entries |
|||
#: end up in the cache, the oldest entries are removed. |
|||
self.capacity = capacity |
|||
|
|||
#: The cache backing store. Cache entries are stored here as key-value |
|||
#: pairs. The key is the URL used to retrieve the cached response. The |
|||
#: value is a python dict, which stores three objects: the response |
|||
#: (keyed off of 'response'), the retrieval or creation date (keyed off |
|||
#: of 'creation') and the cache expiry date (keyed off of 'expiry'). |
|||
#: This last value may be None. |
|||
self._cache = RecentOrderedDict() |
|||
|
|||
def store(self, response): |
|||
""" |
|||
Takes an HTTP response object and stores it in the cache according to |
|||
RFC 2616. Returns a boolean value indicating whether the response was |
|||
cached or not. |
|||
|
|||
:param response: Requests :class:`Response <Response>` object to cache. |
|||
""" |
|||
# Define an internal utility function. |
|||
def date_header_or_default(header_name, default, response): |
|||
try: |
|||
date_header = response.headers[header_name] |
|||
except KeyError: |
|||
value = default |
|||
else: |
|||
value = parse_date_header(date_header) |
|||
return value |
|||
|
|||
if response.status_code not in CACHEABLE_RCS: |
|||
return False |
|||
|
|||
if response.request.method not in CACHEABLE_VERBS: |
|||
return False |
|||
|
|||
url = response.url |
|||
now = datetime.utcnow() |
|||
|
|||
# Get the value of the 'Date' header, if it exists. If it doesn't, just |
|||
# use now. |
|||
creation = date_header_or_default('Date', now, response) |
|||
|
|||
# Get the value of the 'Cache-Control' header, if it exists. |
|||
cc = response.headers.get('Cache-Control', None) |
|||
if cc is not None: |
|||
expiry = expires_from_cache_control(cc, now) |
|||
|
|||
# If the above returns None, we are explicitly instructed not to |
|||
# cache this. |
|||
if expiry is None: |
|||
return False |
|||
|
|||
# Get the value of the 'Expires' header, if it exists, and if we don't |
|||
# have anything from the 'Cache-Control' header. |
|||
if cc is None: |
|||
expiry = date_header_or_default('Expires', None, response) |
|||
|
|||
# If the expiry date is earlier or the same as the Date header, don't |
|||
# cache the response at all. |
|||
if expiry is not None and expiry <= creation: |
|||
return False |
|||
|
|||
# If there's a query portion of the url and it's a GET, don't cache |
|||
# this unless explicitly instructed to. |
|||
if expiry is None and response.request.method == 'GET': |
|||
if url_contains_query(url): |
|||
return False |
|||
|
|||
self._cache[url] = {'response': response, |
|||
'creation': creation, |
|||
'expiry': expiry} |
|||
|
|||
self.__reduce_cache_count() |
|||
|
|||
return True |
|||
|
|||
def handle_304(self, response): |
|||
""" |
|||
Given a 304 response, retrieves the cached entry. This unconditionally |
|||
returns the cached entry, so it can be used when the 'intelligent' |
|||
behaviour of retrieve() is not desired. |
|||
|
|||
Returns None if there is no entry in the cache. |
|||
|
|||
:param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response <Response>`. |
|||
""" |
|||
try: |
|||
cached_response = self._cache[response.url]['response'] |
|||
except KeyError: |
|||
cached_response = None |
|||
|
|||
return cached_response |
|||
|
|||
def retrieve(self, request): |
|||
""" |
|||
Retrieves a cached response if possible. |
|||
|
|||
If there is a response that can be unconditionally returned (e.g. one |
|||
that had a Cache-Control header set), that response is returned. If |
|||
there is one that can be conditionally returned (if a 304 is returned), |
|||
applies an If-Modified-Since header to the request and returns None. |
|||
|
|||
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object. |
|||
""" |
|||
return_response = None |
|||
url = request.url |
|||
|
|||
try: |
|||
cached_response = self._cache[url] |
|||
except KeyError: |
|||
return None |
|||
|
|||
if request.method not in NON_INVALIDATING_VERBS: |
|||
del self._cache[url] |
|||
return None |
|||
|
|||
if cached_response['expiry'] is None: |
|||
# We have no explicit expiry time, so we weren't instructed to |
|||
# cache. Add an 'If-Modified-Since' header. |
|||
creation = cached_response['creation'] |
|||
header = build_date_header(creation) |
|||
request.headers['If-Modified-Since'] = header |
|||
else: |
|||
# We have an explicit expiry time. If we're earlier than the expiry |
|||
# time, return the response. |
|||
now = datetime.utcnow() |
|||
|
|||
if now <= cached_response['expiry']: |
|||
return_response = cached_response['response'] |
|||
else: |
|||
del self._cache[url] |
|||
|
|||
return return_response |
|||
|
|||
def __reduce_cache_count(self): |
|||
""" |
|||
Drops the number of entries in the cache to the capacity of the cache. |
|||
|
|||
Walks the backing RecentOrderedDict in order from oldest to youngest. |
|||
Deletes cache entries that are either invalid or being speculatively |
|||
cached until the number of cache entries drops to the capacity. If this |
|||
leaves the cache above capacity, begins deleting the least-used cache |
|||
entries that are still valid until the cache has space. |
|||
""" |
|||
if len(self._cache) <= self.capacity: |
|||
return |
|||
|
|||
to_delete = len(self._cache) - self.capacity |
|||
keys = list(self._cache.keys()) |
|||
|
|||
for key in keys: |
|||
if self._cache[key]['expiry'] is None: |
|||
del self._cache[key] |
|||
to_delete -= 1 |
|||
|
|||
if to_delete == 0: |
|||
return |
|||
|
|||
keys = list(self._cache.keys()) |
|||
|
|||
for i in range(to_delete): |
|||
del self._cache[keys[i]] |
|||
|
|||
return |
@ -1,10 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
compat.py |
|||
~~~~~~~~~ |
|||
|
|||
Defines cross-platform functions and classes needed to achieve proper |
|||
functionality. |
|||
""" |
|||
|
|||
pass |
@ -1,59 +0,0 @@ |
|||
""" |
|||
structures.py |
|||
~~~~~~~~~~~~~ |
|||
|
|||
Defines structures used by the httpcache module. |
|||
""" |
|||
|
|||
class RecentOrderedDict(dict): |
|||
""" |
|||
A custom variant of the dictionary that ensures that the object most |
|||
recently inserted _or_ retrieved from the dictionary is enumerated first. |
|||
""" |
|||
def __init__(self): |
|||
self._data = {} |
|||
self._order = [] |
|||
|
|||
def __setitem__(self, key, value): |
|||
if key in self._data: |
|||
self._order.remove(key) |
|||
|
|||
self._order.append(key) |
|||
self._data[key] = value |
|||
|
|||
def __getitem__(self, key): |
|||
value = self._data[key] |
|||
self._order.remove(key) |
|||
self._order.append(key) |
|||
return value |
|||
|
|||
def __delitem__(self, key): |
|||
del self._data[key] |
|||
self._order.remove(key) |
|||
|
|||
def __iter__(self): |
|||
return self._order |
|||
|
|||
def __len__(self): |
|||
return len(self._order) |
|||
|
|||
def __contains__(self, value): |
|||
return self._data.__contains__(value) |
|||
|
|||
def items(self): |
|||
return [(key, self._data[key]) for key in self._order] |
|||
|
|||
def keys(self): |
|||
return self._order |
|||
|
|||
def values(self): |
|||
return [self._data[key] for key in self._order] |
|||
|
|||
def clear(self): |
|||
self._data = {} |
|||
self._order = [] |
|||
|
|||
def copy(self): |
|||
c = RecentOrderedDict() |
|||
c._data = self._data.copy() |
|||
c._order = self._order[:] |
@ -1,97 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
utils.py |
|||
~~~~~~~~ |
|||
|
|||
Utility functions for use with httpcache. |
|||
""" |
|||
from datetime import datetime, timedelta |
|||
|
|||
try: # Python 2 |
|||
from urlparse import urlparse |
|||
except ImportError: # Python 3 |
|||
from urllib.parse import urlparse |
|||
|
|||
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" |
|||
RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT" |
|||
|
|||
|
|||
def parse_date_header(header): |
|||
""" |
|||
Given a date header in the form specified by RFC 2616, return a Python |
|||
datetime object. |
|||
|
|||
RFC 2616 specifies three possible formats for date/time headers, and |
|||
makes it clear that all dates/times should be in UTC/GMT. That is assumed |
|||
by this library, which simply does everything in UTC. This currently does |
|||
not parse the C asctime() string, because that's effort. |
|||
|
|||
This function does _not_ follow Postel's Law. If a format does not strictly |
|||
match the defined strings, this function returns None. This is considered |
|||
'safe' behaviour. |
|||
""" |
|||
try: |
|||
dt = datetime.strptime(header, RFC_1123_DT_STR) |
|||
except ValueError: |
|||
try: |
|||
dt = datetime.strptime(header, RFC_850_DT_STR) |
|||
except ValueError: |
|||
dt = None |
|||
except TypeError: |
|||
dt = None |
|||
|
|||
return dt |
|||
|
|||
|
|||
def build_date_header(dt): |
|||
""" |
|||
Given a Python datetime object, build a Date header value according to |
|||
RFC 2616. |
|||
|
|||
RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is |
|||
what we use. |
|||
""" |
|||
return dt.strftime(RFC_1123_DT_STR) |
|||
|
|||
|
|||
def expires_from_cache_control(header, current_time): |
|||
""" |
|||
Given a Cache-Control header, builds a Python datetime object corresponding |
|||
to the expiry time (in UTC). This function should respect all relevant |
|||
Cache-Control directives. |
|||
|
|||
Takes current_time as an argument to ensure that 'max-age=0' generates the |
|||
correct behaviour without being special-cased. |
|||
|
|||
Returns None to indicate that a request must not be cached. |
|||
""" |
|||
# Cache control header values are made of multiple comma separated fields. |
|||
# Splitting them like this is probably a bad idea, but I'm going to roll with |
|||
# it for now. We'll come back to it. |
|||
fields = header.split(', ') |
|||
duration = None |
|||
|
|||
for field in fields: |
|||
# Right now we don't handle no-cache applied to specific fields. To be |
|||
# as 'nice' as possible, treat any no-cache as applying to the whole |
|||
# request. Bail early, because there's no reason to stick around. |
|||
if field.startswith('no-cache') or field == 'no-store': |
|||
return None |
|||
|
|||
if field.startswith('max-age'): |
|||
_, duration = field.split('=') |
|||
duration = int(duration) |
|||
|
|||
if duration: |
|||
interval = timedelta(seconds=int(duration)) |
|||
return current_time + interval |
|||
|
|||
def url_contains_query(url): |
|||
""" |
|||
A very stupid function for determining if a URL contains a query string |
|||
or not. |
|||
""" |
|||
if urlparse(url).query: |
|||
return True |
|||
else: |
|||
return False |
@ -1,31 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache |
|||
~~~~~~~~~~~~~~ |
|||
|
|||
Transparent cache for ``requests`` library with persistence and async support |
|||
|
|||
Just write:: |
|||
|
|||
import requests_cache |
|||
requests_cache.install_cache() |
|||
|
|||
And requests to resources will be cached for faster repeated access:: |
|||
|
|||
import requests |
|||
for i in range(10): |
|||
r = requests.get('http://httpbin.org/delay/5') |
|||
# will took approximately 5 seconds instead 50 |
|||
|
|||
|
|||
:copyright: (c) 2012 by Roman Haritonov. |
|||
:license: BSD, see LICENSE for more details. |
|||
""" |
|||
__docformat__ = 'restructuredtext' |
|||
__version__ = '0.4.4' |
|||
|
|||
from .core import( |
|||
CachedSession, install_cache, uninstall_cache, |
|||
disabled, enabled, get_cache, clear, configure |
|||
) |
@ -1,50 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends |
|||
~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Classes and functions for cache persistence |
|||
""" |
|||
|
|||
|
|||
from .base import BaseCache |
|||
|
|||
registry = { |
|||
'memory': BaseCache, |
|||
} |
|||
|
|||
try: |
|||
# Heroku doesn't allow the SQLite3 module to be installed |
|||
from .sqlite import DbCache |
|||
registry['sqlite'] = DbCache |
|||
except ImportError: |
|||
DbCache = None |
|||
|
|||
try: |
|||
from .mongo import MongoCache |
|||
registry['mongo'] = registry['mongodb'] = MongoCache |
|||
except ImportError: |
|||
MongoCache = None |
|||
|
|||
try: |
|||
from .redis import RedisCache |
|||
registry['redis'] = RedisCache |
|||
except ImportError: |
|||
RedisCache = None |
|||
|
|||
|
|||
def create_backend(backend_name, cache_name, options): |
|||
if backend_name is None: |
|||
backend_name = _get_default_backend_name() |
|||
try: |
|||
return registry[backend_name](cache_name, **options) |
|||
except KeyError: |
|||
raise ValueError('Unsupported backend "%s" try one of: %s' % |
|||
(backend_name, ', '.join(registry.keys()))) |
|||
|
|||
|
|||
def _get_default_backend_name(): |
|||
if 'sqlite' in registry: |
|||
return 'sqlite' |
|||
return 'memory' |
@ -1,171 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.base |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Contains BaseCache class which can be used as in-memory cache backend or |
|||
extended to support persistence. |
|||
""" |
|||
from datetime import datetime |
|||
import hashlib |
|||
from copy import copy |
|||
|
|||
import requests |
|||
|
|||
from ..compat import is_py2 |
|||
|
|||
|
|||
class BaseCache(object): |
|||
""" Base class for cache implementations, can be used as in-memory cache. |
|||
|
|||
To extend it you can provide dictionary-like objects for |
|||
:attr:`keys_map` and :attr:`responses` or override public methods. |
|||
""" |
|||
def __init__(self, *args, **kwargs): |
|||
#: `key` -> `key_in_responses` mapping |
|||
self.keys_map = {} |
|||
#: `key_in_cache` -> `response` mapping |
|||
self.responses = {} |
|||
|
|||
def save_response(self, key, response): |
|||
""" Save response to cache |
|||
|
|||
:param key: key for this response |
|||
:param response: response to save |
|||
|
|||
.. note:: Response is reduced before saving (with :meth:`reduce_response`) |
|||
to make it picklable |
|||
""" |
|||
self.responses[key] = self.reduce_response(response), datetime.utcnow() |
|||
|
|||
def add_key_mapping(self, new_key, key_to_response): |
|||
""" |
|||
Adds mapping of `new_key` to `key_to_response` to make it possible to |
|||
associate many keys with single response |
|||
|
|||
:param new_key: new key (e.g. url from redirect) |
|||
:param key_to_response: key which can be found in :attr:`responses` |
|||
:return: |
|||
""" |
|||
self.keys_map[new_key] = key_to_response |
|||
|
|||
def get_response_and_time(self, key, default=(None, None)): |
|||
""" Retrieves response and timestamp for `key` if it's stored in cache, |
|||
otherwise returns `default` |
|||
|
|||
:param key: key of resource |
|||
:param default: return this if `key` not found in cache |
|||
:returns: tuple (response, datetime) |
|||
|
|||
.. note:: Response is restored after unpickling with :meth:`restore_response` |
|||
""" |
|||
try: |
|||
if key not in self.responses: |
|||
key = self.keys_map[key] |
|||
response, timestamp = self.responses[key] |
|||
except KeyError: |
|||
return default |
|||
return self.restore_response(response), timestamp |
|||
|
|||
def delete(self, key): |
|||
""" Delete `key` from cache. Also deletes all responses from response history |
|||
""" |
|||
try: |
|||
if key in self.responses: |
|||
response, _ = self.responses[key] |
|||
del self.responses[key] |
|||
else: |
|||
response, _ = self.responses[self.keys_map[key]] |
|||
del self.keys_map[key] |
|||
for r in response.history: |
|||
del self.keys_map[self.create_key(r.request)] |
|||
except KeyError: |
|||
pass |
|||
|
|||
def delete_url(self, url): |
|||
""" Delete response associated with `url` from cache. |
|||
Also deletes all responses from response history. Works only for GET requests |
|||
""" |
|||
self.delete(self._url_to_key(url)) |
|||
|
|||
def clear(self): |
|||
""" Clear cache |
|||
""" |
|||
self.responses.clear() |
|||
self.keys_map.clear() |
|||
|
|||
def has_key(self, key): |
|||
""" Returns `True` if cache has `key`, `False` otherwise |
|||
""" |
|||
return key in self.responses or key in self.keys_map |
|||
|
|||
def has_url(self, url): |
|||
""" Returns `True` if cache has `url`, `False` otherwise. |
|||
Works only for GET request urls |
|||
""" |
|||
return self.has_key(self._url_to_key(url)) |
|||
|
|||
def _url_to_key(self, url): |
|||
from requests import Request |
|||
return self.create_key(Request('GET', url).prepare()) |
|||
|
|||
_response_attrs = ['_content', 'url', 'status_code', 'cookies', |
|||
'headers', 'encoding', 'request', 'reason', 'raw'] |
|||
|
|||
_raw_response_attrs = ['_original_response', 'decode_content', 'headers', |
|||
'reason', 'status', 'strict', 'version'] |
|||
|
|||
def reduce_response(self, response): |
|||
""" Reduce response object to make it compatible with ``pickle`` |
|||
""" |
|||
result = _Store() |
|||
# prefetch |
|||
response.content |
|||
for field in self._response_attrs: |
|||
setattr(result, field, self._picklable_field(response, field)) |
|||
result.history = tuple(self.reduce_response(r) for r in response.history) |
|||
return result |
|||
|
|||
def _picklable_field(self, response, name): |
|||
value = getattr(response, name) |
|||
if name == 'request': |
|||
value = copy(value) |
|||
value.hooks = [] |
|||
elif name == 'raw': |
|||
result = _Store() |
|||
for field in self._raw_response_attrs: |
|||
setattr(result, field, getattr(value, field, None)) |
|||
value = result |
|||
return value |
|||
|
|||
def restore_response(self, response): |
|||
""" Restore response object after unpickling |
|||
""" |
|||
result = requests.Response() |
|||
for field in self._response_attrs: |
|||
setattr(result, field, getattr(response, field, None)) |
|||
result.history = tuple(self.restore_response(r) for r in response.history) |
|||
return result |
|||
|
|||
def create_key(self, request): |
|||
key = hashlib.sha256() |
|||
key.update(_to_bytes(request.method.upper())) |
|||
key.update(_to_bytes(request.url)) |
|||
if request.body: |
|||
key.update(_to_bytes(request.body)) |
|||
return key.hexdigest() |
|||
|
|||
def __str__(self): |
|||
return 'keys: %s\nresponses: %s' % (self.keys_map, self.responses) |
|||
|
|||
|
|||
# used for saving response attributes |
|||
class _Store(object): |
|||
pass |
|||
|
|||
|
|||
def _to_bytes(s, encoding='utf-8'): |
|||
if is_py2 or isinstance(s, bytes): |
|||
return s |
|||
return bytes(s, encoding) |
@ -1,25 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.mongo |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
``mongo`` cache backend |
|||
""" |
|||
from .base import BaseCache |
|||
from .storage.mongodict import MongoDict, MongoPickleDict |
|||
|
|||
|
|||
class MongoCache(BaseCache): |
|||
""" ``mongo`` cache backend. |
|||
""" |
|||
def __init__(self, db_name='requests-cache', **options): |
|||
""" |
|||
:param db_name: database name (default: ``'requests-cache'``) |
|||
:param connection: (optional) ``pymongo.Connection`` |
|||
""" |
|||
super(MongoCache, self).__init__() |
|||
self.responses = MongoPickleDict(db_name, 'responses', |
|||
options.get('connection')) |
|||
self.keys_map = MongoDict(db_name, 'urls', self.responses.connection) |
|||
|
@ -1,24 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.redis |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
``redis`` cache backend |
|||
""" |
|||
from .base import BaseCache |
|||
from .storage.redisdict import RedisDict |
|||
|
|||
|
|||
class RedisCache(BaseCache): |
|||
""" ``redis`` cache backend. |
|||
""" |
|||
def __init__(self, namespace='requests-cache', **options): |
|||
""" |
|||
:param namespace: redis namespace (default: ``'requests-cache'``) |
|||
:param connection: (optional) ``redis.StrictRedis`` |
|||
""" |
|||
super(RedisCache, self).__init__() |
|||
self.responses = RedisDict(namespace, 'responses', |
|||
options.get('connection')) |
|||
self.keys_map = RedisDict(namespace, 'urls', self.responses.connection) |
@ -1,30 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.sqlite |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
``sqlite3`` cache backend |
|||
""" |
|||
from .base import BaseCache |
|||
from .storage.dbdict import DbDict, DbPickleDict |
|||
|
|||
|
|||
class DbCache(BaseCache): |
|||
""" sqlite cache backend. |
|||
|
|||
Reading is fast, saving is a bit slower. It can store big amount of data |
|||
with low memory usage. |
|||
""" |
|||
def __init__(self, location='cache', |
|||
fast_save=False, extension='.sqlite', **options): |
|||
""" |
|||
:param location: database filename prefix (default: ``'cache'``) |
|||
:param fast_save: Speedup cache saving up to 50 times but with possibility of data loss. |
|||
See :ref:`backends.DbDict <backends_dbdict>` for more info |
|||
:param extension: extension for filename (default: ``'.sqlite'``) |
|||
""" |
|||
super(DbCache, self).__init__() |
|||
self.responses = DbPickleDict(location + extension, 'responses', fast_save=fast_save) |
|||
self.keys_map = DbDict(location + extension, 'urls') |
|||
|
@ -1,171 +0,0 @@ |
|||
#!/usr/bin/python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.dbdict |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Dictionary-like objects for saving large data sets to `sqlite` database |
|||
""" |
|||
from collections import MutableMapping |
|||
import sqlite3 as sqlite |
|||
from contextlib import contextmanager |
|||
try: |
|||
import threading |
|||
except ImportError: |
|||
import dummy_threading as threading |
|||
try: |
|||
import cPickle as pickle |
|||
except ImportError: |
|||
import pickle |
|||
|
|||
from requests_cache.compat import bytes |
|||
|
|||
|
|||
|
|||
class DbDict(MutableMapping): |
|||
""" DbDict - a dictionary-like object for saving large datasets to `sqlite` database |
|||
|
|||
It's possible to create multiply DbDict instances, which will be stored as separate |
|||
tables in one database:: |
|||
|
|||
d1 = DbDict('test', 'table1') |
|||
d2 = DbDict('test', 'table2') |
|||
d3 = DbDict('test', 'table3') |
|||
|
|||
all data will be stored in ``test.sqlite`` database into |
|||
correspondent tables: ``table1``, ``table2`` and ``table3`` |
|||
""" |
|||
|
|||
def __init__(self, filename, table_name='data', fast_save=False, **options): |
|||
""" |
|||
:param filename: filename for database (without extension) |
|||
:param table_name: table name |
|||
:param fast_save: If it's True, then sqlite will be configured with |
|||
`"PRAGMA synchronous = 0;" <http://www.sqlite.org/pragma.html#pragma_synchronous>`_ |
|||
to speedup cache saving, but be careful, it's dangerous. |
|||
Tests showed that insertion order of records can be wrong with this option. |
|||
""" |
|||
self.filename = filename |
|||
self.table_name = table_name |
|||
self.fast_save = fast_save |
|||
|
|||
#: Transactions can be commited if this property is set to `True` |
|||
self.can_commit = True |
|||
|
|||
|
|||
self._bulk_commit = False |
|||
self._pending_connection = None |
|||
self._lock = threading.RLock() |
|||
with self.connection() as con: |
|||
con.execute("create table if not exists `%s` (key PRIMARY KEY, value)" % self.table_name) |
|||
|
|||
|
|||
@contextmanager |
|||
def connection(self, commit_on_success=False): |
|||
with self._lock: |
|||
if self._bulk_commit: |
|||
if self._pending_connection is None: |
|||
self._pending_connection = sqlite.connect(self.filename) |
|||
con = self._pending_connection |
|||
else: |
|||
con = sqlite.connect(self.filename) |
|||
try: |
|||
if self.fast_save: |
|||
con.execute("PRAGMA synchronous = 0;") |
|||
yield con |
|||
if commit_on_success and self.can_commit: |
|||
con.commit() |
|||
finally: |
|||
if not self._bulk_commit: |
|||
con.close() |
|||
|
|||
def commit(self, force=False): |
|||
""" |
|||
Commits pending transaction if :attr:`can_commit` or `force` is `True` |
|||
|
|||
:param force: force commit, ignore :attr:`can_commit` |
|||
""" |
|||
if force or self.can_commit: |
|||
if self._pending_connection is not None: |
|||
self._pending_connection.commit() |
|||
|
|||
@contextmanager |
|||
def bulk_commit(self): |
|||
""" |
|||
Context manager used to speedup insertion of big number of records |
|||
:: |
|||
|
|||
>>> d1 = DbDict('test') |
|||
>>> with d1.bulk_commit(): |
|||
... for i in range(1000): |
|||
... d1[i] = i * 2 |
|||
|
|||
""" |
|||
self._bulk_commit = True |
|||
self.can_commit = False |
|||
try: |
|||
yield |
|||
self.commit(True) |
|||
finally: |
|||
self._bulk_commit = False |
|||
self.can_commit = True |
|||
self._pending_connection.close() |
|||
self._pending_connection = None |
|||
|
|||
def __getitem__(self, key): |
|||
with self.connection() as con: |
|||
row = con.execute("select value from `%s` where key=?" % |
|||
self.table_name, (key,)).fetchone() |
|||
if not row: |
|||
raise KeyError |
|||
return row[0] |
|||
|
|||
def __setitem__(self, key, item): |
|||
with self.connection(True) as con: |
|||
if con.execute("select key from `%s` where key=?" % |
|||
self.table_name, (key,)).fetchone(): |
|||
con.execute("update `%s` set value=? where key=?" % |
|||
self.table_name, (item, key)) |
|||
else: |
|||
con.execute("insert into `%s` (key,value) values (?,?)" % |
|||
self.table_name, (key, item)) |
|||
|
|||
def __delitem__(self, key): |
|||
with self.connection(True) as con: |
|||
if con.execute("select key from `%s` where key=?" % |
|||
self.table_name, (key,)).fetchone(): |
|||
con.execute("delete from `%s` where key=?" % |
|||
self.table_name, (key,)) |
|||
else: |
|||
raise KeyError |
|||
|
|||
def __iter__(self): |
|||
with self.connection() as con: |
|||
for row in con.execute("select key from `%s`" % |
|||
self.table_name): |
|||
yield row[0] |
|||
|
|||
def __len__(self): |
|||
with self.connection() as con: |
|||
return con.execute("select count(key) from `%s`" % |
|||
self.table_name).fetchone()[0] |
|||
|
|||
def clear(self): |
|||
with self.connection(True) as con: |
|||
con.execute("drop table `%s`" % self.table_name) |
|||
con.execute("create table `%s` (key PRIMARY KEY, value)" % |
|||
self.table_name) |
|||
|
|||
def __str__(self): |
|||
return str(dict(self.items())) |
|||
|
|||
|
|||
class DbPickleDict(DbDict): |
|||
""" Same as :class:`DbDict`, but pickles values before saving |
|||
""" |
|||
def __setitem__(self, key, item): |
|||
super(DbPickleDict, self).__setitem__(key, |
|||
sqlite.Binary(pickle.dumps(item))) |
|||
|
|||
def __getitem__(self, key): |
|||
return pickle.loads(bytes(super(DbPickleDict, self).__getitem__(key))) |
@ -1,74 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.mongodict |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Dictionary-like objects for saving large data sets to ``mongodb`` database |
|||
""" |
|||
from collections import MutableMapping |
|||
try: |
|||
import cPickle as pickle |
|||
except ImportError: |
|||
import pickle |
|||
|
|||
from pymongo import Connection |
|||
|
|||
|
|||
class MongoDict(MutableMapping): |
|||
""" MongoDict - a dictionary-like interface for ``mongo`` database |
|||
""" |
|||
def __init__(self, db_name, |
|||
collection_name='mongo_dict_data', connection=None): |
|||
""" |
|||
:param db_name: database name (be careful with production databases) |
|||
:param collection_name: collection name (default: mongo_dict_data) |
|||
:param connection: ``pymongo.Connection`` instance. If it's ``None`` |
|||
(default) new connection with default options will |
|||
be created |
|||
""" |
|||
if connection is not None: |
|||
self.connection = connection |
|||
else: |
|||
self.connection = Connection() |
|||
self.db = self.connection[db_name] |
|||
self.collection = self.db[collection_name] |
|||
|
|||
def __getitem__(self, key): |
|||
result = self.collection.find_one({'_id': key}) |
|||
if result is None: |
|||
raise KeyError |
|||
return result['data'] |
|||
|
|||
def __setitem__(self, key, item): |
|||
self.collection.save({'_id': key, 'data': item}) |
|||
|
|||
def __delitem__(self, key): |
|||
spec = {'_id': key} |
|||
if self.collection.find_one(spec, fields=['_id']): |
|||
self.collection.remove(spec) |
|||
else: |
|||
raise KeyError |
|||
|
|||
def __len__(self): |
|||
return self.collection.count() |
|||
|
|||
def __iter__(self): |
|||
for d in self.collection.find(fields=['_id']): |
|||
yield d['_id'] |
|||
|
|||
def clear(self): |
|||
self.collection.drop() |
|||
|
|||
def __str__(self): |
|||
return str(dict(self.items())) |
|||
|
|||
|
|||
class MongoPickleDict(MongoDict): |
|||
""" Same as :class:`MongoDict`, but pickles values before saving |
|||
""" |
|||
def __setitem__(self, key, item): |
|||
super(MongoPickleDict, self).__setitem__(key, pickle.dumps(item)) |
|||
|
|||
def __getitem__(self, key): |
|||
return pickle.loads(bytes(super(MongoPickleDict, self).__getitem__(key))) |
@ -1,68 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.backends.redisdict |
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Dictionary-like objects for saving large data sets to ``redis`` key-store |
|||
""" |
|||
from collections import MutableMapping |
|||
try: |
|||
import cPickle as pickle |
|||
except ImportError: |
|||
import pickle |
|||
from redis import StrictRedis as Redis |
|||
|
|||
|
|||
class RedisDict(MutableMapping): |
|||
""" RedisDict - a dictionary-like interface for ``redis`` key-stores |
|||
""" |
|||
def __init__(self, namespace, collection_name='redis_dict_data', |
|||
connection=None): |
|||
""" |
|||
The actual key name on the redis server will be |
|||
``namespace``:``collection_name`` |
|||
|
|||
In order to deal with how redis stores data/keys, |
|||
everything, i.e. keys and data, must be pickled. |
|||
|
|||
:param namespace: namespace to use |
|||
:param collection_name: name of the hash map stored in redis |
|||
(default: redis_dict_data) |
|||
:param connection: ``redis.StrictRedis`` instance. |
|||
If it's ``None`` (default), a new connection with |
|||
default options will be created |
|||
|
|||
""" |
|||
if connection is not None: |
|||
self.connection = connection |
|||
else: |
|||
self.connection = Redis() |
|||
self._self_key = ':'.join([namespace, collection_name]) |
|||
|
|||
def __getitem__(self, key): |
|||
result = self.connection.hget(self._self_key, pickle.dumps(key)) |
|||
if result is None: |
|||
raise KeyError |
|||
return pickle.loads(bytes(result)) |
|||
|
|||
def __setitem__(self, key, item): |
|||
self.connection.hset(self._self_key, pickle.dumps(key), |
|||
pickle.dumps(item)) |
|||
|
|||
def __delitem__(self, key): |
|||
if not self.connection.hdel(self._self_key, pickle.dumps(key)): |
|||
raise KeyError |
|||
|
|||
def __len__(self): |
|||
return self.connection.hlen(self._self_key) |
|||
|
|||
def __iter__(self): |
|||
for v in self.connection.hkeys(self._self_key): |
|||
yield pickle.loads(bytes(v)) |
|||
|
|||
def clear(self): |
|||
self.connection.delete(self._self_key) |
|||
|
|||
def __str__(self): |
|||
return str(dict(self.items())) |
@ -1,103 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
# taken from requests library: https://github.com/kennethreitz/requests |
|||
""" |
|||
pythoncompat |
|||
""" |
|||
|
|||
|
|||
import sys |
|||
|
|||
# ------- |
|||
# Pythons |
|||
# ------- |
|||
|
|||
# Syntax sugar. |
|||
_ver = sys.version_info |
|||
|
|||
#: Python 2.x? |
|||
is_py2 = (_ver[0] == 2) |
|||
|
|||
#: Python 3.x? |
|||
is_py3 = (_ver[0] == 3) |
|||
|
|||
#: Python 3.0.x |
|||
is_py30 = (is_py3 and _ver[1] == 0) |
|||
|
|||
#: Python 3.1.x |
|||
is_py31 = (is_py3 and _ver[1] == 1) |
|||
|
|||
#: Python 3.2.x |
|||
is_py32 = (is_py3 and _ver[1] == 2) |
|||
|
|||
#: Python 3.3.x |
|||
is_py33 = (is_py3 and _ver[1] == 3) |
|||
|
|||
#: Python 3.4.x |
|||
is_py34 = (is_py3 and _ver[1] == 4) |
|||
|
|||
#: Python 2.7.x |
|||
is_py27 = (is_py2 and _ver[1] == 7) |
|||
|
|||
#: Python 2.6.x |
|||
is_py26 = (is_py2 and _ver[1] == 6) |
|||
|
|||
#: Python 2.5.x |
|||
is_py25 = (is_py2 and _ver[1] == 5) |
|||
|
|||
#: Python 2.4.x |
|||
is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. |
|||
|
|||
|
|||
# --------- |
|||
# Platforms |
|||
# --------- |
|||
|
|||
|
|||
# Syntax sugar. |
|||
_ver = sys.version.lower() |
|||
|
|||
is_pypy = ('pypy' in _ver) |
|||
is_jython = ('jython' in _ver) |
|||
is_ironpython = ('iron' in _ver) |
|||
|
|||
# Assume CPython, if nothing else. |
|||
is_cpython = not any((is_pypy, is_jython, is_ironpython)) |
|||
|
|||
# Windows-based system. |
|||
is_windows = 'win32' in str(sys.platform).lower() |
|||
|
|||
# Standard Linux 2+ system. |
|||
is_linux = ('linux' in str(sys.platform).lower()) |
|||
is_osx = ('darwin' in str(sys.platform).lower()) |
|||
is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. |
|||
is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. |
|||
|
|||
|
|||
# --------- |
|||
# Specifics |
|||
# --------- |
|||
|
|||
|
|||
if is_py2: |
|||
from urllib import quote, unquote, urlencode |
|||
from urlparse import urlparse, urlunparse, urljoin, urlsplit |
|||
from urllib2 import parse_http_list |
|||
import cookielib |
|||
from StringIO import StringIO |
|||
bytes = str |
|||
str = unicode |
|||
basestring = basestring |
|||
|
|||
|
|||
|
|||
elif is_py3: |
|||
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote |
|||
from urllib.request import parse_http_list |
|||
from http import cookiejar as cookielib |
|||
from http.cookies import SimpleCookie |
|||
from io import StringIO |
|||
|
|||
str = str |
|||
bytes = bytes |
|||
basestring = (str,bytes) |
|||
|
@ -1,227 +0,0 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
""" |
|||
requests_cache.core |
|||
~~~~~~~~~~~~~~~~~~~ |
|||
|
|||
Core functions for configuring cache and monkey patching ``requests`` |
|||
""" |
|||
from contextlib import contextmanager |
|||
from datetime import datetime, timedelta |
|||
|
|||
import requests |
|||
from requests import Session as OriginalSession |
|||
from requests.hooks import dispatch_hook |
|||
|
|||
from requests_cache import backends |
|||
from requests_cache.compat import str, basestring |
|||
|
|||
try: |
|||
ver = tuple(map(int, requests.__version__.split("."))) |
|||
except ValueError: |
|||
pass |
|||
else: |
|||
# We don't need to dispatch hook in Requests <= 1.1.0 |
|||
if ver < (1, 2, 0): |
|||
dispatch_hook = lambda key, hooks, hook_data, *a, **kw: hook_data |
|||
del ver |
|||
|
|||
|
|||
class CachedSession(OriginalSession): |
|||
""" Requests ``Sessions`` with caching support. |
|||
""" |
|||
|
|||
def __init__(self, cache_name='cache', backend=None, expire_after=None, |
|||
allowable_codes=(200,), allowable_methods=('GET',), |
|||
**backend_options): |
|||
""" |
|||
:param cache_name: for ``sqlite`` backend: cache file will start with this prefix, |
|||
e.g ``cache.sqlite`` |
|||
|
|||
for ``mongodb``: it's used as database name |
|||
|
|||
for ``redis``: it's used as the namespace. This means all keys |
|||
are prefixed with ``'cache_name:'`` |
|||
:param backend: cache backend name e.g ``'sqlite'``, ``'mongodb'``, ``'redis'``, ``'memory'``. |
|||
(see :ref:`persistence`). Or instance of backend implementation. |
|||
Default value is ``None``, which means use ``'sqlite'`` if available, |
|||
otherwise fallback to ``'memory'``. |
|||
:param expire_after: number of seconds after cache will be expired |
|||
or `None` (default) to ignore expiration |
|||
:type expire_after: float |
|||
:param allowable_codes: limit caching only for response with this codes (default: 200) |
|||
:type allowable_codes: tuple |
|||
:param allowable_methods: cache only requests of this methods (default: 'GET') |
|||
:type allowable_methods: tuple |
|||
:kwarg backend_options: options for chosen backend. See corresponding |
|||
:ref:`sqlite <backends_sqlite>`, :ref:`mongo <backends_mongo>` |
|||
and :ref:`redis <backends_redis>` backends API documentation |
|||
""" |
|||
if backend is None or isinstance(backend, basestring): |
|||
self.cache = backends.create_backend(backend, cache_name, |
|||
backend_options) |
|||
else: |
|||
self.cache = backend |
|||
|
|||
self._cache_expire_after = expire_after |
|||
self._cache_allowable_codes = allowable_codes |
|||
self._cache_allowable_methods = allowable_methods |
|||
self._is_cache_disabled = False |
|||
super(CachedSession, self).__init__() |
|||
|
|||
def send(self, request, **kwargs): |
|||
if (self._is_cache_disabled |
|||
or request.method not in self._cache_allowable_methods): |
|||
response = super(CachedSession, self).send(request, **kwargs) |
|||
response.from_cache = False |
|||
return response |
|||
|
|||
cache_key = self.cache.create_key(request) |
|||
|
|||
def send_request_and_cache_response(): |
|||
response = super(CachedSession, self).send(request, **kwargs) |
|||
if response.status_code in self._cache_allowable_codes: |
|||
self.cache.save_response(cache_key, response) |
|||
response.from_cache = False |
|||
return response |
|||
|
|||
response, timestamp = self.cache.get_response_and_time(cache_key) |
|||
if response is None: |
|||
return send_request_and_cache_response() |
|||
|
|||
if self._cache_expire_after is not None: |
|||
difference = datetime.utcnow() - timestamp |
|||
if difference > timedelta(seconds=self._cache_expire_after): |
|||
self.cache.delete(cache_key) |
|||
return send_request_and_cache_response() |
|||
# dispatch hook here, because we've removed it before pickling |
|||
response.from_cache = True |
|||
response = dispatch_hook('response', request.hooks, response, **kwargs) |
|||
return response |
|||
|
|||
def request(self, method, url, params=None, data=None, headers=None, |
|||
cookies=None, files=None, auth=None, timeout=None, |
|||
allow_redirects=True, proxies=None, hooks=None, stream=None, |
|||
verify=None, cert=None): |
|||
response = super(CachedSession, self).request(method, url, params, data, |
|||
headers, cookies, files, |
|||
auth, timeout, |
|||
allow_redirects, proxies, |
|||
hooks, stream, verify, cert) |
|||
if self._is_cache_disabled: |
|||
return response |
|||
|
|||
main_key = self.cache.create_key(response.request) |
|||
for r in response.history: |
|||
self.cache.add_key_mapping( |
|||
self.cache.create_key(r.request), main_key |
|||
) |
|||
return response |
|||
|
|||
@contextmanager |
|||
def cache_disabled(self): |
|||
""" |
|||
Context manager for temporary disabling cache |
|||
:: |
|||
|
|||
>>> s = CachedSession() |
|||
>>> with s.cache_disabled(): |
|||
... s.get('http://httpbin.org/ip') |
|||
""" |
|||
self._is_cache_disabled = True |
|||
try: |
|||
yield |
|||
finally: |
|||
self._is_cache_disabled = False |
|||
|
|||
|
|||
def install_cache(cache_name='cache', backend=None, expire_after=None, |
|||
allowable_codes=(200,), allowable_methods=('GET',), |
|||
session_factory=CachedSession, **backend_options): |
|||
""" |
|||
Installs cache for all ``Requests`` requests by monkey-patching ``Session`` |
|||
|
|||
Parameters are the same as in :class:`CachedSession`. Additional parameters: |
|||
|
|||
:param session_factory: Session factory. It should inherit :class:`CachedSession` (default) |
|||
""" |
|||
if backend: |
|||
backend = backends.create_backend(backend, cache_name, backend_options) |
|||
_patch_session_factory( |
|||
lambda : session_factory(cache_name=cache_name, |
|||
backend=backend, |
|||
expire_after=expire_after, |
|||
allowable_codes=allowable_codes, |
|||
allowable_methods=allowable_methods, |
|||
**backend_options) |
|||
) |
|||
|
|||
|
|||
# backward compatibility |
|||
configure = install_cache |
|||
|
|||
|
|||
def uninstall_cache(): |
|||
""" Restores ``requests.Session`` and disables cache |
|||
""" |
|||
_patch_session_factory(OriginalSession) |
|||
|
|||
|
|||
@contextmanager |
|||
def disabled(): |
|||
""" |
|||
Context manager for temporary disabling globally installed cache |
|||
|
|||
.. warning:: not thread-safe |
|||
|
|||
:: |
|||
|
|||
>>> with requests_cache.disabled(): |
|||
... requests.get('http://httpbin.org/ip') |
|||
... requests.get('http://httpbin.org/get') |
|||
|
|||
""" |
|||
previous = requests.Session |
|||
uninstall_cache() |
|||
try: |
|||
yield |
|||
finally: |
|||
_patch_session_factory(previous) |
|||
|
|||
|
|||
@contextmanager |
|||
def enabled(*args, **kwargs): |
|||
""" |
|||
Context manager for temporary installing global cache. |
|||
|
|||
Accepts same arguments as :func:`install_cache` |
|||
|
|||
.. warning:: not thread-safe |
|||
|
|||
:: |
|||
|
|||
>>> with requests_cache.enabled('cache_db'): |
|||
... requests.get('http://httpbin.org/get') |
|||
|
|||
""" |
|||
install_cache(*args, **kwargs) |
|||
try: |
|||
yield |
|||
finally: |
|||
uninstall_cache() |
|||
|
|||
|
|||
def get_cache(): |
|||
""" Returns internal cache object from globally installed ``CachedSession`` |
|||
""" |
|||
return requests.Session().cache |
|||
|
|||
|
|||
def clear(): |
|||
""" Clears globally installed cache |
|||
""" |
|||
get_cache().clear() |
|||
|
|||
|
|||
def _patch_session_factory(session_factory=CachedSession): |
|||
requests.Session = requests.sessions.Session = session_factory |
Loading…
Reference in new issue