You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
391 lines
13 KiB
391 lines
13 KiB
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
#-----------------------
|
|
# Name: cache_file.py
|
|
# Python Library
|
|
# Author: Raymond Wagner
|
|
# Purpose: Persistant file-backed cache using /tmp/ to share data
|
|
# using flock or msvcrt.locking to allow safe concurrent
|
|
# access.
|
|
#-----------------------
|
|
|
|
import struct
|
|
import errno
|
|
import json
|
|
import os
|
|
import io
|
|
|
|
from cStringIO import StringIO
|
|
|
|
from tmdb_exceptions import *
|
|
from cache_engine import CacheEngine, CacheObject
|
|
|
|
####################
|
|
# Cache File Format
|
|
#------------------
|
|
# cache version (2) unsigned short
|
|
# slot count (2) unsigned short
|
|
# slot 0: timestamp (8) double
|
|
# slot 0: lifetime (4) unsigned int
|
|
# slot 0: seek point (4) unsigned int
|
|
# slot 1: timestamp
|
|
# slot 1: lifetime index slots are IDd by their query date and
|
|
# slot 1: seek point are filled incrementally forwards. lifetime
|
|
# .... is how long after query date before the item
|
|
# .... expires, and seek point is the location of the
|
|
# slot N-2: timestamp start of data for that entry. 256 empty slots
|
|
# slot N-2: lifetime are pre-allocated, allowing fast updates.
|
|
# slot N-2: seek point when all slots are filled, the cache file is
|
|
# slot N-1: timestamp rewritten from scrach to add more slots.
|
|
# slot N-1: lifetime
|
|
# slot N-1: seek point
|
|
# block 1 (?) ASCII
|
|
# block 2
|
|
# .... blocks are just simple ASCII text, generated
|
|
# .... as independent objects by the JSON encoder
|
|
# block N-2
|
|
# block N-1
|
|
#
|
|
####################
|
|
|
|
|
|
def _donothing(*args, **kwargs):
|
|
pass
|
|
|
|
try:
|
|
import fcntl
|
|
class Flock( object ):
|
|
"""
|
|
Context manager to flock file for the duration the object exists.
|
|
Referenced file will be automatically unflocked as the interpreter
|
|
exits the context.
|
|
Supports an optional callback to process the error and optionally
|
|
suppress it.
|
|
"""
|
|
LOCK_EX = fcntl.LOCK_EX
|
|
LOCK_SH = fcntl.LOCK_SH
|
|
|
|
def __init__(self, fileobj, operation, callback=None):
|
|
self.fileobj = fileobj
|
|
self.operation = operation
|
|
self.callback = callback
|
|
def __enter__(self):
|
|
fcntl.flock(self.fileobj, self.operation)
|
|
def __exit__(self, exc_type, exc_value, exc_tb):
|
|
suppress = False
|
|
if callable(self.callback):
|
|
suppress = self.callback(exc_type, exc_value, exc_tb)
|
|
fcntl.flock(self.fileobj, fcntl.LOCK_UN)
|
|
return suppress
|
|
|
|
def parse_filename(filename):
|
|
if '$' in filename:
|
|
# replace any environmental variables
|
|
filename = os.path.expandvars(filename)
|
|
if filename.startswith('~'):
|
|
# check for home directory
|
|
return os.path.expanduser(filename)
|
|
elif filename.startswith('/'):
|
|
# check for absolute path
|
|
return filename
|
|
# return path with temp directory prepended
|
|
return '/tmp/' + filename
|
|
|
|
except ImportError:
|
|
import msvcrt
|
|
class Flock( object ):
|
|
LOCK_EX = msvcrt.LK_LOCK
|
|
LOCK_SH = msvcrt.LK_LOCK
|
|
|
|
def __init__(self, fileobj, operation, callback=None):
|
|
self.fileobj = fileobj
|
|
self.operation = operation
|
|
self.callback = callback
|
|
def __enter__(self):
|
|
self.size = os.path.getsize(self.fileobj.name)
|
|
msvcrt.locking(self.fileobj.fileno(), self.operation, self.size)
|
|
def __exit__(self, exc_type, exc_value, exc_tb):
|
|
suppress = False
|
|
if callable(self.callback):
|
|
suppress = self.callback(exc_type, exc_value, exc_tb)
|
|
msvcrt.locking(self.fileobj.fileno(), msvcrt.LK_UNLCK, self.size)
|
|
return suppress
|
|
|
|
def parse_filename(filename):
|
|
if '%' in filename:
|
|
# replace any environmental variables
|
|
filename = os.path.expandvars(filename)
|
|
if filename.startswith('~'):
|
|
# check for home directory
|
|
return os.path.expanduser(filename)
|
|
elif (ord(filename[0]) in (range(65,91)+range(99,123))) \
|
|
and (filename[1:3] == ':\\'):
|
|
# check for absolute drive path (e.g. C:\...)
|
|
return filename
|
|
elif (filename.count('\\') >= 3) and (filename.startswith('\\\\')):
|
|
# check for absolute UNC path (e.g. \\server\...)
|
|
return filename
|
|
# return path with temp directory prepended
|
|
return os.path.expandvars(os.path.join('%TEMP%',filename))
|
|
|
|
|
|
class FileCacheObject( CacheObject ):
|
|
_struct = struct.Struct('dII') # double and two ints
|
|
# timestamp, lifetime, position
|
|
|
|
@classmethod
|
|
def fromFile(cls, fd):
|
|
dat = cls._struct.unpack(fd.read(cls._struct.size))
|
|
obj = cls(None, None, dat[1], dat[0])
|
|
obj.position = dat[2]
|
|
return obj
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self._key = None
|
|
self._data = None
|
|
self._size = None
|
|
self._buff = StringIO()
|
|
super(FileCacheObject, self).__init__(*args, **kwargs)
|
|
|
|
@property
|
|
def size(self):
|
|
if self._size is None:
|
|
self._buff.seek(0,2)
|
|
size = self._buff.tell()
|
|
if size == 0:
|
|
if (self._key is None) or (self._data is None):
|
|
raise RuntimeError
|
|
json.dump([self.key, self.data], self._buff)
|
|
self._size = self._buff.tell()
|
|
self._size = size
|
|
return self._size
|
|
@size.setter
|
|
def size(self, value): self._size = value
|
|
|
|
@property
|
|
def key(self):
|
|
if self._key is None:
|
|
try:
|
|
self._key, self._data = json.loads(self._buff.getvalue())
|
|
except:
|
|
pass
|
|
return self._key
|
|
@key.setter
|
|
def key(self, value): self._key = value
|
|
|
|
@property
|
|
def data(self):
|
|
if self._data is None:
|
|
self._key, self._data = json.loads(self._buff.getvalue())
|
|
return self._data
|
|
@data.setter
|
|
def data(self, value): self._data = value
|
|
|
|
def load(self, fd):
|
|
fd.seek(self.position)
|
|
self._buff.seek(0)
|
|
self._buff.write(fd.read(self.size))
|
|
|
|
def dumpslot(self, fd):
|
|
pos = fd.tell()
|
|
fd.write(self._struct.pack(self.creation, self.lifetime, self.position))
|
|
|
|
def dumpdata(self, fd):
|
|
self.size
|
|
fd.seek(self.position)
|
|
fd.write(self._buff.getvalue())
|
|
|
|
|
|
class FileEngine( CacheEngine ):
|
|
"""Simple file-backed engine."""
|
|
name = 'file'
|
|
_struct = struct.Struct('HH') # two shorts for version and count
|
|
_version = 2
|
|
|
|
def __init__(self, parent):
|
|
super(FileEngine, self).__init__(parent)
|
|
self.configure(None)
|
|
|
|
def configure(self, filename, preallocate=256):
|
|
self.preallocate = preallocate
|
|
self.cachefile = filename
|
|
self.size = 0
|
|
self.free = 0
|
|
self.age = 0
|
|
|
|
def _init_cache(self):
|
|
# only run this once
|
|
self._init_cache = _donothing
|
|
|
|
if self.cachefile is None:
|
|
raise TMDBCacheError("No cache filename given.")
|
|
|
|
self.cachefile = parse_filename(self.cachefile)
|
|
|
|
try:
|
|
# attempt to read existing cache at filename
|
|
# handle any errors that occur
|
|
self._open('r+b')
|
|
# seems to have read fine, make sure we have write access
|
|
if not os.access(self.cachefile, os.W_OK):
|
|
raise TMDBCacheWriteError(self.cachefile)
|
|
|
|
except IOError as e:
|
|
if e.errno == errno.ENOENT:
|
|
# file does not exist, create a new one
|
|
try:
|
|
self._open('w+b')
|
|
self._write([])
|
|
except IOError as e:
|
|
if e.errno == errno.ENOENT:
|
|
# directory does not exist
|
|
raise TMDBCacheDirectoryError(self.cachefile)
|
|
elif e.errno == errno.EACCES:
|
|
# user does not have rights to create new file
|
|
raise TMDBCacheWriteError(self.cachefile)
|
|
else:
|
|
# let the unhandled error continue through
|
|
raise
|
|
elif e.errno == errno.EACCESS:
|
|
# file exists, but we do not have permission to access it
|
|
raise TMDBCacheReadError(self.cachefile)
|
|
else:
|
|
# let the unhandled error continue through
|
|
raise
|
|
|
|
def get(self, date):
|
|
self._init_cache()
|
|
self._open('r+b')
|
|
|
|
with Flock(self.cachefd, Flock.LOCK_SH): # lock for shared access
|
|
# return any new objects in the cache
|
|
return self._read(date)
|
|
|
|
def put(self, key, value, lifetime):
|
|
self._init_cache()
|
|
self._open('r+b')
|
|
|
|
with Flock(self.cachefd, Flock.LOCK_EX): # lock for exclusive access
|
|
newobjs = self._read(self.age)
|
|
newobjs.append(FileCacheObject(key, value, lifetime))
|
|
|
|
# this will cause a new file object to be opened with the proper
|
|
# access mode, however the Flock should keep the old object open
|
|
# and properly locked
|
|
self._open('r+b')
|
|
self._write(newobjs)
|
|
return newobjs
|
|
|
|
def _open(self, mode='r+b'):
|
|
# enforce binary operation
|
|
try:
|
|
if self.cachefd.mode == mode:
|
|
# already opened in requested mode, nothing to do
|
|
self.cachefd.seek(0)
|
|
return
|
|
except: pass # catch issue of no cachefile yet opened
|
|
self.cachefd = io.open(self.cachefile, mode)
|
|
|
|
def _read(self, date):
|
|
try:
|
|
self.cachefd.seek(0)
|
|
version, count = self._struct.unpack(\
|
|
self.cachefd.read(self._struct.size))
|
|
if version != self._version:
|
|
# old version, break out and well rewrite when finished
|
|
raise Exception
|
|
|
|
self.size = count
|
|
cache = []
|
|
while count:
|
|
# loop through storage definitions
|
|
obj = FileCacheObject.fromFile(self.cachefd)
|
|
cache.append(obj)
|
|
count -= 1
|
|
|
|
except:
|
|
# failed to read information, so just discard it and return empty
|
|
self.size = 0
|
|
self.free = 0
|
|
return []
|
|
|
|
# get end of file
|
|
self.cachefd.seek(0,2)
|
|
position = self.cachefd.tell()
|
|
newobjs = []
|
|
emptycount = 0
|
|
|
|
# walk backward through all, collecting new content and populating size
|
|
while len(cache):
|
|
obj = cache.pop()
|
|
if obj.creation == 0:
|
|
# unused slot, skip
|
|
emptycount += 1
|
|
elif obj.expired:
|
|
# object has passed expiration date, no sense processing
|
|
continue
|
|
elif obj.creation > date:
|
|
# used slot with new data, process
|
|
obj.size, position = position - obj.position, obj.position
|
|
newobjs.append(obj)
|
|
# update age
|
|
self.age = max(self.age, obj.creation)
|
|
elif len(newobjs):
|
|
# end of new data, break
|
|
break
|
|
|
|
# walk forward and load new content
|
|
for obj in newobjs:
|
|
obj.load(self.cachefd)
|
|
|
|
self.free = emptycount
|
|
return newobjs
|
|
|
|
def _write(self, data):
|
|
if self.free and (self.size != self.free):
|
|
# we only care about the last data point, since the rest are
|
|
# already stored in the file
|
|
data = data[-1]
|
|
|
|
# determine write position of data in cache
|
|
self.cachefd.seek(0,2)
|
|
end = self.cachefd.tell()
|
|
data.position = end
|
|
|
|
# write incremental update to free slot
|
|
self.cachefd.seek(4 + 16*(self.size-self.free))
|
|
data.dumpslot(self.cachefd)
|
|
data.dumpdata(self.cachefd)
|
|
|
|
else:
|
|
# rewrite cache file from scratch
|
|
# pull data from parent cache
|
|
data.extend(self.parent()._data.values())
|
|
data.sort(key=lambda x: x.creation)
|
|
# write header
|
|
size = len(data) + self.preallocate
|
|
self.cachefd.seek(0)
|
|
self.cachefd.truncate()
|
|
self.cachefd.write(self._struct.pack(self._version, size))
|
|
# write storage slot definitions
|
|
prev = None
|
|
for d in data:
|
|
if prev == None:
|
|
d.position = 4 + 16*size
|
|
else:
|
|
d.position = prev.position + prev.size
|
|
d.dumpslot(self.cachefd)
|
|
prev = d
|
|
# fill in allocated slots
|
|
for i in range(2**8):
|
|
self.cachefd.write(FileCacheObject._struct.pack(0, 0, 0))
|
|
# write stored data
|
|
for d in data:
|
|
d.dumpdata(self.cachefd)
|
|
|
|
self.cachefd.flush()
|
|
|
|
def expire(self, key):
|
|
pass
|
|
|
|
|
|
|