You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

391 lines
13 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------
# Name: cache_file.py
# Python Library
# Author: Raymond Wagner
# Purpose: Persistant file-backed cache using /tmp/ to share data
# using flock or msvcrt.locking to allow safe concurrent
# access.
#-----------------------
import struct
import errno
import json
import os
import io
from cStringIO import StringIO
from tmdb_exceptions import *
from cache_engine import CacheEngine, CacheObject
####################
# Cache File Format
#------------------
# cache version (2) unsigned short
# slot count (2) unsigned short
# slot 0: timestamp (8) double
# slot 0: lifetime (4) unsigned int
# slot 0: seek point (4) unsigned int
# slot 1: timestamp
# slot 1: lifetime index slots are IDd by their query date and
# slot 1: seek point are filled incrementally forwards. lifetime
# .... is how long after query date before the item
# .... expires, and seek point is the location of the
# slot N-2: timestamp start of data for that entry. 256 empty slots
# slot N-2: lifetime are pre-allocated, allowing fast updates.
# slot N-2: seek point when all slots are filled, the cache file is
# slot N-1: timestamp rewritten from scrach to add more slots.
# slot N-1: lifetime
# slot N-1: seek point
# block 1 (?) ASCII
# block 2
# .... blocks are just simple ASCII text, generated
# .... as independent objects by the JSON encoder
# block N-2
# block N-1
#
####################
def _donothing(*args, **kwargs):
pass
try:
import fcntl
class Flock( object ):
"""
Context manager to flock file for the duration the object exists.
Referenced file will be automatically unflocked as the interpreter
exits the context.
Supports an optional callback to process the error and optionally
suppress it.
"""
LOCK_EX = fcntl.LOCK_EX
LOCK_SH = fcntl.LOCK_SH
def __init__(self, fileobj, operation, callback=None):
self.fileobj = fileobj
self.operation = operation
self.callback = callback
def __enter__(self):
fcntl.flock(self.fileobj, self.operation)
def __exit__(self, exc_type, exc_value, exc_tb):
suppress = False
if callable(self.callback):
suppress = self.callback(exc_type, exc_value, exc_tb)
fcntl.flock(self.fileobj, fcntl.LOCK_UN)
return suppress
def parse_filename(filename):
if '$' in filename:
# replace any environmental variables
filename = os.path.expandvars(filename)
if filename.startswith('~'):
# check for home directory
return os.path.expanduser(filename)
elif filename.startswith('/'):
# check for absolute path
return filename
# return path with temp directory prepended
return '/tmp/' + filename
except ImportError:
import msvcrt
class Flock( object ):
LOCK_EX = msvcrt.LK_LOCK
LOCK_SH = msvcrt.LK_LOCK
def __init__(self, fileobj, operation, callback=None):
self.fileobj = fileobj
self.operation = operation
self.callback = callback
def __enter__(self):
self.size = os.path.getsize(self.fileobj.name)
msvcrt.locking(self.fileobj.fileno(), self.operation, self.size)
def __exit__(self, exc_type, exc_value, exc_tb):
suppress = False
if callable(self.callback):
suppress = self.callback(exc_type, exc_value, exc_tb)
msvcrt.locking(self.fileobj.fileno(), msvcrt.LK_UNLCK, self.size)
return suppress
def parse_filename(filename):
if '%' in filename:
# replace any environmental variables
filename = os.path.expandvars(filename)
if filename.startswith('~'):
# check for home directory
return os.path.expanduser(filename)
elif (ord(filename[0]) in (range(65,91)+range(99,123))) \
and (filename[1:3] == ':\\'):
# check for absolute drive path (e.g. C:\...)
return filename
elif (filename.count('\\') >= 3) and (filename.startswith('\\\\')):
# check for absolute UNC path (e.g. \\server\...)
return filename
# return path with temp directory prepended
return os.path.expandvars(os.path.join('%TEMP%',filename))
class FileCacheObject( CacheObject ):
_struct = struct.Struct('dII') # double and two ints
# timestamp, lifetime, position
@classmethod
def fromFile(cls, fd):
dat = cls._struct.unpack(fd.read(cls._struct.size))
obj = cls(None, None, dat[1], dat[0])
obj.position = dat[2]
return obj
def __init__(self, *args, **kwargs):
self._key = None
self._data = None
self._size = None
self._buff = StringIO()
super(FileCacheObject, self).__init__(*args, **kwargs)
@property
def size(self):
if self._size is None:
self._buff.seek(0,2)
size = self._buff.tell()
if size == 0:
if (self._key is None) or (self._data is None):
raise RuntimeError
json.dump([self.key, self.data], self._buff)
self._size = self._buff.tell()
self._size = size
return self._size
@size.setter
def size(self, value): self._size = value
@property
def key(self):
if self._key is None:
try:
self._key, self._data = json.loads(self._buff.getvalue())
except:
pass
return self._key
@key.setter
def key(self, value): self._key = value
@property
def data(self):
if self._data is None:
self._key, self._data = json.loads(self._buff.getvalue())
return self._data
@data.setter
def data(self, value): self._data = value
def load(self, fd):
fd.seek(self.position)
self._buff.seek(0)
self._buff.write(fd.read(self.size))
def dumpslot(self, fd):
pos = fd.tell()
fd.write(self._struct.pack(self.creation, self.lifetime, self.position))
def dumpdata(self, fd):
self.size
fd.seek(self.position)
fd.write(self._buff.getvalue())
class FileEngine( CacheEngine ):
"""Simple file-backed engine."""
name = 'file'
_struct = struct.Struct('HH') # two shorts for version and count
_version = 2
def __init__(self, parent):
super(FileEngine, self).__init__(parent)
self.configure(None)
def configure(self, filename, preallocate=256):
self.preallocate = preallocate
self.cachefile = filename
self.size = 0
self.free = 0
self.age = 0
def _init_cache(self):
# only run this once
self._init_cache = _donothing
if self.cachefile is None:
raise TMDBCacheError("No cache filename given.")
self.cachefile = parse_filename(self.cachefile)
try:
# attempt to read existing cache at filename
# handle any errors that occur
self._open('r+b')
# seems to have read fine, make sure we have write access
if not os.access(self.cachefile, os.W_OK):
raise TMDBCacheWriteError(self.cachefile)
except IOError as e:
if e.errno == errno.ENOENT:
# file does not exist, create a new one
try:
self._open('w+b')
self._write([])
except IOError as e:
if e.errno == errno.ENOENT:
# directory does not exist
raise TMDBCacheDirectoryError(self.cachefile)
elif e.errno == errno.EACCES:
# user does not have rights to create new file
raise TMDBCacheWriteError(self.cachefile)
else:
# let the unhandled error continue through
raise
elif e.errno == errno.EACCESS:
# file exists, but we do not have permission to access it
raise TMDBCacheReadError(self.cachefile)
else:
# let the unhandled error continue through
raise
def get(self, date):
self._init_cache()
self._open('r+b')
with Flock(self.cachefd, Flock.LOCK_SH): # lock for shared access
# return any new objects in the cache
return self._read(date)
def put(self, key, value, lifetime):
self._init_cache()
self._open('r+b')
with Flock(self.cachefd, Flock.LOCK_EX): # lock for exclusive access
newobjs = self._read(self.age)
newobjs.append(FileCacheObject(key, value, lifetime))
# this will cause a new file object to be opened with the proper
# access mode, however the Flock should keep the old object open
# and properly locked
self._open('r+b')
self._write(newobjs)
return newobjs
def _open(self, mode='r+b'):
# enforce binary operation
try:
if self.cachefd.mode == mode:
# already opened in requested mode, nothing to do
self.cachefd.seek(0)
return
except: pass # catch issue of no cachefile yet opened
self.cachefd = io.open(self.cachefile, mode)
def _read(self, date):
try:
self.cachefd.seek(0)
version, count = self._struct.unpack(\
self.cachefd.read(self._struct.size))
if version != self._version:
# old version, break out and well rewrite when finished
raise Exception
self.size = count
cache = []
while count:
# loop through storage definitions
obj = FileCacheObject.fromFile(self.cachefd)
cache.append(obj)
count -= 1
except:
# failed to read information, so just discard it and return empty
self.size = 0
self.free = 0
return []
# get end of file
self.cachefd.seek(0,2)
position = self.cachefd.tell()
newobjs = []
emptycount = 0
# walk backward through all, collecting new content and populating size
while len(cache):
obj = cache.pop()
if obj.creation == 0:
# unused slot, skip
emptycount += 1
elif obj.expired:
# object has passed expiration date, no sense processing
continue
elif obj.creation > date:
# used slot with new data, process
obj.size, position = position - obj.position, obj.position
newobjs.append(obj)
# update age
self.age = max(self.age, obj.creation)
elif len(newobjs):
# end of new data, break
break
# walk forward and load new content
for obj in newobjs:
obj.load(self.cachefd)
self.free = emptycount
return newobjs
def _write(self, data):
if self.free and (self.size != self.free):
# we only care about the last data point, since the rest are
# already stored in the file
data = data[-1]
# determine write position of data in cache
self.cachefd.seek(0,2)
end = self.cachefd.tell()
data.position = end
# write incremental update to free slot
self.cachefd.seek(4 + 16*(self.size-self.free))
data.dumpslot(self.cachefd)
data.dumpdata(self.cachefd)
else:
# rewrite cache file from scratch
# pull data from parent cache
data.extend(self.parent()._data.values())
data.sort(key=lambda x: x.creation)
# write header
size = len(data) + self.preallocate
self.cachefd.seek(0)
self.cachefd.truncate()
self.cachefd.write(self._struct.pack(self._version, size))
# write storage slot definitions
prev = None
for d in data:
if prev == None:
d.position = 4 + 16*size
else:
d.position = prev.position + prev.size
d.dumpslot(self.cachefd)
prev = d
# fill in allocated slots
for i in range(2**8):
self.cachefd.write(FileCacheObject._struct.pack(0, 0, 0))
# write stored data
for d in data:
d.dumpdata(self.cachefd)
self.cachefd.flush()
def expire(self, key):
pass