88 changed files with 9967 additions and 1 deletions
@ -0,0 +1,201 @@ |
|||||
|
# Copyright (c) 2001 Autonomous Zone Industries |
||||
|
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
An object that makes some of the attributes of your class persistent, pickling |
||||
|
them and lazily writing them to a file. |
||||
|
""" |
||||
|
|
||||
|
# from the Python Standard Library |
||||
|
import os |
||||
|
import cPickle as pickle |
||||
|
import warnings |
||||
|
|
||||
|
# from the pyutil library |
||||
|
import fileutil |
||||
|
import nummedobj |
||||
|
import twistedutil |
||||
|
|
||||
|
# from the Twisted library |
||||
|
from twisted.python import log |
||||
|
|
||||
|
class PickleSaver(nummedobj.NummedObj): |
||||
|
""" |
||||
|
This makes some of the attributes of your class persistent, saving |
||||
|
them in a pickle and saving them lazily. |
||||
|
|
||||
|
The general idea: You are going to tell PickleSaver which of your |
||||
|
attributes ought to be persistently saved, and the name of a file to |
||||
|
save them in. Those attributes will get saved to disk, and when |
||||
|
your object is instantiated those attributes will get set to the |
||||
|
values loaded from the file. |
||||
|
|
||||
|
Usage: inherit from PickleSaver and call PickleSaver.__init__() in your |
||||
|
constructor. You will pass arguments to PickleSaver.__init__() |
||||
|
telling it which attributes to save, which file to save them in, and |
||||
|
what values they should have if there is no value stored for them in |
||||
|
the file. |
||||
|
|
||||
|
Note: do *not* assign values to your persistent attributes in your |
||||
|
constructor, because you might thus overwrite their persistent |
||||
|
values. |
||||
|
|
||||
|
Then whenever you change one of the persistent attributes, call |
||||
|
self.lazy_save() (it won't *really* save -- it'll just schedule a |
||||
|
save for DELAY minutes later.) If you update an attribute and |
||||
|
forget to call self.lazy_save() then the change will not be saved, |
||||
|
unless you later call self.lazy_save() before you shut down. |
||||
|
|
||||
|
Data could be lost if the Python interpreter were to die |
||||
|
unexpectedly (for example, due to a segfault in a compiled machine |
||||
|
code module or due to the Python process being killed without |
||||
|
warning via SIGKILL) before the delay passes. However if the Python |
||||
|
interpreter shuts down cleanly (i.e., if it garbage collects and |
||||
|
invokes the __del__ methods of the collected objects), then the data |
||||
|
will be saved at that time (unless your class has the "not-collectable" |
||||
|
problem: http://python.org/doc/current/lib/module-gc.html -- search |
||||
|
in text for "uncollectable"). |
||||
|
|
||||
|
Note: you can pass DELAY=0 to make PickleSaver a not-so-lazy saver. |
||||
|
The advantage of laziness is that you don't touch the disk as |
||||
|
often -- touching disk is a performance cost. |
||||
|
|
||||
|
To cleanly shutdown, invoke shutdown(). Further operations after that |
||||
|
will result in exceptions. |
||||
|
""" |
||||
|
class ExtRes: |
||||
|
""" |
||||
|
This is for holding things (external resources) that PickleSaver needs |
||||
|
to finalize after PickleSaver is killed. (post-mortem finalization) |
||||
|
|
||||
|
In particular, this holds the names and values of all attributes |
||||
|
that have been changed, so that after the PickleSaver is |
||||
|
garbage-collected those values will be saved to the persistent file. |
||||
|
""" |
||||
|
def __init__(self, fname, objname): |
||||
|
self.fname = fname |
||||
|
self.objname = objname |
||||
|
self.dirty = False # True iff the attrs have been changed and need to be saved to disk; When you change this flag from False to True, you schedule a save task for 10 minutes later. When the save task goes off it changes the flag from True to False. |
||||
|
self.savertask = None |
||||
|
self.valstr = None # the pickled (serialized, string) contents of the attributes that should be saved |
||||
|
|
||||
|
def _save_to_disk(self): |
||||
|
if self.valstr is not None: |
||||
|
log.msg("%s._save_to_disk(): fname: %s" % (self.objname, self.fname,)) |
||||
|
of = open(self.fname + ".tmp", "wb") |
||||
|
of.write(self.valstr) |
||||
|
of.flush() |
||||
|
of.close() |
||||
|
of = None |
||||
|
fileutil.remove_if_possible(self.fname) |
||||
|
fileutil.rename(self.fname + ".tmp", self.fname) |
||||
|
log.msg("%s._save_to_disk(): now, having finished write(), os.path.isfile(%s): %s" % (self, self.fname, os.path.isfile(self.fname),)) |
||||
|
self.valstr = None |
||||
|
self.dirty = False |
||||
|
try: |
||||
|
self.savertask.callId.cancel() |
||||
|
except: |
||||
|
pass |
||||
|
self.savertask = None |
||||
|
|
||||
|
def shutdown(self): |
||||
|
if self.dirty: |
||||
|
self._save_to_disk() |
||||
|
if self.savertask: |
||||
|
try: |
||||
|
self.savertask.callId.cancel() |
||||
|
except: |
||||
|
pass |
||||
|
self.savertask = None |
||||
|
|
||||
|
def __del__(self): |
||||
|
self.shutdown() |
||||
|
|
||||
|
def __init__(self, fname, attrs, DELAY=60*60, savecb=None): |
||||
|
""" |
||||
|
@param attrs: a dict whose keys are the names of all the attributes to be persistently stored and whose values are the initial default value that the attribute gets set to the first time it is ever used; After this first initialization, the value will be persistent so the initial default value will never be used again. |
||||
|
@param savecb: if not None, then it is a callable that will be called after each save completes (useful for unit tests) (savecb doesn't get called after a shutdown-save, only after a scheduled save) |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
|
||||
|
nummedobj.NummedObj.__init__(self) |
||||
|
self._DELAY = DELAY |
||||
|
|
||||
|
self._attrnames = attrs.keys() |
||||
|
self._extres = PickleSaver.ExtRes(fname=fname, objname=self.__repr__()) |
||||
|
self._savecb = savecb |
||||
|
|
||||
|
for attrname, defaultval in attrs.items(): |
||||
|
setattr(self, attrname, defaultval) |
||||
|
|
||||
|
try: |
||||
|
attrdict = pickle.loads(open(self._extres.fname, "rb").read()) |
||||
|
for attrname, attrval in attrdict.items(): |
||||
|
if not hasattr(self, attrname): |
||||
|
log.msg("WARNING: %s has no attribute named %s on load from disk, value: %s." % (self, attrname, attrval,)) |
||||
|
setattr(self, attrname, attrval) |
||||
|
except (pickle.UnpicklingError, IOError, EOFError,), le: |
||||
|
try: |
||||
|
attrdict = pickle.loads(open(self._extres.fname + ".tmp", "rb").read()) |
||||
|
for attrname, attrval in attrdict.items(): |
||||
|
if not hasattr(self, attrname): |
||||
|
log.msg("WARNING: %s has no attribute named %s on load from disk, value: %s." % (self, attrname, attrval,)) |
||||
|
setattr(self, attrname, attrval) |
||||
|
except (pickle.UnpicklingError, IOError, EOFError,), le2: |
||||
|
log.msg("Got exception attempting to load attrs. (This is normal if this is the first time you've used this persistent %s object.) fname: %s, le: %s, le2: %s" % (self.__class__, self._extres.fname, le, le2,)) |
||||
|
|
||||
|
self.lazy_save() |
||||
|
|
||||
|
def _store_attrs_in_extres(self): |
||||
|
d = {} |
||||
|
for attrname in self._attrnames: |
||||
|
d[attrname] = getattr(self, attrname) |
||||
|
# log.msg("%s._store_attrs_in_extres: attrname: %s, val: %s" % (self, attrname, getattr(self, attrname),)) |
||||
|
|
||||
|
# pickle the attrs now, to ensure that there are no reference cycles |
||||
|
self._extres.valstr = pickle.dumps(d, True) |
||||
|
|
||||
|
# log.msg("%s._store_attrs_in_extres: valstr: %s" % (self, self._extres.valstr,)) |
||||
|
|
||||
|
self._extres.dirty = True |
||||
|
|
||||
|
def _save_to_disk(self): |
||||
|
log.msg("%s._save_to_disk()" % (self,)) |
||||
|
self._extres._save_to_disk() |
||||
|
if self._savecb: |
||||
|
self._savecb() |
||||
|
|
||||
|
def _lazy_save(self, delay=None): |
||||
|
""" @deprecated: use lazy_save() instead """ |
||||
|
return self.lazy_save(delay) |
||||
|
|
||||
|
def lazy_save(self, delay=None): |
||||
|
""" |
||||
|
@param delay: how long from now before the data gets saved to disk, or `None' in order to use the default value provided in the constructor |
||||
|
""" |
||||
|
if delay is None: |
||||
|
delay=self._DELAY |
||||
|
|
||||
|
# copy the values into extres so that if `self' gets garbage-collected the values will be written to disk during post-mortem finalization. (This also marks it as dirty.) |
||||
|
self._store_attrs_in_extres() |
||||
|
|
||||
|
newsavetask = twistedutil.callLater_weakly(delay, self._save_to_disk) |
||||
|
if self._extres.savertask: |
||||
|
if self._extres.savertask.callId.getTime() < newsavetask.callId.getTime(): |
||||
|
try: |
||||
|
newsavetask.callId.cancel() |
||||
|
except: |
||||
|
pass |
||||
|
else: |
||||
|
try: |
||||
|
self._extres.savertask.callId.cancel() |
||||
|
except: |
||||
|
pass |
||||
|
self._extres.savertask = newsavetask |
||||
|
else: |
||||
|
self._extres.savertask = newsavetask |
||||
|
|
||||
|
def shutdown(self): |
||||
|
self.extres.shutdown() |
||||
|
self.extres = None |
@ -0,0 +1,23 @@ |
|||||
|
""" |
||||
|
Library of useful Python functions and classes. |
||||
|
|
||||
|
Projects that have contributed substantial portions to pyutil: |
||||
|
U{Mojo Nation<http://mojonation.net/>} |
||||
|
U{Mnet<http://sf.net/projects/mnet>} |
||||
|
U{Allmydata<http://allmydata.com/>} |
||||
|
U{Tahoe-LAFS<http://tahoe-lafs.org/>} |
||||
|
|
||||
|
mailto:zooko@zooko.com |
||||
|
|
||||
|
pyutil web site: U{http://tahoe-lafs.org/trac/pyutil} |
||||
|
""" |
||||
|
|
||||
|
__version__ = "unknown" |
||||
|
try: |
||||
|
from _version import __version__ |
||||
|
except ImportError: |
||||
|
# We're running in a tree that hasn't run "./setup.py darcsver", and didn't |
||||
|
# come with a _version.py, so we don't know what our version is. This should |
||||
|
# not happen very often. |
||||
|
pass |
||||
|
__version__ # hush pyflakes |
@ -0,0 +1,17 @@ |
|||||
|
|
||||
|
# This is the version of this tree, as created by setup.py darcsver from the darcs patch |
||||
|
# information: the main version number is taken from the most recent release |
||||
|
# tag. If some patches have been added since the last release, this will have a |
||||
|
# -NN "build number" suffix, or else a -rNN "revision number" suffix. Please see |
||||
|
# pyutil.version_class for a description of what the different fields mean. |
||||
|
|
||||
|
__pkgname__ = "pyutil" |
||||
|
verstr = "1.9.3" |
||||
|
try: |
||||
|
from pyutil.version_class import Version as pyutil_Version |
||||
|
__version__ = pyutil_Version(verstr) |
||||
|
except (ImportError, ValueError): |
||||
|
# Maybe there is no pyutil installed, or this may be an older version of |
||||
|
# pyutil.version_class which does not support SVN-alike revision numbers. |
||||
|
from distutils.version import LooseVersion as distutils_Version |
||||
|
__version__ = distutils_Version(verstr) |
@ -0,0 +1,60 @@ |
|||||
|
# Copyright (c) 2003-2009 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Tests useful in assertion checking, prints out nicely formated messages too. |
||||
|
""" |
||||
|
|
||||
|
from humanreadable import hr |
||||
|
|
||||
|
def _assert(___cond=False, *___args, **___kwargs): |
||||
|
if ___cond: |
||||
|
return True |
||||
|
msgbuf=[] |
||||
|
if ___args: |
||||
|
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),)))) |
||||
|
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]]) |
||||
|
if ___kwargs: |
||||
|
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
else: |
||||
|
if ___kwargs: |
||||
|
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]]) |
||||
|
|
||||
|
raise AssertionError, "".join(msgbuf) |
||||
|
|
||||
|
def precondition(___cond=False, *___args, **___kwargs): |
||||
|
if ___cond: |
||||
|
return True |
||||
|
msgbuf=["precondition", ] |
||||
|
if ___args or ___kwargs: |
||||
|
msgbuf.append(": ") |
||||
|
if ___args: |
||||
|
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),)))) |
||||
|
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]]) |
||||
|
if ___kwargs: |
||||
|
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
else: |
||||
|
if ___kwargs: |
||||
|
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]]) |
||||
|
|
||||
|
raise AssertionError, "".join(msgbuf) |
||||
|
|
||||
|
def postcondition(___cond=False, *___args, **___kwargs): |
||||
|
if ___cond: |
||||
|
return True |
||||
|
msgbuf=["postcondition", ] |
||||
|
if ___args or ___kwargs: |
||||
|
msgbuf.append(": ") |
||||
|
if ___args: |
||||
|
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),)))) |
||||
|
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]]) |
||||
|
if ___kwargs: |
||||
|
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
else: |
||||
|
if ___kwargs: |
||||
|
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),))))) |
||||
|
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]]) |
||||
|
|
||||
|
raise AssertionError, "".join(msgbuf) |
@ -0,0 +1,55 @@ |
|||||
|
from pyutil import randutil |
||||
|
import random |
||||
|
from decimal import Decimal |
||||
|
|
||||
|
l = [] |
||||
|
s = None |
||||
|
|
||||
|
def data_strings(N): |
||||
|
assert isinstance(N, int), (N, type(N)) |
||||
|
del l[:] |
||||
|
for i in range(N): |
||||
|
l.append(repr(randutil.insecurerandstr(4))) |
||||
|
global s |
||||
|
s = json.dumps(l) |
||||
|
|
||||
|
def data_Decimals(N): |
||||
|
del l[:] |
||||
|
for i in range(N): |
||||
|
l.append(Decimal(str(random.randrange(0, 1000000000)))/random.randrange(1, 1000000000)) |
||||
|
global s |
||||
|
s = jsonutil.dumps(l) |
||||
|
|
||||
|
def data_floats(N): |
||||
|
del l[:] |
||||
|
for i in range(N): |
||||
|
l.append(float(random.randrange(0, 1000000000))/random.randrange(1, 1000000000)) |
||||
|
global s |
||||
|
s = json.dumps(l) |
||||
|
|
||||
|
import json |
||||
|
from pyutil import jsonutil |
||||
|
|
||||
|
def je(N): |
||||
|
return json.dumps(l) |
||||
|
|
||||
|
def ue(N): |
||||
|
return jsonutil.dumps(l) |
||||
|
|
||||
|
def jd(N): |
||||
|
return json.loads(s) |
||||
|
|
||||
|
def ud(N): |
||||
|
return jsonutil.loads(s) |
||||
|
|
||||
|
from pyutil import benchutil |
||||
|
|
||||
|
for i in (data_strings, data_floats, data_Decimals): |
||||
|
for e in (ud, ue, jd, je): |
||||
|
# for e in (ue,): |
||||
|
print "i: %s, e: %s" % (i, e,) |
||||
|
try: |
||||
|
benchutil.bench(e, initfunc=i, TOPXP=5, profile=False) |
||||
|
except TypeError, e: |
||||
|
print "skipping due to %s" % (e,) |
||||
|
benchutil.print_bench_footer() |
@ -0,0 +1,53 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import hmac, sys, random |
||||
|
|
||||
|
from pyutil.assertutil import _assert |
||||
|
from pyutil.xor import xor |
||||
|
from pyutil import benchfunc |
||||
|
from pyutil import randutil |
||||
|
|
||||
|
SFUNCS = [hmac._strxor, xor.py_xor,] |
||||
|
|
||||
|
SFNAMES = ["hmac", "pyutil py",] |
||||
|
inputs = {} |
||||
|
|
||||
|
def _help_init_string(N): |
||||
|
global inputs |
||||
|
if not inputs.has_key(N): |
||||
|
inputs[N] = [randutil.insecurerandstr(N), randutil.insecurerandstr(N),] |
||||
|
|
||||
|
def _help_make_bench_xor(f): |
||||
|
def g(n): |
||||
|
assert inputs.has_key(n) |
||||
|
_assert(isinstance(inputs[n][0], str), "Required to be a string.", inputs[n][0]) |
||||
|
assert len(inputs[n][0]) == n |
||||
|
_assert(isinstance(inputs[n][1], str), "Required to be a string.", inputs[n][1]) |
||||
|
assert len(inputs[n][1]) == n |
||||
|
for SF in SFUNCS: |
||||
|
assert f(inputs[n][0], inputs[n][1]) == SF(inputs[n][0], inputs[n][1]) |
||||
|
|
||||
|
return f(inputs[n][0], inputs[n][1]) |
||||
|
return g |
||||
|
|
||||
|
def bench(SETSIZES=[2**x for x in range(0, 22, 3)]): |
||||
|
random.seed(0) |
||||
|
if len(SFUNCS) <= 1: print "" |
||||
|
maxnamel = max(map(len, SFNAMES)) |
||||
|
for SETSIZE in SETSIZES: |
||||
|
seed = random.random() |
||||
|
# print "seed: ", seed |
||||
|
random.seed(seed) |
||||
|
i = 0 |
||||
|
if len(SFUNCS) > 1: print "" |
||||
|
for FUNC in SFUNCS: |
||||
|
funcname = SFNAMES[i] + " " * (maxnamel - len(SFNAMES[i])) |
||||
|
print "%s" % funcname, |
||||
|
sys.stdout.flush() |
||||
|
benchfunc.rep_bench(_help_make_bench_xor(FUNC), SETSIZE, initfunc=_help_init_string, MAXREPS=2**9, MAXTIME=30) |
||||
|
i = i + 1 |
||||
|
|
||||
|
bench() |
@ -0,0 +1,243 @@ |
|||||
|
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Benchmark a function for its behavior with respect to N. |
||||
|
|
||||
|
How to use this module: |
||||
|
|
||||
|
1. Define a function which runs the code that you want to benchmark. The |
||||
|
function takes a single argument which is the size of the task (i.e. the "N" |
||||
|
parameter). Pass this function as the first argument to rep_bench(), and N as |
||||
|
the second, e.g.: |
||||
|
|
||||
|
>>> from pyutil.benchutil import rep_bench |
||||
|
>>> def fib(N): |
||||
|
... if N <= 1: |
||||
|
... return 1 |
||||
|
... else: |
||||
|
... return fib(N-1) + fib(N-2) |
||||
|
... |
||||
|
>>> rep_bench(fib, 25, UNITS_PER_SECOND=1000) |
||||
|
best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10) |
||||
|
|
||||
|
The output is reporting the number of milliseconds that executing the function |
||||
|
took, divided by N, from ten different invocations of fib(). It reports the |
||||
|
best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of |
||||
|
the number of invocations (in this case 10). |
||||
|
|
||||
|
2. Now run it with different values of N and look for patterns: |
||||
|
|
||||
|
>>> for N in 1, 5, 9, 13, 17, 21: |
||||
|
... print "%2d" % N, |
||||
|
... rep_bench(fib, N, UNITS_PER_SECOND=1000000) |
||||
|
... |
||||
|
1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10) |
||||
|
5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10) |
||||
|
9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10) |
||||
|
13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10) |
||||
|
17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10) |
||||
|
21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10) |
||||
|
>>> print_bench_footer(UNITS_PER_SECOND=1000000) |
||||
|
all results are in time units per N |
||||
|
time units per second: 1000000; seconds per time unit: 0.000001 |
||||
|
|
||||
|
(The pattern here is that as N grows, the time per N grows.) |
||||
|
|
||||
|
2. If you need to do some setting up before the code can run, then put the |
||||
|
setting-up code into a separate function so that it won't be included in the |
||||
|
timing measurements. A good way to share state between the setting-up function |
||||
|
and the main function is to make them be methods of the same object, e.g.: |
||||
|
|
||||
|
>>> import random |
||||
|
>>> class O: |
||||
|
... def __init__(self): |
||||
|
... self.l = [] |
||||
|
... def setup(self, N): |
||||
|
... del self.l[:] |
||||
|
... self.l.extend(range(N)) |
||||
|
... random.shuffle(self.l) |
||||
|
... def sort(self, N): |
||||
|
... self.l.sort() |
||||
|
... |
||||
|
>>> o = O() |
||||
|
>>> for N in 1000, 10000, 100000, 1000000: |
||||
|
... print "%7d" % N, |
||||
|
... rep_bench(o.sort, N, o.setup) |
||||
|
... |
||||
|
1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10) |
||||
|
10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10) |
||||
|
100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10) |
||||
|
1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10) |
||||
|
|
||||
|
3. Useful fact! rep_bench() returns a dict containing the numbers. |
||||
|
|
||||
|
4. Things to fix: |
||||
|
|
||||
|
a. I used to have it hooked up to use the "hotshot" profiler on the code being |
||||
|
measured. I recently tried to change it to use the newer cProfile profiler |
||||
|
instead, but I don't understand the interface to cProfiler so it just gives an |
||||
|
exception if you pass profile=True. Please fix this and send me a patch. |
||||
|
|
||||
|
b. Wouldn't it be great if this script emitted results in a json format that |
||||
|
was understood by a tool to make pretty interactive explorable graphs? The |
||||
|
pretty graphs could look like those on http://speed.pypy.org/ . Please make |
||||
|
this work and send me a patch! |
||||
|
""" |
||||
|
|
||||
|
import cProfile, operator, time |
||||
|
from decimal import Decimal as D |
||||
|
|
||||
|
#from pyutil import jsonutil as json |
||||
|
|
||||
|
import platform |
||||
|
if 'windows' in platform.system().lower(): |
||||
|
clock = time.clock |
||||
|
else: |
||||
|
clock = time.time |
||||
|
|
||||
|
from assertutil import _assert |
||||
|
|
||||
|
def makeg(func): |
||||
|
def blah(n, func=func): |
||||
|
for i in xrange(n): |
||||
|
func() |
||||
|
return blah |
||||
|
|
||||
|
def to_decimal(x): |
||||
|
""" |
||||
|
See if D(x) returns something. If instead it raises TypeError, x must have been a float, so convert it to Decimal by way of string. (In Python >= 2.7, D(x) does this automatically. |
||||
|
""" |
||||
|
try: |
||||
|
return D(x) |
||||
|
except TypeError: |
||||
|
return D("%0.54f" % (x,)) |
||||
|
|
||||
|
def mult(a, b): |
||||
|
""" |
||||
|
If we get TypeError from * (possibly because one is float and the other is Decimal), then promote them both to Decimal. |
||||
|
""" |
||||
|
try: |
||||
|
return a * b |
||||
|
except TypeError: |
||||
|
return to_decimal(a) * to_decimal(b) |
||||
|
|
||||
|
def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): |
||||
|
""" |
||||
|
Will run the func up to MAXREPS times, but won't start a new run if MAXTIME |
||||
|
(wall-clock time) has already elapsed (unless MAXTIME is None). |
||||
|
|
||||
|
@param quiet Don't print anything--just return the results dict. |
||||
|
""" |
||||
|
assert isinstance(n, int), (n, type(n)) |
||||
|
startwallclocktime = time.time() |
||||
|
tls = [] # elapsed time in seconds |
||||
|
bmes = [] |
||||
|
while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)): |
||||
|
if initfunc: |
||||
|
initfunc(n) |
||||
|
try: |
||||
|
tl = bench_it(func, n, profile=profile, profresults=profresults) |
||||
|
except BadMeasure, bme: |
||||
|
bmes.append(bme) |
||||
|
else: |
||||
|
tls.append(tl) |
||||
|
if len(tls) == 0: |
||||
|
raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,)) |
||||
|
sumtls = reduce(operator.__add__, tls) |
||||
|
mean = sumtls / len(tls) |
||||
|
tls.sort() |
||||
|
worst = tls[-1] |
||||
|
best = tls[0] |
||||
|
_assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,)) |
||||
|
m = len(tls)/4 |
||||
|
if m > 0: |
||||
|
mthbest = tls[m-1] |
||||
|
mthworst = tls[-m] |
||||
|
else: |
||||
|
mthbest = tls[0] |
||||
|
mthworst = tls[-1] |
||||
|
|
||||
|
# The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst, |
||||
|
# etc, so we use mp1 to name it. |
||||
|
mp1 = m+1 |
||||
|
res = { |
||||
|
'worst': mult(worst, UNITS_PER_SECOND)/n, |
||||
|
'best': mult(best, UNITS_PER_SECOND)/n, |
||||
|
'mp1': mp1, |
||||
|
'mth-best': mult(mthbest, UNITS_PER_SECOND)/n, |
||||
|
'mth-worst': mult(mthworst, UNITS_PER_SECOND)/n, |
||||
|
'mean': mult(mean, UNITS_PER_SECOND)/n, |
||||
|
'num': len(tls), |
||||
|
} |
||||
|
|
||||
|
if not quiet: |
||||
|
print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res |
||||
|
|
||||
|
return res |
||||
|
|
||||
|
MARGINOFERROR = 10 |
||||
|
|
||||
|
worstemptymeasure = 0 |
||||
|
|
||||
|
class BadMeasure(Exception): |
||||
|
""" Either the clock wrapped (which happens with time.clock()) or |
||||
|
it went backwards (which happens with time.time() on rare |
||||
|
occasions), (or the code being measured completed before a single |
||||
|
clock tick). """ |
||||
|
def __init__(self, startt, stopt, clock): |
||||
|
self.startt = startt |
||||
|
self.stopt = stopt |
||||
|
self.clock = clock |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock) |
||||
|
|
||||
|
def do_nothing(n): |
||||
|
pass |
||||
|
|
||||
|
def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"): |
||||
|
if profile: |
||||
|
st = clock() |
||||
|
cProfile.run('func(n)', profresults) |
||||
|
sto = clock() |
||||
|
else: |
||||
|
st = clock() |
||||
|
func(n) |
||||
|
sto = clock() |
||||
|
timeelapsed = sto - st |
||||
|
if timeelapsed <= 0: |
||||
|
raise BadMeasure(timeelapsed) |
||||
|
global worstemptymeasure |
||||
|
emsta = clock() |
||||
|
do_nothing(2**32) |
||||
|
emstop = clock() |
||||
|
empty = emstop - emsta |
||||
|
if empty > worstemptymeasure: |
||||
|
worstemptymeasure = empty |
||||
|
return timeelapsed |
||||
|
|
||||
|
def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1): |
||||
|
BSIZES = [] |
||||
|
for i in range(TOPXP-6, TOPXP+1, 2): |
||||
|
n = int(2 ** i) |
||||
|
if n < 1: |
||||
|
n = 1 |
||||
|
if BSIZES and n <= BSIZES[-1]: |
||||
|
n *= 2 |
||||
|
BSIZES.append(n) |
||||
|
|
||||
|
res = {} |
||||
|
for BSIZE in BSIZES: |
||||
|
print "N: %7d," % BSIZE, |
||||
|
r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND) |
||||
|
res[BSIZE] = r |
||||
|
|
||||
|
#if outputjson: |
||||
|
# write_file(jsonresultsfname, json.dumps(res)) |
||||
|
|
||||
|
return res |
||||
|
|
||||
|
def print_bench_footer(UNITS_PER_SECOND=1): |
||||
|
print "all results are in time units per N" |
||||
|
print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND) |
@ -0,0 +1,225 @@ |
|||||
|
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Benchmark a function for its behavior with respect to N. |
||||
|
|
||||
|
How to use this module: |
||||
|
|
||||
|
1. Define a function which runs the code that you want to benchmark. The |
||||
|
function takes a single argument which is the size of the task (i.e. the "N" |
||||
|
parameter). Pass this function as the first argument to rep_bench(), and N as |
||||
|
the second, e.g.: |
||||
|
|
||||
|
>>> from pyutil.benchutil import rep_bench |
||||
|
>>> def fib(N): |
||||
|
... if N <= 1: |
||||
|
... return 1 |
||||
|
... else: |
||||
|
... return fib(N-1) + fib(N-2) |
||||
|
... |
||||
|
>>> rep_bench(fib, 25, UNITS_PER_SECOND=1000) |
||||
|
best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10) |
||||
|
|
||||
|
The output is reporting the number of milliseconds that executing the function |
||||
|
took, divided by N, from ten different invocations of fib(). It reports the |
||||
|
best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of |
||||
|
the number of invocations (in this case 10). |
||||
|
|
||||
|
2. Now run it with different values of N and look for patterns: |
||||
|
|
||||
|
>>> for N in 1, 5, 9, 13, 17, 21: |
||||
|
... print "%2d" % N, |
||||
|
... rep_bench(fib, N, UNITS_PER_SECOND=1000000) |
||||
|
... |
||||
|
1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10) |
||||
|
5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10) |
||||
|
9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10) |
||||
|
13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10) |
||||
|
17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10) |
||||
|
21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10) |
||||
|
>>> print_bench_footer(UNITS_PER_SECOND=1000000) |
||||
|
all results are in time units per N |
||||
|
time units per second: 1000000; seconds per time unit: 0.000001 |
||||
|
|
||||
|
(The pattern here is that as N grows, the time per N grows.) |
||||
|
|
||||
|
2. If you need to do some setting up before the code can run, then put the |
||||
|
setting-up code into a separate function so that it won't be included in the |
||||
|
timing measurements. A good way to share state between the setting-up function |
||||
|
and the main function is to make them be methods of the same object, e.g.: |
||||
|
|
||||
|
>>> import random |
||||
|
>>> class O: |
||||
|
... def __init__(self): |
||||
|
... self.l = [] |
||||
|
... def setup(self, N): |
||||
|
... del self.l[:] |
||||
|
... self.l.extend(range(N)) |
||||
|
... random.shuffle(self.l) |
||||
|
... def sort(self, N): |
||||
|
... self.l.sort() |
||||
|
... |
||||
|
>>> o = O() |
||||
|
>>> for N in 1000, 10000, 100000, 1000000: |
||||
|
... print "%7d" % N, |
||||
|
... rep_bench(o.sort, N, o.setup) |
||||
|
... |
||||
|
1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10) |
||||
|
10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10) |
||||
|
100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10) |
||||
|
1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10) |
||||
|
|
||||
|
3. Useful fact! rep_bench() returns a dict containing the numbers. |
||||
|
|
||||
|
4. Things to fix: |
||||
|
|
||||
|
a. I used to have it hooked up to use the "hotshot" profiler on the code being |
||||
|
measured. I recently tried to change it to use the newer cProfile profiler |
||||
|
instead, but I don't understand the interface to cProfiler so it just gives an |
||||
|
exception if you pass profile=True. Please fix this and send me a patch. |
||||
|
|
||||
|
b. Wouldn't it be great if this script emitted results in a json format that |
||||
|
was understood by a tool to make pretty interactive explorable graphs? The |
||||
|
pretty graphs could look like those on http://speed.pypy.org/ . Please make |
||||
|
this work and send me a patch! |
||||
|
""" |
||||
|
|
||||
|
import cProfile, operator, time |
||||
|
from decimal import Decimal as D |
||||
|
|
||||
|
#from pyutil import jsonutil as json |
||||
|
|
||||
|
import platform |
||||
|
if 'windows' in platform.system().lower(): |
||||
|
clock = time.clock |
||||
|
else: |
||||
|
clock = time.time |
||||
|
|
||||
|
from assertutil import _assert |
||||
|
|
||||
|
def makeg(func): |
||||
|
def blah(n, func=func): |
||||
|
for i in xrange(n): |
||||
|
func() |
||||
|
return blah |
||||
|
|
||||
|
def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): |
||||
|
""" |
||||
|
Will run the func up to MAXREPS times, but won't start a new run if MAXTIME |
||||
|
(wall-clock time) has already elapsed (unless MAXTIME is None). |
||||
|
|
||||
|
@param quiet Don't print anything--just return the results dict. |
||||
|
""" |
||||
|
assert isinstance(n, int), (n, type(n)) |
||||
|
startwallclocktime = time.time() |
||||
|
tls = [] # elapsed time in seconds |
||||
|
bmes = [] |
||||
|
while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)): |
||||
|
if initfunc: |
||||
|
initfunc(n) |
||||
|
try: |
||||
|
tl = bench_it(func, n, profile=profile, profresults=profresults) |
||||
|
except BadMeasure, bme: |
||||
|
bmes.append(bme) |
||||
|
else: |
||||
|
tls.append(tl) |
||||
|
if len(tls) == 0: |
||||
|
raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,)) |
||||
|
sumtls = reduce(operator.__add__, tls) |
||||
|
mean = sumtls / len(tls) |
||||
|
tls.sort() |
||||
|
worst = tls[-1] |
||||
|
best = tls[0] |
||||
|
_assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,)) |
||||
|
m = len(tls)/4 |
||||
|
if m > 0: |
||||
|
mthbest = tls[m-1] |
||||
|
mthworst = tls[-m] |
||||
|
else: |
||||
|
mthbest = tls[0] |
||||
|
mthworst = tls[-1] |
||||
|
|
||||
|
# The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst, |
||||
|
# etc, so we use mp1 to name it. |
||||
|
mp1 = m+1 |
||||
|
res = { |
||||
|
'worst': (worst*UNITS_PER_SECOND)/n, |
||||
|
'best': (best*UNITS_PER_SECOND)/n, |
||||
|
'mp1': mp1, |
||||
|
'mth-best': (mthbest*UNITS_PER_SECOND)/n, |
||||
|
'mth-worst': (mthworst*UNITS_PER_SECOND)/n, |
||||
|
'mean': (mean*UNITS_PER_SECOND)/n, |
||||
|
'num': len(tls), |
||||
|
} |
||||
|
|
||||
|
if not quiet: |
||||
|
print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res |
||||
|
|
||||
|
return res |
||||
|
|
||||
|
MARGINOFERROR = 10 |
||||
|
|
||||
|
worstemptymeasure = 0 |
||||
|
|
||||
|
class BadMeasure(Exception): |
||||
|
""" Either the clock wrapped (which happens with time.clock()) or |
||||
|
it went backwards (which happens with time.time() on rare |
||||
|
occasions), (or the code being measured completed before a single |
||||
|
clock tick). """ |
||||
|
def __init__(self, startt, stopt, clock): |
||||
|
self.startt = startt |
||||
|
self.stopt = stopt |
||||
|
self.clock = clock |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock) |
||||
|
|
||||
|
def do_nothing(n): |
||||
|
pass |
||||
|
|
||||
|
def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"): |
||||
|
if profile: |
||||
|
st = clock() |
||||
|
cProfile.run('func(n)', profresults) |
||||
|
sto = clock() |
||||
|
else: |
||||
|
st = clock() |
||||
|
func(n) |
||||
|
sto = clock() |
||||
|
timeelapsed = sto - st |
||||
|
if timeelapsed <= 0: |
||||
|
raise BadMeasure(timeelapsed) |
||||
|
global worstemptymeasure |
||||
|
emsta = clock() |
||||
|
do_nothing(2**32) |
||||
|
emstop = clock() |
||||
|
empty = emstop - emsta |
||||
|
if empty > worstemptymeasure: |
||||
|
worstemptymeasure = empty |
||||
|
return timeelapsed |
||||
|
|
||||
|
def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1): |
||||
|
BSIZES = [] |
||||
|
for i in range(TOPXP-6, TOPXP+1, 2): |
||||
|
n = int(2 ** i) |
||||
|
if n < 1: |
||||
|
n = 1 |
||||
|
if BSIZES and n <= BSIZES[-1]: |
||||
|
n *= 2 |
||||
|
BSIZES.append(n) |
||||
|
|
||||
|
res = {} |
||||
|
for BSIZE in BSIZES: |
||||
|
print "N: %7d," % BSIZE, |
||||
|
r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND) |
||||
|
res[BSIZE] = r |
||||
|
|
||||
|
#if outputjson: |
||||
|
# write_file(jsonresultsfname, json.dumps(res)) |
||||
|
|
||||
|
return res |
||||
|
|
||||
|
def print_bench_footer(UNITS_PER_SECOND=1): |
||||
|
print "all results are in time units per N" |
||||
|
print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND) |
@ -0,0 +1,735 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
|
||||
|
""" |
||||
|
This module offers three implementations of an LRUCache, which is a dict that |
||||
|
drops items according to a Least-Recently-Used policy if the dict exceeds a |
||||
|
fixed maximum size. |
||||
|
|
||||
|
Warning: if -O optimizations are not turned on then LRUCache performs |
||||
|
extensive self-analysis in every function call, which can take minutes |
||||
|
and minutes for a large cache. Turn on -O, or comment out ``assert self._assert_invariants()`` |
||||
|
""" |
||||
|
|
||||
|
import operator |
||||
|
|
||||
|
from assertutil import _assert, precondition |
||||
|
from humanreadable import hr |
||||
|
|
||||
|
class LRUCache: |
||||
|
""" |
||||
|
An efficient least-recently-used cache. It keeps an LRU queue, and when |
||||
|
the number of items in the cache reaches maxsize, it removes the least |
||||
|
recently used item. |
||||
|
|
||||
|
"Looking" at an item, key, or value such as with "has_key()" makes that |
||||
|
item become the most recently used item. |
||||
|
|
||||
|
You can also use "refresh()" to explicitly make an item become the most |
||||
|
recently used item. |
||||
|
|
||||
|
Adding an item that is already in the dict *does* make it the most- |
||||
|
recently-used item although it does not change the state of the dict |
||||
|
itself. |
||||
|
|
||||
|
See also SmallLRUCache (below), which is faster in some cases. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.hs][2] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.ts: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = n |
||||
|
return (k, v,) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.hs][2] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.ts: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = n |
||||
|
return k |
||||
|
|
||||
|
class ValIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.hs][2] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.ts: |
||||
|
raise StopIteration |
||||
|
precondition(self.c.d.has_key(self.i), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c) |
||||
|
(v, p, n,) = self.c.d[self.i] |
||||
|
self.i = n |
||||
|
return v |
||||
|
|
||||
|
class Sentinel: |
||||
|
def __init__(self, msg): |
||||
|
self.msg = msg |
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.msg,) |
||||
|
|
||||
|
def __init__(self, initialdata={}, maxsize=128): |
||||
|
precondition(maxsize > 0) |
||||
|
self.m = maxsize+2 # The +2 is for the head and tail nodes. |
||||
|
self.d = {} # k: k, v: [v, prev, next,] # the dict |
||||
|
self.hs = LRUCache.Sentinel("hs") |
||||
|
self.ts = LRUCache.Sentinel("ts") |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.update(initialdata) |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def __repr_n__(self, n=None): |
||||
|
s = ["{",] |
||||
|
try: |
||||
|
iter = self.iteritems() |
||||
|
x = iter.next() |
||||
|
s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
i = 1 |
||||
|
while (n is None) or (i < n): |
||||
|
x = iter.next() |
||||
|
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
s.append("}") |
||||
|
return ''.join(s) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),) |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert(len(self.d) <= self.m, "Size is required to be <= maxsize.", len(self.d), self.m) |
||||
|
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d)) |
||||
|
foundprevsentinel = 0 |
||||
|
foundnextsentinel = 0 |
||||
|
for (k, (v, p, n,)) in self.d.iteritems(): |
||||
|
_assert(v not in (self.hs, self.ts,)) |
||||
|
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n) |
||||
|
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n) |
||||
|
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n) |
||||
|
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n) |
||||
|
if p is self.hs: |
||||
|
foundprevsentinel += 1 |
||||
|
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n) |
||||
|
if n is self.ts: |
||||
|
foundnextsentinel += 1 |
||||
|
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n) |
||||
|
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).") |
||||
|
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).") |
||||
|
|
||||
|
count = 0 |
||||
|
for (k, v,) in self.iteritems(): |
||||
|
_assert(k not in (self.hs, self.ts,)) |
||||
|
count += 1 |
||||
|
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels |
||||
|
|
||||
|
return True |
||||
|
|
||||
|
def freshen(self, k, strictkey=False): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
if not self.d.has_key(k): |
||||
|
if strictkey: |
||||
|
raise KeyError, k |
||||
|
return |
||||
|
|
||||
|
node = self.d[k] |
||||
|
|
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
|
||||
|
# move to front |
||||
|
hnode = self.d[self.hs] |
||||
|
|
||||
|
node[1] = self.hs |
||||
|
node[2] = hnode[2] |
||||
|
hnode[2] = k |
||||
|
self.d[node[2]][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def iteritems(self): |
||||
|
return LRUCache.ItemIterator(self) |
||||
|
|
||||
|
def itervalues(self): |
||||
|
return LRUCache.ValIterator(self) |
||||
|
|
||||
|
def iterkeys(self): |
||||
|
return self.__iter__() |
||||
|
|
||||
|
def __iter__(self): |
||||
|
return LRUCache.KeyIterator(self) |
||||
|
|
||||
|
def __getitem__(self, key, default=None, strictkey=True): |
||||
|
node = self.d.get(key) |
||||
|
if not node: |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return default |
||||
|
self.freshen(key) |
||||
|
return node[0] |
||||
|
|
||||
|
def __setitem__(self, k, v=None): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
node = self.d.get(k) |
||||
|
if node: |
||||
|
node[0] = v |
||||
|
self.freshen(k) |
||||
|
return |
||||
|
|
||||
|
if len(self.d) == self.m: |
||||
|
# If this insert is going to increase the size of the cache to |
||||
|
# bigger than maxsize. |
||||
|
self.pop() |
||||
|
|
||||
|
hnode = self.d[self.hs] |
||||
|
n = hnode[2] |
||||
|
self.d[k] = [v, self.hs, n,] |
||||
|
hnode[2] = k |
||||
|
self.d[n][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return v |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the value removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
node = self.d[key] |
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
del self.d[key] |
||||
|
assert self._assert_invariants() |
||||
|
return node[0] |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
self.freshen(key) |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
self.d.clear() |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
if len(otherdict) >= (self.m-2): # -2 for the sentinel nodes |
||||
|
# optimization |
||||
|
self.clear() |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
i = otherdict.iteritems() |
||||
|
try: |
||||
|
while len(self.d) < self.m: |
||||
|
(k, v,) = i.next() |
||||
|
assert self._assert_invariants() |
||||
|
self[k] = v |
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
except StopIteration: |
||||
|
_assert(False, "Internal error -- this should never have happened since the while loop should have terminated first.") |
||||
|
return self |
||||
|
|
||||
|
for (k, v,) in otherdict.iteritems(): |
||||
|
assert self._assert_invariants() |
||||
|
self[k] = v |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def pop(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.ts][1] |
||||
|
self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return k |
||||
|
|
||||
|
def popitem(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.ts][1] |
||||
|
val = self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return (k, val,) |
||||
|
|
||||
|
def keys_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return t.keys() |
||||
|
|
||||
|
def keys(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for k in self.iterkeys(): |
||||
|
res[i] = k |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def values_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return map(operator.__getitem__, t.values(), [0]*len(t)) |
||||
|
|
||||
|
def values(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for v in self.itervalues(): |
||||
|
res[i] = v |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def items(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for it in self.iteritems(): |
||||
|
res[i] = it |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def __len__(self): |
||||
|
return len(self.d) - 2 |
||||
|
|
||||
|
def insert(self, key, val=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, val) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def get(self, key, default=None): |
||||
|
return self.__getitem__(key, default, strictkey=False) |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
class SmallLRUCache(dict): |
||||
|
""" |
||||
|
SmallLRUCache is faster than LRUCache for small sets. How small? That |
||||
|
depends on your machine and which operations you use most often. Use |
||||
|
performance profiling to determine whether the cache class that you are |
||||
|
using makes any difference to the performance of your program, and if it |
||||
|
does, then run "quick_bench()" in test/test_cache.py to see which cache |
||||
|
implementation is faster for the size of your datasets. |
||||
|
|
||||
|
A simple least-recently-used cache. It keeps an LRU queue, and |
||||
|
when the number of items in the cache reaches maxsize, it removes |
||||
|
the least recently used item. |
||||
|
|
||||
|
"Looking" at an item or a key such as with "has_key()" makes that |
||||
|
item become the most recently used item. |
||||
|
|
||||
|
You can also use "refresh()" to explicitly make an item become the most |
||||
|
recently used item. |
||||
|
|
||||
|
Adding an item that is already in the dict *does* make it the |
||||
|
most- recently-used item although it does not change the state of |
||||
|
the dict itself. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return (k, dict.__getitem__(self.c, k),) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return k |
||||
|
|
||||
|
class ValueIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return dict.__getitem__(self.c, k) |
||||
|
|
||||
|
def __init__(self, initialdata={}, maxsize=128): |
||||
|
dict.__init__(self, initialdata) |
||||
|
self._lru = initialdata.keys() # contains keys |
||||
|
self._maxsize = maxsize |
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.") |
||||
|
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize) |
||||
|
return True |
||||
|
|
||||
|
def insert(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, item) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def __setitem__(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
self._lru.remove(key) |
||||
|
else: |
||||
|
if len(self._lru) == self._maxsize: |
||||
|
# If this insert is going to increase the size of the cache to bigger than maxsize: |
||||
|
killkey = self._lru.pop(0) |
||||
|
dict.__delitem__(self, killkey) |
||||
|
dict.__setitem__(self, key, item) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return item |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the object removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
val = dict.__getitem__(self, key) |
||||
|
dict.__delitem__(self, key) |
||||
|
self._lru.remove(key) |
||||
|
assert self._assert_invariants() |
||||
|
return val |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if len(otherdict) > self._maxsize: |
||||
|
# Handling this special case here makes it possible to implement the |
||||
|
# other more common cases faster below. |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
if self._maxsize > (len(otherdict) - self._maxsize): |
||||
|
dict.update(self, otherdict) |
||||
|
while len(self) > self._maxsize: |
||||
|
dict.popitem(self) |
||||
|
else: |
||||
|
for k, v, in otherdict.iteritems(): |
||||
|
if len(self) == self._maxsize: |
||||
|
break |
||||
|
dict.__setitem__(self, k, v) |
||||
|
self._lru = dict.keys(self) |
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
for k in otherdict.iterkeys(): |
||||
|
if dict.has_key(self, k): |
||||
|
self._lru.remove(k) |
||||
|
self._lru.extend(otherdict.keys()) |
||||
|
dict.update(self, otherdict) |
||||
|
|
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,))) |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def refresh(self, key, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: raise a KeyError exception if key isn't present |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if not dict.has_key(self, key): |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
|
||||
|
def popitem(self): |
||||
|
if not self._lru: |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self._lru[-1] |
||||
|
obj = self.remove(k) |
||||
|
return (k, obj,) |
||||
|
|
||||
|
class LinkedListLRUCache: |
||||
|
""" |
||||
|
This is slower and less featureful than LRUCache. It is included |
||||
|
here for comparison purposes. |
||||
|
|
||||
|
Implementation of a length-limited O(1) LRU queue. |
||||
|
Built for and used by PyPE: |
||||
|
http://pype.sourceforge.net |
||||
|
original Copyright 2003 Josiah Carlson. |
||||
|
useful methods and _assert_invariant added by Zooko for testing and benchmarking purposes |
||||
|
""" |
||||
|
class Node: |
||||
|
def __init__(self, prev, me): |
||||
|
self.prev = prev |
||||
|
self.me = me |
||||
|
self.next = None |
||||
|
def __init__(self, initialdata={}, maxsize=128): |
||||
|
self._maxsize = max(maxsize, 1) |
||||
|
self.d = {} |
||||
|
self.first = None |
||||
|
self.last = None |
||||
|
for key, value in initialdata.iteritems(): |
||||
|
self[key] = value |
||||
|
def clear(self): |
||||
|
self.d = {} |
||||
|
self.first = None |
||||
|
self.last = None |
||||
|
def update(self, otherdict): |
||||
|
for (k, v,) in otherdict.iteritems(): |
||||
|
self[k] = v |
||||
|
def setdefault(self, key, default=None): |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
return self[key] |
||||
|
def _assert_invariants(self): |
||||
|
def lliterkeys(self): |
||||
|
cur = self.first |
||||
|
while cur != None: |
||||
|
cur2 = cur.next |
||||
|
yield cur.me[0] |
||||
|
cur = cur2 |
||||
|
def lllen(self): |
||||
|
# Ugh. |
||||
|
acc = 0 |
||||
|
for x in lliterkeys(self): |
||||
|
acc += 1 |
||||
|
return acc |
||||
|
def llhaskey(self, key): |
||||
|
# Ugh. |
||||
|
for x in lliterkeys(self): |
||||
|
if x is key: |
||||
|
return True |
||||
|
return False |
||||
|
for k in lliterkeys(self): |
||||
|
_assert(self.d.has_key(k), "Each key in the linked list is required to be in the dict.", k) |
||||
|
for k in self.d.iterkeys(): |
||||
|
_assert(llhaskey(self, k), "Each key in the dict is required to be in the linked list.", k) |
||||
|
_assert(lllen(self) == len(self.d), "internal consistency", self, self.d) |
||||
|
_assert(len(self.d) <= self._maxsize, "Size is required to be <= maxsize.") |
||||
|
return True |
||||
|
def __contains__(self, obj): |
||||
|
return obj in self.d |
||||
|
def has_key(self, key): |
||||
|
return self.__contains__(key) |
||||
|
def __getitem__(self, obj): |
||||
|
a = self.d[obj].me |
||||
|
self[a[0]] = a[1] |
||||
|
return a[1] |
||||
|
def get(self, key, default=None, strictkey=False): |
||||
|
if not self.has_key(key) and strictkey: |
||||
|
raise KeyError, key |
||||
|
if self.has_key(key): |
||||
|
return self.__getitem__(key) |
||||
|
else: |
||||
|
return default |
||||
|
def __setitem__(self, obj, val): |
||||
|
if obj in self.d: |
||||
|
del self[obj] |
||||
|
nobj = self.Node(self.last, (obj, val)) |
||||
|
if self.first is None: |
||||
|
self.first = nobj |
||||
|
if self.last: |
||||
|
self.last.next = nobj |
||||
|
self.last = nobj |
||||
|
self.d[obj] = nobj |
||||
|
if len(self.d) > self._maxsize: |
||||
|
if self.first == self.last: |
||||
|
self.first = None |
||||
|
self.last = None |
||||
|
return |
||||
|
a = self.first |
||||
|
a.next.prev = None |
||||
|
self.first = a.next |
||||
|
a.next = None |
||||
|
del self.d[a.me[0]] |
||||
|
del a |
||||
|
def insert(self, key, item=None): |
||||
|
return self.__setitem__(key, item) |
||||
|
def __delitem__(self, obj, default=None, strictkey=True): |
||||
|
if self.d.has_key(obj): |
||||
|
nobj = self.d[obj] |
||||
|
if nobj.prev: |
||||
|
nobj.prev.next = nobj.next |
||||
|
else: |
||||
|
self.first = nobj.next |
||||
|
if nobj.next: |
||||
|
nobj.next.prev = nobj.prev |
||||
|
else: |
||||
|
self.last = nobj.prev |
||||
|
val = self.d[obj] |
||||
|
del self.d[obj] |
||||
|
return val.me[1] |
||||
|
elif strictkey: |
||||
|
raise KeyError, obj |
||||
|
else: |
||||
|
return default |
||||
|
def remove(self, obj, default=None, strictkey=True): |
||||
|
return self.__delitem__(obj, default=default, strictkey=strictkey) |
||||
|
def __iter__(self): |
||||
|
cur = self.first |
||||
|
while cur != None: |
||||
|
cur2 = cur.next |
||||
|
yield cur.me[1] |
||||
|
cur = cur2 |
||||
|
def iteritems(self): |
||||
|
cur = self.first |
||||
|
while cur != None: |
||||
|
cur2 = cur.next |
||||
|
yield cur.me |
||||
|
cur = cur2 |
||||
|
def iterkeys(self): |
||||
|
return iter(self.d) |
||||
|
def itervalues(self): |
||||
|
for i,j in self.iteritems(): |
||||
|
yield j |
||||
|
def values(self): |
||||
|
l = [] |
||||
|
for v in self.itervalues(): |
||||
|
l.append(v) |
||||
|
return l |
||||
|
def keys(self): |
||||
|
return self.d.keys() |
||||
|
def __len__(self): |
||||
|
return self.d.__len__() |
||||
|
def popitem(self): |
||||
|
i = self.last.me |
||||
|
obj = self.remove(i[0]) |
||||
|
return obj |
@ -0,0 +1,605 @@ |
|||||
|
""" |
||||
|
Tools to mess with dicts. |
||||
|
""" |
||||
|
import warnings |
||||
|
|
||||
|
import copy, operator |
||||
|
from bisect import bisect_left, insort_left |
||||
|
|
||||
|
from pyutil.assertutil import _assert, precondition |
||||
|
|
||||
|
def move(k, d1, d2, strict=False): |
||||
|
""" |
||||
|
Move item with key k from d1 to d2. |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if strict and not d1.has_key(k): |
||||
|
raise KeyError, k |
||||
|
|
||||
|
d2[k] = d1[k] |
||||
|
del d1[k] |
||||
|
|
||||
|
def subtract(d1, d2): |
||||
|
""" |
||||
|
Remove all items from d1 whose key occurs in d2. |
||||
|
|
||||
|
@returns d1 |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if len(d1) > len(d2): |
||||
|
for k in d2.keys(): |
||||
|
if d1.has_key(k): |
||||
|
del d1[k] |
||||
|
else: |
||||
|
for k in d1.keys(): |
||||
|
if d2.has_key(k): |
||||
|
del d1[k] |
||||
|
return d1 |
||||
|
|
||||
|
class DictOfSets(dict): |
||||
|
def add(self, key, value): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if key in self: |
||||
|
self[key].add(value) |
||||
|
else: |
||||
|
self[key] = set([value]) |
||||
|
|
||||
|
def discard(self, key, value): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if not key in self: |
||||
|
return |
||||
|
self[key].discard(value) |
||||
|
if not self[key]: |
||||
|
del self[key] |
||||
|
|
||||
|
class UtilDict: |
||||
|
def __init__(self, initialdata={}): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
self.d = {} |
||||
|
self.update(initialdata) |
||||
|
|
||||
|
def del_if_present(self, key): |
||||
|
if self.has_key(key): |
||||
|
del self[key] |
||||
|
|
||||
|
def items_sorted_by_value(self): |
||||
|
""" |
||||
|
@return a sequence of (key, value,) pairs sorted according to value |
||||
|
""" |
||||
|
l = [(x[1], x[0],) for x in self.d.iteritems()] |
||||
|
l.sort() |
||||
|
return [(x[1], x[0],) for x in l] |
||||
|
|
||||
|
def items_sorted_by_key(self): |
||||
|
""" |
||||
|
@return a sequence of (key, value,) pairs sorted according to key |
||||
|
""" |
||||
|
l = self.d.items() |
||||
|
l.sort() |
||||
|
return l |
||||
|
|
||||
|
def __repr__(self, *args, **kwargs): |
||||
|
return self.d.__repr__(*args, **kwargs) |
||||
|
|
||||
|
def __str__(self, *args, **kwargs): |
||||
|
return self.d.__str__(*args, **kwargs) |
||||
|
|
||||
|
def __contains__(self, *args, **kwargs): |
||||
|
return self.d.__contains__(*args, **kwargs) |
||||
|
|
||||
|
def __len__(self, *args, **kwargs): |
||||
|
return self.d.__len__(*args, **kwargs) |
||||
|
|
||||
|
def __cmp__(self, other): |
||||
|
try: |
||||
|
return self.d.__cmp__(other) |
||||
|
except TypeError, le: |
||||
|
# maybe we should look for a .d member in other. I know this is insanely kludgey, but the Right Way To Do It is for dict.__cmp__ to use structural typing ("duck typing") |
||||
|
try: |
||||
|
return self.d.__cmp__(other.d) |
||||
|
except: |
||||
|
raise le |
||||
|
|
||||
|
def __eq__(self, *args, **kwargs): |
||||
|
return self.d.__eq__(*args, **kwargs) |
||||
|
|
||||
|
def __ne__(self, *args, **kwargs): |
||||
|
return self.d.__ne__(*args, **kwargs) |
||||
|
|
||||
|
def __gt__(self, *args, **kwargs): |
||||
|
return self.d.__gt__(*args, **kwargs) |
||||
|
|
||||
|
def __ge__(self, *args, **kwargs): |
||||
|
return self.d.__ge__(*args, **kwargs) |
||||
|
|
||||
|
def __le__(self, *args, **kwargs): |
||||
|
return self.d.__le__(*args, **kwargs) |
||||
|
|
||||
|
def __lt__(self, *args, **kwargs): |
||||
|
return self.d.__lt__(*args, **kwargs) |
||||
|
|
||||
|
def __getitem__(self, *args, **kwargs): |
||||
|
return self.d.__getitem__(*args, **kwargs) |
||||
|
|
||||
|
def __setitem__(self, *args, **kwargs): |
||||
|
return self.d.__setitem__(*args, **kwargs) |
||||
|
|
||||
|
def __delitem__(self, *args, **kwargs): |
||||
|
return self.d.__delitem__(*args, **kwargs) |
||||
|
|
||||
|
def __iter__(self, *args, **kwargs): |
||||
|
return self.d.__iter__(*args, **kwargs) |
||||
|
|
||||
|
def clear(self, *args, **kwargs): |
||||
|
return self.d.clear(*args, **kwargs) |
||||
|
|
||||
|
def copy(self, *args, **kwargs): |
||||
|
return self.__class__(self.d.copy(*args, **kwargs)) |
||||
|
|
||||
|
def fromkeys(self, *args, **kwargs): |
||||
|
return self.__class__(self.d.fromkeys(*args, **kwargs)) |
||||
|
|
||||
|
def get(self, key, default=None): |
||||
|
return self.d.get(key, default) |
||||
|
|
||||
|
def has_key(self, *args, **kwargs): |
||||
|
return self.d.has_key(*args, **kwargs) |
||||
|
|
||||
|
def items(self, *args, **kwargs): |
||||
|
return self.d.items(*args, **kwargs) |
||||
|
|
||||
|
def iteritems(self, *args, **kwargs): |
||||
|
return self.d.iteritems(*args, **kwargs) |
||||
|
|
||||
|
def iterkeys(self, *args, **kwargs): |
||||
|
return self.d.iterkeys(*args, **kwargs) |
||||
|
|
||||
|
def itervalues(self, *args, **kwargs): |
||||
|
return self.d.itervalues(*args, **kwargs) |
||||
|
|
||||
|
def keys(self, *args, **kwargs): |
||||
|
return self.d.keys(*args, **kwargs) |
||||
|
|
||||
|
def pop(self, *args, **kwargs): |
||||
|
return self.d.pop(*args, **kwargs) |
||||
|
|
||||
|
def popitem(self, *args, **kwargs): |
||||
|
return self.d.popitem(*args, **kwargs) |
||||
|
|
||||
|
def setdefault(self, *args, **kwargs): |
||||
|
return self.d.setdefault(*args, **kwargs) |
||||
|
|
||||
|
def update(self, *args, **kwargs): |
||||
|
self.d.update(*args, **kwargs) |
||||
|
|
||||
|
def values(self, *args, **kwargs): |
||||
|
return self.d.values(*args, **kwargs) |
||||
|
|
||||
|
class NumDict: |
||||
|
def __init__(self, initialdict={}): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
self.d = copy.deepcopy(initialdict) |
||||
|
|
||||
|
def add_num(self, key, val, default=0): |
||||
|
""" |
||||
|
If the key doesn't appear in self then it is created with value default |
||||
|
(before addition). |
||||
|
""" |
||||
|
self.d[key] = self.d.get(key, default) + val |
||||
|
|
||||
|
def subtract_num(self, key, val, default=0): |
||||
|
self.d[key] = self.d.get(key, default) - val |
||||
|
|
||||
|
def sum(self): |
||||
|
""" |
||||
|
@return: the sum of all values |
||||
|
""" |
||||
|
return reduce(operator.__add__, self.d.values()) |
||||
|
|
||||
|
def inc(self, key, default=0): |
||||
|
""" |
||||
|
Increment the value associated with key in dict. If there is no such |
||||
|
key, then one will be created with initial value 0 (before inc() -- |
||||
|
therefore value 1 after inc). |
||||
|
""" |
||||
|
self.add_num(key, 1, default) |
||||
|
|
||||
|
def dec(self, key, default=0): |
||||
|
""" |
||||
|
Decrement the value associated with key in dict. If there is no such |
||||
|
key, then one will be created with initial value 0 (before dec() -- |
||||
|
therefore value -1 after dec). |
||||
|
""" |
||||
|
self.subtract_num(key, 1, default) |
||||
|
|
||||
|
def items_sorted_by_value(self): |
||||
|
""" |
||||
|
@return a sequence of (key, value,) pairs sorted according to value |
||||
|
""" |
||||
|
l = [(x[1], x[0],) for x in self.d.iteritems()] |
||||
|
l.sort() |
||||
|
return [(x[1], x[0],) for x in l] |
||||
|
|
||||
|
def item_with_largest_value(self): |
||||
|
it = self.d.iteritems() |
||||
|
(winner, winnerval,) = it.next() |
||||
|
try: |
||||
|
while True: |
||||
|
n, nv = it.next() |
||||
|
if nv > winnerval: |
||||
|
winner = n |
||||
|
winnerval = nv |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
return (winner, winnerval,) |
||||
|
|
||||
|
def items_sorted_by_key(self): |
||||
|
""" |
||||
|
@return a sequence of (key, value,) pairs sorted according to key |
||||
|
""" |
||||
|
l = self.d.items() |
||||
|
l.sort() |
||||
|
return l |
||||
|
|
||||
|
def __repr__(self, *args, **kwargs): |
||||
|
return self.d.__repr__(*args, **kwargs) |
||||
|
|
||||
|
def __str__(self, *args, **kwargs): |
||||
|
return self.d.__str__(*args, **kwargs) |
||||
|
|
||||
|
def __contains__(self, *args, **kwargs): |
||||
|
return self.d.__contains__(*args, **kwargs) |
||||
|
|
||||
|
def __len__(self, *args, **kwargs): |
||||
|
return self.d.__len__(*args, **kwargs) |
||||
|
|
||||
|
def __cmp__(self, other): |
||||
|
try: |
||||
|
return self.d.__cmp__(other) |
||||
|
except TypeError, le: |
||||
|
# maybe we should look for a .d member in other. I know this is insanely kludgey, but the Right Way To Do It is for dict.__cmp__ to use structural typing ("duck typing") |
||||
|
try: |
||||
|
return self.d.__cmp__(other.d) |
||||
|
except: |
||||
|
raise le |
||||
|
|
||||
|
def __eq__(self, *args, **kwargs): |
||||
|
return self.d.__eq__(*args, **kwargs) |
||||
|
|
||||
|
def __ne__(self, *args, **kwargs): |
||||
|
return self.d.__ne__(*args, **kwargs) |
||||
|
|
||||
|
def __gt__(self, *args, **kwargs): |
||||
|
return self.d.__gt__(*args, **kwargs) |
||||
|
|
||||
|
def __ge__(self, *args, **kwargs): |
||||
|
return self.d.__ge__(*args, **kwargs) |
||||
|
|
||||
|
def __le__(self, *args, **kwargs): |
||||
|
return self.d.__le__(*args, **kwargs) |
||||
|
|
||||
|
def __lt__(self, *args, **kwargs): |
||||
|
return self.d.__lt__(*args, **kwargs) |
||||
|
|
||||
|
def __getitem__(self, *args, **kwargs): |
||||
|
return self.d.__getitem__(*args, **kwargs) |
||||
|
|
||||
|
def __setitem__(self, *args, **kwargs): |
||||
|
return self.d.__setitem__(*args, **kwargs) |
||||
|
|
||||
|
def __delitem__(self, *args, **kwargs): |
||||
|
return self.d.__delitem__(*args, **kwargs) |
||||
|
|
||||
|
def __iter__(self, *args, **kwargs): |
||||
|
return self.d.__iter__(*args, **kwargs) |
||||
|
|
||||
|
def clear(self, *args, **kwargs): |
||||
|
return self.d.clear(*args, **kwargs) |
||||
|
|
||||
|
def copy(self, *args, **kwargs): |
||||
|
return self.__class__(self.d.copy(*args, **kwargs)) |
||||
|
|
||||
|
def fromkeys(self, *args, **kwargs): |
||||
|
return self.__class__(self.d.fromkeys(*args, **kwargs)) |
||||
|
|
||||
|
def get(self, key, default=0): |
||||
|
return self.d.get(key, default) |
||||
|
|
||||
|
def has_key(self, *args, **kwargs): |
||||
|
return self.d.has_key(*args, **kwargs) |
||||
|
|
||||
|
def items(self, *args, **kwargs): |
||||
|
return self.d.items(*args, **kwargs) |
||||
|
|
||||
|
def iteritems(self, *args, **kwargs): |
||||
|
return self.d.iteritems(*args, **kwargs) |
||||
|
|
||||
|
def iterkeys(self, *args, **kwargs): |
||||
|
return self.d.iterkeys(*args, **kwargs) |
||||
|
|
||||
|
def itervalues(self, *args, **kwargs): |
||||
|
return self.d.itervalues(*args, **kwargs) |
||||
|
|
||||
|
def keys(self, *args, **kwargs): |
||||
|
return self.d.keys(*args, **kwargs) |
||||
|
|
||||
|
def pop(self, *args, **kwargs): |
||||
|
return self.d.pop(*args, **kwargs) |
||||
|
|
||||
|
def popitem(self, *args, **kwargs): |
||||
|
return self.d.popitem(*args, **kwargs) |
||||
|
|
||||
|
def setdefault(self, *args, **kwargs): |
||||
|
return self.d.setdefault(*args, **kwargs) |
||||
|
|
||||
|
def update(self, *args, **kwargs): |
||||
|
return self.d.update(*args, **kwargs) |
||||
|
|
||||
|
def values(self, *args, **kwargs): |
||||
|
return self.d.values(*args, **kwargs) |
||||
|
|
||||
|
def del_if_present(d, k): |
||||
|
if d.has_key(k): |
||||
|
del d[k] |
||||
|
|
||||
|
class ValueOrderedDict: |
||||
|
""" |
||||
|
Note: this implementation assumes that the values do not mutate and change |
||||
|
their sort order. That is, it stores the values in a sorted list and |
||||
|
as items are added and removed from the dict, it makes updates to the list |
||||
|
which will keep the list sorted. But if a value that is currently sitting |
||||
|
in the list changes its sort order, then the internal consistency of this |
||||
|
object will be lost. |
||||
|
|
||||
|
If that happens, and if assertion checking is turned on, then you will get |
||||
|
an assertion failure the very next time you try to do anything with this |
||||
|
ValueOrderedDict. However, those internal consistency checks are very slow |
||||
|
and almost certainly unacceptable to leave turned on in production code. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c) |
||||
|
if self.i == len(self.c.l): |
||||
|
raise StopIteration |
||||
|
le = self.c.l[self.i] |
||||
|
self.i += 1 |
||||
|
return (le[1], le[0],) |
||||
|
|
||||
|
def iteritems(self): |
||||
|
return ValueOrderedDict.ItemIterator(self) |
||||
|
|
||||
|
def items(self): |
||||
|
return zip(map(operator.__getitem__, self.l, [1]*len(self.l)), map(operator.__getitem__, self.l, [0]*len(self.l))) |
||||
|
|
||||
|
def values(self): |
||||
|
return map(operator.__getitem__, self.l, [0]*len(self.l)) |
||||
|
|
||||
|
def keys(self): |
||||
|
return map(operator.__getitem__, self.l, [1]*len(self.l)) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c) |
||||
|
if self.i == len(self.c.l): |
||||
|
raise StopIteration |
||||
|
le = self.c.l[self.i] |
||||
|
self.i += 1 |
||||
|
return le[1] |
||||
|
|
||||
|
def iterkeys(self): |
||||
|
return ValueOrderedDict.KeyIterator(self) |
||||
|
|
||||
|
class ValueIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c) |
||||
|
if self.i == len(self.c.l): |
||||
|
raise StopIteration |
||||
|
le = self.c.l[self.i] |
||||
|
self.i += 1 |
||||
|
return le[0] |
||||
|
|
||||
|
def itervalues(self): |
||||
|
return ValueOrderedDict.ValueIterator(self) |
||||
|
|
||||
|
def __init__(self, initialdata={}): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
self.d = {} # k: key, v: val |
||||
|
self.l = [] # sorted list of tuples of (val, key,) |
||||
|
self.update(initialdata) |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def __len__(self): |
||||
|
return len(self.l) |
||||
|
|
||||
|
def __repr_n__(self, n=None): |
||||
|
s = ["{",] |
||||
|
try: |
||||
|
iter = self.iteritems() |
||||
|
x = iter.next() |
||||
|
s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
i = 1 |
||||
|
while (n is None) or (i < n): |
||||
|
x = iter.next() |
||||
|
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
s.append("}") |
||||
|
return ''.join(s) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),) |
||||
|
|
||||
|
def __eq__(self, other): |
||||
|
for (k, v,) in other.iteritems(): |
||||
|
if not self.d.has_key(k) or self.d[k] != v: |
||||
|
return False |
||||
|
return True |
||||
|
|
||||
|
def __ne__(self, other): |
||||
|
return not self.__eq__(other) |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
iter = self.l.__iter__() |
||||
|
try: |
||||
|
oldx = iter.next() |
||||
|
while True: |
||||
|
x = iter.next() |
||||
|
# self.l is required to be sorted |
||||
|
_assert(x >= oldx, x, oldx) |
||||
|
# every element of self.l is required to appear in self.d |
||||
|
_assert(self.d.has_key(x[1]), x) |
||||
|
oldx =x |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
for (k, v,) in self.d.iteritems(): |
||||
|
i = bisect_left(self.l, (v, k,)) |
||||
|
while (self.l[i][0] is not v) or (self.l[i][1] is not k): |
||||
|
i += 1 |
||||
|
_assert(i < len(self.l), i, len(self.l), k, v, self.l) |
||||
|
_assert(self.l[i][0] is v, i, v, l=self.l, d=self.d) |
||||
|
_assert(self.l[i][1] is k, i, k, l=self.l, d=self.d) |
||||
|
return True |
||||
|
|
||||
|
def insert(self, key, val=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, val) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def __setitem__(self, key, val=None): |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
oldval = self.d[key] |
||||
|
if oldval != val: |
||||
|
# re-sort |
||||
|
i = bisect_left(self.l, (oldval, key,)) |
||||
|
while (self.l[i][0] is not oldval) or (self.l[i][1] is not key): |
||||
|
i += 1 |
||||
|
self.l.pop(i) |
||||
|
insort_left(self.l, (val, key,)) |
||||
|
elif oldval is not val: |
||||
|
# replace |
||||
|
i = bisect_left(self.l, (oldval, key,)) |
||||
|
while (self.l[i][0] is not oldval) or (self.l[i][1] is not key): |
||||
|
i += 1 |
||||
|
self.l[i] = (val, key,) |
||||
|
else: |
||||
|
insort_left(self.l, (val, key,)) |
||||
|
|
||||
|
self.d[key] = val |
||||
|
assert self._assert_invariants() |
||||
|
return val |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def __getitem__(self, key, default=None, strictkey=True): |
||||
|
if not self.d.has_key(key): |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
return default |
||||
|
return self.d[key] |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the object removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
val = self.d.pop(key) |
||||
|
i = bisect_left(self.l, (val, key,)) |
||||
|
while (self.l[i][0] is not val) or (self.l[i][1] is not key): |
||||
|
i += 1 |
||||
|
self.l.pop(i) |
||||
|
assert self._assert_invariants() |
||||
|
return val |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
self.d.clear() |
||||
|
del self.l[:] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
for (k, v,) in otherdict.iteritems(): |
||||
|
self.insert(k, v) |
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
return self.d.has_key(key) |
||||
|
|
||||
|
def popitem(self): |
||||
|
if not self.l: |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
le = self.l.pop(0) |
||||
|
del self.d[le[1]] |
||||
|
return (le[1], le[0],) |
||||
|
|
||||
|
def pop(self, k, default=None, strictkey=False): |
||||
|
if not self.d.has_key(k): |
||||
|
if strictkey: |
||||
|
raise KeyError, k |
||||
|
else: |
||||
|
return default |
||||
|
v = self.d.pop(k) |
||||
|
i = bisect_left(self.l, (v, k,)) |
||||
|
while (self.l[i][0] is not v) or (self.l[i][1] is not k): |
||||
|
i += 1 |
||||
|
self.l.pop(i) |
||||
|
return v |
||||
|
|
||||
|
def pop_from_list(self, i=0): |
||||
|
le = self.l.pop(i) |
||||
|
del self.d[le[1]] |
||||
|
return le[1] |
@ -0,0 +1,271 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Futz with files like a pro. |
||||
|
""" |
||||
|
|
||||
|
import errno, exceptions, os, stat, tempfile |
||||
|
|
||||
|
try: |
||||
|
import bsddb |
||||
|
except ImportError: |
||||
|
DBNoSuchFileError = None |
||||
|
else: |
||||
|
DBNoSuchFileError = bsddb.db.DBNoSuchFileError |
||||
|
|
||||
|
# read_file() and write_file() copied from Mark Seaborn's blog post. Please |
||||
|
# read it for complete rationale: |
||||
|
# http://lackingrhoticity.blogspot.com/2009/12/readfile-and-writefile-in-python.html |
||||
|
|
||||
|
def read_file(filename, mode='rb'): |
||||
|
""" Read the contents of the file named filename and return it in |
||||
|
a string. This function closes the file handle before it returns |
||||
|
(even if the underlying Python implementation's garbage collector |
||||
|
doesn't). """ |
||||
|
fh = open(filename, mode) |
||||
|
try: |
||||
|
return fh.read() |
||||
|
finally: |
||||
|
fh.close() |
||||
|
|
||||
|
def write_file(filename, data, mode='wb'): |
||||
|
""" Write the string data into a file named filename. This |
||||
|
function closes the file handle (ensuring that the written data is |
||||
|
flushed from the perspective of the Python implementation) before |
||||
|
it returns (even if the underlying Python implementation's garbage |
||||
|
collector doesn't).""" |
||||
|
fh = open(filename, mode) |
||||
|
try: |
||||
|
fh.write(data) |
||||
|
finally: |
||||
|
fh.close() |
||||
|
|
||||
|
# For backwards-compatibility in case someone is using these names. We used to |
||||
|
# have a superkludge in fileutil.py under these names. |
||||
|
def rename(src, dst, tries=4, basedelay=0.1): |
||||
|
return os.rename(src, dst) |
||||
|
|
||||
|
def remove(f, tries=4, basedelay=0.1): |
||||
|
return os.remove(f) |
||||
|
|
||||
|
def rmdir(f, tries=4, basedelay=0.1): |
||||
|
return os.rmdir(f) |
||||
|
|
||||
|
class _Dir(object): |
||||
|
""" |
||||
|
Hold a set of files and subdirs and clean them all up when asked to. |
||||
|
""" |
||||
|
def __init__(self, name, cleanup=True): |
||||
|
self.name = name |
||||
|
self.cleanup = cleanup |
||||
|
self.files = [] |
||||
|
self.subdirs = set() |
||||
|
|
||||
|
def file(self, fname, mode=None): |
||||
|
""" |
||||
|
Create a file in the tempdir and remember it so as to close() it |
||||
|
before attempting to cleanup the temp dir. |
||||
|
|
||||
|
@rtype: file |
||||
|
""" |
||||
|
ffn = os.path.join(self.name, fname) |
||||
|
if mode is not None: |
||||
|
fo = open(ffn, mode) |
||||
|
else: |
||||
|
fo = open(ffn) |
||||
|
self.register_file(fo) |
||||
|
return fo |
||||
|
|
||||
|
def subdir(self, dirname): |
||||
|
""" |
||||
|
Create a subdirectory in the tempdir and remember it so as to call |
||||
|
shutdown() on it before attempting to clean up. |
||||
|
|
||||
|
@rtype: _Dir instance |
||||
|
""" |
||||
|
ffn = os.path.join(self.name, dirname) |
||||
|
sd = _Dir(ffn, self.cleanup) |
||||
|
self.register_subdir(sd) |
||||
|
make_dirs(sd.name) |
||||
|
return sd |
||||
|
|
||||
|
def register_file(self, fileobj): |
||||
|
""" |
||||
|
Remember the file object and call close() on it before attempting to |
||||
|
clean up. |
||||
|
""" |
||||
|
self.files.append(fileobj) |
||||
|
|
||||
|
def register_subdir(self, dirobj): |
||||
|
""" |
||||
|
Remember the _Dir object and call shutdown() on it before attempting |
||||
|
to clean up. |
||||
|
""" |
||||
|
self.subdirs.add(dirobj) |
||||
|
|
||||
|
def shutdown(self): |
||||
|
if self.cleanup: |
||||
|
for subdir in hasattr(self, 'subdirs') and self.subdirs or []: |
||||
|
subdir.shutdown() |
||||
|
for fileobj in hasattr(self, 'files') and self.files or []: |
||||
|
if DBNoSuchFileError is None: |
||||
|
fileobj.close() # "close()" is idempotent so we don't need to catch exceptions here |
||||
|
else: |
||||
|
try: |
||||
|
fileobj.close() |
||||
|
except DBNoSuchFileError: |
||||
|
# Ah, except that the bsddb module's file-like object (a DB object) has a non-idempotent close... |
||||
|
pass |
||||
|
|
||||
|
if hasattr(self, 'name'): |
||||
|
rm_dir(self.name) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.__repr__() |
||||
|
|
||||
|
def __del__(self): |
||||
|
try: |
||||
|
self.shutdown() |
||||
|
except: |
||||
|
import traceback |
||||
|
traceback.print_exc() |
||||
|
|
||||
|
class NamedTemporaryDirectory(_Dir): |
||||
|
""" |
||||
|
Call tempfile.mkdtemp(), store the name of the dir in self.name, and |
||||
|
rm_dir() when it gets garbage collected or "shutdown()". |
||||
|
|
||||
|
Also keep track of file objects for files within the tempdir and call |
||||
|
close() on them before rm_dir(). This is a convenient way to open temp |
||||
|
files within the directory, and it is very helpful on Windows because you |
||||
|
can't delete a directory which contains a file which is currently open. |
||||
|
""" |
||||
|
|
||||
|
def __init__(self, cleanup=True, *args, **kwargs): |
||||
|
""" If cleanup, then the directory will be rmrf'ed when the object is shutdown. """ |
||||
|
name = tempfile.mkdtemp(*args, **kwargs) |
||||
|
_Dir.__init__(self, name, cleanup) |
||||
|
|
||||
|
class ReopenableNamedTemporaryFile: |
||||
|
""" |
||||
|
This uses tempfile.mkstemp() to generate a secure temp file. It then closes |
||||
|
the file, leaving a zero-length file as a placeholder. You can get the |
||||
|
filename with ReopenableNamedTemporaryFile.name. When the |
||||
|
ReopenableNamedTemporaryFile instance is garbage collected or its shutdown() |
||||
|
method is called, it deletes the file. |
||||
|
""" |
||||
|
def __init__(self, *args, **kwargs): |
||||
|
fd, self.name = tempfile.mkstemp(*args, **kwargs) |
||||
|
os.close(fd) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.__repr__() |
||||
|
|
||||
|
def __del__(self): |
||||
|
self.shutdown() |
||||
|
|
||||
|
def shutdown(self): |
||||
|
remove(self.name) |
||||
|
|
||||
|
def make_dirs(dirname, mode=0777): |
||||
|
""" |
||||
|
An idempotent version of os.makedirs(). If the dir already exists, do |
||||
|
nothing and return without raising an exception. If this call creates the |
||||
|
dir, return without raising an exception. If there is an error that |
||||
|
prevents creation or if the directory gets deleted after make_dirs() creates |
||||
|
it and before make_dirs() checks that it exists, raise an exception. |
||||
|
""" |
||||
|
tx = None |
||||
|
try: |
||||
|
os.makedirs(dirname, mode) |
||||
|
except OSError, x: |
||||
|
tx = x |
||||
|
|
||||
|
if not os.path.isdir(dirname): |
||||
|
if tx: |
||||
|
raise tx |
||||
|
raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning... |
||||
|
|
||||
|
def rmtree(dirname): |
||||
|
""" |
||||
|
A threadsafe and idempotent version of shutil.rmtree(). If the dir is |
||||
|
already gone, do nothing and return without raising an exception. If this |
||||
|
call removes the dir, return without raising an exception. If there is an |
||||
|
error that prevents deletion or if the directory gets created again after |
||||
|
rm_dir() deletes it and before rm_dir() checks that it is gone, raise an |
||||
|
exception. |
||||
|
""" |
||||
|
excs = [] |
||||
|
try: |
||||
|
os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD) |
||||
|
for f in os.listdir(dirname): |
||||
|
fullname = os.path.join(dirname, f) |
||||
|
if os.path.isdir(fullname): |
||||
|
rm_dir(fullname) |
||||
|
else: |
||||
|
remove(fullname) |
||||
|
os.rmdir(dirname) |
||||
|
except EnvironmentError, le: |
||||
|
# Ignore "No such file or directory", collect any other exception. |
||||
|
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): |
||||
|
excs.append(le) |
||||
|
except Exception, le: |
||||
|
excs.append(le) |
||||
|
|
||||
|
# Okay, now we've recursively removed everything, ignoring any "No |
||||
|
# such file or directory" errors, and collecting any other errors. |
||||
|
|
||||
|
if os.path.exists(dirname): |
||||
|
if len(excs) == 1: |
||||
|
raise excs[0] |
||||
|
if len(excs) == 0: |
||||
|
raise OSError, "Failed to remove dir for unknown reason." |
||||
|
raise OSError, excs |
||||
|
|
||||
|
def rm_dir(dirname): |
||||
|
# Renamed to be like shutil.rmtree and unlike rmdir. |
||||
|
return rmtree(dirname) |
||||
|
|
||||
|
def remove_if_possible(f): |
||||
|
try: |
||||
|
remove(f) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
def remove_if_present(f): |
||||
|
try: |
||||
|
remove(f) |
||||
|
except EnvironmentError, le: |
||||
|
# Ignore "No such file or directory", re-raise any other exception. |
||||
|
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): |
||||
|
raise |
||||
|
|
||||
|
def rmdir_if_possible(f): |
||||
|
try: |
||||
|
rmdir(f) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
def open_or_create(fname, binarymode=True): |
||||
|
try: |
||||
|
f = open(fname, binarymode and "r+b" or "r+") |
||||
|
except EnvironmentError: |
||||
|
f = open(fname, binarymode and "w+b" or "w+") |
||||
|
return f |
||||
|
|
||||
|
def du(basedir): |
||||
|
size = 0 |
||||
|
|
||||
|
for root, dirs, files in os.walk(basedir): |
||||
|
for f in files: |
||||
|
fn = os.path.join(root, f) |
||||
|
size += os.path.getsize(fn) |
||||
|
|
||||
|
return size |
@ -0,0 +1,271 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Futz with files like a pro. |
||||
|
""" |
||||
|
|
||||
|
import errno, exceptions, os, stat, tempfile |
||||
|
|
||||
|
try: |
||||
|
import bsddb |
||||
|
except ImportError: |
||||
|
DBNoSuchFileError = None |
||||
|
else: |
||||
|
DBNoSuchFileError = bsddb.db.DBNoSuchFileError |
||||
|
|
||||
|
# read_file() and write_file() copied from Mark Seaborn's blog post. Please |
||||
|
# read it for complete rationale: |
||||
|
# http://lackingrhoticity.blogspot.com/2009/12/readfile-and-writefile-in-python.html |
||||
|
|
||||
|
def read_file(filename, mode='rb'): |
||||
|
""" Read the contents of the file named filename and return it in |
||||
|
a string. This function closes the file handle before it returns |
||||
|
(even if the underlying Python implementation's garbage collector |
||||
|
doesn't). """ |
||||
|
fh = open(filename, mode) |
||||
|
try: |
||||
|
return fh.read() |
||||
|
finally: |
||||
|
fh.close() |
||||
|
|
||||
|
def write_file(filename, data, mode='wb'): |
||||
|
""" Write the string data into a file named filename. This |
||||
|
function closes the file handle (ensuring that the written data is |
||||
|
flushed from the perspective of the Python implementation) before |
||||
|
it returns (even if the underlying Python implementation's garbage |
||||
|
collector doesn't).""" |
||||
|
fh = open(filename, mode) |
||||
|
try: |
||||
|
fh.write(data) |
||||
|
finally: |
||||
|
fh.close() |
||||
|
|
||||
|
# For backwards-compatibility in case someone is using these names. We used to |
||||
|
# have a superkludge in fileutil.py under these names. |
||||
|
def rename(src, dst, tries=4, basedelay=0.1): |
||||
|
return os.rename(src, dst) |
||||
|
|
||||
|
def remove(f, tries=4, basedelay=0.1): |
||||
|
return os.remove(f) |
||||
|
|
||||
|
def rmdir(f, tries=4, basedelay=0.1): |
||||
|
return os.rmdir(f) |
||||
|
|
||||
|
class _Dir(object): |
||||
|
""" |
||||
|
Hold a set of files and subdirs and clean them all up when asked to. |
||||
|
""" |
||||
|
def __init__(self, name, cleanup=True): |
||||
|
self.name = name |
||||
|
self.cleanup = cleanup |
||||
|
self.files = [] |
||||
|
self.subdirs = set() |
||||
|
|
||||
|
def file(self, fname, mode=None): |
||||
|
""" |
||||
|
Create a file in the tempdir and remember it so as to close() it |
||||
|
before attempting to cleanup the temp dir. |
||||
|
|
||||
|
@rtype: file |
||||
|
""" |
||||
|
ffn = os.path.join(self.name, fname) |
||||
|
if mode is not None: |
||||
|
fo = open(ffn, mode) |
||||
|
else: |
||||
|
fo = open(ffn) |
||||
|
self.register_file(fo) |
||||
|
return fo |
||||
|
|
||||
|
def subdir(self, dirname): |
||||
|
""" |
||||
|
Create a subdirectory in the tempdir and remember it so as to call |
||||
|
shutdown() on it before attempting to clean up. |
||||
|
|
||||
|
@rtype: _Dir instance |
||||
|
""" |
||||
|
ffn = os.path.join(self.name, dirname) |
||||
|
sd = _Dir(ffn, self.cleanup) |
||||
|
self.register_subdir(sd) |
||||
|
make_dirs(sd.name) |
||||
|
return sd |
||||
|
|
||||
|
def register_file(self, fileobj): |
||||
|
""" |
||||
|
Remember the file object and call close() on it before attempting to |
||||
|
clean up. |
||||
|
""" |
||||
|
self.files.append(fileobj) |
||||
|
|
||||
|
def register_subdir(self, dirobj): |
||||
|
""" |
||||
|
Remember the _Dir object and call shutdown() on it before attempting |
||||
|
to clean up. |
||||
|
""" |
||||
|
self.subdirs.add(dirobj) |
||||
|
|
||||
|
def shutdown(self): |
||||
|
if self.cleanup: |
||||
|
for subdir in hasattr(self, 'subdirs') and self.subdirs or []: |
||||
|
subdir.shutdown() |
||||
|
for fileobj in hasattr(self, 'files') and self.files or []: |
||||
|
if DBNoSuchFileError is None: |
||||
|
fileobj.close() # "close()" is idempotent so we don't need to catch exceptions here |
||||
|
else: |
||||
|
try: |
||||
|
fileobj.close() |
||||
|
except DBNoSuchFileError: |
||||
|
# Ah, except that the bsddb module's file-like object (a DB object) has a non-idempotent close... |
||||
|
pass |
||||
|
|
||||
|
if hasattr(self, 'name'): |
||||
|
rm_dir(self.name) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.__repr__() |
||||
|
|
||||
|
def __del__(self): |
||||
|
try: |
||||
|
self.shutdown() |
||||
|
except: |
||||
|
import traceback |
||||
|
traceback.print_exc() |
||||
|
|
||||
|
class NamedTemporaryDirectory(_Dir): |
||||
|
""" |
||||
|
Call tempfile.mkdtemp(), store the name of the dir in self.name, and |
||||
|
rm_dir() when it gets garbage collected or "shutdown()". |
||||
|
|
||||
|
Also keep track of file objects for files within the tempdir and call |
||||
|
close() on them before rm_dir(). This is a convenient way to open temp |
||||
|
files within the directory, and it is very helpful on Windows because you |
||||
|
can't delete a directory which contains a file which is currently open. |
||||
|
""" |
||||
|
|
||||
|
def __init__(self, cleanup=True, *args, **kwargs): |
||||
|
""" If cleanup, then the directory will be rmrf'ed when the object is shutdown. """ |
||||
|
name = tempfile.mkdtemp(*args, **kwargs) |
||||
|
_Dir.__init__(self, name, cleanup) |
||||
|
|
||||
|
class ReopenableNamedTemporaryFile: |
||||
|
""" |
||||
|
This uses tempfile.mkstemp() to generate a secure temp file. It then closes |
||||
|
the file, leaving a zero-length file as a placeholder. You can get the |
||||
|
filename with ReopenableNamedTemporaryFile.name. When the |
||||
|
ReopenableNamedTemporaryFile instance is garbage collected or its shutdown() |
||||
|
method is called, it deletes the file. |
||||
|
""" |
||||
|
def __init__(self, *args, **kwargs): |
||||
|
fd, self.name = tempfile.mkstemp(*args, **kwargs) |
||||
|
os.close(fd) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.__repr__() |
||||
|
|
||||
|
def __del__(self): |
||||
|
self.shutdown() |
||||
|
|
||||
|
def shutdown(self): |
||||
|
remove(self.name) |
||||
|
|
||||
|
def make_dirs(dirname, mode=0777): |
||||
|
""" |
||||
|
An idempotent version of os.makedirs(). If the dir already exists, do |
||||
|
nothing and return without raising an exception. If this call creates the |
||||
|
dir, return without raising an exception. If there is an error that |
||||
|
prevents creation or if the directory gets deleted after make_dirs() creates |
||||
|
it and before make_dirs() checks that it exists, raise an exception. |
||||
|
""" |
||||
|
tx = None |
||||
|
try: |
||||
|
os.makedirs(dirname, mode) |
||||
|
except OSError, x: |
||||
|
tx = x |
||||
|
|
||||
|
if not os.path.isdir(dirname): |
||||
|
if tx: |
||||
|
raise tx |
||||
|
raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning... |
||||
|
|
||||
|
def rmtree(dirname): |
||||
|
""" |
||||
|
A threadsafe and idempotent version of shutil.rmtree(). If the dir is |
||||
|
already gone, do nothing and return without raising an exception. If this |
||||
|
call removes the dir, return without raising an exception. If there is an |
||||
|
error that prevents deletion or if the directory gets created again after |
||||
|
rm_dir() deletes it and before rm_dir() checks that it is gone, raise an |
||||
|
exception. |
||||
|
""" |
||||
|
excs = [] |
||||
|
try: |
||||
|
os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD) |
||||
|
for f in os.listdir(dirname): |
||||
|
fullname = os.path.join(dirname, f) |
||||
|
if os.path.isdir(fullname): |
||||
|
rm_dir(fullname) |
||||
|
else: |
||||
|
remove(fullname) |
||||
|
os.rmdir(dirname) |
||||
|
except EnvironmentError, le: |
||||
|
# Ignore "No such file or directory", collect any other exception. |
||||
|
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): |
||||
|
excs.append(le) |
||||
|
except Exception, le: |
||||
|
excs.append(le) |
||||
|
|
||||
|
# Okay, now we've recursively removed everything, ignoring any "No |
||||
|
# such file or directory" errors, and collecting any other errors. |
||||
|
|
||||
|
if os.path.exists(dirname): |
||||
|
if len(excs) == 1: |
||||
|
raise excs[0] |
||||
|
if len(excs) == 0: |
||||
|
raise OSError, "Failed to remove dir for unknown reason." |
||||
|
raise OSError, excs |
||||
|
|
||||
|
def rm_dir(dirname): |
||||
|
# Renamed to be like shutil.rmtree and unlike rmdir. |
||||
|
return rmtree(dirname) |
||||
|
|
||||
|
def remove_if_possible(f): |
||||
|
try: |
||||
|
remove(f) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
def remove_if_present(f): |
||||
|
try: |
||||
|
remove(f) |
||||
|
except EnvironmentError, le: |
||||
|
# Ignore "No such file or directory", re-raise any other exception. |
||||
|
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): |
||||
|
raise |
||||
|
|
||||
|
def rmdir_if_possible(f): |
||||
|
try: |
||||
|
rmdir(f) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
def open_or_create(fname, binarymode=True): |
||||
|
try: |
||||
|
f = open(fname, binarymode and "r+b" or "r+") |
||||
|
except EnvironmentError: |
||||
|
f = open(fname, binarymode and "w+b" or "w+") |
||||
|
return f |
||||
|
|
||||
|
def du(basedir): |
||||
|
size = 0 |
||||
|
|
||||
|
for root, dirs, files in os.walk(basedir): |
||||
|
for f in files: |
||||
|
fn = os.path.join(root, f) |
||||
|
size += os.path.getsize(fn) |
||||
|
|
||||
|
return size |
@ -0,0 +1,30 @@ |
|||||
|
import warnings |
||||
|
import os, sys |
||||
|
from twisted.python.procutils import which |
||||
|
|
||||
|
def find_exe(exename): |
||||
|
""" |
||||
|
Look for something named exename or exename + ".py". |
||||
|
|
||||
|
This is a kludge. |
||||
|
|
||||
|
@return: a list containing one element which is the path to the exename |
||||
|
(if it is thought to be executable), or else the first element being |
||||
|
sys.executable and the second element being the path to the |
||||
|
exename + ".py", or else return False if one can't be found |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
exes = which(exename) |
||||
|
exe = exes and exes[0] |
||||
|
if not exe: |
||||
|
exe = os.path.join(sys.prefix, 'scripts', exename + '.py') |
||||
|
if os.path.exists(exe): |
||||
|
path, ext = os.path.splitext(exe) |
||||
|
if ext.lower() in [".exe", ".bat",]: |
||||
|
cmd = [exe,] |
||||
|
else: |
||||
|
cmd = [sys.executable, exe,] |
||||
|
return cmd |
||||
|
else: |
||||
|
return False |
||||
|
|
@ -0,0 +1,81 @@ |
|||||
|
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import warnings |
||||
|
|
||||
|
""" |
||||
|
Cryptographically strong pseudo-random number generator based on SHA256. |
||||
|
""" |
||||
|
|
||||
|
class SHA256Expander: |
||||
|
""" |
||||
|
Provide a cryptographically strong pseudo-random number generator based on |
||||
|
SHA256. Hopefully this means that no attacker will be able to predict any |
||||
|
bit of output that he hasn't seen, given that he doesn't know anything about |
||||
|
the seed and given that he can see as many bits of output as he desires |
||||
|
except for the bit that he is trying to predict. Further it is hoped that |
||||
|
an attacker will not even be able to determine whether a given stream of |
||||
|
random bytes was generated by this PRNG or by flipping a coin repeatedly. |
||||
|
The safety of this technique has not been verified by a Real Cryptographer. |
||||
|
... but it is similar to the PRNG in FIPS-186... |
||||
|
|
||||
|
The seed and counter are encoded in DJB's netstring format so that I |
||||
|
don't have to think about the possibility of ambiguity. |
||||
|
|
||||
|
Note: I've since learned more about the theory of secure hash functions |
||||
|
and the above is a strong assumption about a secure hash function. Use |
||||
|
of this class should be considered deprecated and you should use a more |
||||
|
well-analyzed KDF (such as the nascent standard HKDF) or stream cipher or |
||||
|
whatever it is that you need. |
||||
|
""" |
||||
|
def __init__(self, seed=None): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if seed is not None: |
||||
|
self.seed(seed) |
||||
|
|
||||
|
def seed(self, seed): |
||||
|
import hashlib |
||||
|
self.starth = hashlib.sha256('24:pyutil hash expansion v2,10:algorithm:,6:SHA256,6:value:,') |
||||
|
seedlen = len(seed) |
||||
|
seedlenstr = str(seedlen) |
||||
|
self.starth.update(seedlenstr) |
||||
|
self.starth.update(':') |
||||
|
self.starth.update(seed) |
||||
|
self.starth.update(',') |
||||
|
|
||||
|
self.avail = "" |
||||
|
self.counter = 0 |
||||
|
|
||||
|
def get(self, bytes): |
||||
|
bytesleft = bytes |
||||
|
|
||||
|
res = [] |
||||
|
|
||||
|
while bytesleft > 0: |
||||
|
if len(self.avail) == 0: |
||||
|
h = self.starth.copy() |
||||
|
counterstr = str(self.counter) |
||||
|
counterstrlen = len(counterstr) |
||||
|
counterstrlenstr = str(counterstrlen) |
||||
|
h.update(counterstrlenstr) |
||||
|
h.update(':') |
||||
|
h.update(counterstr) |
||||
|
h.update(',') |
||||
|
self.avail = h.digest() |
||||
|
self.counter += 1 |
||||
|
|
||||
|
numb = min(len(self.avail), bytesleft) |
||||
|
|
||||
|
(chunk, self.avail,) = (self.avail[:numb], self.avail[numb:],) |
||||
|
|
||||
|
res.append(chunk) |
||||
|
|
||||
|
bytesleft = bytesleft - numb |
||||
|
|
||||
|
resstr = ''.join(res) |
||||
|
assert len(resstr) == bytes |
||||
|
|
||||
|
return resstr |
||||
|
|
||||
|
def sha256expand(inpstr, expbytes): |
||||
|
return SHA256Expander(inpstr).get(expbytes) |
@ -0,0 +1,115 @@ |
|||||
|
# Copyright (c) 2001 Autonomous Zone Industries |
||||
|
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import exceptions, os |
||||
|
from repr import Repr |
||||
|
|
||||
|
class BetterRepr(Repr): |
||||
|
def __init__(self): |
||||
|
Repr.__init__(self) |
||||
|
|
||||
|
# Note: These levels can get adjusted dynamically! My goal is to get more info when printing important debug stuff like exceptions and stack traces and less info when logging normal events. --Zooko 2000-10-14 |
||||
|
self.maxlevel = 6 |
||||
|
self.maxdict = 6 |
||||
|
self.maxlist = 6 |
||||
|
self.maxtuple = 6 |
||||
|
self.maxstring = 300 |
||||
|
self.maxother = 300 |
||||
|
|
||||
|
def repr_function(self, obj, level): |
||||
|
if hasattr(obj, 'func_code'): |
||||
|
return '<' + obj.func_name + '() at ' + os.path.basename(obj.func_code.co_filename) + ':' + str(obj.func_code.co_firstlineno) + '>' |
||||
|
else: |
||||
|
return '<' + obj.func_name + '() at (builtin)' |
||||
|
|
||||
|
def repr_instance_method(self, obj, level): |
||||
|
if hasattr(obj, 'func_code'): |
||||
|
return '<' + obj.im_class.__name__ + '.' + obj.im_func.__name__ + '() at ' + os.path.basename(obj.im_func.func_code.co_filename) + ':' + str(obj.im_func.func_code.co_firstlineno) + '>' |
||||
|
else: |
||||
|
return '<' + obj.im_class.__name__ + '.' + obj.im_func.__name__ + '() at (builtin)' |
||||
|
|
||||
|
def repr_long(self, obj, level): |
||||
|
s = `obj` # XXX Hope this isn't too slow... |
||||
|
if len(s) > self.maxlong: |
||||
|
i = max(0, (self.maxlong-3)/2) |
||||
|
j = max(0, self.maxlong-3-i) |
||||
|
s = s[:i] + '...' + s[len(s)-j:] |
||||
|
if s[-1] == 'L': |
||||
|
return s[:-1] |
||||
|
return s |
||||
|
|
||||
|
def repr_instance(self, obj, level): |
||||
|
""" |
||||
|
If it is an instance of Exception, format it nicely (trying to emulate |
||||
|
the format that you see when an exception is actually raised, plus |
||||
|
bracketing '<''s). If it is an instance of dict call self.repr_dict() |
||||
|
on it. If it is an instance of list call self.repr_list() on it. Else |
||||
|
call Repr.repr_instance(). |
||||
|
""" |
||||
|
if isinstance(obj, exceptions.Exception): |
||||
|
# Don't cut down exception strings so much. |
||||
|
tms = self.maxstring |
||||
|
self.maxstring = max(512, tms * 4) |
||||
|
tml = self.maxlist |
||||
|
self.maxlist = max(12, tml * 4) |
||||
|
try: |
||||
|
if hasattr(obj, 'args'): |
||||
|
if len(obj.args) == 1: |
||||
|
return '<' + obj.__class__.__name__ + ': ' + self.repr1(obj.args[0], level-1) + '>' |
||||
|
else: |
||||
|
return '<' + obj.__class__.__name__ + ': ' + self.repr1(obj.args, level-1) + '>' |
||||
|
else: |
||||
|
return '<' + obj.__class__.__name__ + '>' |
||||
|
finally: |
||||
|
self.maxstring = tms |
||||
|
self.maxlist = tml |
||||
|
|
||||
|
if isinstance(obj, dict): |
||||
|
return self.repr_dict(obj, level) |
||||
|
|
||||
|
if isinstance(obj, list): |
||||
|
return self.repr_list(obj, level) |
||||
|
|
||||
|
return Repr.repr_instance(self, obj, level) |
||||
|
|
||||
|
def repr_list(self, obj, level): |
||||
|
""" |
||||
|
copied from standard repr.py and fixed to work on multithreadedly mutating lists. |
||||
|
""" |
||||
|
if level <= 0: return '[...]' |
||||
|
n = len(obj) |
||||
|
myl = obj[:min(n, self.maxlist)] |
||||
|
s = '' |
||||
|
for item in myl: |
||||
|
entry = self.repr1(item, level-1) |
||||
|
if s: s = s + ', ' |
||||
|
s = s + entry |
||||
|
if n > self.maxlist: s = s + ', ...' |
||||
|
return '[' + s + ']' |
||||
|
|
||||
|
def repr_dict(self, obj, level): |
||||
|
""" |
||||
|
copied from standard repr.py and fixed to work on multithreadedly mutating dicts. |
||||
|
""" |
||||
|
if level <= 0: return '{...}' |
||||
|
s = '' |
||||
|
n = len(obj) |
||||
|
items = obj.items()[:min(n, self.maxdict)] |
||||
|
items.sort() |
||||
|
for key, val in items: |
||||
|
entry = self.repr1(key, level-1) + ':' + self.repr1(val, level-1) |
||||
|
if s: s = s + ', ' |
||||
|
s = s + entry |
||||
|
if n > self.maxdict: s = s + ', ...' |
||||
|
return '{' + s + '}' |
||||
|
|
||||
|
# This object can be changed by other code updating this module's "brepr" |
||||
|
# variables. This is so that (a) code can use humanreadable with |
||||
|
# "from humanreadable import hr; hr(mything)", and (b) code can override |
||||
|
# humanreadable to provide application-specific human readable output |
||||
|
# (e.g. libbase32's base32id.AbbrevRepr). |
||||
|
brepr = BetterRepr() |
||||
|
|
||||
|
def hr(x): |
||||
|
return brepr.repr(x) |
@ -0,0 +1,136 @@ |
|||||
|
# Copyright (c) 2001 Autonomous Zone Industries |
||||
|
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
This module was invented when it was discovered that time.time() can return |
||||
|
decreasing answers, which was causing scheduled tasks to get executed out of |
||||
|
order. See python bug report `[ #447945 ] time.time() is not |
||||
|
non-decreasing', |
||||
|
http://sourceforge.net/tracker/index.php?func=detail&aid=447945&group_id=5470&atid=105470 |
||||
|
http://mail.python.org/pipermail/python-list/2001-August/thread.html#58296 |
||||
|
|
||||
|
After posting that bug report, I figured out that this isn't really a bug, |
||||
|
but a misunderstanding about the semantics of gettimeofday(). gettimeofday() |
||||
|
relies on the hardware clock, which is supposed to reflect the "real" time |
||||
|
i.e. the position and orientation of our planet with regard to our sun. But |
||||
|
the hardware clock gets adjusted, either for skew (because hardware clocks |
||||
|
always run a little faster or a little slower than they ought), or in order to |
||||
|
sync up with another clock e.g. through NTP. So it isn't really a bug in the |
||||
|
underlying platform (except perhaps a bug in the lack of a prominent warning |
||||
|
in the documentation), but if you depend on a monotonically increasing |
||||
|
timestamps, you need to use IncreasingTimer.time() instead of the Python |
||||
|
standard library's time.time(). --Zooko 2001-08-04 |
||||
|
""" |
||||
|
|
||||
|
import time as standardtime |
||||
|
|
||||
|
# Here is a global reference to an IncreasingTimer. |
||||
|
# This singleton global IncreasingTimer instance gets created at module load time. |
||||
|
timer = None |
||||
|
|
||||
|
class IncreasingTimer: |
||||
|
def __init__(self, inittime=None): |
||||
|
""" |
||||
|
@param inittime starting time (in seconds) or None in which case it |
||||
|
will be initialized to standardtime.time() |
||||
|
""" |
||||
|
if inittime is None: |
||||
|
inittime = standardtime.time() |
||||
|
self.lasttime = inittime # This stores the most recent answer that we returned from time(). |
||||
|
self.delta = 0 # We add this to the result from the underlying standardtime.time(). |
||||
|
|
||||
|
# How big of an increment do we need to add in order to make the new float greater than the old float? |
||||
|
trye = 1.0 |
||||
|
while (self.lasttime + trye) > self.lasttime: |
||||
|
olde = trye |
||||
|
trye = trye / 2.0 |
||||
|
self._EPSILON = olde |
||||
|
|
||||
|
def time(self): |
||||
|
""" |
||||
|
This returns the current time as a float, with as much precision as |
||||
|
the underlying Python interpreter can muster. In addition, successive |
||||
|
calls to time() always return bigger numbers. (standardtime.time() |
||||
|
can sometimes return the same or even a *smaller* number!) |
||||
|
|
||||
|
On the other hand, calling time() is a bit slower than calling |
||||
|
standardtime.time(), so you might want to avoid it inside tight loops |
||||
|
and deal with decreasing or identical answers yourself. |
||||
|
|
||||
|
Now by definition you cannot "reset" this clock to an earlier state. |
||||
|
This means that if you start a Python interpreter and instantiate an |
||||
|
IncreasingTimer, and then you subsequently realize that your |
||||
|
computer's clock was set to next year, and you set it back to the |
||||
|
correct year, that subsequent calls to standardtime.time() will return |
||||
|
a number indicating this year and IncreasingTimer.time() will continue |
||||
|
to return a number indicating next year. Therefore, you should use |
||||
|
the answers from IncreasingTimer.time() in such a way that the only |
||||
|
things you depend on are correctness in the relative *order* of two |
||||
|
times, (and, with the following caveat, the relative *difference* |
||||
|
between two times as well), not the global "correctness" of the times |
||||
|
with respect to the rest of the world. |
||||
|
|
||||
|
The caveat is that if the underlying answers from standardtime.time() |
||||
|
jump *forward*, then this *does* distort the relative difference |
||||
|
between two answers from IncreasingTimer.time(). What |
||||
|
IncreasingTimer.time() does is if the underlying clock goes |
||||
|
*backwards*, then IncreasingTimer.time() still returns successively |
||||
|
higher numbers. Then if the underlying clock jumps *forwards*, |
||||
|
IncreasingTimer.time() also jumps forward the same amount. A weird |
||||
|
consequence of this is that if you were to set your system clock to |
||||
|
point to 10 years ago, and call: |
||||
|
|
||||
|
t1 = increasingtimer.time() |
||||
|
|
||||
|
and then set your system clock back to the present, and call: |
||||
|
|
||||
|
t2 = increasingtimer.time() |
||||
|
|
||||
|
, then there would be a 10-year difference between t2 and t1. |
||||
|
|
||||
|
In practice, adjustments to the underlying system time are rarely that |
||||
|
drastic, and for some systems (e.g. Mnet's DoQ, for which this module |
||||
|
was invented) it doesn't matter anyway if time jumps forward. |
||||
|
|
||||
|
Another note: Brian Warner has pointed out that there is another |
||||
|
caveat, which is due to there being a delay between successive calls |
||||
|
to IncreasingTimer.time(). When the underlying clock jumps backward, |
||||
|
then events which were scheduled before the jump and scheduled to go |
||||
|
off after the jump may be delayed by at most d, where d is the delay |
||||
|
between the two successive calls to IncreasingTimer which spanned the |
||||
|
jump. |
||||
|
|
||||
|
@singlethreaded You must guarantee that you never have more than one |
||||
|
thread in this function at a time. |
||||
|
""" |
||||
|
t = standardtime.time() + self.delta |
||||
|
lasttime = self.lasttime |
||||
|
|
||||
|
if t <= lasttime: |
||||
|
self.delta = self.delta + (lasttime - t) + self._EPSILON |
||||
|
t = lasttime + self._EPSILON |
||||
|
|
||||
|
# TODO: If you were sure that you could generate a bigger float in one |
||||
|
# pass, you could change this `while' to an `if' and optimize out a |
||||
|
# test. |
||||
|
while t <= lasttime: |
||||
|
# We can get into here only if self._EPSILON is too small to make |
||||
|
# # the time float "tick over" to a new higher value. So we |
||||
|
# (permanently) # double self._EPSILON. |
||||
|
# TODO: Is doubling epsilon the best way to quickly get a |
||||
|
# minimally bigger float? |
||||
|
self._EPSILON = self._EPSILON * 2.0 |
||||
|
|
||||
|
# Delta, having smaller magnitude than t, can be incremented by |
||||
|
# more than t was incremented. (Up to the old epsilon more.) |
||||
|
# That's OK. |
||||
|
self.delta = self.delta + self._EPSILON |
||||
|
t = t + self._EPSILON |
||||
|
|
||||
|
self.lasttime = t |
||||
|
return t |
||||
|
|
||||
|
# create the global IncreasingTimer instance and `time' function |
||||
|
timer = IncreasingTimer() |
||||
|
time = timer.time |
@ -0,0 +1,288 @@ |
|||||
|
# portions extracted from ipaddresslib by Autonomous Zone Industries, LGPL (author: Greg Smith) |
||||
|
# portions adapted from nattraverso.ipdiscover |
||||
|
# portions authored by Brian Warner, working for Allmydata |
||||
|
# most recent version authored by Zooko O'Whielacronx, working for Allmydata |
||||
|
|
||||
|
# from the Python Standard Library |
||||
|
import os, re, socket, sys |
||||
|
|
||||
|
# from Twisted |
||||
|
from twisted.internet import defer, reactor |
||||
|
from twisted.python import failure |
||||
|
from twisted.internet.protocol import DatagramProtocol |
||||
|
from twisted.internet.utils import getProcessOutput |
||||
|
from twisted.python.procutils import which |
||||
|
from twisted.python import log |
||||
|
|
||||
|
# from pyutil |
||||
|
import observer |
||||
|
|
||||
|
try: |
||||
|
import resource |
||||
|
def increase_rlimits(): |
||||
|
# We'd like to raise our soft resource.RLIMIT_NOFILE, since certain |
||||
|
# systems (OS-X, probably solaris) start with a relatively low limit |
||||
|
# (256), and some unit tests want to open up more sockets than this. |
||||
|
# Most linux systems start with both hard and soft limits at 1024, |
||||
|
# which is plenty. |
||||
|
|
||||
|
# unfortunately the values to pass to setrlimit() vary widely from |
||||
|
# one system to another. OS-X reports (256, HUGE), but the real hard |
||||
|
# limit is 10240, and accepts (-1,-1) to mean raise it to the |
||||
|
# maximum. Cygwin reports (256, -1), then ignores a request of |
||||
|
# (-1,-1): instead you have to guess at the hard limit (it appears to |
||||
|
# be 3200), so using (3200,-1) seems to work. Linux reports a |
||||
|
# sensible (1024,1024), then rejects (-1,-1) as trying to raise the |
||||
|
# maximum limit, so you could set it to (1024,1024) but you might as |
||||
|
# well leave it alone. |
||||
|
|
||||
|
try: |
||||
|
current = resource.getrlimit(resource.RLIMIT_NOFILE) |
||||
|
except AttributeError: |
||||
|
# we're probably missing RLIMIT_NOFILE |
||||
|
return |
||||
|
|
||||
|
if current[0] >= 1024: |
||||
|
# good enough, leave it alone |
||||
|
return |
||||
|
|
||||
|
try: |
||||
|
if current[1] > 0 and current[1] < 1000000: |
||||
|
# solaris reports (256, 65536) |
||||
|
resource.setrlimit(resource.RLIMIT_NOFILE, |
||||
|
(current[1], current[1])) |
||||
|
else: |
||||
|
# this one works on OS-X (bsd), and gives us 10240, but |
||||
|
# it doesn't work on linux (on which both the hard and |
||||
|
# soft limits are set to 1024 by default). |
||||
|
resource.setrlimit(resource.RLIMIT_NOFILE, (-1,-1)) |
||||
|
new = resource.getrlimit(resource.RLIMIT_NOFILE) |
||||
|
if new[0] == current[0]: |
||||
|
# probably cygwin, which ignores -1. Use a real value. |
||||
|
resource.setrlimit(resource.RLIMIT_NOFILE, (3200,-1)) |
||||
|
|
||||
|
except ValueError: |
||||
|
log.msg("unable to set RLIMIT_NOFILE: current value %s" |
||||
|
% (resource.getrlimit(resource.RLIMIT_NOFILE),)) |
||||
|
except: |
||||
|
# who knows what. It isn't very important, so log it and continue |
||||
|
log.err() |
||||
|
except ImportError: |
||||
|
def _increase_rlimits(): |
||||
|
# TODO: implement this for Windows. Although I suspect the |
||||
|
# solution might be "be running under the iocp reactor and |
||||
|
# make this function be a no-op". |
||||
|
pass |
||||
|
# pyflakes complains about two 'def FOO' statements in the same time, |
||||
|
# since one might be shadowing the other. This hack appeases pyflakes. |
||||
|
increase_rlimits = _increase_rlimits |
||||
|
|
||||
|
def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET |
||||
|
""" |
||||
|
Return a Deferred that fires with a list of IPv4 addresses (as dotted-quad |
||||
|
strings) that are currently configured on this host, sorted in descending |
||||
|
order of how likely we think they are to work. |
||||
|
|
||||
|
@param target: we want to learn an IP address they could try using to |
||||
|
connect to us; The default value is fine, but it might help if you |
||||
|
pass the address of a host that you are actually trying to be |
||||
|
reachable to. |
||||
|
""" |
||||
|
addresses = [] |
||||
|
local_ip = get_local_ip_for(target) |
||||
|
if local_ip: |
||||
|
addresses.append(local_ip) |
||||
|
|
||||
|
if sys.platform == "cygwin": |
||||
|
d = _cygwin_hack_find_addresses(target) |
||||
|
else: |
||||
|
d = _find_addresses_via_config() |
||||
|
|
||||
|
def _collect(res): |
||||
|
for addr in res: |
||||
|
if addr != "0.0.0.0" and not addr in addresses: |
||||
|
addresses.append(addr) |
||||
|
return addresses |
||||
|
d.addCallback(_collect) |
||||
|
|
||||
|
return d |
||||
|
|
||||
|
def get_local_ip_for(target): |
||||
|
"""Find out what our IP address is for use by a given target. |
||||
|
|
||||
|
@return: the IP address as a dotted-quad string which could be used by |
||||
|
to connect to us. It might work for them, it might not. If |
||||
|
there is no suitable address (perhaps we don't currently have an |
||||
|
externally-visible interface), this will return None. |
||||
|
""" |
||||
|
|
||||
|
try: |
||||
|
target_ipaddr = socket.gethostbyname(target) |
||||
|
except socket.gaierror: |
||||
|
# DNS isn't running, or somehow we encountered an error |
||||
|
|
||||
|
# note: if an interface is configured and up, but nothing is connected to it, |
||||
|
# gethostbyname("A.ROOT-SERVERS.NET") will take 20 seconds to raise socket.gaierror |
||||
|
# . This is synchronous and occurs for each node being started, so users of certain unit |
||||
|
# tests will see something like 120s of delay, which may be enough to hit the default |
||||
|
# trial timeouts. For that reason, get_local_addresses_async() was changed to default to |
||||
|
# the numerical ip address for A.ROOT-SERVERS.NET, to avoid this DNS lookup. This also |
||||
|
# makes node startup a tad faster. |
||||
|
|
||||
|
return None |
||||
|
udpprot = DatagramProtocol() |
||||
|
port = reactor.listenUDP(0, udpprot) |
||||
|
try: |
||||
|
udpprot.transport.connect(target_ipaddr, 7) |
||||
|
localip = udpprot.transport.getHost().host |
||||
|
except socket.error: |
||||
|
# no route to that host |
||||
|
localip = None |
||||
|
port.stopListening() # note, this returns a Deferred |
||||
|
return localip |
||||
|
|
||||
|
# k: result of sys.platform, v: which kind of IP configuration reader we use |
||||
|
_platform_map = { |
||||
|
"linux-i386": "linux", # redhat |
||||
|
"linux-ppc": "linux", # redhat |
||||
|
"linux2": "linux", # debian |
||||
|
"win32": "win32", |
||||
|
"irix6-n32": "irix", |
||||
|
"irix6-n64": "irix", |
||||
|
"irix6": "irix", |
||||
|
"openbsd2": "bsd", |
||||
|
"darwin": "bsd", # Mac OS X |
||||
|
"freebsd4": "bsd", |
||||
|
"freebsd5": "bsd", |
||||
|
"freebsd6": "bsd", |
||||
|
"netbsd1": "bsd", |
||||
|
"sunos5": "sunos", |
||||
|
"cygwin": "cygwin", |
||||
|
} |
||||
|
|
||||
|
class UnsupportedPlatformError(Exception): |
||||
|
pass |
||||
|
|
||||
|
# Wow, I'm really amazed at home much mileage we've gotten out of calling |
||||
|
# the external route.exe program on windows... It appears to work on all |
||||
|
# versions so far. Still, the real system calls would much be preferred... |
||||
|
# ... thus wrote Greg Smith in time immemorial... |
||||
|
_win32_path = 'route.exe' |
||||
|
_win32_args = ('print',) |
||||
|
_win32_re = re.compile('^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\d+)\s+(?P<metric>\d+)\s*$', flags=re.M|re.I|re.S) |
||||
|
|
||||
|
# These work in Redhat 6.x and Debian 2.2 potato |
||||
|
_linux_path = '/sbin/ifconfig' |
||||
|
_linux_re = re.compile('^\s*inet addr:(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) |
||||
|
|
||||
|
# originally NetBSD 1.4 (submitted by Rhialto), Darwin, Mac OS X, FreeBSD, OpenBSD |
||||
|
_bsd_path = '/sbin/ifconfig' |
||||
|
_bsd_args = ('-a',) |
||||
|
_bsd_re = re.compile('^\s+inet (?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) |
||||
|
|
||||
|
# Irix 6.5 |
||||
|
_irix_path = '/usr/etc/ifconfig' |
||||
|
|
||||
|
# Solaris 2.x |
||||
|
_sunos_path = '/usr/sbin/ifconfig' |
||||
|
|
||||
|
class SequentialTrier(object): |
||||
|
""" I hold a list of executables to try and try each one in turn |
||||
|
until one gives me a list of IP addresses.""" |
||||
|
|
||||
|
def __init__(self, exebasename, args, regex): |
||||
|
assert not os.path.isabs(exebasename) |
||||
|
self.exes_left_to_try = which(exebasename) |
||||
|
self.exes_left_to_try.reverse() |
||||
|
self.args = args |
||||
|
self.regex = regex |
||||
|
self.o = observer.OneShotObserverList() |
||||
|
self._try_next() |
||||
|
|
||||
|
def _try_next(self): |
||||
|
if not self.exes_left_to_try: |
||||
|
self.o.fire(None) |
||||
|
else: |
||||
|
exe = self.exes_left_to_try.pop() |
||||
|
d2 = _query(exe, self.args, self.regex) |
||||
|
|
||||
|
def cb(res): |
||||
|
if res: |
||||
|
self.o.fire(res) |
||||
|
else: |
||||
|
self._try_next() |
||||
|
|
||||
|
def eb(why): |
||||
|
self._try_next() |
||||
|
|
||||
|
d2.addCallbacks(cb, eb) |
||||
|
|
||||
|
def when_tried(self): |
||||
|
return self.o.when_fired() |
||||
|
|
||||
|
# k: platform string as provided in the value of _platform_map |
||||
|
# v: tuple of (path_to_tool, args, regex,) |
||||
|
_tool_map = { |
||||
|
"linux": (_linux_path, (), _linux_re,), |
||||
|
"win32": (_win32_path, _win32_args, _win32_re,), |
||||
|
"cygwin": (_win32_path, _win32_args, _win32_re,), |
||||
|
"bsd": (_bsd_path, _bsd_args, _bsd_re,), |
||||
|
"irix": (_irix_path, _bsd_args, _bsd_re,), |
||||
|
"sunos": (_sunos_path, _bsd_args, _bsd_re,), |
||||
|
} |
||||
|
def _find_addresses_via_config(): |
||||
|
# originally by Greg Smith, hacked by Zooko to conform to Brian Warner's API. |
||||
|
|
||||
|
platform = _platform_map.get(sys.platform) |
||||
|
(pathtotool, args, regex,) = _tool_map.get(platform, ('ifconfig', _bsd_args, _bsd_re,)) |
||||
|
|
||||
|
# If the platform isn't known then we attempt BSD-style ifconfig. If it |
||||
|
# turns out that we don't get anything resembling a dotted quad IPv4 address |
||||
|
# out of it, then we'll raise UnsupportedPlatformError. |
||||
|
|
||||
|
# If pathtotool is a fully qualified path then we just try that. |
||||
|
# If it is merely an executable name then we use Twisted's |
||||
|
# "which()" utility and try each executable in turn until one |
||||
|
# gives us something that resembles a dotted-quad IPv4 address. |
||||
|
|
||||
|
if os.path.isabs(pathtotool): |
||||
|
d = _query(pathtotool, args, regex) |
||||
|
else: |
||||
|
d = SequentialTrier(pathtotool, args, regex).when_tried() |
||||
|
|
||||
|
d.addCallback(_check_result) |
||||
|
return d |
||||
|
|
||||
|
def _check_result(result): |
||||
|
if not result and not _platform_map.has_key(sys.platform): |
||||
|
return failure.Failure(UnsupportedPlatformError(sys.platform)) |
||||
|
else: |
||||
|
return result |
||||
|
|
||||
|
def _query(path, args, regex): |
||||
|
d = getProcessOutput(path, args) |
||||
|
def _parse(output): |
||||
|
addresses = [] |
||||
|
outputsplit = output.split('\n') |
||||
|
for outline in outputsplit: |
||||
|
m = regex.match(outline) |
||||
|
if m: |
||||
|
addr = m.groupdict()['address'] |
||||
|
if addr not in addresses: |
||||
|
addresses.append(addr) |
||||
|
|
||||
|
return addresses |
||||
|
d.addCallback(_parse) |
||||
|
return d |
||||
|
|
||||
|
def _cygwin_hack_find_addresses(target): |
||||
|
addresses = [] |
||||
|
for h in [target, "localhost", "127.0.0.1",]: |
||||
|
try: |
||||
|
addr = get_local_ip_for(h) |
||||
|
if addr not in addresses: |
||||
|
addresses.append(addr) |
||||
|
except socket.gaierror: |
||||
|
pass |
||||
|
|
||||
|
return defer.succeed(addresses) |
@ -0,0 +1,456 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# Copyright (c) 2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
# We require simplejson>= 2.1.0 and set its default behavior to |
||||
|
# use_decimal=True. This retains backwards compatibility with previous |
||||
|
# versions of jsonutil (although it means jsonutil now requires a recent |
||||
|
# version of simplejson). |
||||
|
|
||||
|
# http://code.google.com/p/simplejson/issues/detail?id=34 |
||||
|
|
||||
|
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of |
||||
|
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data |
||||
|
interchange format. |
||||
|
|
||||
|
:mod:`simplejson` exposes an API familiar to users of the standard library |
||||
|
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained |
||||
|
version of the :mod:`json` library contained in Python 2.6, but maintains |
||||
|
compatibility with Python 2.4 and Python 2.5 and (currently) has |
||||
|
significant performance advantages, even without using the optional C |
||||
|
extension for speedups. |
||||
|
|
||||
|
Encoding basic Python object hierarchies:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) |
||||
|
'["foo", {"bar": ["baz", null, 1.0, 2]}]' |
||||
|
>>> print json.dumps("\"foo\bar") |
||||
|
"\"foo\bar" |
||||
|
>>> print json.dumps(u'\u1234') |
||||
|
"\u1234" |
||||
|
>>> print json.dumps('\\') |
||||
|
"\\" |
||||
|
>>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) |
||||
|
{"a": 0, "b": 0, "c": 0} |
||||
|
>>> from StringIO import StringIO |
||||
|
>>> io = StringIO() |
||||
|
>>> json.dump(['streaming API'], io) |
||||
|
>>> io.getvalue() |
||||
|
'["streaming API"]' |
||||
|
|
||||
|
Compact encoding:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) |
||||
|
'[1,2,3,{"4":5,"6":7}]' |
||||
|
|
||||
|
Pretty printing:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') |
||||
|
>>> print '\n'.join([l.rstrip() for l in s.splitlines()]) |
||||
|
{ |
||||
|
"4": 5, |
||||
|
"6": 7 |
||||
|
} |
||||
|
|
||||
|
Decoding JSON:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] |
||||
|
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj |
||||
|
True |
||||
|
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' |
||||
|
True |
||||
|
>>> from StringIO import StringIO |
||||
|
>>> io = StringIO('["streaming API"]') |
||||
|
>>> json.load(io)[0] == 'streaming API' |
||||
|
True |
||||
|
|
||||
|
Specializing JSON object decoding:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> def as_complex(dct): |
||||
|
... if '__complex__' in dct: |
||||
|
... return complex(dct['real'], dct['imag']) |
||||
|
... return dct |
||||
|
... |
||||
|
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', |
||||
|
... object_hook=as_complex) |
||||
|
(1+2j) |
||||
|
>>> from decimal import Decimal |
||||
|
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') |
||||
|
True |
||||
|
|
||||
|
Specializing JSON object encoding:: |
||||
|
|
||||
|
>>> import simplejson as json |
||||
|
>>> def encode_complex(obj): |
||||
|
... if isinstance(obj, complex): |
||||
|
... return [obj.real, obj.imag] |
||||
|
... raise TypeError(repr(o) + " is not JSON serializable") |
||||
|
... |
||||
|
>>> json.dumps(2 + 1j, default=encode_complex) |
||||
|
'[2.0, 1.0]' |
||||
|
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) |
||||
|
'[2.0, 1.0]' |
||||
|
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) |
||||
|
'[2.0, 1.0]' |
||||
|
|
||||
|
|
||||
|
Using simplejson.tool from the shell to validate and pretty-print:: |
||||
|
|
||||
|
$ echo '{"json":"obj"}' | python -m simplejson.tool |
||||
|
{ |
||||
|
"json": "obj" |
||||
|
} |
||||
|
$ echo '{ 1.2:3.4}' | python -m simplejson.tool |
||||
|
Expecting property name: line 1 column 2 (char 2) |
||||
|
""" |
||||
|
|
||||
|
import pkg_resources |
||||
|
pkg_resources.require("simplejson>=2.1.0") |
||||
|
|
||||
|
# Now we just import all of the contents of the simplejson package and |
||||
|
# then overwrite it with a copy of the simplejson __init__.py edited |
||||
|
# to make use_decimal=True the default. |
||||
|
|
||||
|
import simplejson |
||||
|
__version__ = simplejson.__version__ |
||||
|
__all__ = simplejson.__all__ |
||||
|
# The unit tests rely on .encoder and .decoder, and although they are not |
||||
|
# included in simplejson.__all__ they are still attributes of the simplejson |
||||
|
# package since they are modules within it. |
||||
|
from simplejson import encoder, decoder, scanner |
||||
|
encoder, decoder, scanner # http://divmod.org/trac/ticket/1499 |
||||
|
__all__.extend(['encoder', 'decoder', 'scanner']) |
||||
|
__author__ = simplejson.__author__ |
||||
|
del simplejson |
||||
|
|
||||
|
from decimal import Decimal |
||||
|
|
||||
|
from simplejson.decoder import JSONDecoder, JSONDecodeError |
||||
|
JSONDecoder, JSONDecodeError # http://divmod.org/trac/ticket/1499 |
||||
|
from simplejson.encoder import JSONEncoder |
||||
|
def _import_OrderedDict(): |
||||
|
from pyutil.odict import OrderedDict |
||||
|
return OrderedDict |
||||
|
OrderedDict = _import_OrderedDict() |
||||
|
|
||||
|
def _import_c_make_encoder(): |
||||
|
from simplejson._speedups import make_encoder# XXX |
||||
|
try: |
||||
|
return make_encoder |
||||
|
except ImportError: |
||||
|
return None |
||||
|
|
||||
|
_default_encoder = JSONEncoder( |
||||
|
skipkeys=False, |
||||
|
ensure_ascii=True, |
||||
|
check_circular=True, |
||||
|
allow_nan=True, |
||||
|
indent=None, |
||||
|
separators=None, |
||||
|
encoding='utf-8', |
||||
|
default=None, |
||||
|
use_decimal=True, |
||||
|
) |
||||
|
|
||||
|
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, |
||||
|
allow_nan=True, cls=None, indent=None, separators=None, |
||||
|
encoding='utf-8', default=None, use_decimal=True, **kw): |
||||
|
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a |
||||
|
``.write()``-supporting file-like object). |
||||
|
|
||||
|
If ``skipkeys`` is true then ``dict`` keys that are not basic types |
||||
|
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) |
||||
|
will be skipped instead of raising a ``TypeError``. |
||||
|
|
||||
|
If ``ensure_ascii`` is false, then the some chunks written to ``fp`` |
||||
|
may be ``unicode`` instances, subject to normal Python ``str`` to |
||||
|
``unicode`` coercion rules. Unless ``fp.write()`` explicitly |
||||
|
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely |
||||
|
to cause an error. |
||||
|
|
||||
|
If ``check_circular`` is false, then the circular reference check |
||||
|
for container types will be skipped and a circular reference will |
||||
|
result in an ``OverflowError`` (or worse). |
||||
|
|
||||
|
If ``allow_nan`` is false, then it will be a ``ValueError`` to |
||||
|
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) |
||||
|
in strict compliance of the JSON specification, instead of using the |
||||
|
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). |
||||
|
|
||||
|
If *indent* is a string, then JSON array elements and object members |
||||
|
will be pretty-printed with a newline followed by that string repeated |
||||
|
for each level of nesting. ``None`` (the default) selects the most compact |
||||
|
representation without any newlines. For backwards compatibility with |
||||
|
versions of simplejson earlier than 2.1.0, an integer is also accepted |
||||
|
and is converted to a string with that many spaces. |
||||
|
|
||||
|
If ``separators`` is an ``(item_separator, dict_separator)`` tuple |
||||
|
then it will be used instead of the default ``(', ', ': ')`` separators. |
||||
|
``(',', ':')`` is the most compact JSON representation. |
||||
|
|
||||
|
``encoding`` is the character encoding for str instances, default is UTF-8. |
||||
|
|
||||
|
``default(obj)`` is a function that should return a serializable version |
||||
|
of obj or raise TypeError. The default simply raises TypeError. |
||||
|
|
||||
|
If *use_decimal* is true (default: ``True``) then decimal.Decimal |
||||
|
will be natively serialized to JSON with full precision. |
||||
|
|
||||
|
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the |
||||
|
``.default()`` method to serialize additional types), specify it with |
||||
|
the ``cls`` kwarg. |
||||
|
|
||||
|
""" |
||||
|
# cached encoder |
||||
|
if (not skipkeys and ensure_ascii and |
||||
|
check_circular and allow_nan and |
||||
|
cls is None and indent is None and separators is None and |
||||
|
encoding == 'utf-8' and default is None and not kw): |
||||
|
iterable = _default_encoder.iterencode(obj) |
||||
|
else: |
||||
|
if cls is None: |
||||
|
cls = JSONEncoder |
||||
|
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, |
||||
|
check_circular=check_circular, allow_nan=allow_nan, indent=indent, |
||||
|
separators=separators, encoding=encoding, |
||||
|
default=default, use_decimal=use_decimal, **kw).iterencode(obj) |
||||
|
# could accelerate with writelines in some versions of Python, at |
||||
|
# a debuggability cost |
||||
|
for chunk in iterable: |
||||
|
fp.write(chunk) |
||||
|
|
||||
|
|
||||
|
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, |
||||
|
allow_nan=True, cls=None, indent=None, separators=None, |
||||
|
encoding='utf-8', default=None, use_decimal=True, **kw): |
||||
|
"""Serialize ``obj`` to a JSON formatted ``str``. |
||||
|
|
||||
|
If ``skipkeys`` is false then ``dict`` keys that are not basic types |
||||
|
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) |
||||
|
will be skipped instead of raising a ``TypeError``. |
||||
|
|
||||
|
If ``ensure_ascii`` is false, then the return value will be a |
||||
|
``unicode`` instance subject to normal Python ``str`` to ``unicode`` |
||||
|
coercion rules instead of being escaped to an ASCII ``str``. |
||||
|
|
||||
|
If ``check_circular`` is false, then the circular reference check |
||||
|
for container types will be skipped and a circular reference will |
||||
|
result in an ``OverflowError`` (or worse). |
||||
|
|
||||
|
If ``allow_nan`` is false, then it will be a ``ValueError`` to |
||||
|
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in |
||||
|
strict compliance of the JSON specification, instead of using the |
||||
|
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). |
||||
|
|
||||
|
If ``indent`` is a string, then JSON array elements and object members |
||||
|
will be pretty-printed with a newline followed by that string repeated |
||||
|
for each level of nesting. ``None`` (the default) selects the most compact |
||||
|
representation without any newlines. For backwards compatibility with |
||||
|
versions of simplejson earlier than 2.1.0, an integer is also accepted |
||||
|
and is converted to a string with that many spaces. |
||||
|
|
||||
|
If ``separators`` is an ``(item_separator, dict_separator)`` tuple |
||||
|
then it will be used instead of the default ``(', ', ': ')`` separators. |
||||
|
``(',', ':')`` is the most compact JSON representation. |
||||
|
|
||||
|
``encoding`` is the character encoding for str instances, default is UTF-8. |
||||
|
|
||||
|
``default(obj)`` is a function that should return a serializable version |
||||
|
of obj or raise TypeError. The default simply raises TypeError. |
||||
|
|
||||
|
If *use_decimal* is true (default: ``True``) then decimal.Decimal |
||||
|
will be natively serialized to JSON with full precision. |
||||
|
|
||||
|
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the |
||||
|
``.default()`` method to serialize additional types), specify it with |
||||
|
the ``cls`` kwarg. |
||||
|
|
||||
|
""" |
||||
|
# cached encoder |
||||
|
if (not skipkeys and ensure_ascii and |
||||
|
check_circular and allow_nan and |
||||
|
cls is None and indent is None and separators is None and |
||||
|
encoding == 'utf-8' and default is None and use_decimal |
||||
|
and not kw): |
||||
|
return _default_encoder.encode(obj) |
||||
|
if cls is None: |
||||
|
cls = JSONEncoder |
||||
|
return cls( |
||||
|
skipkeys=skipkeys, ensure_ascii=ensure_ascii, |
||||
|
check_circular=check_circular, allow_nan=allow_nan, indent=indent, |
||||
|
separators=separators, encoding=encoding, default=default, |
||||
|
use_decimal=use_decimal, **kw).encode(obj) |
||||
|
|
||||
|
|
||||
|
_default_decoder = JSONDecoder(encoding=None, object_hook=None, |
||||
|
object_pairs_hook=None, parse_float=Decimal) |
||||
|
|
||||
|
|
||||
|
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, |
||||
|
parse_int=None, parse_constant=None, object_pairs_hook=None, |
||||
|
use_decimal=True, **kw): |
||||
|
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing |
||||
|
a JSON document) to a Python object. |
||||
|
|
||||
|
*encoding* determines the encoding used to interpret any |
||||
|
:class:`str` objects decoded by this instance (``'utf-8'`` by |
||||
|
default). It has no effect when decoding :class:`unicode` objects. |
||||
|
|
||||
|
Note that currently only encodings that are a superset of ASCII work, |
||||
|
strings of other encodings should be passed in as :class:`unicode`. |
||||
|
|
||||
|
*object_hook*, if specified, will be called with the result of every |
||||
|
JSON object decoded and its return value will be used in place of the |
||||
|
given :class:`dict`. This can be used to provide custom |
||||
|
deserializations (e.g. to support JSON-RPC class hinting). |
||||
|
|
||||
|
*object_pairs_hook* is an optional function that will be called with |
||||
|
the result of any object literal decode with an ordered list of pairs. |
||||
|
The return value of *object_pairs_hook* will be used instead of the |
||||
|
:class:`dict`. This feature can be used to implement custom decoders |
||||
|
that rely on the order that the key and value pairs are decoded (for |
||||
|
example, :func:`collections.OrderedDict` will remember the order of |
||||
|
insertion). If *object_hook* is also defined, the *object_pairs_hook* |
||||
|
takes priority. |
||||
|
|
||||
|
*parse_float*, if specified, will be called with the string of every |
||||
|
JSON float to be decoded. By default, this is equivalent to |
||||
|
``float(num_str)``. This can be used to use another datatype or parser |
||||
|
for JSON floats (e.g. :class:`decimal.Decimal`). |
||||
|
|
||||
|
*parse_int*, if specified, will be called with the string of every |
||||
|
JSON int to be decoded. By default, this is equivalent to |
||||
|
``int(num_str)``. This can be used to use another datatype or parser |
||||
|
for JSON integers (e.g. :class:`float`). |
||||
|
|
||||
|
*parse_constant*, if specified, will be called with one of the |
||||
|
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This |
||||
|
can be used to raise an exception if invalid JSON numbers are |
||||
|
encountered. |
||||
|
|
||||
|
If *use_decimal* is true (default: ``True``) then it implies |
||||
|
parse_float=decimal.Decimal for parity with ``dump``. |
||||
|
|
||||
|
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` |
||||
|
kwarg. |
||||
|
|
||||
|
""" |
||||
|
return loads(fp.read(), |
||||
|
encoding=encoding, cls=cls, object_hook=object_hook, |
||||
|
parse_float=parse_float, parse_int=parse_int, |
||||
|
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, |
||||
|
use_decimal=use_decimal, **kw) |
||||
|
|
||||
|
|
||||
|
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, |
||||
|
parse_int=None, parse_constant=None, object_pairs_hook=None, |
||||
|
use_decimal=True, **kw): |
||||
|
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON |
||||
|
document) to a Python object. |
||||
|
|
||||
|
*encoding* determines the encoding used to interpret any |
||||
|
:class:`str` objects decoded by this instance (``'utf-8'`` by |
||||
|
default). It has no effect when decoding :class:`unicode` objects. |
||||
|
|
||||
|
Note that currently only encodings that are a superset of ASCII work, |
||||
|
strings of other encodings should be passed in as :class:`unicode`. |
||||
|
|
||||
|
*object_hook*, if specified, will be called with the result of every |
||||
|
JSON object decoded and its return value will be used in place of the |
||||
|
given :class:`dict`. This can be used to provide custom |
||||
|
deserializations (e.g. to support JSON-RPC class hinting). |
||||
|
|
||||
|
*object_pairs_hook* is an optional function that will be called with |
||||
|
the result of any object literal decode with an ordered list of pairs. |
||||
|
The return value of *object_pairs_hook* will be used instead of the |
||||
|
:class:`dict`. This feature can be used to implement custom decoders |
||||
|
that rely on the order that the key and value pairs are decoded (for |
||||
|
example, :func:`collections.OrderedDict` will remember the order of |
||||
|
insertion). If *object_hook* is also defined, the *object_pairs_hook* |
||||
|
takes priority. |
||||
|
|
||||
|
*parse_float*, if specified, will be called with the string of every |
||||
|
JSON float to be decoded. By default, this is equivalent to |
||||
|
``float(num_str)``. This can be used to use another datatype or parser |
||||
|
for JSON floats (e.g. :class:`decimal.Decimal`). |
||||
|
|
||||
|
*parse_int*, if specified, will be called with the string of every |
||||
|
JSON int to be decoded. By default, this is equivalent to |
||||
|
``int(num_str)``. This can be used to use another datatype or parser |
||||
|
for JSON integers (e.g. :class:`float`). |
||||
|
|
||||
|
*parse_constant*, if specified, will be called with one of the |
||||
|
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This |
||||
|
can be used to raise an exception if invalid JSON numbers are |
||||
|
encountered. |
||||
|
|
||||
|
If *use_decimal* is true (default: ``True``) then it implies |
||||
|
parse_float=decimal.Decimal for parity with ``dump``. |
||||
|
|
||||
|
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` |
||||
|
kwarg. |
||||
|
|
||||
|
""" |
||||
|
if (cls is None and encoding is None and object_hook is None and |
||||
|
parse_int is None and parse_float is None and |
||||
|
parse_constant is None and object_pairs_hook is None |
||||
|
and use_decimal and not kw): |
||||
|
return _default_decoder.decode(s) |
||||
|
if cls is None: |
||||
|
cls = JSONDecoder |
||||
|
if object_hook is not None: |
||||
|
kw['object_hook'] = object_hook |
||||
|
if object_pairs_hook is not None: |
||||
|
kw['object_pairs_hook'] = object_pairs_hook |
||||
|
if parse_float is not None: |
||||
|
kw['parse_float'] = parse_float |
||||
|
if parse_int is not None: |
||||
|
kw['parse_int'] = parse_int |
||||
|
if parse_constant is not None: |
||||
|
kw['parse_constant'] = parse_constant |
||||
|
if not use_decimal: |
||||
|
kw['use_decimal'] = use_decimal |
||||
|
return cls(encoding=encoding, **kw).decode(s) |
||||
|
|
||||
|
|
||||
|
def _toggle_speedups(enabled): |
||||
|
import simplejson.decoder as dec |
||||
|
import simplejson.encoder as enc |
||||
|
import simplejson.scanner as scan |
||||
|
c_make_encoder = _import_c_make_encoder() |
||||
|
if enabled: |
||||
|
dec.scanstring = dec.c_scanstring or dec.py_scanstring |
||||
|
enc.c_make_encoder = c_make_encoder |
||||
|
enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or |
||||
|
enc.py_encode_basestring_ascii) |
||||
|
scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner |
||||
|
else: |
||||
|
dec.scanstring = dec.py_scanstring |
||||
|
enc.c_make_encoder = None |
||||
|
enc.encode_basestring_ascii = enc.py_encode_basestring_ascii |
||||
|
scan.make_scanner = scan.py_make_scanner |
||||
|
dec.make_scanner = scan.make_scanner |
||||
|
global _default_decoder |
||||
|
_default_decoder = JSONDecoder( |
||||
|
encoding=None, |
||||
|
object_hook=None, |
||||
|
object_pairs_hook=None, |
||||
|
use_decimal=True, |
||||
|
) |
||||
|
global _default_encoder |
||||
|
_default_encoder = JSONEncoder( |
||||
|
skipkeys=False, |
||||
|
ensure_ascii=True, |
||||
|
check_circular=True, |
||||
|
allow_nan=True, |
||||
|
indent=None, |
||||
|
separators=None, |
||||
|
encoding='utf-8', |
||||
|
default=None, |
||||
|
use_decimal=True, |
||||
|
) |
@ -0,0 +1,59 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import os, re |
||||
|
|
||||
|
def lineify_fileobjs(ifo, ofo, strip=False): |
||||
|
from pyutil.strutil import pop_trailing_newlines, split_on_newlines |
||||
|
for l in ifo: |
||||
|
for sl in split_on_newlines(pop_trailing_newlines(l)): |
||||
|
if strip: |
||||
|
sl = sl.strip() |
||||
|
ofo.write(pop_trailing_newlines(sl) + '\n') |
||||
|
|
||||
|
def lineify_file(fname, strip=False, nobak=True): |
||||
|
f = open(fname, "rU") |
||||
|
from pyutil.fileutil import ReopenableNamedTemporaryFile |
||||
|
rntf = ReopenableNamedTemporaryFile() |
||||
|
fo = open(rntf.name, "wb") |
||||
|
for l in f: |
||||
|
if strip: |
||||
|
l = l.strip() + '\n' |
||||
|
fo.write(l) |
||||
|
fo.close() |
||||
|
import shutil |
||||
|
if not nobak: |
||||
|
shutil.copyfile(fname, fname + ".lines.py-bak") |
||||
|
import shutil |
||||
|
try: |
||||
|
shutil.move(rntf.name, fname) |
||||
|
except EnvironmentError: |
||||
|
# Couldn't atomically overwrite, so just hope that this process doesn't die |
||||
|
# and the target file doesn't get recreated in between the following two |
||||
|
# operations: |
||||
|
if nobak: |
||||
|
os.remove(fname) |
||||
|
else: |
||||
|
shutil.move(fname, fname + ".lines.py-bak-2") |
||||
|
shutil.move(rntf.name, fname) |
||||
|
|
||||
|
def darcs_metadir_dirpruner(dirs): |
||||
|
if "_darcs" in dirs: |
||||
|
dirs.remove("_darcs") |
||||
|
|
||||
|
SCRE=re.compile("\\.(py|php|c|h|cpp|hpp|txt|sh|pyx|pxi|html|htm)$|makefile$", re.IGNORECASE) |
||||
|
def source_code_filepruner(fname): |
||||
|
return SCRE.search(fname) |
||||
|
|
||||
|
def all_filepruner(fname): |
||||
|
return True |
||||
|
|
||||
|
def all_dirpruner(dirs): |
||||
|
return |
||||
|
|
||||
|
def lineify_all_files(dirname, strip=False, nobak=True, dirpruner=all_dirpruner, filepruner=all_filepruner): |
||||
|
for (root, dirs, files,) in os.walk(dirname): |
||||
|
dirpruner(dirs) |
||||
|
for fname in files: |
||||
|
fullfname = os.path.join(root, fname) |
||||
|
if filepruner(fullfname): |
||||
|
lineify_file(fullfname, strip=strip, nobak=nobak) |
@ -0,0 +1,19 @@ |
|||||
|
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
# This little file makes it so that we can use "log.msg()" and the contents |
||||
|
# get logged to the Twisted logger if present, else to the Python Standard |
||||
|
# Library logger. |
||||
|
|
||||
|
import warnings |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
try: |
||||
|
from twisted.python import log |
||||
|
log # http://divmod.org/trac/ticket/1499 |
||||
|
except ImportError: |
||||
|
import logging |
||||
|
class MinimalLogger: |
||||
|
def msg(self, m): |
||||
|
logging.log(0, m) |
||||
|
log = MinimalLogger() |
||||
|
|
@ -0,0 +1,106 @@ |
|||||
|
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
A few commonly needed functions. |
||||
|
""" |
||||
|
|
||||
|
import math |
||||
|
|
||||
|
def div_ceil(n, d): |
||||
|
""" |
||||
|
The smallest integer k such that k*d >= n. |
||||
|
""" |
||||
|
return (n/d) + (n%d != 0) |
||||
|
|
||||
|
def next_multiple(n, k): |
||||
|
""" |
||||
|
The smallest multiple of k which is >= n. Note that if n is 0 then the |
||||
|
answer is 0. |
||||
|
""" |
||||
|
return div_ceil(n, k) * k |
||||
|
|
||||
|
def pad_size(n, k): |
||||
|
""" |
||||
|
The smallest number that has to be added to n to equal a multiple of k. |
||||
|
""" |
||||
|
if n%k: |
||||
|
return k - n%k |
||||
|
else: |
||||
|
return 0 |
||||
|
|
||||
|
def is_power_of_k(n, k): |
||||
|
return k**int(math.log(n, k) + 0.5) == n |
||||
|
|
||||
|
def next_power_of_k(n, k): |
||||
|
p = 1 |
||||
|
while p < n: |
||||
|
p *= k |
||||
|
return p |
||||
|
|
||||
|
def ave(l): |
||||
|
return sum(l) / len(l) |
||||
|
|
||||
|
def log_ceil(n, b): |
||||
|
""" |
||||
|
The smallest integer k such that b^k >= n. |
||||
|
|
||||
|
log_ceil(n, 2) is the number of bits needed to store any of n values, e.g. |
||||
|
the number of bits needed to store any of 128 possible values is 7. |
||||
|
""" |
||||
|
p = 1 |
||||
|
k = 0 |
||||
|
while p < n: |
||||
|
p *= b |
||||
|
k += 1 |
||||
|
return k |
||||
|
|
||||
|
def log_floor(n, b): |
||||
|
""" |
||||
|
The largest integer k such that b^k <= n. |
||||
|
""" |
||||
|
p = 1 |
||||
|
k = 0 |
||||
|
while p <= n: |
||||
|
p *= b |
||||
|
k += 1 |
||||
|
return k - 1 |
||||
|
|
||||
|
def linear_fit_slope(ps): |
||||
|
""" |
||||
|
Single-independent-variable linear regression -- least squares method. |
||||
|
|
||||
|
At least, I *think* this function computes that answer. I no longer |
||||
|
remember where I learned this trick and at the moment I can't prove to |
||||
|
myself that this is correct. |
||||
|
|
||||
|
@param ps a sequence of tuples of (x, y) |
||||
|
""" |
||||
|
avex = ave([x for (x, y) in ps]) |
||||
|
avey = ave([y for (x, y) in ps]) |
||||
|
sxy = sum([ (x - avex) * (y - avey) for (x, y) in ps ]) |
||||
|
sxx = sum([ (x - avex) ** 2 for (x, y) in ps ]) |
||||
|
if sxx == 0: |
||||
|
return None |
||||
|
return sxy / sxx |
||||
|
|
||||
|
def permute(l): |
||||
|
""" |
||||
|
Return all possible permutations of l. |
||||
|
|
||||
|
@type l: sequence |
||||
|
@rtype a set of sequences |
||||
|
""" |
||||
|
if len(l) == 1: |
||||
|
return [l,] |
||||
|
|
||||
|
res = [] |
||||
|
for i in range(len(l)): |
||||
|
l2 = list(l[:]) |
||||
|
x = l2.pop(i) |
||||
|
for l3 in permute(l2): |
||||
|
l3.append(x) |
||||
|
res.append(l3) |
||||
|
|
||||
|
return res |
||||
|
|
@ -0,0 +1,586 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
# from the Python Standard Library |
||||
|
import exceptions, gc, math, operator, os, sys, types |
||||
|
|
||||
|
# from the pyutil library |
||||
|
from assertutil import precondition |
||||
|
import mathutil |
||||
|
|
||||
|
class Canary: |
||||
|
""" |
||||
|
Want to get a printout when your object is garbage collected? Then put "self.canary = Canary(self)" in your object's constructor. |
||||
|
""" |
||||
|
def __init__(self, owner): |
||||
|
self.ownerdesc = repr(owner) |
||||
|
|
||||
|
def __del__(self): |
||||
|
print "Canary says that %s is gone." % self.ownerdesc |
||||
|
|
||||
|
def estimate_mem_of_obj(o): |
||||
|
# assumes 32-bit CPUs... |
||||
|
PY_STRUCT_HEAD_LEN=4 |
||||
|
if hasattr(o, '__len__'): |
||||
|
if isinstance(o, str): |
||||
|
return PY_STRUCT_HEAD_LEN + o.__len__() * 1 |
||||
|
if isinstance(o, unicode): |
||||
|
return PY_STRUCT_HEAD_LEN + o.__len__() * 4 # 4 depends on implementation and is approximate |
||||
|
if isinstance(o, (tuple, list,)): |
||||
|
return PY_STRUCT_HEAD_LEN + o.__len__() * 4 |
||||
|
if isinstance(o, (dict, set,)): |
||||
|
return PY_STRUCT_HEAD_LEN + o.__len__() * 4 * 2 * 2 # approximate |
||||
|
if isinstance(o, int): |
||||
|
return PY_STRUCT_HEAD_LEN + 4 |
||||
|
if isinstance(o, long): |
||||
|
return PY_STRUCT_HEAD_LEN + 4 |
||||
|
if o < 1: |
||||
|
return PY_STRUCT_HEAD_LEN |
||||
|
else: |
||||
|
return PY_STRUCT_HEAD_LEN + math.log(o) / 5 # the 5 was empirically determined (it is approximate) |
||||
|
if isinstance(o, float): |
||||
|
return PY_STRUCT_HEAD_LEN + 8 |
||||
|
|
||||
|
# Uh-oh... I wonder what we are missing here... |
||||
|
return PY_STRUCT_HEAD_LEN |
||||
|
|
||||
|
def check_for_obj_leakage(f, *args, **kwargs): |
||||
|
""" |
||||
|
The idea is that I am going to invoke f(), then run gc.collect(), then run |
||||
|
gc.get_objects() to get a complete list of all objects in the system, then |
||||
|
invoke f() a second time, then run gc.collect(), then run gc.get_objects() |
||||
|
to get a list of all the objects *now* in the system. |
||||
|
|
||||
|
Then I return a tuple two things: the first element of the tuple is the |
||||
|
difference between the number of objects in the second list and the number |
||||
|
of objects in the first list. |
||||
|
|
||||
|
I.e., if this number is zero then you can be pretty sure there is no memory |
||||
|
leak, unless f is deleting some objects and replacing them by exactly the |
||||
|
same number of objects but the new objects take up more memory. If this |
||||
|
number is greater than zero then you can pretty sure there is a memory |
||||
|
leak, unless f is doing some memoization/caching behavior and it will |
||||
|
eventually stabilize, which you can detect by running |
||||
|
check_for_obj_leakage() more times and seeing if it stabilizes. |
||||
|
|
||||
|
(Actually we run f() followed by gc.collect() one time before we start in |
||||
|
order to account for any static objects which are created the first time |
||||
|
you run f() and then re-used after that.) |
||||
|
|
||||
|
The second element in the return value is the set of all objects which were |
||||
|
present in the second list and not in the first. Some of these objects |
||||
|
might be memory-leaked objects, or perhaps f deleted some objects and |
||||
|
replaced them with equivalent objects, in which case these objects are not |
||||
|
leaked. |
||||
|
|
||||
|
(We actually invoke gc.collect() three times in a row in case there are |
||||
|
objects which get collected in the first pass that have finalizers which |
||||
|
create new reference-cycled objects... "3" is a superstitious number -- we |
||||
|
figure most of the time the finalizers of the things produced by the first |
||||
|
round of finalizers won't themselves product another round of |
||||
|
reference-cycled objects.) |
||||
|
""" |
||||
|
f() |
||||
|
gc.collect();gc.collect();gc.collect() |
||||
|
f() |
||||
|
gc.collect();gc.collect();gc.collect() |
||||
|
r1 = gc.get_objects() |
||||
|
f() |
||||
|
gc.collect();gc.collect();gc.collect() |
||||
|
r2 = gc.get_objects() |
||||
|
d2 = dict([(id(x), x) for x in r2]) |
||||
|
|
||||
|
# Now remove everything from r1, and r1 itself, from d2. |
||||
|
del d2[id(r1)] |
||||
|
for o in r1: |
||||
|
if id(o) in d2: |
||||
|
del d2[id(o)] |
||||
|
|
||||
|
return (len(r2) - len(r1) - 1, d2) |
||||
|
|
||||
|
def measure_obj_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs): |
||||
|
""" |
||||
|
The idea is we are going to use count_all_objects() to see how many |
||||
|
objects are in use, and keep track of that number with respect to how |
||||
|
many times we've invoked f(), and return the slope of the best linear |
||||
|
fit. |
||||
|
|
||||
|
@param numsamples: recommended: 2**7 |
||||
|
|
||||
|
@param iterspersample: how many times f() should be invoked per sample; |
||||
|
Basically, choose iterspersample such that |
||||
|
iterspersample * numsamples * |
||||
|
how-long-it-takes-to-compute-f() is slightly less |
||||
|
than how long you are willing to wait for this |
||||
|
leak test. |
||||
|
|
||||
|
@return: the slope of the best linear fit, which can be interpreted as 'the |
||||
|
approximate number of Python objects created and not destroyed |
||||
|
per invocation of f()' |
||||
|
""" |
||||
|
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples) |
||||
|
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample) |
||||
|
|
||||
|
resiters = [None]*numsamples # values: iters |
||||
|
resnumobjs = [None]*numsamples # values: numobjs |
||||
|
|
||||
|
totaliters = 0 |
||||
|
for i in range(numsamples): |
||||
|
for j in range(iterspersample): |
||||
|
f(*args, **kwargs) |
||||
|
totaliters = totaliters + iterspersample |
||||
|
resiters[i] = totaliters |
||||
|
gc.collect() |
||||
|
resnumobjs[i] = count_all_objects() |
||||
|
# print "totaliters: %s, numobjs: %s" % (resiters[-1], resnumobjs[-1],) |
||||
|
|
||||
|
avex = float(reduce(operator.__add__, resiters)) / len(resiters) |
||||
|
avey = float(reduce(operator.__add__, resnumobjs)) / len(resnumobjs) |
||||
|
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resnumobjs))) |
||||
|
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters)) |
||||
|
return sxy / sxx |
||||
|
|
||||
|
def linear_fit_slope(xs, ys): |
||||
|
avex = float(reduce(operator.__add__, xs)) / len(xs) |
||||
|
avey = float(reduce(operator.__add__, ys)) / len(ys) |
||||
|
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(xs, ys))) |
||||
|
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, xs)) |
||||
|
return sxy / sxx |
||||
|
|
||||
|
def measure_ref_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs): |
||||
|
""" |
||||
|
The idea is we are going to use sys.gettotalrefcount() to see how many |
||||
|
references are extant, and keep track of that number with respect to how |
||||
|
many times we've invoked f(), and return the slope of the best linear |
||||
|
fit. |
||||
|
|
||||
|
@param numsamples: recommended: 2**7 |
||||
|
|
||||
|
@param iterspersample: how many times f() should be invoked per sample; |
||||
|
Basically, choose iterspersample such that |
||||
|
iterspersample * numsamples * |
||||
|
how-long-it-takes-to-compute-f() is slightly less |
||||
|
than how long you are willing to wait for this |
||||
|
leak test. |
||||
|
|
||||
|
@return: the slope of the best linear fit, which can be interpreted as 'the |
||||
|
approximate number of Python references created and not |
||||
|
nullified per invocation of f()' |
||||
|
""" |
||||
|
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples) |
||||
|
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample) |
||||
|
|
||||
|
try: |
||||
|
sys.gettotalrefcount() |
||||
|
except AttributeError, le: |
||||
|
raise AttributeError(le, "Probably this is not a debug build of Python, so it doesn't have a sys.gettotalrefcount function.") |
||||
|
resiters = [None]*numsamples # values: iters |
||||
|
resnumrefs = [None]*numsamples # values: numrefs |
||||
|
|
||||
|
totaliters = 0 |
||||
|
for i in range(numsamples): |
||||
|
for j in range(iterspersample): |
||||
|
f(*args, **kwargs) |
||||
|
totaliters = totaliters + iterspersample |
||||
|
resiters[i] = totaliters |
||||
|
gc.collect() |
||||
|
resnumrefs[i] = sys.gettotalrefcount() |
||||
|
# print "totaliters: %s, numrefss: %s" % (resiters[-1], resnumrefs[-1],) |
||||
|
|
||||
|
avex = float(reduce(operator.__add__, resiters)) / len(resiters) |
||||
|
avey = float(reduce(operator.__add__, resnumrefs)) / len(resnumrefs) |
||||
|
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resnumrefs))) |
||||
|
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters)) |
||||
|
return sxy / sxx |
||||
|
|
||||
|
class NotSupportedException(exceptions.StandardError): |
||||
|
""" |
||||
|
Just an exception class. It is thrown by get_mem_usage if the OS does |
||||
|
not support the operation. |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
def get_mem_used(): |
||||
|
""" |
||||
|
This only works on Linux, and only if the /proc/$PID/statm output is the |
||||
|
same as that in linux kernel 2.6. Also `os.getpid()' must work. |
||||
|
|
||||
|
@return: tuple of (res, virt) used by this process |
||||
|
""" |
||||
|
try: |
||||
|
import resource |
||||
|
except ImportError: |
||||
|
raise NotSupportedException |
||||
|
# sample output from cat /proc/$PID/statm: |
||||
|
# 14317 3092 832 279 0 2108 0 |
||||
|
a = os.popen("cat /proc/%s/statm 2>/dev/null" % os.getpid()).read().split() |
||||
|
if not a: |
||||
|
raise NotSupportedException |
||||
|
return (int(a[1]) * resource.getpagesize(), int(a[0]) * resource.getpagesize(),) |
||||
|
|
||||
|
def get_mem_used_res(): |
||||
|
""" |
||||
|
This only works on Linux, and only if the /proc/$PID/statm output is the |
||||
|
same as that in linux kernel 2.6. Also `os.getpid()' must work. |
||||
|
""" |
||||
|
try: |
||||
|
import resource |
||||
|
except ImportError: |
||||
|
raise NotSupportedException |
||||
|
# sample output from cat /proc/$PID/statm: |
||||
|
# 14317 3092 832 279 0 2108 0 |
||||
|
a = os.popen("cat /proc/%s/statm" % os.getpid()).read().split() |
||||
|
if not len(a) > 1: |
||||
|
raise NotSupportedException |
||||
|
return int(a[1]) * resource.getpagesize() |
||||
|
|
||||
|
def get_mem_usage_virt_and_res(): |
||||
|
""" |
||||
|
This only works on Linux, and only if the /proc/$PID/statm output is the |
||||
|
same as that in linux kernel 2.6. Also `os.getpid()' must work. |
||||
|
""" |
||||
|
try: |
||||
|
import resource |
||||
|
except ImportError: |
||||
|
raise NotSupportedException |
||||
|
# sample output from cat /proc/$PID/statm: |
||||
|
# 14317 3092 832 279 0 2108 0 |
||||
|
a = os.popen("cat /proc/%s/statm" % os.getpid()).read().split() |
||||
|
if not len(a) > 1: |
||||
|
raise NotSupportedException |
||||
|
return (int(a[0]) * resource.getpagesize(), int(a[1]) * resource.getpagesize(),) |
||||
|
|
||||
|
class Measurer(object): |
||||
|
def __init__(self, f, numsamples=2**7, iterspersample=2**4, *args, **kwargs): |
||||
|
""" |
||||
|
@param f a callable; If it returns a deferred then the memory will not |
||||
|
be measured and the next iteration will not be started until the |
||||
|
deferred fires; else the memory will be measured and the next |
||||
|
iteration started when f returns. |
||||
|
""" |
||||
|
self.f = f |
||||
|
self.numsamples = numsamples |
||||
|
self.iterspersample = iterspersample |
||||
|
self.args = args |
||||
|
self.kwargs = kwargs |
||||
|
# from twisted |
||||
|
from twisted.internet import defer |
||||
|
self.d = defer.Deferred() |
||||
|
|
||||
|
def when_complete(self): |
||||
|
return self.d |
||||
|
|
||||
|
def _invoke(self): |
||||
|
d = self.f(*self.args, **self.kwargs) |
||||
|
# from twisted |
||||
|
from twisted.internet import defer |
||||
|
if isinstance(d, defer.Deferred): |
||||
|
d.addCallback(self._after) |
||||
|
else: |
||||
|
self._after(None) |
||||
|
|
||||
|
def start(self): |
||||
|
self.resiters = [None]*self.numsamples # values: iters |
||||
|
self.resmemusage = [None]*self.numsamples # values: memusage |
||||
|
self.totaliters = 0 |
||||
|
self.i = 0 |
||||
|
self.j = 0 |
||||
|
self._invoke() |
||||
|
|
||||
|
def _after(self, o): |
||||
|
self.j += 1 |
||||
|
if self.j < self.iterspersample: |
||||
|
self._invoke() |
||||
|
return |
||||
|
|
||||
|
if self.i < self.numsamples: |
||||
|
self.j = 0 |
||||
|
self.i += 1 |
||||
|
self.totaliters += self.iterspersample |
||||
|
self.resiters[self.i] = self.totaliters |
||||
|
self.resmemusage[self.i] = get_mem_used_res() |
||||
|
self._invoke() |
||||
|
return |
||||
|
|
||||
|
self.d.callback(mathutil.linear_fit_slope(zip(self.resiters, self.resmemusage))) |
||||
|
|
||||
|
def measure_mem_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs): |
||||
|
""" |
||||
|
This does the same thing as measure_obj_leakage() but instead of using |
||||
|
count_all_objects() it uses get_mem_usage(), which is currently |
||||
|
implemented for Linux and barely implemented for Mac OS X. |
||||
|
|
||||
|
@param numsamples: recommended: 2**7 |
||||
|
|
||||
|
@param iterspersample: how many times `f()' should be invoked per sample; |
||||
|
Basically, choose `iterspersample' such that |
||||
|
(iterspersample * numsamples * |
||||
|
how-long-it-takes-to-compute-`f()') is slightly |
||||
|
less than how long you are willing to wait for |
||||
|
this leak test. |
||||
|
|
||||
|
@return: the slope of the best linear fit, which can be interpreted as |
||||
|
'the approximate number of system bytes allocated and not freed |
||||
|
per invocation of f()' |
||||
|
""" |
||||
|
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples) |
||||
|
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample) |
||||
|
|
||||
|
resiters = [None]*numsamples # values: iters |
||||
|
resmemusage = [None]*numsamples # values: memusage |
||||
|
|
||||
|
totaliters = 0 |
||||
|
for i in range(numsamples): |
||||
|
for j in range(iterspersample): |
||||
|
f(*args, **kwargs) |
||||
|
totaliters = totaliters + iterspersample |
||||
|
resiters[i] = totaliters |
||||
|
gc.collect() |
||||
|
resmemusage[i] = get_mem_used_res() |
||||
|
# print "totaliters: %s, numobjs: %s" % (resiters[-1], resmemusage[-1],) |
||||
|
|
||||
|
avex = float(reduce(operator.__add__, resiters)) / len(resiters) |
||||
|
avey = float(reduce(operator.__add__, resmemusage)) / len(resmemusage) |
||||
|
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resmemusage))) |
||||
|
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters)) |
||||
|
if sxx == 0: |
||||
|
return None |
||||
|
return sxy / sxx |
||||
|
|
||||
|
def describe_object(o, FunctionType=types.FunctionType, MethodType=types.MethodType, InstanceType=types.InstanceType): |
||||
|
""" |
||||
|
For human analysis, when humans are attempting to understand where all the |
||||
|
memory is going. Argument o is an object, return value is a string |
||||
|
describing the object. |
||||
|
""" |
||||
|
sl = [] |
||||
|
if isinstance(o, FunctionType): |
||||
|
try: |
||||
|
sl.append("<type 'function' %s>" % str(o.func_name)) |
||||
|
except: |
||||
|
pass |
||||
|
elif isinstance(o, MethodType): |
||||
|
try: |
||||
|
sl.append("<type 'method' %s>" % str(o.im_func.func_name)) |
||||
|
except: |
||||
|
pass |
||||
|
elif isinstance(o, InstanceType): |
||||
|
try: |
||||
|
sl.append("<type 'instance' %s>" % str(o.__class__.__name__)) |
||||
|
except: |
||||
|
pass |
||||
|
else: |
||||
|
sl.append(str(type(o))) |
||||
|
|
||||
|
try: |
||||
|
sl.append(str(len(o))) |
||||
|
except: |
||||
|
pass |
||||
|
return ''.join(sl) |
||||
|
|
||||
|
import dictutil |
||||
|
def describe_object_with_dict_details(o): |
||||
|
sl = [] |
||||
|
sl.append(str(type(o))) |
||||
|
if isinstance(o, types.FunctionType): |
||||
|
try: |
||||
|
sl.append(str(o.func_name)) |
||||
|
except: |
||||
|
pass |
||||
|
elif isinstance(o, types.MethodType): |
||||
|
try: |
||||
|
sl.append(str(o.im_func.func_name)) |
||||
|
except: |
||||
|
pass |
||||
|
try: |
||||
|
sl.append(str(len(o))) |
||||
|
except: |
||||
|
pass |
||||
|
if isinstance(o, dict) and o: |
||||
|
sl.append('-') |
||||
|
nd = dictutil.NumDict() |
||||
|
for k, v in o.iteritems(): |
||||
|
nd.inc((describe_object(k), describe_object(v),)) |
||||
|
k, v = nd.item_with_largest_value() |
||||
|
sl.append("-") |
||||
|
iterator = o.iteritems() |
||||
|
k,v = iterator.next() |
||||
|
sl.append(describe_object(k)) |
||||
|
sl.append(":") |
||||
|
sl.append(describe_object(v)) |
||||
|
return ''.join(sl) |
||||
|
|
||||
|
def describe_dict(o): |
||||
|
sl = ['<dict'] |
||||
|
l = len(o) |
||||
|
sl.append(str(l)) |
||||
|
if l: |
||||
|
sl.append("-") |
||||
|
iterator = o.iteritems() |
||||
|
firstitem=True |
||||
|
try: |
||||
|
while True: |
||||
|
if firstitem: |
||||
|
firstitem = False |
||||
|
else: |
||||
|
sl.append(", ") |
||||
|
k,v = iterator.next() |
||||
|
sl.append(describe_object(k)) |
||||
|
sl.append(": ") |
||||
|
sl.append(describe_object(v)) |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
sl.append('>') |
||||
|
return ''.join(sl) |
||||
|
|
||||
|
def count_all_objects(): |
||||
|
ids = set() |
||||
|
ls = locals() |
||||
|
import inspect |
||||
|
cf = inspect.currentframe() |
||||
|
for o in gc.get_objects(): |
||||
|
if o is ids or o is ls or o is cf: |
||||
|
continue |
||||
|
if not id(o) in ids: |
||||
|
ids.add(id(o)) |
||||
|
for so in gc.get_referents(o): |
||||
|
if not id(so) in ids: |
||||
|
ids.add(id(so)) |
||||
|
return len(ids) |
||||
|
|
||||
|
def visit_all_objects(f): |
||||
|
""" |
||||
|
Brian and I *think* that this gets all objects. This is predicated on the |
||||
|
assumption that every object either participates in gc, or is at most one |
||||
|
hop from an object that participates in gc. This was Brian's clever idea. |
||||
|
""" |
||||
|
ids = set() |
||||
|
ls = locals() |
||||
|
import inspect |
||||
|
cf = inspect.currentframe() |
||||
|
for o in gc.get_objects(): |
||||
|
if o is ids or o is ls or o is cf: |
||||
|
continue |
||||
|
if not id(o) in ids: |
||||
|
ids.add(id(o)) |
||||
|
f(o) |
||||
|
for so in gc.get_referents(o): |
||||
|
if not id(so) in ids: |
||||
|
ids.add(id(so)) |
||||
|
f(so) |
||||
|
|
||||
|
def get_all_objects(): |
||||
|
objs = [] |
||||
|
def addit(o): |
||||
|
objs.append(o) |
||||
|
visit_all_objects(addit) |
||||
|
return objs |
||||
|
|
||||
|
def describe_all_objects(): |
||||
|
import dictutil |
||||
|
d = dictutil.NumDict() |
||||
|
for o in get_all_objects(): |
||||
|
d.inc(describe_object(o)) |
||||
|
return d |
||||
|
|
||||
|
def dump_description_of_object(o, f): |
||||
|
f.write("%x" % (id(o),)) |
||||
|
f.write("-") |
||||
|
f.write(describe_object(o)) |
||||
|
f.write("\n") |
||||
|
|
||||
|
def dump_description_of_object_refs(o, f): |
||||
|
# This holds the ids of all referents that we've already dumped. |
||||
|
dumped = set() |
||||
|
|
||||
|
# First, any __dict__ items |
||||
|
try: |
||||
|
itemsiter = o.__dict__.iteritems() |
||||
|
except: |
||||
|
pass |
||||
|
else: |
||||
|
for k, v in itemsiter: |
||||
|
try: |
||||
|
idr = id(v) |
||||
|
if idr not in dumped: |
||||
|
dumped.add(idr) |
||||
|
f.write("%d:"%len(k)) |
||||
|
f.write(k) |
||||
|
f.write(",") |
||||
|
f.write("%0x,"%idr) |
||||
|
except: |
||||
|
pass |
||||
|
|
||||
|
# Then anything else that gc.get_referents() returns. |
||||
|
for r in gc.get_referents(o): |
||||
|
idr = id(r) |
||||
|
if idr not in dumped: |
||||
|
dumped.add(idr) |
||||
|
f.write("0:,%0x,"%idr) |
||||
|
|
||||
|
def dump_descriptions_of_all_objects(f): |
||||
|
ids = set() |
||||
|
ls = locals() |
||||
|
for o in gc.get_objects(): |
||||
|
if o is f or o is ids or o is ls: |
||||
|
continue |
||||
|
if not id(o) in ids: |
||||
|
ids.add(id(o)) |
||||
|
dump_description_of_object(o, f) |
||||
|
for so in gc.get_referents(o): |
||||
|
if o is f or o is ids or o is ls: |
||||
|
continue |
||||
|
if not id(so) in ids: |
||||
|
ids.add(id(so)) |
||||
|
dump_description_of_object(so, f) |
||||
|
ls = None # break reference cycle |
||||
|
return len(ids) |
||||
|
|
||||
|
def dump_description_of_object_with_refs(o, f): |
||||
|
f.write("%0x" % (id(o),)) |
||||
|
f.write("-") |
||||
|
desc = describe_object(o) |
||||
|
f.write("%d:"%len(desc)) |
||||
|
f.write(desc) |
||||
|
f.write(",") |
||||
|
|
||||
|
dump_description_of_object_refs(o, f) |
||||
|
f.write("\n") |
||||
|
|
||||
|
def dump_descriptions_of_all_objects_with_refs(f): |
||||
|
ids = set() |
||||
|
ls = locals() |
||||
|
for o in gc.get_objects(): |
||||
|
if o is f or o is ids or o is ls: |
||||
|
continue |
||||
|
if not id(o) in ids: |
||||
|
ids.add(id(o)) |
||||
|
dump_description_of_object_with_refs(o, f) |
||||
|
for so in gc.get_referents(o): |
||||
|
if o is f or o is ids or o is ls: |
||||
|
continue |
||||
|
if not id(so) in ids: |
||||
|
ids.add(id(so)) |
||||
|
dump_description_of_object_with_refs(so, f) |
||||
|
ls = None # break reference cycle |
||||
|
return len(ids) |
||||
|
|
||||
|
import re |
||||
|
NRE = re.compile("[1-9][0-9]*$") |
||||
|
def undump_descriptions_of_all_objects(inf): |
||||
|
d = {} |
||||
|
for l in inf: |
||||
|
dash=l.find('-') |
||||
|
if dash == -1: |
||||
|
raise l |
||||
|
mo = NRE.search(l) |
||||
|
if mo: |
||||
|
typstr = l[dash+1:mo.start(0)] |
||||
|
num=int(mo.group(0)) |
||||
|
if str(num) != mo.group(0): |
||||
|
raise mo.group(0) |
||||
|
else: |
||||
|
typstr = l[dash+1:] |
||||
|
num = None |
||||
|
d[l[:dash]] = (typstr, num,) |
||||
|
return d |
@ -0,0 +1,52 @@ |
|||||
|
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn |
||||
|
# mailto:zooko@zooko.com |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import dictutil |
||||
|
|
||||
|
class NummedObj(object): |
||||
|
""" |
||||
|
This is useful for nicer debug printouts. Instead of objects of the same class being |
||||
|
distinguished from one another by their memory address, they each get a unique number, which |
||||
|
can be read as "the first object of this class", "the second object of this class", etc. This |
||||
|
is especially useful because separate runs of a program will yield identical debug output, |
||||
|
(assuming that the objects get created in the same order in each run). This makes it possible |
||||
|
to diff outputs from separate runs to see what changed, without having to ignore a difference |
||||
|
on every line due to different memory addresses of objects. |
||||
|
""" |
||||
|
objnums = dictutil.NumDict() # key: class names, value: highest used object number |
||||
|
|
||||
|
def __init__(self, klass=None): |
||||
|
""" |
||||
|
@param klass: in which class are you counted? If default value of `None', then self.__class__ will be used. |
||||
|
""" |
||||
|
if klass is None: |
||||
|
klass = self.__class__ |
||||
|
self._classname = klass.__name__ |
||||
|
|
||||
|
NummedObj.objnums.inc(self._classname) |
||||
|
self._objid = NummedObj.objnums[self._classname] |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s #%d>" % (self._classname, self._objid,) |
||||
|
|
||||
|
def __lt__(self, other): |
||||
|
return (self._objid, self._classname,) < (other._objid, other._classname,) |
||||
|
|
||||
|
def __le__(self, other): |
||||
|
return (self._objid, self._classname,) <= (other._objid, other._classname,) |
||||
|
|
||||
|
def __eq__(self, other): |
||||
|
return (self._objid, self._classname,) == (other._objid, other._classname,) |
||||
|
|
||||
|
def __ne__(self, other): |
||||
|
return (self._objid, self._classname,) != (other._objid, other._classname,) |
||||
|
|
||||
|
def __gt__(self, other): |
||||
|
return (self._objid, self._classname,) > (other._objid, other._classname,) |
||||
|
|
||||
|
def __ge__(self, other): |
||||
|
return (self._objid, self._classname,) >= (other._objid, other._classname,) |
||||
|
|
||||
|
def __hash__(self): |
||||
|
return id(self) |
@ -0,0 +1,99 @@ |
|||||
|
# -*- test-case-name: allmydata.test.test_observer -*- |
||||
|
|
||||
|
from twisted.internet import defer |
||||
|
try: |
||||
|
from foolscap.eventual import eventually |
||||
|
eventually # http://divmod.org/trac/ticket/1499 |
||||
|
except ImportError: |
||||
|
from twisted.internet import reactor |
||||
|
def eventually(f, *args, **kwargs): |
||||
|
return reactor.callLater(0, f, *args, **kwargs) |
||||
|
|
||||
|
"""The idiom we use is for the observed object to offer a method named |
||||
|
'when_something', which returns a deferred. That deferred will be fired when |
||||
|
something happens. The way this is typically implemented is that the observed |
||||
|
has an ObserverList whose when_fired method is called in the observed's |
||||
|
'when_something'.""" |
||||
|
|
||||
|
class OneShotObserverList: |
||||
|
"""A one-shot event distributor.""" |
||||
|
def __init__(self): |
||||
|
self._fired = False |
||||
|
self._result = None |
||||
|
self._watchers = [] |
||||
|
self.__repr__ = self._unfired_repr |
||||
|
|
||||
|
def _unfired_repr(self): |
||||
|
return "<OneShotObserverList [%s]>" % (self._watchers, ) |
||||
|
|
||||
|
def _fired_repr(self): |
||||
|
return "<OneShotObserverList -> %s>" % (self._result, ) |
||||
|
|
||||
|
def _get_result(self): |
||||
|
return self._result |
||||
|
|
||||
|
def when_fired(self): |
||||
|
if self._fired: |
||||
|
return defer.succeed(self._get_result()) |
||||
|
d = defer.Deferred() |
||||
|
self._watchers.append(d) |
||||
|
return d |
||||
|
|
||||
|
def fire(self, result): |
||||
|
assert not self._fired |
||||
|
self._fired = True |
||||
|
self._result = result |
||||
|
self._fire(result) |
||||
|
|
||||
|
def _fire(self, result): |
||||
|
for w in self._watchers: |
||||
|
eventually(w.callback, result) |
||||
|
del self._watchers |
||||
|
self.__repr__ = self._fired_repr |
||||
|
|
||||
|
def fire_if_not_fired(self, result): |
||||
|
if not self._fired: |
||||
|
self.fire(result) |
||||
|
|
||||
|
class LazyOneShotObserverList(OneShotObserverList): |
||||
|
""" |
||||
|
a variant of OneShotObserverList which does not retain |
||||
|
the result it handles, but rather retains a callable() |
||||
|
through which is retrieves the data if and when needed. |
||||
|
""" |
||||
|
def __init__(self): |
||||
|
OneShotObserverList.__init__(self) |
||||
|
|
||||
|
def _get_result(self): |
||||
|
return self._result_producer() |
||||
|
|
||||
|
def fire(self, result_producer): |
||||
|
""" |
||||
|
@param result_producer: a no-arg callable which |
||||
|
returns the data which is to be considered the |
||||
|
'result' for this observer list. note that this |
||||
|
function may be called multiple times - once |
||||
|
upon initial firing, and potentially once more |
||||
|
for each subsequent when_fired() deferred created |
||||
|
""" |
||||
|
assert not self._fired |
||||
|
self._fired = True |
||||
|
self._result_producer = result_producer |
||||
|
if self._watchers: # if not, don't call result_producer |
||||
|
self._fire(self._get_result()) |
||||
|
|
||||
|
class ObserverList: |
||||
|
"""A simple class to distribute events to a number of subscribers.""" |
||||
|
|
||||
|
def __init__(self): |
||||
|
self._watchers = [] |
||||
|
|
||||
|
def subscribe(self, observer): |
||||
|
self._watchers.append(observer) |
||||
|
|
||||
|
def unsubscribe(self, observer): |
||||
|
self._watchers.remove(observer) |
||||
|
|
||||
|
def notify(self, *args, **kwargs): |
||||
|
for o in self._watchers: |
||||
|
eventually(o, *args, **kwargs) |
@ -0,0 +1,552 @@ |
|||||
|
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
|
||||
|
""" |
||||
|
This module offers a Ordered Dict, which is a dict that preserves |
||||
|
insertion order. See PEP 372 for description of the problem. This |
||||
|
implementation uses a linked-list to get good O(1) asymptotic |
||||
|
performance. (Actually it is O(hashtable-update-cost), but whatever.) |
||||
|
|
||||
|
Warning: if -O optimizations are not turned on then OrderedDict performs |
||||
|
extensive self-analysis in every function call, which can take minutes |
||||
|
and minutes for a large cache. Turn on -O, or comment out assert |
||||
|
self._assert_invariants() |
||||
|
""" |
||||
|
|
||||
|
import operator |
||||
|
|
||||
|
from assertutil import _assert, precondition |
||||
|
from humanreadable import hr |
||||
|
|
||||
|
class OrderedDict: |
||||
|
""" |
||||
|
An efficient ordered dict. |
||||
|
|
||||
|
Adding an item that is already in the dict *does not* make it the |
||||
|
most- recently-added item although it may change the state of the |
||||
|
dict itself (if the value is different than the previous value). |
||||
|
|
||||
|
See also SmallOrderedDict (below), which is faster in some cases. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = p |
||||
|
return (k, v,) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = p |
||||
|
return k |
||||
|
|
||||
|
class ValIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
precondition(self.c.d.has_key(self.i), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
(v, p, n,) = self.c.d[self.i] |
||||
|
self.i = p |
||||
|
return v |
||||
|
|
||||
|
class Sentinel: |
||||
|
def __init__(self, msg): |
||||
|
self.msg = msg |
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.msg,) |
||||
|
|
||||
|
def __init__(self, initialdata={}): |
||||
|
self.d = {} # k: k, v: [v, prev, next,] # the dict |
||||
|
self.hs = OrderedDict.Sentinel("hs") |
||||
|
self.ts = OrderedDict.Sentinel("ts") |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.update(initialdata) |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def __repr_n__(self, n=None): |
||||
|
s = ["{",] |
||||
|
try: |
||||
|
iter = self.iteritems() |
||||
|
x = iter.next() |
||||
|
s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
i = 1 |
||||
|
while (n is None) or (i < n): |
||||
|
x = iter.next() |
||||
|
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
s.append("}") |
||||
|
return ''.join(s) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),) |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d)) |
||||
|
foundprevsentinel = 0 |
||||
|
foundnextsentinel = 0 |
||||
|
for (k, (v, p, n,)) in self.d.iteritems(): |
||||
|
_assert(v not in (self.hs, self.ts,)) |
||||
|
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n) |
||||
|
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n) |
||||
|
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n) |
||||
|
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n) |
||||
|
if p is self.hs: |
||||
|
foundprevsentinel += 1 |
||||
|
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n) |
||||
|
if n is self.ts: |
||||
|
foundnextsentinel += 1 |
||||
|
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n) |
||||
|
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).") |
||||
|
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).") |
||||
|
|
||||
|
count = 0 |
||||
|
for (k, v,) in self.iteritems(): |
||||
|
_assert(k not in (self.hs, self.ts,), k, self.hs, self.ts) |
||||
|
count += 1 |
||||
|
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels |
||||
|
|
||||
|
return True |
||||
|
|
||||
|
def move_to_most_recent(self, k, strictkey=False): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
if not self.d.has_key(k): |
||||
|
if strictkey: |
||||
|
raise KeyError, k |
||||
|
return |
||||
|
|
||||
|
node = self.d[k] |
||||
|
|
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
|
||||
|
# move to front |
||||
|
hnode = self.d[self.hs] |
||||
|
|
||||
|
node[1] = self.hs |
||||
|
node[2] = hnode[2] |
||||
|
hnode[2] = k |
||||
|
self.d[node[2]][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def iteritems(self): |
||||
|
return OrderedDict.ItemIterator(self) |
||||
|
|
||||
|
def itervalues(self): |
||||
|
return OrderedDict.ValIterator(self) |
||||
|
|
||||
|
def iterkeys(self): |
||||
|
return self.__iter__() |
||||
|
|
||||
|
def __iter__(self): |
||||
|
return OrderedDict.KeyIterator(self) |
||||
|
|
||||
|
def __getitem__(self, key, default=None, strictkey=True): |
||||
|
node = self.d.get(key) |
||||
|
if not node: |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return default |
||||
|
return node[0] |
||||
|
|
||||
|
def __setitem__(self, k, v=None): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
node = self.d.get(k) |
||||
|
if node: |
||||
|
node[0] = v |
||||
|
return |
||||
|
|
||||
|
hnode = self.d[self.hs] |
||||
|
n = hnode[2] |
||||
|
self.d[k] = [v, self.hs, n,] |
||||
|
hnode[2] = k |
||||
|
self.d[n][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return v |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the value removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
node = self.d[key] |
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
del self.d[key] |
||||
|
assert self._assert_invariants() |
||||
|
return node[0] |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
self.d.clear() |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
for (k, v,) in otherdict.iteritems(): |
||||
|
assert self._assert_invariants() |
||||
|
self[k] = v |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def pop(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.hs][2] |
||||
|
self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return k |
||||
|
|
||||
|
def popitem(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.hs][2] |
||||
|
val = self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return (k, val,) |
||||
|
|
||||
|
def keys_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return t.keys() |
||||
|
|
||||
|
def keys(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for k in self.iterkeys(): |
||||
|
res[i] = k |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def values_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return map(operator.__getitem__, t.values(), [0]*len(t)) |
||||
|
|
||||
|
def values(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for v in self.itervalues(): |
||||
|
res[i] = v |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def items(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for it in self.iteritems(): |
||||
|
res[i] = it |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def __len__(self): |
||||
|
return len(self.d) - 2 |
||||
|
|
||||
|
def insert(self, key, val=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, val) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def get(self, key, default=None): |
||||
|
return self.__getitem__(key, default, strictkey=False) |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
class SmallOrderedDict(dict): |
||||
|
""" |
||||
|
SmallOrderedDict is faster than OrderedDict for small sets. How small? That |
||||
|
depends on your machine and which operations you use most often. Use |
||||
|
performance profiling to determine whether the ordered dict class that you are |
||||
|
using makes any difference to the performance of your program, and if it |
||||
|
does, then run "quick_bench()" in test/test_cache.py to see which cache |
||||
|
implementation is faster for the size of your datasets. |
||||
|
|
||||
|
A simple least-recently-used cache. It keeps an LRU queue, and |
||||
|
when the number of items in the cache reaches maxsize, it removes |
||||
|
the least recently used item. |
||||
|
|
||||
|
"Looking" at an item or a key such as with "has_key()" makes that |
||||
|
item become the most recently used item. |
||||
|
|
||||
|
You can also use "refresh()" to explicitly make an item become the most |
||||
|
recently used item. |
||||
|
|
||||
|
Adding an item that is already in the dict *does* make it the |
||||
|
most- recently-used item although it does not change the state of |
||||
|
the dict itself. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return (k, dict.__getitem__(self.c, k),) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return k |
||||
|
|
||||
|
class ValueIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return dict.__getitem__(self.c, k) |
||||
|
|
||||
|
def __init__(self, initialdata={}, maxsize=128): |
||||
|
dict.__init__(self, initialdata) |
||||
|
self._lru = initialdata.keys() # contains keys |
||||
|
self._maxsize = maxsize |
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.") |
||||
|
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize) |
||||
|
return True |
||||
|
|
||||
|
def insert(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, item) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def __setitem__(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
self._lru.remove(key) |
||||
|
else: |
||||
|
if len(self._lru) == self._maxsize: |
||||
|
# If this insert is going to increase the size of the cache to bigger than maxsize: |
||||
|
killkey = self._lru.pop(0) |
||||
|
dict.__delitem__(self, killkey) |
||||
|
dict.__setitem__(self, key, item) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return item |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the object removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
val = dict.__getitem__(self, key) |
||||
|
dict.__delitem__(self, key) |
||||
|
self._lru.remove(key) |
||||
|
assert self._assert_invariants() |
||||
|
return val |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if len(otherdict) > self._maxsize: |
||||
|
# Handling this special case here makes it possible to implement the |
||||
|
# other more common cases faster below. |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
if self._maxsize > (len(otherdict) - self._maxsize): |
||||
|
dict.update(self, otherdict) |
||||
|
while len(self) > self._maxsize: |
||||
|
dict.popitem(self) |
||||
|
else: |
||||
|
for k, v, in otherdict.iteritems(): |
||||
|
if len(self) == self._maxsize: |
||||
|
break |
||||
|
dict.__setitem__(self, k, v) |
||||
|
self._lru = dict.keys(self) |
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
for k in otherdict.iterkeys(): |
||||
|
if dict.has_key(self, k): |
||||
|
self._lru.remove(k) |
||||
|
self._lru.extend(otherdict.keys()) |
||||
|
dict.update(self, otherdict) |
||||
|
|
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,))) |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def refresh(self, key, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: raise a KeyError exception if key isn't present |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if not dict.has_key(self, key): |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
|
||||
|
def popitem(self): |
||||
|
if not self._lru: |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self._lru[-1] |
||||
|
obj = self.remove(k) |
||||
|
return (k, obj,) |
@ -0,0 +1,552 @@ |
|||||
|
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
|
||||
|
""" |
||||
|
This module offers a Ordered Dict, which is a dict that preserves |
||||
|
insertion order. See PEP 372 for description of the problem. This |
||||
|
implementation uses a linked-list to get good O(1) asymptotic |
||||
|
performance. (Actually it is O(hashtable-update-cost), but whatever.) |
||||
|
|
||||
|
Warning: if -O optimizations are not turned on then OrderedDict performs |
||||
|
extensive self-analysis in every function call, which can take minutes |
||||
|
and minutes for a large cache. Turn on -O, or comment out assert |
||||
|
self._assert_invariants() |
||||
|
""" |
||||
|
|
||||
|
import operator |
||||
|
|
||||
|
from assertutil import _assert, precondition |
||||
|
from humanreadable import hr |
||||
|
|
||||
|
class OrderedDict: |
||||
|
""" |
||||
|
An efficient ordered dict. |
||||
|
|
||||
|
Adding an item that is already in the dict *does not* make it the |
||||
|
most- recently-added item although it may change the state of the |
||||
|
dict itself (if the value is different than the previous value). |
||||
|
|
||||
|
See also SmallOrderedDict (below), which is faster in some cases. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = p |
||||
|
return (k, v,) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) |
||||
|
(v, p, n,) = self.c.d[k] |
||||
|
self.i = p |
||||
|
return k |
||||
|
|
||||
|
class ValIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = c.d[c.ts][1] |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
if self.i is self.c.hs: |
||||
|
raise StopIteration |
||||
|
precondition(self.c.d.has_key(self.i), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
(v, p, n,) = self.c.d[self.i] |
||||
|
self.i = p |
||||
|
return v |
||||
|
|
||||
|
class Sentinel: |
||||
|
def __init__(self, msg): |
||||
|
self.msg = msg |
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.msg,) |
||||
|
|
||||
|
def __init__(self, initialdata={}): |
||||
|
self.d = {} # k: k, v: [v, prev, next,] # the dict |
||||
|
self.hs = OrderedDict.Sentinel("hs") |
||||
|
self.ts = OrderedDict.Sentinel("ts") |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.update(initialdata) |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def __repr_n__(self, n=None): |
||||
|
s = ["{",] |
||||
|
try: |
||||
|
iter = self.iteritems() |
||||
|
x = iter.next() |
||||
|
s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
i = 1 |
||||
|
while (n is None) or (i < n): |
||||
|
x = iter.next() |
||||
|
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1])) |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
s.append("}") |
||||
|
return ''.join(s) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),) |
||||
|
|
||||
|
def __str__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),) |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d)) |
||||
|
foundprevsentinel = 0 |
||||
|
foundnextsentinel = 0 |
||||
|
for (k, (v, p, n,)) in self.d.iteritems(): |
||||
|
_assert(v not in (self.hs, self.ts,)) |
||||
|
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n) |
||||
|
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n) |
||||
|
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n) |
||||
|
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n) |
||||
|
if p is self.hs: |
||||
|
foundprevsentinel += 1 |
||||
|
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n) |
||||
|
if n is self.ts: |
||||
|
foundnextsentinel += 1 |
||||
|
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n) |
||||
|
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).") |
||||
|
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).") |
||||
|
|
||||
|
count = 0 |
||||
|
for (k, v,) in self.iteritems(): |
||||
|
_assert(k not in (self.hs, self.ts,), k, self.hs, self.ts) |
||||
|
count += 1 |
||||
|
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels |
||||
|
|
||||
|
return True |
||||
|
|
||||
|
def move_to_most_recent(self, k, strictkey=False): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
if not self.d.has_key(k): |
||||
|
if strictkey: |
||||
|
raise KeyError, k |
||||
|
return |
||||
|
|
||||
|
node = self.d[k] |
||||
|
|
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
|
||||
|
# move to front |
||||
|
hnode = self.d[self.hs] |
||||
|
|
||||
|
node[1] = self.hs |
||||
|
node[2] = hnode[2] |
||||
|
hnode[2] = k |
||||
|
self.d[node[2]][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def iteritems(self): |
||||
|
return OrderedDict.ItemIterator(self) |
||||
|
|
||||
|
def itervalues(self): |
||||
|
return OrderedDict.ValIterator(self) |
||||
|
|
||||
|
def iterkeys(self): |
||||
|
return self.__iter__() |
||||
|
|
||||
|
def __iter__(self): |
||||
|
return OrderedDict.KeyIterator(self) |
||||
|
|
||||
|
def __getitem__(self, key, default=None, strictkey=True): |
||||
|
node = self.d.get(key) |
||||
|
if not node: |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return default |
||||
|
return node[0] |
||||
|
|
||||
|
def __setitem__(self, k, v=None): |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
node = self.d.get(k) |
||||
|
if node: |
||||
|
node[0] = v |
||||
|
return |
||||
|
|
||||
|
hnode = self.d[self.hs] |
||||
|
n = hnode[2] |
||||
|
self.d[k] = [v, self.hs, n,] |
||||
|
hnode[2] = k |
||||
|
self.d[n][1] = k |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return v |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the value removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
node = self.d[key] |
||||
|
# relink |
||||
|
self.d[node[1]][2] = node[2] |
||||
|
self.d[node[2]][1] = node[1] |
||||
|
del self.d[key] |
||||
|
assert self._assert_invariants() |
||||
|
return node[0] |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if self.d.has_key(key): |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
self.d.clear() |
||||
|
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
for (k, v,) in otherdict.iteritems(): |
||||
|
assert self._assert_invariants() |
||||
|
self[k] = v |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def pop(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.hs][2] |
||||
|
self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return k |
||||
|
|
||||
|
def popitem(self): |
||||
|
assert self._assert_invariants() |
||||
|
if len(self.d) < 2: # the +2 is for the sentinels |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self.d[self.hs][2] |
||||
|
val = self.remove(k) |
||||
|
assert self._assert_invariants() |
||||
|
return (k, val,) |
||||
|
|
||||
|
def keys_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return t.keys() |
||||
|
|
||||
|
def keys(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for k in self.iterkeys(): |
||||
|
res[i] = k |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def values_unsorted(self): |
||||
|
assert self._assert_invariants() |
||||
|
t = self.d.copy() |
||||
|
del t[self.hs] |
||||
|
del t[self.ts] |
||||
|
assert self._assert_invariants() |
||||
|
return map(operator.__getitem__, t.values(), [0]*len(t)) |
||||
|
|
||||
|
def values(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for v in self.itervalues(): |
||||
|
res[i] = v |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def items(self): |
||||
|
res = [None] * len(self) |
||||
|
i = 0 |
||||
|
for it in self.iteritems(): |
||||
|
res[i] = it |
||||
|
i += 1 |
||||
|
return res |
||||
|
|
||||
|
def __len__(self): |
||||
|
return len(self.d) - 2 |
||||
|
|
||||
|
def insert(self, key, val=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, val) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def get(self, key, default=None): |
||||
|
return self.__getitem__(key, default, strictkey=False) |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
class SmallOrderedDict(dict): |
||||
|
""" |
||||
|
SmallOrderedDict is faster than OrderedDict for small sets. How small? That |
||||
|
depends on your machine and which operations you use most often. Use |
||||
|
performance profiling to determine whether the cache class that you are |
||||
|
using makes any difference to the performance of your program, and if it |
||||
|
does, then run "quick_bench()" in test/test_cache.py to see which cache |
||||
|
implementation is faster for the size of your datasets. |
||||
|
|
||||
|
A simple least-recently-used cache. It keeps an LRU queue, and |
||||
|
when the number of items in the cache reaches maxsize, it removes |
||||
|
the least recently used item. |
||||
|
|
||||
|
"Looking" at an item or a key such as with "has_key()" makes that |
||||
|
item become the most recently used item. |
||||
|
|
||||
|
You can also use "refresh()" to explicitly make an item become the most |
||||
|
recently used item. |
||||
|
|
||||
|
Adding an item that is already in the dict *does* make it the |
||||
|
most- recently-used item although it does not change the state of |
||||
|
the dict itself. |
||||
|
""" |
||||
|
class ItemIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return (k, dict.__getitem__(self.c, k),) |
||||
|
|
||||
|
class KeyIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return k |
||||
|
|
||||
|
class ValueIterator: |
||||
|
def __init__(self, c): |
||||
|
self.c = c |
||||
|
self.i = 0 |
||||
|
def __iter__(self): |
||||
|
return self |
||||
|
def next(self): |
||||
|
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) |
||||
|
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) |
||||
|
if self.i == len(self.c._lru): |
||||
|
raise StopIteration |
||||
|
k = self.i |
||||
|
self.i += 1 |
||||
|
return dict.__getitem__(self.c, k) |
||||
|
|
||||
|
def __init__(self, initialdata={}, maxsize=128): |
||||
|
dict.__init__(self, initialdata) |
||||
|
self._lru = initialdata.keys() # contains keys |
||||
|
self._maxsize = maxsize |
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def _assert_invariants(self): |
||||
|
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.") |
||||
|
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self) |
||||
|
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize) |
||||
|
return True |
||||
|
|
||||
|
def insert(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__setitem__(key, item) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def setdefault(self, key, default=None): |
||||
|
assert self._assert_invariants() |
||||
|
if not self.has_key(key): |
||||
|
self[key] = default |
||||
|
assert self._assert_invariants() |
||||
|
return self[key] |
||||
|
|
||||
|
def __setitem__(self, key, item=None): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
self._lru.remove(key) |
||||
|
else: |
||||
|
if len(self._lru) == self._maxsize: |
||||
|
# If this insert is going to increase the size of the cache to bigger than maxsize: |
||||
|
killkey = self._lru.pop(0) |
||||
|
dict.__delitem__(self, killkey) |
||||
|
dict.__setitem__(self, key, item) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return item |
||||
|
|
||||
|
def remove(self, key, default=None, strictkey=True): |
||||
|
assert self._assert_invariants() |
||||
|
result = self.__delitem__(key, default, strictkey) |
||||
|
assert self._assert_invariants() |
||||
|
return result |
||||
|
|
||||
|
def __delitem__(self, key, default=None, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: True if you want a KeyError in the case that |
||||
|
key is not there, False if you want a reference to default |
||||
|
in the case that key is not there |
||||
|
@param default: the object to return if key is not there; This |
||||
|
is ignored if strictkey. |
||||
|
|
||||
|
@return: the object removed or default if there is not item by |
||||
|
that key and strictkey is False |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
val = dict.__getitem__(self, key) |
||||
|
dict.__delitem__(self, key) |
||||
|
self._lru.remove(key) |
||||
|
assert self._assert_invariants() |
||||
|
return val |
||||
|
elif strictkey: |
||||
|
assert self._assert_invariants() |
||||
|
raise KeyError, key |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return default |
||||
|
|
||||
|
def clear(self): |
||||
|
assert self._assert_invariants() |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
assert self._assert_invariants() |
||||
|
|
||||
|
def update(self, otherdict): |
||||
|
""" |
||||
|
@return: self |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if len(otherdict) > self._maxsize: |
||||
|
# Handling this special case here makes it possible to implement the |
||||
|
# other more common cases faster below. |
||||
|
dict.clear(self) |
||||
|
self._lru = [] |
||||
|
if self._maxsize > (len(otherdict) - self._maxsize): |
||||
|
dict.update(self, otherdict) |
||||
|
while len(self) > self._maxsize: |
||||
|
dict.popitem(self) |
||||
|
else: |
||||
|
for k, v, in otherdict.iteritems(): |
||||
|
if len(self) == self._maxsize: |
||||
|
break |
||||
|
dict.__setitem__(self, k, v) |
||||
|
self._lru = dict.keys(self) |
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
for k in otherdict.iterkeys(): |
||||
|
if dict.has_key(self, k): |
||||
|
self._lru.remove(k) |
||||
|
self._lru.extend(otherdict.keys()) |
||||
|
dict.update(self, otherdict) |
||||
|
|
||||
|
over = len(self) - self._maxsize |
||||
|
if over > 0: |
||||
|
map(dict.__delitem__, [self]*over, self._lru[:over]) |
||||
|
del self._lru[:over] |
||||
|
|
||||
|
assert self._assert_invariants() |
||||
|
return self |
||||
|
|
||||
|
def has_key(self, key): |
||||
|
assert self._assert_invariants() |
||||
|
if dict.has_key(self, key): |
||||
|
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,))) |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
assert self._assert_invariants() |
||||
|
return True |
||||
|
else: |
||||
|
assert self._assert_invariants() |
||||
|
return False |
||||
|
|
||||
|
def refresh(self, key, strictkey=True): |
||||
|
""" |
||||
|
@param strictkey: raise a KeyError exception if key isn't present |
||||
|
""" |
||||
|
assert self._assert_invariants() |
||||
|
if not dict.has_key(self, key): |
||||
|
if strictkey: |
||||
|
raise KeyError, key |
||||
|
return |
||||
|
self._lru.remove(key) |
||||
|
self._lru.append(key) |
||||
|
|
||||
|
def popitem(self): |
||||
|
if not self._lru: |
||||
|
raise KeyError, 'popitem(): dictionary is empty' |
||||
|
k = self._lru[-1] |
||||
|
obj = self.remove(k) |
||||
|
return (k, obj,) |
@ -0,0 +1,100 @@ |
|||||
|
# Thanks to Daenyth for help porting this to Arch Linux. |
||||
|
|
||||
|
import os, platform, re, subprocess |
||||
|
_distributor_id_cmdline_re = re.compile("(?:Distributor ID:)\s*(.*)", re.I) |
||||
|
_release_cmdline_re = re.compile("(?:Release:)\s*(.*)", re.I) |
||||
|
|
||||
|
_distributor_id_file_re = re.compile("(?:DISTRIB_ID\s*=)\s*(.*)", re.I) |
||||
|
_release_file_re = re.compile("(?:DISTRIB_RELEASE\s*=)\s*(.*)", re.I) |
||||
|
|
||||
|
global _distname,_version |
||||
|
_distname = None |
||||
|
_version = None |
||||
|
|
||||
|
def get_linux_distro(): |
||||
|
""" Tries to determine the name of the Linux OS distribution name. |
||||
|
|
||||
|
First, try to parse a file named "/etc/lsb-release". If it exists, and |
||||
|
contains the "DISTRIB_ID=" line and the "DISTRIB_RELEASE=" line, then return |
||||
|
the strings parsed from that file. |
||||
|
|
||||
|
If that doesn't work, then invoke platform.dist(). |
||||
|
|
||||
|
If that doesn't work, then try to execute "lsb_release", as standardized in |
||||
|
2001: |
||||
|
|
||||
|
http://refspecs.freestandards.org/LSB_1.0.0/gLSB/lsbrelease.html |
||||
|
|
||||
|
The current version of the standard is here: |
||||
|
|
||||
|
http://refspecs.freestandards.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/lsbrelease.html |
||||
|
|
||||
|
that lsb_release emitted, as strings. |
||||
|
|
||||
|
Returns a tuple (distname,version). Distname is what LSB calls a |
||||
|
"distributor id", e.g. "Ubuntu". Version is what LSB calls a "release", |
||||
|
e.g. "8.04". |
||||
|
|
||||
|
A version of this has been submitted to python as a patch for the standard |
||||
|
library module "platform": |
||||
|
|
||||
|
http://bugs.python.org/issue3937 |
||||
|
""" |
||||
|
global _distname,_version |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
|
||||
|
try: |
||||
|
etclsbrel = open("/etc/lsb-release", "rU") |
||||
|
for line in etclsbrel: |
||||
|
m = _distributor_id_file_re.search(line) |
||||
|
if m: |
||||
|
_distname = m.group(1).strip() |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
m = _release_file_re.search(line) |
||||
|
if m: |
||||
|
_version = m.group(1).strip() |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
(_distname, _version) = platform.dist()[:2] |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
|
||||
|
try: |
||||
|
p = subprocess.Popen(["lsb_release", "--all"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
||||
|
rc = p.wait() |
||||
|
if rc == 0: |
||||
|
for line in p.stdout.readlines(): |
||||
|
m = _distributor_id_cmdline_re.search(line) |
||||
|
if m: |
||||
|
_distname = m.group(1).strip() |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
|
||||
|
m = _release_cmdline_re.search(p.stdout.read()) |
||||
|
if m: |
||||
|
_version = m.group(1).strip() |
||||
|
if _distname and _version: |
||||
|
return (_distname, _version) |
||||
|
except EnvironmentError: |
||||
|
pass |
||||
|
|
||||
|
if os.path.exists("/etc/arch-release"): |
||||
|
return ("Arch_Linux", "") |
||||
|
|
||||
|
return (_distname,_version) |
||||
|
|
||||
|
def get_platform(): |
||||
|
# Our version of platform.platform(), telling us both less and more than the |
||||
|
# Python Standard Library's version does. |
||||
|
# We omit details such as the Linux kernel version number, but we add a |
||||
|
# more detailed and correct rendition of the Linux distribution and |
||||
|
# distribution-version. |
||||
|
if "linux" in platform.system().lower(): |
||||
|
return platform.system()+"-"+"_".join(get_linux_distro())+"-"+platform.machine()+"-"+"_".join([x for x in platform.architecture() if x]) |
||||
|
else: |
||||
|
return platform.platform() |
@ -0,0 +1,85 @@ |
|||||
|
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import warnings |
||||
|
import os, random |
||||
|
|
||||
|
try: |
||||
|
import hashexpand |
||||
|
class SHA256Random(hashexpand.SHA256Expander, random.Random): |
||||
|
def __init__(self, seed=None, deterministic=True): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if not deterministic: |
||||
|
raise NotImplementedError, "SHA256Expander is always deterministic. For non-deterministic, try urandomRandom." |
||||
|
|
||||
|
hashexpand.SHA256Expander.__init__(self) |
||||
|
random.Random.__init__(self, seed) |
||||
|
self.seed(seed) |
||||
|
|
||||
|
def seed(self, seed=None): |
||||
|
if seed is None: |
||||
|
import increasing_timer |
||||
|
seed = repr(increasing_timer.time()) |
||||
|
hashexpand.SHA256Expander.seed(self, seed) |
||||
|
|
||||
|
|
||||
|
class SHA256Random(hashexpand.SHA256Expander, random.Random): |
||||
|
def __init__(self, seed=""): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
hashexpand.SHA256Expander.__init__(self) |
||||
|
self.seed(seed) |
||||
|
|
||||
|
def seed(self, seed=None): |
||||
|
if seed is None: |
||||
|
seed = os.urandom(32) |
||||
|
hashexpand.SHA256Expander.seed(self, seed) |
||||
|
except ImportError, le: |
||||
|
class InsecureSHA256Random: |
||||
|
def __init__(self, seed=None): |
||||
|
raise ImportError, le |
||||
|
class SHA256Random: |
||||
|
def __init__(self, seed=""): |
||||
|
raise ImportError, le |
||||
|
|
||||
|
class devrandomRandom(random.Random): |
||||
|
""" The problem with using this one, of course, is that it blocks. This |
||||
|
is, of course, a security flaw. (On Linux and probably on other |
||||
|
systems.) --Zooko 2005-03-04 |
||||
|
|
||||
|
Not repeatable. |
||||
|
""" |
||||
|
def __init__(self): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
self.dr = open("/dev/random", "r") |
||||
|
|
||||
|
def get(self, bytes): |
||||
|
return self.dr.read(bytes) |
||||
|
|
||||
|
|
||||
|
class devurandomRandom(random.Random): |
||||
|
""" The problem with using this one is that it gives answers even when it |
||||
|
has never been properly seeded, e.g. when you are booting from CD and have |
||||
|
just started up and haven't yet gathered enough entropy to actually be |
||||
|
unguessable. (On Linux and probably on other systems.) --Zooko 2005-03-04 |
||||
|
|
||||
|
Not repeatable. |
||||
|
""" |
||||
|
def get(self, bytes): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
return os.urandom(bytes) |
||||
|
|
||||
|
|
||||
|
randobj = devurandomRandom() |
||||
|
get = randobj.get |
||||
|
random = randobj.random |
||||
|
randrange = randobj.randrange |
||||
|
shuffle = randobj.shuffle |
||||
|
choice = randobj.choice |
||||
|
seed = randobj.seed |
||||
|
|
||||
|
def randstr(n): |
||||
|
return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) |
||||
|
|
||||
|
import random as insecurerandom |
||||
|
def insecurerandstr(n): |
||||
|
return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n))) |
@ -0,0 +1,85 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import warnings |
||||
|
import os, random |
||||
|
|
||||
|
try: |
||||
|
import hashexpand |
||||
|
class SHA256Random(hashexpand.SHA256Expander, random.Random): |
||||
|
def __init__(self, seed=None, deterministic=True): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
if not deterministic: |
||||
|
raise NotImplementedError, "SHA256Expander is always deterministic. For non-deterministic, try urandomRandom." |
||||
|
|
||||
|
hashexpand.SHA256Expander.__init__(self) |
||||
|
random.Random.__init__(self, seed) |
||||
|
self.seed(seed) |
||||
|
|
||||
|
def seed(self, seed=None): |
||||
|
if seed is None: |
||||
|
import increasing_timer |
||||
|
seed = repr(increasing_timer.time()) |
||||
|
hashexpand.SHA256Expander.seed(self, seed) |
||||
|
|
||||
|
|
||||
|
class SHA256Random(hashexpand.SHA256Expander, random.Random): |
||||
|
def __init__(self, seed=""): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
hashexpand.SHA256Expander.__init__(self) |
||||
|
self.seed(seed) |
||||
|
|
||||
|
def seed(self, seed=None): |
||||
|
if seed is None: |
||||
|
seed = os.urandom(32) |
||||
|
hashexpand.SHA256Expander.seed(self, seed) |
||||
|
except ImportError, le: |
||||
|
class InsecureSHA256Random: |
||||
|
def __init__(self, seed=None): |
||||
|
raise ImportError, le |
||||
|
class SHA256Random: |
||||
|
def __init__(self, seed=""): |
||||
|
raise ImportError, le |
||||
|
|
||||
|
class devrandomRandom(random.Random): |
||||
|
""" The problem with using this one, of course, is that it blocks. This |
||||
|
is, of course, a security flaw. (On Linux and probably on other |
||||
|
systems.) --Zooko 2005-03-04 |
||||
|
|
||||
|
Not repeatable. |
||||
|
""" |
||||
|
def __init__(self): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
self.dr = open("/dev/random", "r") |
||||
|
|
||||
|
def get(self, bytes): |
||||
|
return self.dr.read(bytes) |
||||
|
|
||||
|
|
||||
|
class devurandomRandom(random.Random): |
||||
|
""" The problem with using this one is that it gives answers even when it |
||||
|
has never been properly seeded, e.g. when you are booting from CD and have |
||||
|
just started up and haven't yet gathered enough entropy to actually be |
||||
|
unguessable. (On Linux and probably on other systems.) --Zooko 2005-03-04 |
||||
|
|
||||
|
Not repeatable. |
||||
|
""" |
||||
|
def get(self, bytes): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
return os.urandom(bytes) |
||||
|
|
||||
|
|
||||
|
randobj = devurandomRandom() |
||||
|
get = randobj.get |
||||
|
random = randobj.random |
||||
|
randrange = randobj.randrange |
||||
|
shuffle = randobj.shuffle |
||||
|
choice = randobj.choice |
||||
|
seed = randobj.seed |
||||
|
|
||||
|
def randstr(n): |
||||
|
return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) |
||||
|
|
||||
|
import random as insecurerandom |
||||
|
def insecurerandstr(n): |
||||
|
return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n))) |
@ -0,0 +1,90 @@ |
|||||
|
""" |
||||
|
If you execute force_repeatability() then the following things are changed in the runtime: |
||||
|
|
||||
|
1. random.random() and its sibling functions, and random.Random.seed() in the random module are seeded with a known seed so that they will return the same sequence on each run. |
||||
|
2. os.urandom() is replaced by a fake urandom that returns a pseudorandom sequence. |
||||
|
3. time.time() is replaced by a fake time that returns an incrementing number. (Original time.time is available as time.realtime.) |
||||
|
|
||||
|
Which seed will be used? |
||||
|
|
||||
|
If the environment variable REPEATABLE_RANDOMNESS_SEED is set, then it will use that. Else, it will use the current real time. In either case it logs the seed that it used. |
||||
|
|
||||
|
Caveats: |
||||
|
|
||||
|
1. If some code has acquired a random.Random object before force_repeatability() is executed, then that Random object will produce non-reproducible results. For example, the tempfile module in the Python Standard Library does this. |
||||
|
2. Likewise if some code called time.time() before force_repeatability() was called, then it will have gotten a real time stamp. For example, trial does this. (Then it later subtracts that real timestamp from a faketime timestamp to calculate elapsed time, resulting in a large negative elapsed time.) |
||||
|
3. Fake urandom has an added constraint for performance reasons -- you can't ask it for more than 64 bytes of randomness at a time. (I couldn't figure out how to generate large fake random strings efficiently.) |
||||
|
""" |
||||
|
|
||||
|
import os, random, time |
||||
|
if not hasattr(time, "realtime"): |
||||
|
time.realtime = time.time |
||||
|
if not hasattr(os, "realurandom"): |
||||
|
os.realurandom = os.urandom |
||||
|
if not hasattr(random, "realseed"): |
||||
|
random.realseed = random.seed |
||||
|
|
||||
|
tdelta = 0 |
||||
|
seeded = False |
||||
|
def force_repeatability(): |
||||
|
now = 1043659734.0 |
||||
|
def faketime(): |
||||
|
global tdelta |
||||
|
tdelta += 1 |
||||
|
return now + tdelta |
||||
|
time.faketime = faketime |
||||
|
time.time = faketime |
||||
|
|
||||
|
from idlib import i2b |
||||
|
def fakeurandom(n): |
||||
|
if n > 64: |
||||
|
raise ("Can't produce more than 64 bytes of pseudorandomness efficiently.") |
||||
|
elif n == 0: |
||||
|
return '' |
||||
|
else: |
||||
|
z = i2b(random.getrandbits(n*8)) |
||||
|
x = z + "0" * (n-len(z)) |
||||
|
assert len(x) == n |
||||
|
return x |
||||
|
os.fakeurandom = fakeurandom |
||||
|
os.urandom = fakeurandom |
||||
|
|
||||
|
global seeded |
||||
|
if not seeded: |
||||
|
SEED = os.environ.get('REPEATABLE_RANDOMNESS_SEED', None) |
||||
|
|
||||
|
if SEED is None: |
||||
|
# Generate a seed which is integral and fairly short (to ease cut-and-paste, writing it down, etc.). |
||||
|
t = time.realtime() |
||||
|
subsec = t % 1 |
||||
|
t += (subsec * 1000000) |
||||
|
t %= 1000000 |
||||
|
SEED = long(t) |
||||
|
import sys |
||||
|
sys.stdout.write("REPEATABLE_RANDOMNESS_SEED: %s\n" % SEED) ; sys.stdout.flush() |
||||
|
sys.stdout.write("In order to reproduce this run of the code, set the environment variable \"REPEATABLE_RANDOMNESS_SEED\" to %s before executing.\n" % SEED) ; sys.stdout.flush() |
||||
|
random.seed(SEED) |
||||
|
|
||||
|
def seed_which_refuses(a): |
||||
|
sys.stdout.write("I refuse to reseed to %s. Go away!\n" % (a,)) ; sys.stdout.flush() |
||||
|
return |
||||
|
random.realseed = random.seed |
||||
|
random.seed = seed_which_refuses |
||||
|
seeded = True |
||||
|
|
||||
|
import setutil |
||||
|
setutil.RandomSet.DETERMINISTIC = True |
||||
|
|
||||
|
def restore_real_clock(): |
||||
|
time.time = time.realtime |
||||
|
|
||||
|
def restore_real_urandom(): |
||||
|
os.urandom = os.realurandom |
||||
|
|
||||
|
def restore_real_seed(): |
||||
|
random.seed = random.realseed |
||||
|
|
||||
|
def restore_non_repeatability(): |
||||
|
restore_real_seed() |
||||
|
restore_real_urandom() |
||||
|
restore_real_clock() |
@ -0,0 +1,36 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
from pyutil import lineutil |
||||
|
|
||||
|
import sys |
||||
|
|
||||
|
def main(): |
||||
|
if len(sys.argv) > 1 and "-s" in sys.argv[1:]: |
||||
|
strip = True |
||||
|
sys.argv.remove("-s") |
||||
|
else: |
||||
|
strip = False |
||||
|
|
||||
|
if len(sys.argv) > 1 and "-n" in sys.argv[1:]: |
||||
|
nobak = True |
||||
|
sys.argv.remove("-n") |
||||
|
else: |
||||
|
nobak = False |
||||
|
|
||||
|
if len(sys.argv) > 1: |
||||
|
pipe = False |
||||
|
else: |
||||
|
pipe = True |
||||
|
|
||||
|
if pipe: |
||||
|
lineutil.lineify_fileobjs(sys.stdin, sys.stdout) |
||||
|
else: |
||||
|
for fn in sys.argv[1:]: |
||||
|
lineutil.lineify_file(fn, strip, nobak) |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
main() |
||||
|
|
@ -0,0 +1,65 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import bindann |
||||
|
bindann.install_exception_handler() |
||||
|
|
||||
|
import sys |
||||
|
|
||||
|
inf = open(sys.argv[1], "r") |
||||
|
|
||||
|
outf = open(sys.argv[1]+".dot", "w") |
||||
|
outf.write("digraph %s {\n" % sys.argv[1].replace(".","")) |
||||
|
|
||||
|
def parse_netstring(l, i): |
||||
|
try: |
||||
|
j = l.find(':', i) |
||||
|
if j == -1: |
||||
|
return (None, len(l),) |
||||
|
lenval = int(l[i:j]) |
||||
|
val = l[j+1:j+1+lenval] |
||||
|
# skip the comma |
||||
|
assert l[j+1+lenval] == "," |
||||
|
return (val, j+1+lenval+1,) |
||||
|
except Exception, le: |
||||
|
le.args = tuple(le.args + (l, i,)) |
||||
|
raise |
||||
|
|
||||
|
def parse_ref(l, i): |
||||
|
(attrname, i,) = parse_netstring(l, i) |
||||
|
j = l.find(",", i) |
||||
|
assert j != -1 |
||||
|
objid = l[i:j] |
||||
|
return (objid, attrname, j+1,) |
||||
|
|
||||
|
def parse_memdump_line(l): |
||||
|
result = [] |
||||
|
|
||||
|
i = l.find('-') |
||||
|
objid = l[:i] |
||||
|
(objdesc, i,) = parse_netstring(l, i+1) |
||||
|
|
||||
|
result.append((objid, objdesc,)) |
||||
|
|
||||
|
while i != -1 and i < len(l): |
||||
|
(objid, attrname, i,) = parse_ref(l, i) |
||||
|
result.append((objid, attrname,)) |
||||
|
|
||||
|
return result |
||||
|
|
||||
|
for l in inf: |
||||
|
if l[-1] != "\n": |
||||
|
raise "waht the HECK? %r" % l |
||||
|
res = parse_memdump_line(l.strip()) |
||||
|
# declare the node |
||||
|
outf.write("\"%s\" [label=\"%s\"];\n" % (res[0][0], res[0][1],)) |
||||
|
|
||||
|
# declare all the edges |
||||
|
for edge in res[1:]: |
||||
|
if edge[1]: |
||||
|
# a named edge |
||||
|
outf.write("\"%s\" -> \"%s\" [style=bold, label=\"%s\"];\n" % (res[0][0], edge[0], edge[1],)) |
||||
|
else: |
||||
|
# an anonymous edge |
||||
|
outf.write("\"%s\" -> \"%s\";\n" % (res[0][0], edge[0])) |
||||
|
|
||||
|
outf.write("}") |
@ -0,0 +1,30 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import os, sys |
||||
|
|
||||
|
import zbase32 |
||||
|
|
||||
|
def main(): |
||||
|
if len(sys.argv) > 1: |
||||
|
l = int(sys.argv[1]) |
||||
|
else: |
||||
|
l = 64 |
||||
|
|
||||
|
bl = (l + 7) / 8 |
||||
|
|
||||
|
s = zbase32.b2a_l(os.urandom(bl), l) |
||||
|
|
||||
|
# insert some hyphens for easier memorization |
||||
|
chs = 3 + (len(s)%8==0) |
||||
|
i = chs |
||||
|
while i < len(s)-1: |
||||
|
s = s[:i] + "-" + s[i:] |
||||
|
i += 1 |
||||
|
chs = 7-chs |
||||
|
i += chs |
||||
|
|
||||
|
print s |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
main() |
||||
|
|
@ -0,0 +1,48 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import os, sys |
||||
|
|
||||
|
from random import randrange |
||||
|
|
||||
|
import argparse |
||||
|
|
||||
|
def main(): |
||||
|
CHUNKSIZE=2**20 |
||||
|
|
||||
|
parser = argparse.ArgumentParser(prog="randfile", description="Create a file of pseudorandom bytes (not cryptographically secure).") |
||||
|
|
||||
|
parser.add_argument('-b', '--num-bytes', help="how many bytes to write per output file (default 20)", type=int, metavar="BYTES", default=20) |
||||
|
parser.add_argument('-f', '--output-file-prefix', help="prefix of the name of the output file to create and fill with random bytes (default \"randfile\"", metavar="OUTFILEPRE", default="randfile") |
||||
|
parser.add_argument('-n', '--num-files', help="how many files to write (default 1)", type=int, metavar="FILES", default=1) |
||||
|
parser.add_argument('-F', '--force', help='overwrite any file already present', action='store_true') |
||||
|
parser.add_argument('-p', '--progress', help='write an "x" for every file completed and a "." for every %d bytes' % CHUNKSIZE, action='store_true') |
||||
|
args = parser.parse_args() |
||||
|
|
||||
|
for i in xrange(args.num_files): |
||||
|
bytesleft = args.num_bytes |
||||
|
outputfname = args.output_file_prefix + "." + str(i) |
||||
|
|
||||
|
if args.force: |
||||
|
f = open(outputfname, "wb") |
||||
|
else: |
||||
|
flags = os.O_WRONLY|os.O_CREAT|os.O_EXCL | (hasattr(os, 'O_BINARY') and os.O_BINARY) |
||||
|
fd = os.open(outputfname, flags) |
||||
|
f = os.fdopen(fd, "wb") |
||||
|
zs = [0]*CHUNKSIZE |
||||
|
ts = [256]*CHUNKSIZE |
||||
|
while bytesleft >= CHUNKSIZE: |
||||
|
f.write(''.join(map(chr, map(randrange, zs, ts)))) |
||||
|
bytesleft -= CHUNKSIZE |
||||
|
|
||||
|
if args.progress: |
||||
|
sys.stdout.write(".") ; sys.stdout.flush() |
||||
|
|
||||
|
zs = [0]*bytesleft |
||||
|
ts = [256]*bytesleft |
||||
|
f.write(''.join(map(chr, map(randrange, zs, ts)))) |
||||
|
|
||||
|
if args.progress: |
||||
|
sys.stdout.write("x") ; sys.stdout.flush() |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
main() |
@ -0,0 +1,30 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# output all but the first N lines of a file |
||||
|
|
||||
|
# Allen Short and Jp Calderone wrote this coool version: |
||||
|
import itertools, sys |
||||
|
|
||||
|
def main(): |
||||
|
K = int(sys.argv[1]) |
||||
|
if len(sys.argv) > 2: |
||||
|
fname = sys.argv[2] |
||||
|
inf = open(fname, 'r') |
||||
|
else: |
||||
|
inf = sys.stdin |
||||
|
|
||||
|
sys.stdout.writelines(itertools.islice(inf, K, None)) |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
main() |
||||
|
|
||||
|
# thus replacing my dumb version: |
||||
|
# # from the Python Standard Library |
||||
|
# import sys |
||||
|
# |
||||
|
# i = K |
||||
|
# for l in sys.stdin.readlines(): |
||||
|
# if i: |
||||
|
# i -= 1 |
||||
|
# else: |
||||
|
# print l, |
@ -0,0 +1,96 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import binascii, codecs, encodings, locale, os, sys, zlib |
||||
|
|
||||
|
import argparse |
||||
|
|
||||
|
def listcodecs(dir): |
||||
|
names = [] |
||||
|
for filename in os.listdir(dir): |
||||
|
if filename[-3:] != '.py': |
||||
|
continue |
||||
|
name = filename[:-3] |
||||
|
# Check whether we've found a true codec |
||||
|
try: |
||||
|
codecs.lookup(name) |
||||
|
except LookupError: |
||||
|
# Codec not found |
||||
|
continue |
||||
|
except Exception: |
||||
|
# Probably an error from importing the codec; still it's |
||||
|
# a valid code name |
||||
|
pass |
||||
|
names.append(name) |
||||
|
return names |
||||
|
|
||||
|
def listem(): |
||||
|
return listcodecs(encodings.__path__[0]) |
||||
|
|
||||
|
def _canonical_encoding(encoding): |
||||
|
if encoding is None: |
||||
|
encoding = 'utf-8' |
||||
|
encoding = encoding.lower() |
||||
|
if encoding == "cp65001": |
||||
|
encoding = 'utf-8' |
||||
|
elif encoding == "us-ascii" or encoding == "646": |
||||
|
encoding = 'ascii' |
||||
|
|
||||
|
# sometimes Python returns an encoding name that it doesn't support for conversion |
||||
|
# fail early if this happens |
||||
|
try: |
||||
|
u"test".encode(encoding) |
||||
|
except (LookupError, AttributeError): |
||||
|
raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,)) |
||||
|
|
||||
|
return encoding |
||||
|
|
||||
|
def get_output_encoding(): |
||||
|
return _canonical_encoding(sys.stdout.encoding or locale.getpreferredencoding()) |
||||
|
|
||||
|
def get_argv_encoding(): |
||||
|
if sys.platform == 'win32': |
||||
|
# Unicode arguments are not supported on Windows yet; see Tahoe-LAFS tickets #565 and #1074. |
||||
|
return 'ascii' |
||||
|
else: |
||||
|
return get_output_encoding() |
||||
|
|
||||
|
output_encoding = get_output_encoding() |
||||
|
argv_encoding = get_argv_encoding() |
||||
|
|
||||
|
def type_unicode(argstr): |
||||
|
return argstr.decode(argv_encoding) |
||||
|
|
||||
|
def main(): |
||||
|
parser = argparse.ArgumentParser(prog="try_decoding", description="Try decoding some bytes with all sorts of different codecs and print out any that decode.") |
||||
|
|
||||
|
parser.add_argument('inputfile', help='file to decode or "-" for stdin', type=argparse.FileType('rb'), metavar='INF') |
||||
|
parser.add_argument('-t', '--target', help='unicode string to match against (if any)', type=type_unicode, metavar='T') |
||||
|
parser.add_argument('-a', '--accept-bytes', help='include codecs which return bytes instead of returning unicode (they will be marked with "!!!" in the output)', action='store_true') |
||||
|
|
||||
|
args = parser.parse_args() |
||||
|
|
||||
|
inb = args.inputfile.read() |
||||
|
|
||||
|
for codec in listem(): |
||||
|
try: |
||||
|
u = inb.decode(codec) |
||||
|
except (UnicodeDecodeError, IOError, TypeError, IndexError, UnicodeError, ValueError, zlib.error, binascii.Error): |
||||
|
pass |
||||
|
else: |
||||
|
if isinstance(u, unicode): |
||||
|
if args.target: |
||||
|
if args.target != u: |
||||
|
continue |
||||
|
print "%19s" % codec, |
||||
|
print ':', |
||||
|
print u.encode(output_encoding) |
||||
|
else: |
||||
|
if not args.accept_bytes: |
||||
|
continue |
||||
|
print "%19s" % codec, |
||||
|
print "!!! ", |
||||
|
print ':', |
||||
|
print u |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
main() |
@ -0,0 +1,19 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# randomize the lines of stdin or a file |
||||
|
|
||||
|
import random, sys |
||||
|
|
||||
|
def main(): |
||||
|
if len(sys.argv) > 1: |
||||
|
fname = sys.argv[1] |
||||
|
inf = open(fname, 'r') |
||||
|
else: |
||||
|
inf = sys.stdin |
||||
|
|
||||
|
lines = inf.readlines() |
||||
|
random.shuffle(lines) |
||||
|
sys.stdout.writelines(lines) |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
main() |
@ -0,0 +1,26 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import exceptions |
||||
|
class UsageError(exceptions.Exception): pass |
||||
|
|
||||
|
import sys |
||||
|
import pkg_resources |
||||
|
|
||||
|
def main(): |
||||
|
if len(sys.argv) <= 1: |
||||
|
raise UsageError, "USAGE: verinfo DISTRIBUTIONNAME [PACKAGENAME]" |
||||
|
DISTNAME=sys.argv[1] |
||||
|
if len(sys.argv) >= 3: |
||||
|
PACKNAME=sys.argv[2] |
||||
|
else: |
||||
|
PACKNAME=DISTNAME |
||||
|
print "pkg_resources.require('%s') => " % (DISTNAME,), |
||||
|
print pkg_resources.require(DISTNAME) |
||||
|
print "import %s;print %s => " % (PACKNAME, PACKNAME,), |
||||
|
x = __import__(PACKNAME) |
||||
|
print x |
||||
|
print "import %s;print %s.__version__ => " % (PACKNAME, PACKNAME,), |
||||
|
print hasattr(x, '__version__') and x.__version__ |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
main() |
@ -0,0 +1,48 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
def commonprefix(l): |
||||
|
cp = [] |
||||
|
for i in range(min(map(len, l))): |
||||
|
c = l[0][i] |
||||
|
for s in l[1:]: |
||||
|
if s[i] != c: |
||||
|
return ''.join(cp) |
||||
|
cp.append(c) |
||||
|
return ''.join(cp) |
||||
|
|
||||
|
def commonsuffix(l): |
||||
|
cp = [] |
||||
|
for i in range(min(map(len, l))): |
||||
|
c = l[0][-i-1] |
||||
|
for s in l[1:]: |
||||
|
if s[-i-1] != c: |
||||
|
cp.reverse() |
||||
|
return ''.join(cp) |
||||
|
cp.append(c) |
||||
|
cp.reverse() |
||||
|
return ''.join(cp) |
||||
|
|
||||
|
def split_on_newlines(s): |
||||
|
""" |
||||
|
Splits s on all of the three newline sequences: "\r\n", "\r", or "\n". |
||||
|
""" |
||||
|
res = [] |
||||
|
for x in s.split('\r\n'): |
||||
|
for y in x.split('\r'): |
||||
|
res.extend(y.split('\n')) |
||||
|
return res |
||||
|
|
||||
|
def pop_trailing_newlines(s): |
||||
|
""" |
||||
|
@return a copy of s minus any trailing "\n"'s or "\r"'s |
||||
|
""" |
||||
|
i = len(s)-1 |
||||
|
if i < 0: |
||||
|
return '' |
||||
|
while s[i] in ('\n', '\r',): |
||||
|
i = i - 1 |
||||
|
if i < 0: |
||||
|
return '' |
||||
|
return s[:i+1] |
||||
|
|
@ -0,0 +1,15 @@ |
|||||
|
import decimal |
||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class TestDecode(TestCase): |
||||
|
def test_decimal(self): |
||||
|
rval = json.loads('1.1', parse_float=decimal.Decimal) |
||||
|
self.assert_(isinstance(rval, decimal.Decimal)) |
||||
|
self.assertEquals(rval, decimal.Decimal('1.1')) |
||||
|
|
||||
|
def test_float(self): |
||||
|
rval = json.loads('1', parse_int=float) |
||||
|
self.assert_(isinstance(rval, float)) |
||||
|
self.assertEquals(rval, 1.0) |
@ -0,0 +1,9 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class TestDefault(TestCase): |
||||
|
def test_default(self): |
||||
|
self.assertEquals( |
||||
|
json.dumps(type, default=repr), |
||||
|
json.dumps(repr(type))) |
@ -0,0 +1,13 @@ |
|||||
|
from unittest import TestCase |
||||
|
from cStringIO import StringIO |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class TestDump(TestCase): |
||||
|
def test_dump(self): |
||||
|
sio = StringIO() |
||||
|
json.dump({}, sio) |
||||
|
self.assertEquals(sio.getvalue(), '{}') |
||||
|
|
||||
|
def test_dumps(self): |
||||
|
self.assertEquals(json.dumps({}), '{}') |
@ -0,0 +1,36 @@ |
|||||
|
from twisted.trial.unittest import SkipTest, TestCase |
||||
|
|
||||
|
from pyutil.jsonutil import encoder |
||||
|
|
||||
|
CASES = [ |
||||
|
(u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), |
||||
|
(u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), |
||||
|
(u'controls', '"controls"'), |
||||
|
(u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), |
||||
|
(u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), |
||||
|
(u' s p a c e d ', '" s p a c e d "'), |
||||
|
(u'\U0001d120', '"\\ud834\\udd20"'), |
||||
|
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), |
||||
|
('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), |
||||
|
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), |
||||
|
('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'), |
||||
|
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), |
||||
|
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'), |
||||
|
(u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), |
||||
|
(u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), |
||||
|
(u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), |
||||
|
] |
||||
|
|
||||
|
class TestEncodeBaseStringAscii(TestCase): |
||||
|
def test_py_encode_basestring_ascii(self): |
||||
|
self._test_encode_basestring_ascii(encoder.py_encode_basestring_ascii) |
||||
|
|
||||
|
def test_c_encode_basestring_ascii(self): |
||||
|
if not encoder.c_encode_basestring_ascii: |
||||
|
raise SkipTest("no C extension speedups available to test") |
||||
|
self._test_encode_basestring_ascii(encoder.c_encode_basestring_ascii) |
||||
|
|
||||
|
def _test_encode_basestring_ascii(self, encode_basestring_ascii): |
||||
|
for input_string, expect in CASES: |
||||
|
result = encode_basestring_ascii(input_string) |
||||
|
self.assertEquals(result, expect) |
@ -0,0 +1,76 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
# Fri Dec 30 18:57:26 2005 |
||||
|
JSONDOCS = [ |
||||
|
# http://json.org/JSON_checker/test/fail1.json |
||||
|
'"A JSON payload should be an object or array, not a string."', |
||||
|
# http://json.org/JSON_checker/test/fail2.json |
||||
|
'["Unclosed array"', |
||||
|
# http://json.org/JSON_checker/test/fail3.json |
||||
|
'{unquoted_key: "keys must be quoted}', |
||||
|
# http://json.org/JSON_checker/test/fail4.json |
||||
|
'["extra comma",]', |
||||
|
# http://json.org/JSON_checker/test/fail5.json |
||||
|
'["double extra comma",,]', |
||||
|
# http://json.org/JSON_checker/test/fail6.json |
||||
|
'[ , "<-- missing value"]', |
||||
|
# http://json.org/JSON_checker/test/fail7.json |
||||
|
'["Comma after the close"],', |
||||
|
# http://json.org/JSON_checker/test/fail8.json |
||||
|
'["Extra close"]]', |
||||
|
# http://json.org/JSON_checker/test/fail9.json |
||||
|
'{"Extra comma": true,}', |
||||
|
# http://json.org/JSON_checker/test/fail10.json |
||||
|
'{"Extra value after close": true} "misplaced quoted value"', |
||||
|
# http://json.org/JSON_checker/test/fail11.json |
||||
|
'{"Illegal expression": 1 + 2}', |
||||
|
# http://json.org/JSON_checker/test/fail12.json |
||||
|
'{"Illegal invocation": alert()}', |
||||
|
# http://json.org/JSON_checker/test/fail13.json |
||||
|
'{"Numbers cannot have leading zeroes": 013}', |
||||
|
# http://json.org/JSON_checker/test/fail14.json |
||||
|
'{"Numbers cannot be hex": 0x14}', |
||||
|
# http://json.org/JSON_checker/test/fail15.json |
||||
|
'["Illegal backslash escape: \\x15"]', |
||||
|
# http://json.org/JSON_checker/test/fail16.json |
||||
|
'["Illegal backslash escape: \\\'"]', |
||||
|
# http://json.org/JSON_checker/test/fail17.json |
||||
|
'["Illegal backslash escape: \\017"]', |
||||
|
# http://json.org/JSON_checker/test/fail18.json |
||||
|
'[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', |
||||
|
# http://json.org/JSON_checker/test/fail19.json |
||||
|
'{"Missing colon" null}', |
||||
|
# http://json.org/JSON_checker/test/fail20.json |
||||
|
'{"Double colon":: null}', |
||||
|
# http://json.org/JSON_checker/test/fail21.json |
||||
|
'{"Comma instead of colon", null}', |
||||
|
# http://json.org/JSON_checker/test/fail22.json |
||||
|
'["Colon instead of comma": false]', |
||||
|
# http://json.org/JSON_checker/test/fail23.json |
||||
|
'["Bad value", truth]', |
||||
|
# http://json.org/JSON_checker/test/fail24.json |
||||
|
"['single quote']", |
||||
|
# http://code.google.com/p/simplejson/issues/detail?id=3 |
||||
|
u'["A\u001FZ control characters in string"]', |
||||
|
] |
||||
|
|
||||
|
SKIPS = { |
||||
|
1: "why not have a string payload?", |
||||
|
18: "spec doesn't specify any nesting limitations", |
||||
|
} |
||||
|
|
||||
|
class TestFail(TestCase): |
||||
|
def test_failures(self): |
||||
|
for idx, doc in enumerate(JSONDOCS): |
||||
|
idx = idx + 1 |
||||
|
if idx in SKIPS: |
||||
|
json.loads(doc) |
||||
|
continue |
||||
|
try: |
||||
|
json.loads(doc) |
||||
|
except ValueError: |
||||
|
pass |
||||
|
else: |
||||
|
self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) |
@ -0,0 +1,9 @@ |
|||||
|
import math |
||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class TestFloat(TestCase): |
||||
|
def test_floats(self): |
||||
|
for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]: |
||||
|
self.assertEquals(float(json.dumps(num)), num) |
@ -0,0 +1,41 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
import textwrap |
||||
|
|
||||
|
class TestIndent(TestCase): |
||||
|
def test_indent(self): |
||||
|
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', |
||||
|
{'nifty': 87}, {'field': 'yes', 'morefield': False} ] |
||||
|
|
||||
|
expect = textwrap.dedent("""\ |
||||
|
[ |
||||
|
[ |
||||
|
"blorpie" |
||||
|
], |
||||
|
[ |
||||
|
"whoops" |
||||
|
], |
||||
|
[], |
||||
|
"d-shtaeou", |
||||
|
"d-nthiouh", |
||||
|
"i-vhbjkhnth", |
||||
|
{ |
||||
|
"nifty": 87 |
||||
|
}, |
||||
|
{ |
||||
|
"field": "yes", |
||||
|
"morefield": false |
||||
|
} |
||||
|
]""") |
||||
|
|
||||
|
|
||||
|
d1 = json.dumps(h) |
||||
|
d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) |
||||
|
|
||||
|
h1 = json.loads(d1) |
||||
|
h2 = json.loads(d2) |
||||
|
|
||||
|
self.assertEquals(h1, h) |
||||
|
self.assertEquals(h2, h) |
||||
|
self.assertEquals(d2, expect) |
@ -0,0 +1,71 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
# from http://json.org/JSON_checker/test/pass1.json |
||||
|
JSON = r''' |
||||
|
[ |
||||
|
"JSON Test Pattern pass1", |
||||
|
{"object with 1 member":["array with 1 element"]}, |
||||
|
{}, |
||||
|
[], |
||||
|
-42, |
||||
|
true, |
||||
|
false, |
||||
|
null, |
||||
|
{ |
||||
|
"integer": 1234567890, |
||||
|
"real": -9876.543210, |
||||
|
"e": 0.123456789e-12, |
||||
|
"E": 1.234567890E+34, |
||||
|
"": 23456789012E666, |
||||
|
"zero": 0, |
||||
|
"one": 1, |
||||
|
"space": " ", |
||||
|
"quote": "\"", |
||||
|
"backslash": "\\", |
||||
|
"controls": "\b\f\n\r\t", |
||||
|
"slash": "/ & \/", |
||||
|
"alpha": "abcdefghijklmnopqrstuvwyz", |
||||
|
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", |
||||
|
"digit": "0123456789", |
||||
|
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?", |
||||
|
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", |
||||
|
"true": true, |
||||
|
"false": false, |
||||
|
"null": null, |
||||
|
"array":[ ], |
||||
|
"object":{ }, |
||||
|
"address": "50 St. James Street", |
||||
|
"url": "http://www.JSON.org/", |
||||
|
"comment": "// /* <!-- --", |
||||
|
"# -- --> */": " ", |
||||
|
" s p a c e d " :[1,2 , 3 |
||||
|
|
||||
|
, |
||||
|
|
||||
|
4 , 5 , 6 ,7 ], |
||||
|
"compact": [1,2,3,4,5,6,7], |
||||
|
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", |
||||
|
"quotes": "" \u0022 %22 0x22 034 "", |
||||
|
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" |
||||
|
: "A key can be any string" |
||||
|
}, |
||||
|
0.5 ,98.6 |
||||
|
, |
||||
|
99.44 |
||||
|
, |
||||
|
|
||||
|
1066 |
||||
|
|
||||
|
|
||||
|
,"rosebud"] |
||||
|
''' |
||||
|
|
||||
|
class TestPass1(TestCase): |
||||
|
def test_parse(self): |
||||
|
# test in/out equivalence and parsing |
||||
|
res = json.loads(JSON) |
||||
|
out = json.dumps(res) |
||||
|
self.assertEquals(res, json.loads(out)) |
||||
|
self.failUnless("2.3456789012E+676" in json.dumps(res, allow_nan=False)) |
@ -0,0 +1,14 @@ |
|||||
|
from unittest import TestCase |
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
# from http://json.org/JSON_checker/test/pass2.json |
||||
|
JSON = r''' |
||||
|
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] |
||||
|
''' |
||||
|
|
||||
|
class TestPass2(TestCase): |
||||
|
def test_parse(self): |
||||
|
# test in/out equivalence and parsing |
||||
|
res = json.loads(JSON) |
||||
|
out = json.dumps(res) |
||||
|
self.assertEquals(res, json.loads(out)) |
@ -0,0 +1,20 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
# from http://json.org/JSON_checker/test/pass3.json |
||||
|
JSON = r''' |
||||
|
{ |
||||
|
"JSON Test Pattern pass3": { |
||||
|
"The outermost value": "must be an object or array.", |
||||
|
"In this test": "It is an object." |
||||
|
} |
||||
|
} |
||||
|
''' |
||||
|
|
||||
|
class TestPass3(TestCase): |
||||
|
def test_parse(self): |
||||
|
# test in/out equivalence and parsing |
||||
|
res = json.loads(JSON) |
||||
|
out = json.dumps(res) |
||||
|
self.assertEquals(res, json.loads(out)) |
@ -0,0 +1,67 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class JSONTestObject: |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
class RecursiveJSONEncoder(json.JSONEncoder): |
||||
|
recurse = False |
||||
|
def default(self, o): |
||||
|
if o is JSONTestObject: |
||||
|
if self.recurse: |
||||
|
return [JSONTestObject] |
||||
|
else: |
||||
|
return 'JSONTestObject' |
||||
|
return json.JSONEncoder.default(o) |
||||
|
|
||||
|
|
||||
|
class TestRecursion(TestCase): |
||||
|
def test_listrecursion(self): |
||||
|
x = [] |
||||
|
x.append(x) |
||||
|
try: |
||||
|
json.dumps(x) |
||||
|
except ValueError: |
||||
|
pass |
||||
|
else: |
||||
|
self.fail("didn't raise ValueError on list recursion") |
||||
|
x = [] |
||||
|
y = [x] |
||||
|
x.append(y) |
||||
|
try: |
||||
|
json.dumps(x) |
||||
|
except ValueError: |
||||
|
pass |
||||
|
else: |
||||
|
self.fail("didn't raise ValueError on alternating list recursion") |
||||
|
y = [] |
||||
|
x = [y, y] |
||||
|
# ensure that the marker is cleared |
||||
|
json.dumps(x) |
||||
|
|
||||
|
def test_dictrecursion(self): |
||||
|
x = {} |
||||
|
x["test"] = x |
||||
|
try: |
||||
|
json.dumps(x) |
||||
|
except ValueError: |
||||
|
pass |
||||
|
else: |
||||
|
self.fail("didn't raise ValueError on dict recursion") |
||||
|
x = {} |
||||
|
{"a": x, "b": x} |
||||
|
# ensure that the marker is cleared |
||||
|
json.dumps(x) |
||||
|
|
||||
|
def test_defaultrecursion(self): |
||||
|
enc = RecursiveJSONEncoder() |
||||
|
self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') |
||||
|
enc.recurse = True |
||||
|
try: |
||||
|
enc.encode(JSONTestObject) |
||||
|
except ValueError: |
||||
|
pass |
||||
|
else: |
||||
|
self.fail("didn't raise ValueError on default recursion") |
@ -0,0 +1,42 @@ |
|||||
|
import textwrap |
||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
|
||||
|
class TestSeparators(TestCase): |
||||
|
def test_separators(self): |
||||
|
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', |
||||
|
{'nifty': 87}, {'field': 'yes', 'morefield': False} ] |
||||
|
|
||||
|
expect = textwrap.dedent("""\ |
||||
|
[ |
||||
|
[ |
||||
|
"blorpie" |
||||
|
] , |
||||
|
[ |
||||
|
"whoops" |
||||
|
] , |
||||
|
[] , |
||||
|
"d-shtaeou" , |
||||
|
"d-nthiouh" , |
||||
|
"i-vhbjkhnth" , |
||||
|
{ |
||||
|
"nifty" : 87 |
||||
|
} , |
||||
|
{ |
||||
|
"field" : "yes" , |
||||
|
"morefield" : false |
||||
|
} |
||||
|
]""") |
||||
|
|
||||
|
|
||||
|
d1 = json.dumps(h) |
||||
|
d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : ')) |
||||
|
|
||||
|
h1 = json.loads(d1) |
||||
|
h2 = json.loads(d2) |
||||
|
|
||||
|
self.assertEquals(h1, h) |
||||
|
self.assertEquals(h2, h) |
||||
|
self.assertEquals(d2, expect) |
@ -0,0 +1,18 @@ |
|||||
|
from twisted.trial.unittest import SkipTest, TestCase |
||||
|
|
||||
|
from pyutil.jsonutil import decoder |
||||
|
from pyutil.jsonutil import encoder |
||||
|
|
||||
|
class TestSpeedups(TestCase): |
||||
|
def test_scanstring(self): |
||||
|
if not encoder.c_encode_basestring_ascii: |
||||
|
raise SkipTest("no C extension speedups available to test") |
||||
|
self.assertEquals(decoder.scanstring.__module__, "simplejson._speedups") |
||||
|
self.assert_(decoder.scanstring is decoder.c_scanstring) |
||||
|
|
||||
|
def test_encode_basestring_ascii(self): |
||||
|
if not encoder.c_encode_basestring_ascii: |
||||
|
raise SkipTest("no C extension speedups available to test") |
||||
|
self.assertEquals(encoder.encode_basestring_ascii.__module__, "simplejson._speedups") |
||||
|
self.assert_(encoder.encode_basestring_ascii is |
||||
|
encoder.c_encode_basestring_ascii) |
@ -0,0 +1,55 @@ |
|||||
|
from unittest import TestCase |
||||
|
|
||||
|
from pyutil import jsonutil as json |
||||
|
|
||||
|
class TestUnicode(TestCase): |
||||
|
def test_encoding1(self): |
||||
|
encoder = json.JSONEncoder(encoding='utf-8') |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
s = u.encode('utf-8') |
||||
|
ju = encoder.encode(u) |
||||
|
js = encoder.encode(s) |
||||
|
self.assertEquals(ju, js) |
||||
|
|
||||
|
def test_encoding2(self): |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
s = u.encode('utf-8') |
||||
|
ju = json.dumps(u, encoding='utf-8') |
||||
|
js = json.dumps(s, encoding='utf-8') |
||||
|
self.assertEquals(ju, js) |
||||
|
|
||||
|
def test_encoding3(self): |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
j = json.dumps(u) |
||||
|
self.assertEquals(j, '"\\u03b1\\u03a9"') |
||||
|
|
||||
|
def test_encoding4(self): |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
j = json.dumps([u]) |
||||
|
self.assertEquals(j, '["\\u03b1\\u03a9"]') |
||||
|
|
||||
|
def test_encoding5(self): |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
j = json.dumps(u, ensure_ascii=False) |
||||
|
self.assertEquals(j, u'"%s"' % (u,)) |
||||
|
|
||||
|
def test_encoding6(self): |
||||
|
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' |
||||
|
j = json.dumps([u], ensure_ascii=False) |
||||
|
self.assertEquals(j, u'["%s"]' % (u,)) |
||||
|
|
||||
|
def test_big_unicode_encode(self): |
||||
|
u = u'\U0001d120' |
||||
|
self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') |
||||
|
self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') |
||||
|
|
||||
|
def test_big_unicode_decode(self): |
||||
|
u = u'z\U0001d120x' |
||||
|
self.assertEquals(json.loads('"' + u + '"'), u) |
||||
|
self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) |
||||
|
|
||||
|
def test_unicode_decode(self): |
||||
|
for i in range(0, 0xd7ff): |
||||
|
u = unichr(i) |
||||
|
js = '"\\u%04x"' % (i,) |
||||
|
self.assertEquals(json.loads(js), u) |
@ -0,0 +1,18 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn |
||||
|
# portions Copyright (c) 2001 Autonomous Zone Industries |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
# Python Standard Library modules |
||||
|
import unittest |
||||
|
|
||||
|
from pyutil import assertutil |
||||
|
|
||||
|
class Testy(unittest.TestCase): |
||||
|
def test_bad_precond(self): |
||||
|
adict=23 |
||||
|
try: |
||||
|
assertutil.precondition(isinstance(adict, dict), "adict is required to be a dict.", 23, adict=adict, foo=None) |
||||
|
except AssertionError, le: |
||||
|
self.failUnless(le.args[0] == "precondition: 'adict is required to be a dict.' <type 'str'>, 23 <type 'int'>, foo: None <type 'NoneType'>, 'adict': 23 <type 'int'>") |
@ -0,0 +1,33 @@ |
|||||
|
import unittest |
||||
|
|
||||
|
import os |
||||
|
|
||||
|
from pyutil import fileutil |
||||
|
|
||||
|
class FileUtil(unittest.TestCase): |
||||
|
def mkdir(self, basedir, path, mode=0777): |
||||
|
fn = os.path.join(basedir, path) |
||||
|
fileutil.make_dirs(fn, mode) |
||||
|
|
||||
|
def touch(self, basedir, path, mode=None, data="touch\n"): |
||||
|
fn = os.path.join(basedir, path) |
||||
|
f = open(fn, "w") |
||||
|
f.write(data) |
||||
|
f.close() |
||||
|
if mode is not None: |
||||
|
os.chmod(fn, mode) |
||||
|
|
||||
|
def test_du(self): |
||||
|
basedir = "util/FileUtil/test_du" |
||||
|
fileutil.make_dirs(basedir) |
||||
|
d = os.path.join(basedir, "space-consuming") |
||||
|
self.mkdir(d, "a/b") |
||||
|
self.touch(d, "a/b/1.txt", data="a"*10) |
||||
|
self.touch(d, "a/b/2.txt", data="b"*11) |
||||
|
self.mkdir(d, "a/c") |
||||
|
self.touch(d, "a/c/1.txt", data="c"*12) |
||||
|
self.touch(d, "a/c/2.txt", data="d"*13) |
||||
|
|
||||
|
used = fileutil.du(basedir) |
||||
|
self.failUnlessEqual(10+11+12+13, used) |
||||
|
|
@ -0,0 +1,33 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
try: |
||||
|
from twisted.trial import unittest |
||||
|
unittest # http://divmod.org/trac/ticket/1499 |
||||
|
except ImportError, le: |
||||
|
print "Skipping test_iputil since it requires Twisted and Twisted could not be imported: %s" % (le,) |
||||
|
else: |
||||
|
from pyutil import iputil, testutil |
||||
|
import re |
||||
|
|
||||
|
DOTTED_QUAD_RE=re.compile("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$") |
||||
|
|
||||
|
class ListAddresses(testutil.SignalMixin): |
||||
|
def test_get_local_ip_for(self): |
||||
|
addr = iputil.get_local_ip_for('127.0.0.1') |
||||
|
self.failUnless(DOTTED_QUAD_RE.match(addr)) |
||||
|
|
||||
|
def test_list_async(self): |
||||
|
try: |
||||
|
from twisted.trial import unittest |
||||
|
unittest # http://divmod.org/trac/ticket/1499 |
||||
|
from pyutil import iputil |
||||
|
except ImportError, le: |
||||
|
raise unittest.SkipTest("iputil could not be imported (probably because its dependency, Twisted, is not installed). %s" % (le,)) |
||||
|
|
||||
|
d = iputil.get_local_addresses_async() |
||||
|
def _check(addresses): |
||||
|
self.failUnless(len(addresses) >= 1) # always have localhost |
||||
|
self.failUnless("127.0.0.1" in addresses, addresses) |
||||
|
d.addCallbacks(_check) |
||||
|
return d |
||||
|
test_list_async.timeout=2 |
@ -0,0 +1,18 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import unittest |
||||
|
|
||||
|
from decimal import Decimal |
||||
|
|
||||
|
from pyutil import jsonutil |
||||
|
|
||||
|
zero_point_one = Decimal("0.1") |
||||
|
class TestDecimal(unittest.TestCase): |
||||
|
def test_encode(self): |
||||
|
self.failUnlessEqual(jsonutil.dumps(zero_point_one), "0.1") |
||||
|
|
||||
|
def test_decode(self): |
||||
|
self.failUnlessEqual(jsonutil.loads("0.1"), zero_point_one) |
||||
|
|
||||
|
def test_no_exception_on_convergent_parse_float(self): |
||||
|
self.failUnlessEqual(jsonutil.loads("0.1", parse_float=Decimal), zero_point_one) |
@ -0,0 +1,135 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import unittest |
||||
|
|
||||
|
from pyutil import mathutil |
||||
|
from pyutil.assertutil import _assert |
||||
|
|
||||
|
class MathUtilTestCase(unittest.TestCase): |
||||
|
def _help_test_is_power_of_k(self, k): |
||||
|
for i in range(2, 40): |
||||
|
_assert(mathutil.is_power_of_k(k**i, k), k, i) |
||||
|
|
||||
|
def test_is_power_of_k(self): |
||||
|
for i in range(2, 5): |
||||
|
self._help_test_is_power_of_k(i) |
||||
|
|
||||
|
def test_log_ceil(self): |
||||
|
f = mathutil.log_ceil |
||||
|
self.failUnlessEqual(f(1, 2), 0) |
||||
|
self.failUnlessEqual(f(1, 3), 0) |
||||
|
self.failUnlessEqual(f(2, 2), 1) |
||||
|
self.failUnlessEqual(f(2, 3), 1) |
||||
|
self.failUnlessEqual(f(3, 2), 2) |
||||
|
|
||||
|
def test_log_floor(self): |
||||
|
f = mathutil.log_floor |
||||
|
self.failUnlessEqual(f(1, 2), 0) |
||||
|
self.failUnlessEqual(f(1, 3), 0) |
||||
|
self.failUnlessEqual(f(2, 2), 1) |
||||
|
self.failUnlessEqual(f(2, 3), 0) |
||||
|
self.failUnlessEqual(f(3, 2), 1) |
||||
|
|
||||
|
def test_div_ceil(self): |
||||
|
f = mathutil.div_ceil |
||||
|
self.failUnlessEqual(f(0, 1), 0) |
||||
|
self.failUnlessEqual(f(0, 2), 0) |
||||
|
self.failUnlessEqual(f(0, 3), 0) |
||||
|
self.failUnlessEqual(f(1, 3), 1) |
||||
|
self.failUnlessEqual(f(2, 3), 1) |
||||
|
self.failUnlessEqual(f(3, 3), 1) |
||||
|
self.failUnlessEqual(f(4, 3), 2) |
||||
|
self.failUnlessEqual(f(5, 3), 2) |
||||
|
self.failUnlessEqual(f(6, 3), 2) |
||||
|
self.failUnlessEqual(f(7, 3), 3) |
||||
|
|
||||
|
def test_next_multiple(self): |
||||
|
f = mathutil.next_multiple |
||||
|
self.failUnlessEqual(f(5, 1), 5) |
||||
|
self.failUnlessEqual(f(5, 2), 6) |
||||
|
self.failUnlessEqual(f(5, 3), 6) |
||||
|
self.failUnlessEqual(f(5, 4), 8) |
||||
|
self.failUnlessEqual(f(5, 5), 5) |
||||
|
self.failUnlessEqual(f(5, 6), 6) |
||||
|
self.failUnlessEqual(f(32, 1), 32) |
||||
|
self.failUnlessEqual(f(32, 2), 32) |
||||
|
self.failUnlessEqual(f(32, 3), 33) |
||||
|
self.failUnlessEqual(f(32, 4), 32) |
||||
|
self.failUnlessEqual(f(32, 5), 35) |
||||
|
self.failUnlessEqual(f(32, 6), 36) |
||||
|
self.failUnlessEqual(f(32, 7), 35) |
||||
|
self.failUnlessEqual(f(32, 8), 32) |
||||
|
self.failUnlessEqual(f(32, 9), 36) |
||||
|
self.failUnlessEqual(f(32, 10), 40) |
||||
|
self.failUnlessEqual(f(32, 11), 33) |
||||
|
self.failUnlessEqual(f(32, 12), 36) |
||||
|
self.failUnlessEqual(f(32, 13), 39) |
||||
|
self.failUnlessEqual(f(32, 14), 42) |
||||
|
self.failUnlessEqual(f(32, 15), 45) |
||||
|
self.failUnlessEqual(f(32, 16), 32) |
||||
|
self.failUnlessEqual(f(32, 17), 34) |
||||
|
self.failUnlessEqual(f(32, 18), 36) |
||||
|
self.failUnlessEqual(f(32, 589), 589) |
||||
|
|
||||
|
def test_pad_size(self): |
||||
|
f = mathutil.pad_size |
||||
|
self.failUnlessEqual(f(0, 4), 0) |
||||
|
self.failUnlessEqual(f(1, 4), 3) |
||||
|
self.failUnlessEqual(f(2, 4), 2) |
||||
|
self.failUnlessEqual(f(3, 4), 1) |
||||
|
self.failUnlessEqual(f(4, 4), 0) |
||||
|
self.failUnlessEqual(f(5, 4), 3) |
||||
|
|
||||
|
def test_is_power_of_k_part_2(self): |
||||
|
f = mathutil.is_power_of_k |
||||
|
for i in range(1, 100): |
||||
|
if i in (1, 2, 4, 8, 16, 32, 64): |
||||
|
self.failUnless(f(i, 2), "but %d *is* a power of 2" % i) |
||||
|
else: |
||||
|
self.failIf(f(i, 2), "but %d is *not* a power of 2" % i) |
||||
|
for i in range(1, 100): |
||||
|
if i in (1, 3, 9, 27, 81): |
||||
|
self.failUnless(f(i, 3), "but %d *is* a power of 3" % i) |
||||
|
else: |
||||
|
self.failIf(f(i, 3), "but %d is *not* a power of 3" % i) |
||||
|
|
||||
|
def test_next_power_of_k(self): |
||||
|
f = mathutil.next_power_of_k |
||||
|
self.failUnlessEqual(f(0,2), 1) |
||||
|
self.failUnlessEqual(f(1,2), 1) |
||||
|
self.failUnlessEqual(f(2,2), 2) |
||||
|
self.failUnlessEqual(f(3,2), 4) |
||||
|
self.failUnlessEqual(f(4,2), 4) |
||||
|
for i in range(5, 8): self.failUnlessEqual(f(i,2), 8, "%d" % i) |
||||
|
for i in range(9, 16): self.failUnlessEqual(f(i,2), 16, "%d" % i) |
||||
|
for i in range(17, 32): self.failUnlessEqual(f(i,2), 32, "%d" % i) |
||||
|
for i in range(33, 64): self.failUnlessEqual(f(i,2), 64, "%d" % i) |
||||
|
for i in range(65, 100): self.failUnlessEqual(f(i,2), 128, "%d" % i) |
||||
|
|
||||
|
self.failUnlessEqual(f(0,3), 1) |
||||
|
self.failUnlessEqual(f(1,3), 1) |
||||
|
self.failUnlessEqual(f(2,3), 3) |
||||
|
self.failUnlessEqual(f(3,3), 3) |
||||
|
for i in range(4, 9): self.failUnlessEqual(f(i,3), 9, "%d" % i) |
||||
|
for i in range(10, 27): self.failUnlessEqual(f(i,3), 27, "%d" % i) |
||||
|
for i in range(28, 81): self.failUnlessEqual(f(i,3), 81, "%d" % i) |
||||
|
for i in range(82, 200): self.failUnlessEqual(f(i,3), 243, "%d" % i) |
||||
|
|
||||
|
def test_ave(self): |
||||
|
f = mathutil.ave |
||||
|
self.failUnlessEqual(f([1,2,3]), 2) |
||||
|
self.failUnlessEqual(f([0,0,0,4]), 1) |
||||
|
self.failUnlessAlmostEqual(f([0.0, 1.0, 1.0]), .666666666666) |
||||
|
|
||||
|
def failUnlessEqualContents(self, a, b): |
||||
|
self.failUnlessEqual(sorted(a), sorted(b)) |
||||
|
|
||||
|
def test_permute(self): |
||||
|
f = mathutil.permute |
||||
|
self.failUnlessEqualContents(f([]), []) |
||||
|
self.failUnlessEqualContents(f([1]), [[1]]) |
||||
|
self.failUnlessEqualContents(f([1,2]), [[1,2], [2,1]]) |
||||
|
self.failUnlessEqualContents(f([1,2,3]), |
||||
|
[[1,2,3], [1,3,2], |
||||
|
[2,1,3], [2,3,1], |
||||
|
[3,1,2], [3,2,1]]) |
@ -0,0 +1,97 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
"""\ |
||||
|
Test time_format.py |
||||
|
""" |
||||
|
|
||||
|
import os, time, unittest |
||||
|
|
||||
|
from pyutil import time_format, increasing_timer |
||||
|
|
||||
|
class TimeUtilTestCase(unittest.TestCase): |
||||
|
def setUp(self): |
||||
|
pass |
||||
|
|
||||
|
def tearDown(self): |
||||
|
pass |
||||
|
|
||||
|
def test_iso8601_utc_time(self, timer=increasing_timer.timer): |
||||
|
ts1 = time_format.iso_utc(timer.time() - 20) |
||||
|
ts2 = time_format.iso_utc() |
||||
|
assert ts1 < ts2, "failed: %s < %s" % (ts1, ts2) |
||||
|
ts3 = time_format.iso_utc(timer.time() + 20) |
||||
|
assert ts2 < ts3, "failed: %s < %s" % (ts2, ts3) |
||||
|
|
||||
|
def test_iso_utc_time_to_localseconds(self, timer=increasing_timer.timer): |
||||
|
# test three times of the year so that a DST problem would hopefully be triggered |
||||
|
t1 = int(timer.time() - 365*3600/3) |
||||
|
iso_utc_t1 = time_format.iso_utc(t1) |
||||
|
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1) |
||||
|
assert t1 == t1_2, (t1, t1_2) |
||||
|
t1 = int(timer.time() - (365*3600*2/3)) |
||||
|
iso_utc_t1 = time_format.iso_utc(t1) |
||||
|
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1) |
||||
|
self.failUnlessEqual(t1, t1_2) |
||||
|
t1 = int(timer.time()) |
||||
|
iso_utc_t1 = time_format.iso_utc(t1) |
||||
|
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1) |
||||
|
self.failUnlessEqual(t1, t1_2) |
||||
|
|
||||
|
def test_epoch(self): |
||||
|
return self._help_test_epoch() |
||||
|
|
||||
|
def test_epoch_in_London(self): |
||||
|
# Europe/London is a particularly troublesome timezone. Nowadays, its |
||||
|
# offset from GMT is 0. But in 1970, its offset from GMT was 1. |
||||
|
# (Apparently in 1970 Britain had redefined standard time to be GMT+1 |
||||
|
# and stayed in standard time all year round, whereas today |
||||
|
# Europe/London standard time is GMT and Europe/London Daylight |
||||
|
# Savings Time is GMT+1.) The current implementation of |
||||
|
# time_format.iso_utc_time_to_seconds() breaks if the timezone is |
||||
|
# Europe/London. (As soon as this unit test is done then I'll change |
||||
|
# that implementation to something that works even in this case...) |
||||
|
origtz = os.environ.get('TZ') |
||||
|
os.environ['TZ'] = "Europe/London" |
||||
|
if hasattr(time, 'tzset'): |
||||
|
time.tzset() |
||||
|
try: |
||||
|
return self._help_test_epoch() |
||||
|
finally: |
||||
|
if origtz is None: |
||||
|
del os.environ['TZ'] |
||||
|
else: |
||||
|
os.environ['TZ'] = origtz |
||||
|
if hasattr(time, 'tzset'): |
||||
|
time.tzset() |
||||
|
|
||||
|
def _help_test_epoch(self): |
||||
|
origtzname = time.tzname |
||||
|
s = time_format.iso_utc_time_to_seconds("1970-01-01T00:00:01Z") |
||||
|
self.failUnlessEqual(s, 1.0) |
||||
|
s = time_format.iso_utc_time_to_seconds("1970-01-01_00:00:01Z") |
||||
|
self.failUnlessEqual(s, 1.0) |
||||
|
s = time_format.iso_utc_time_to_seconds("1970-01-01 00:00:01Z") |
||||
|
self.failUnlessEqual(s, 1.0) |
||||
|
|
||||
|
self.failUnlessEqual(time_format.iso_utc(1.0), "1970-01-01 00:00:01Z") |
||||
|
self.failUnlessEqual(time_format.iso_utc(1.0, sep="_"), |
||||
|
"1970-01-01_00:00:01Z") |
||||
|
|
||||
|
now = time.time() |
||||
|
isostr = time_format.iso_utc(now) |
||||
|
timestamp = time_format.iso_utc_time_to_seconds(isostr) |
||||
|
self.failUnlessEqual(int(timestamp), int(now)) |
||||
|
|
||||
|
def my_time(): |
||||
|
return 1.0 |
||||
|
self.failUnlessEqual(time_format.iso_utc(t=my_time), |
||||
|
"1970-01-01 00:00:01Z") |
||||
|
self.failUnlessRaises(ValueError, |
||||
|
time_format.iso_utc_time_to_seconds, |
||||
|
"invalid timestring") |
||||
|
s = time_format.iso_utc_time_to_seconds("1970-01-01 00:00:01.500Z") |
||||
|
self.failUnlessEqual(s, 1.5) |
||||
|
|
||||
|
# Look for daylight-savings-related errors. |
||||
|
thatmomentinmarch = time_format.iso_utc_time_to_seconds("2009-03-20 21:49:02.226536Z") |
||||
|
self.failUnlessEqual(thatmomentinmarch, 1237585742.226536) |
||||
|
self.failUnlessEqual(origtzname, time.tzname) |
@ -0,0 +1,124 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
"""Tests for distutils.version.""" |
||||
|
import unittest |
||||
|
import doctest |
||||
|
|
||||
|
from pyutil.verlib import NormalizedVersion as V |
||||
|
from pyutil.verlib import IrrationalVersionError |
||||
|
from pyutil.verlib import suggest_normalized_version as suggest |
||||
|
|
||||
|
class VersionTestCase(unittest.TestCase): |
||||
|
|
||||
|
versions = ((V('1.0'), '1.0'), |
||||
|
(V('1.1'), '1.1'), |
||||
|
(V('1.2.3'), '1.2.3'), |
||||
|
(V('1.2'), '1.2'), |
||||
|
(V('1.2.3a4'), '1.2.3a4'), |
||||
|
(V('1.2c4'), '1.2c4'), |
||||
|
(V('1.2.3.4'), '1.2.3.4'), |
||||
|
(V('1.2.3.4.0b3'), '1.2.3.4b3'), |
||||
|
(V('1.2.0.0.0'), '1.2'), |
||||
|
(V('1.0.dev345'), '1.0.dev345'), |
||||
|
(V('1.0.post456.dev623'), '1.0.post456.dev623')) |
||||
|
|
||||
|
def test_basic_versions(self): |
||||
|
|
||||
|
for v, s in self.versions: |
||||
|
self.assertEquals(str(v), s) |
||||
|
|
||||
|
def test_from_parts(self): |
||||
|
|
||||
|
for v, s in self.versions: |
||||
|
v2 = V.from_parts(*v.parts) |
||||
|
self.assertEquals(v, v2) |
||||
|
self.assertEquals(str(v), str(v2)) |
||||
|
|
||||
|
def test_irrational_versions(self): |
||||
|
|
||||
|
irrational = ('1', '1.2a', '1.2.3b', '1.02', '1.2a03', |
||||
|
'1.2a3.04', '1.2.dev.2', '1.2dev', '1.2.dev', |
||||
|
'1.2.dev2.post2', '1.2.post2.dev3.post4') |
||||
|
|
||||
|
for s in irrational: |
||||
|
self.assertRaises(IrrationalVersionError, V, s) |
||||
|
|
||||
|
def test_comparison(self): |
||||
|
r""" |
||||
|
>>> V('1.2.0') == '1.2' |
||||
|
Traceback (most recent call last): |
||||
|
... |
||||
|
TypeError: cannot compare NormalizedVersion and str |
||||
|
|
||||
|
>>> V('1.2.0') == V('1.2') |
||||
|
True |
||||
|
>>> V('1.2.0') == V('1.2.3') |
||||
|
False |
||||
|
>>> V('1.2.0') < V('1.2.3') |
||||
|
True |
||||
|
>>> (V('1.0') > V('1.0b2')) |
||||
|
True |
||||
|
>>> (V('1.0') > V('1.0c2') > V('1.0c1') > V('1.0b2') > V('1.0b1') |
||||
|
... > V('1.0a2') > V('1.0a1')) |
||||
|
True |
||||
|
>>> (V('1.0.0') > V('1.0.0c2') > V('1.0.0c1') > V('1.0.0b2') > V('1.0.0b1') |
||||
|
... > V('1.0.0a2') > V('1.0.0a1')) |
||||
|
True |
||||
|
|
||||
|
>>> V('1.0') < V('1.0.post456.dev623') |
||||
|
True |
||||
|
|
||||
|
>>> V('1.0.post456.dev623') < V('1.0.post456') < V('1.0.post1234') |
||||
|
True |
||||
|
|
||||
|
>>> (V('1.0a1') |
||||
|
... < V('1.0a2.dev456') |
||||
|
... < V('1.0a2') |
||||
|
... < V('1.0a2.1.dev456') # e.g. need to do a quick post release on 1.0a2 |
||||
|
... < V('1.0a2.1') |
||||
|
... < V('1.0b1.dev456') |
||||
|
... < V('1.0b2') |
||||
|
... < V('1.0c1.dev456') |
||||
|
... < V('1.0c1') |
||||
|
... < V('1.0.dev7') |
||||
|
... < V('1.0.dev18') |
||||
|
... < V('1.0.dev456') |
||||
|
... < V('1.0.dev1234') |
||||
|
... < V('1.0') |
||||
|
... < V('1.0.post456.dev623') # development version of a post release |
||||
|
... < V('1.0.post456')) |
||||
|
True |
||||
|
""" |
||||
|
# must be a simpler way to call the docstrings |
||||
|
doctest.run_docstring_examples(self.test_comparison, globals(), |
||||
|
name='test_comparison') |
||||
|
|
||||
|
def test_suggest_normalized_version(self): |
||||
|
|
||||
|
self.assertEquals(suggest('1.0'), '1.0') |
||||
|
self.assertEquals(suggest('1.0-alpha1'), '1.0a1') |
||||
|
self.assertEquals(suggest('1.0c2'), '1.0c2') |
||||
|
self.assertEquals(suggest('walla walla washington'), None) |
||||
|
self.assertEquals(suggest('2.4c1'), '2.4c1') |
||||
|
|
||||
|
# from setuptools |
||||
|
self.assertEquals(suggest('0.4a1.r10'), '0.4a1.post10') |
||||
|
self.assertEquals(suggest('0.7a1dev-r66608'), '0.7a1.dev66608') |
||||
|
self.assertEquals(suggest('0.6a9.dev-r41475'), '0.6a9.dev41475') |
||||
|
self.assertEquals(suggest('2.4preview1'), '2.4c1') |
||||
|
self.assertEquals(suggest('2.4pre1') , '2.4c1') |
||||
|
self.assertEquals(suggest('2.1-rc2'), '2.1c2') |
||||
|
|
||||
|
# from pypi |
||||
|
self.assertEquals(suggest('0.1dev'), '0.1.dev0') |
||||
|
self.assertEquals(suggest('0.1.dev'), '0.1.dev0') |
||||
|
|
||||
|
# we want to be able to parse Twisted |
||||
|
# development versions are like post releases in Twisted |
||||
|
self.assertEquals(suggest('9.0.0+r2363'), '9.0.0.post2363') |
||||
|
|
||||
|
# pre-releases are using markers like "pre1" |
||||
|
self.assertEquals(suggest('9.0.0pre1'), '9.0.0c1') |
||||
|
|
||||
|
# we want to be able to parse Tcl-TK |
||||
|
# they us "p1" "p2" for post releases |
||||
|
self.assertEquals(suggest('1.4p1'), '1.4.post1') |
@ -0,0 +1,23 @@ |
|||||
|
import unittest |
||||
|
|
||||
|
from pyutil import version_class |
||||
|
|
||||
|
V = version_class.Version |
||||
|
|
||||
|
class T(unittest.TestCase): |
||||
|
def test_rc_regex_rejects_rc_suffix(self): |
||||
|
self.failUnlessRaises(ValueError, V, '9.9.9rc9') |
||||
|
|
||||
|
def test_rc_regex_rejects_trailing_garbage(self): |
||||
|
self.failUnlessRaises(ValueError, V, '9.9.9c9HEYTHISISNTRIGHT') |
||||
|
|
||||
|
def test_comparisons(self): |
||||
|
self.failUnless(V('1.0') < V('1.1')) |
||||
|
self.failUnless(V('1.0a1') < V('1.0')) |
||||
|
self.failUnless(V('1.0a1') < V('1.0b1')) |
||||
|
self.failUnless(V('1.0b1') < V('1.0c1')) |
||||
|
self.failUnless(V('1.0a1') < V('1.0a1-r99')) |
||||
|
self.failUnlessEqual(V('1.0a1.post987'), V('1.0a1-r987')) |
||||
|
self.failUnlessEqual(str(V('1.0a1.post999')), '1.0.0a1-r999') |
||||
|
self.failUnlessEqual(str(V('1.0a1-r999')), '1.0.0a1-r999') |
||||
|
self.failIfEqual(V('1.0a1'), V('1.0a1-r987')) |
@ -0,0 +1,115 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import random, sys, traceback, unittest |
||||
|
|
||||
|
from pyutil.assertutil import _assert |
||||
|
|
||||
|
from pyutil import dictutil |
||||
|
|
||||
|
class EqButNotIs: |
||||
|
def __init__(self, x): |
||||
|
self.x = x |
||||
|
self.hash = int(random.randrange(0, 2**31)) |
||||
|
def __repr__(self): |
||||
|
return "<%s %s>" % (self.__class__.__name__, self.x,) |
||||
|
def __hash__(self): |
||||
|
return self.hash |
||||
|
def __le__(self, other): |
||||
|
return self.x <= other |
||||
|
def __lt__(self, other): |
||||
|
return self.x < other |
||||
|
def __ge__(self, other): |
||||
|
return self.x >= other |
||||
|
def __gt__(self, other): |
||||
|
return self.x > other |
||||
|
def __ne__(self, other): |
||||
|
return self.x != other |
||||
|
def __eq__(self, other): |
||||
|
return self.x == other |
||||
|
|
||||
|
class Testy(unittest.TestCase): |
||||
|
def _help_test_empty_dict(self, klass): |
||||
|
d1 = klass() |
||||
|
d2 = klass({}) |
||||
|
|
||||
|
self.failUnless(d1 == d2, "d1: %r, d2: %r" % (d1, d2,)) |
||||
|
self.failUnless(len(d1) == 0) |
||||
|
self.failUnless(len(d2) == 0) |
||||
|
|
||||
|
def _help_test_nonempty_dict(self, klass): |
||||
|
d1 = klass({'a': 1, 'b': "eggs", 3: "spam",}) |
||||
|
d2 = klass({'a': 1, 'b': "eggs", 3: "spam",}) |
||||
|
|
||||
|
self.failUnless(d1 == d2) |
||||
|
self.failUnless(len(d1) == 3, "%s, %s" % (len(d1), d1,)) |
||||
|
self.failUnless(len(d2) == 3) |
||||
|
|
||||
|
def _help_test_eq_but_notis(self, klass): |
||||
|
d = klass({'a': 3, 'b': EqButNotIs(3), 'c': 3}) |
||||
|
d.pop('b') |
||||
|
|
||||
|
d.clear() |
||||
|
d['a'] = 3 |
||||
|
d['b'] = EqButNotIs(3) |
||||
|
d['c'] = 3 |
||||
|
d.pop('b') |
||||
|
|
||||
|
d.clear() |
||||
|
d['b'] = EqButNotIs(3) |
||||
|
d['a'] = 3 |
||||
|
d['c'] = 3 |
||||
|
d.pop('b') |
||||
|
|
||||
|
d.clear() |
||||
|
d['a'] = EqButNotIs(3) |
||||
|
d['c'] = 3 |
||||
|
d['a'] = 3 |
||||
|
|
||||
|
d.clear() |
||||
|
fake3 = EqButNotIs(3) |
||||
|
fake7 = EqButNotIs(7) |
||||
|
d[fake3] = fake7 |
||||
|
d[3] = 7 |
||||
|
d[3] = 8 |
||||
|
_assert(filter(lambda x: x is 8, d.itervalues())) |
||||
|
_assert(filter(lambda x: x is fake7, d.itervalues())) |
||||
|
_assert(not filter(lambda x: x is 7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8. |
||||
|
_assert(filter(lambda x: x is fake3, d.iterkeys())) |
||||
|
_assert(filter(lambda x: x is 3, d.iterkeys())) |
||||
|
d[fake3] = 8 |
||||
|
|
||||
|
d.clear() |
||||
|
d[3] = 7 |
||||
|
fake3 = EqButNotIs(3) |
||||
|
fake7 = EqButNotIs(7) |
||||
|
d[fake3] = fake7 |
||||
|
d[3] = 8 |
||||
|
_assert(filter(lambda x: x is 8, d.itervalues())) |
||||
|
_assert(filter(lambda x: x is fake7, d.itervalues())) |
||||
|
_assert(not filter(lambda x: x is 7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8. |
||||
|
_assert(filter(lambda x: x is fake3, d.iterkeys())) |
||||
|
_assert(filter(lambda x: x is 3, d.iterkeys())) |
||||
|
d[fake3] = 8 |
||||
|
|
||||
|
def test_em(self): |
||||
|
for klass in (dictutil.UtilDict, dictutil.NumDict, dictutil.ValueOrderedDict,): |
||||
|
# print "name of class: ", klass |
||||
|
for helper in (self._help_test_empty_dict, self._help_test_nonempty_dict, self._help_test_eq_but_notis,): |
||||
|
# print "name of test func: ", helper |
||||
|
try: |
||||
|
helper(klass) |
||||
|
except: |
||||
|
(etype, evalue, realtb) = sys.exc_info() |
||||
|
traceback.print_exception(etype, evalue, realtb) |
||||
|
self.fail(evalue) |
||||
|
del realtb |
||||
|
|
||||
|
def suite(): |
||||
|
suite = unittest.makeSuite(Testy, 'test') |
||||
|
return suite |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
unittest.main() |
@ -0,0 +1,36 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002 Luke 'Artimage' Nelson |
||||
|
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import os |
||||
|
|
||||
|
try: |
||||
|
from twisted.trial import unittest |
||||
|
except ImportError, le: |
||||
|
print "Skipping %s since it requires Twisted and Twisted could not be imported: %s" % (__name__, le,) |
||||
|
else: |
||||
|
from pyutil import PickleSaver, fileutil |
||||
|
|
||||
|
class Thingie(PickleSaver.PickleSaver): |
||||
|
def __init__(self, fname, delay=30): |
||||
|
PickleSaver.PickleSaver.__init__(self, fname=fname, attrs={'tmp_store':'False'}, DELAY=delay) |
||||
|
|
||||
|
class PickleSaverTest(unittest.TestCase): |
||||
|
def _test_save_now(self, fname): |
||||
|
thingie = Thingie(fname, delay=0) |
||||
|
thingie.tmp_store = 'True' |
||||
|
thingie.lazy_save() # Note: it was constructed with default save delay of 0. |
||||
|
|
||||
|
def test_save_now(self): |
||||
|
""" |
||||
|
This test should create a lazy save object, save it with no delay and check if the file exists. |
||||
|
""" |
||||
|
tempdir = fileutil.NamedTemporaryDirectory() |
||||
|
|
||||
|
fname = os.path.join(tempdir.name, "picklesavertest") |
||||
|
self._test_save_now(fname) |
||||
|
self.failUnless(os.path.isfile(fname), "The file [%s] does not exist." %(fname,)) |
||||
|
|
||||
|
tempdir.shutdown() |
@ -0,0 +1,24 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn |
||||
|
# portions Copyright (c) 2001 Autonomous Zone Industries |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
# |
||||
|
import unittest |
||||
|
|
||||
|
from pyutil.xor import xor |
||||
|
|
||||
|
# unit tests |
||||
|
def _help_test(xf): |
||||
|
assert xf('\000', '\000') == '\000' |
||||
|
assert xf('\001', '\000') == '\001' |
||||
|
assert xf('\001', '\001') == '\000' |
||||
|
assert xf('\000\001', '\000\001') == '\000\000' |
||||
|
assert xf('\100\101', '\000\101') == '\100\000' |
||||
|
|
||||
|
class Testy(unittest.TestCase): |
||||
|
def test_em(self): |
||||
|
for xorfunc in (xor.py_xor, xor.py_xor_simple, xor.xor,): |
||||
|
if callable(xorfunc): |
||||
|
# print "testing xorfunc ", xorfunc |
||||
|
_help_test(xorfunc) |
@ -0,0 +1,454 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2010 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import random, unittest |
||||
|
|
||||
|
from pyutil.assertutil import _assert |
||||
|
|
||||
|
from pyutil.humanreadable import hr |
||||
|
from pyutil import memutil |
||||
|
from pyutil import cache |
||||
|
|
||||
|
class Bencher: |
||||
|
def __init__(self, klass, MAXREPS=2**8, MAXTIME=5): |
||||
|
print klass |
||||
|
self.klass = klass |
||||
|
self.MAXREPS = MAXREPS |
||||
|
self.MAXTIME = MAXTIME |
||||
|
self.d = {} |
||||
|
self.lrun = None |
||||
|
|
||||
|
def _generic_benchmarking_init(self, n): |
||||
|
self.d.clear() |
||||
|
global lrun |
||||
|
self.lrun = self.klass(maxsize=n) |
||||
|
for i in range(n): |
||||
|
self.d[i] = i |
||||
|
self.lrun[n+i] = n+i |
||||
|
|
||||
|
def _benchmark_init(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE) |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_update(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(maxsize=MAXSIZE) |
||||
|
assert len(d2) == 0 |
||||
|
d2.update(self.d) |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_insert(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(maxsize=MAXSIZE) |
||||
|
assert len(d2) == 0 |
||||
|
for k, v, in self.d.iteritems(): |
||||
|
d2[k] = v |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_popitem(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE) |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
for i in range(len(d2), 0, -1): |
||||
|
assert len(d2) == i |
||||
|
d2.popitem() |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_has_key_and_del(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE) |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
for k in self.d.iterkeys(): |
||||
|
if d2.has_key(k): |
||||
|
del d2[k] |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_remove(self, n): |
||||
|
MAXSIZE=n/2 |
||||
|
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE) |
||||
|
assert len(d2) == min(len(self.d), MAXSIZE) |
||||
|
for k in self.d.iterkeys(): |
||||
|
d2.remove(k, strictkey=False) |
||||
|
return True |
||||
|
|
||||
|
def bench(self, BSIZES=(128, 250, 2048, 5000, 2**13, 2**20,)): |
||||
|
from pyutil import benchutil |
||||
|
funcs = ("_benchmark_insert", "_benchmark_init_and_has_key_and_del", "_benchmark_init_and_remove", "_benchmark_init_and_popitem", "_benchmark_update", "_benchmark_init",) |
||||
|
max = 0 |
||||
|
for func in funcs: |
||||
|
if len(func) > max: |
||||
|
max = len(func) |
||||
|
for func in funcs: |
||||
|
print func + " " * (max + 1 - len(func)) |
||||
|
for BSIZE in BSIZES: |
||||
|
f = getattr(self, func) |
||||
|
benchutil.rep_bench(f, BSIZE, self._generic_benchmarking_init, MAXREPS=self.MAXREPS, MAXTIME=self.MAXTIME) |
||||
|
|
||||
|
def quick_bench(): |
||||
|
Bencher(cache.LRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15, 2**16,)) |
||||
|
Bencher(cache.LinkedListLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,)) |
||||
|
Bencher(cache.SmallLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,)) |
||||
|
|
||||
|
def slow_bench(): |
||||
|
Bencher(cache.LRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)]) |
||||
|
Bencher(cache.LinkedListLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)]) |
||||
|
Bencher(cache.SmallLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 17)]) |
||||
|
|
||||
|
MUCHADDINGSIZE=2**4 |
||||
|
MUCHADDINGNUM = 2**4 |
||||
|
|
||||
|
# The following parameters are for testing for memory leakage. |
||||
|
MIN_SLOPE = 512.0 # If it leaks less than 512.0 bytes per iteration, then it's probably just some kind of noise from the interpreter or something... |
||||
|
SAMPLES = 2**5 |
||||
|
# MIN_SLOPE is high because samples is low, which is because taking a statistically useful numbers of samples takes too long. |
||||
|
# For a *good* test, turn samples up as high as you can stand (maybe 2**10) and set MIN_SLOPE to about 1.0. |
||||
|
# For a *really* good test, add a variance measure to memutil.measure_mem_leakage(), and only consider it to be leaking if the slope is > 0.1 *and* is a "pretty good" fit for the data. |
||||
|
# MIN_SLOPE = 1.0 |
||||
|
# SAMPLES = 2**10 |
||||
|
|
||||
|
class Testy(unittest.TestCase): |
||||
|
def _test_empty_lookup(self, d) : |
||||
|
self.failUnless(d.get('spam') is None) |
||||
|
|
||||
|
def _test_key_error(self, C) : |
||||
|
d = C() |
||||
|
try: |
||||
|
d['spam'] |
||||
|
self.fail(d) |
||||
|
except KeyError : |
||||
|
pass |
||||
|
|
||||
|
def _test_insert_and_get(self, d) : |
||||
|
d.insert("spam", "eggs") |
||||
|
d["spam2"] = "eggs2" |
||||
|
self.failUnless(d.get("spam") == "eggs", str(d)) |
||||
|
self.failUnless(d.get("spam2") == "eggs2") |
||||
|
self.failUnless(d["spam"] == "eggs") |
||||
|
self.failUnless(d["spam2"] == "eggs2") |
||||
|
|
||||
|
def _test_insert_and_remove(self, d): |
||||
|
d.insert('spam', "eggs") |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggs") |
||||
|
self.failUnless(d['spam'] == "eggs") |
||||
|
x = d.remove('spam') |
||||
|
self.failUnless(x == "eggs", "x: %s" % `x`) |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
d['spam'] = "eggs" |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggs") |
||||
|
self.failUnless(d['spam'] == "eggs") |
||||
|
del d['spam'] |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
|
||||
|
def _test_setdefault(self, d): |
||||
|
d.setdefault('spam', "eggs") |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggs") |
||||
|
self.failUnless(d['spam'] == "eggs") |
||||
|
x = d.remove('spam') |
||||
|
self.failUnless(x == "eggs", "x: %s" % `x`) |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
|
||||
|
def _test_extracted_bound_method(self, d): |
||||
|
insmeth = d.insert |
||||
|
insmeth('spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_extracted_unbound_method(self, d): |
||||
|
insumeth = d.__class__.insert |
||||
|
insumeth(d, 'spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_unbound_method(self, C, d): |
||||
|
umeth = C.insert |
||||
|
umeth(d, 'spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_clear(self, d): |
||||
|
d[11] = 11 |
||||
|
d._assert_invariants() |
||||
|
self.failUnless(len(d) == 1) |
||||
|
d.clear() |
||||
|
d._assert_invariants() |
||||
|
self.failUnless(len(d) == 0) |
||||
|
|
||||
|
def _test_update(self, d): |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d['b'] = 99 |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d2={ 'a': 0, 'b': 1, 'c': 2,} |
||||
|
d.update(d2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('c') == 2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
|
||||
|
def _test_popitem(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
res = c.popitem() |
||||
|
_assert(res == ("a", 1,), C, c, res) |
||||
|
self.failUnless(res == ("a", 1,)) |
||||
|
|
||||
|
def _test_iterate_items(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
i = c.iteritems() |
||||
|
x = i.next() |
||||
|
self.failUnless(x == ("a", 1,)) |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_iterate_keys(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
i = c.iterkeys() |
||||
|
x = i.next() |
||||
|
self.failUnless(x == "a") |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_iterate_values(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
i = c.itervalues() |
||||
|
x = i.next() |
||||
|
self.failUnless(x == 1) |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_LRU_much_adding_some_removing(self, C): |
||||
|
c = C(maxsize=MUCHADDINGSIZE) |
||||
|
for i in range(MUCHADDINGNUM): |
||||
|
c[i] = i |
||||
|
if (i % 400) == 0: |
||||
|
k = random.choice(c.keys()) |
||||
|
del c[k] |
||||
|
for i in range(MUCHADDINGSIZE): |
||||
|
c[i] = i |
||||
|
self.failUnless(len(c) == MUCHADDINGSIZE) |
||||
|
|
||||
|
def _test_LRU_1(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_LRU_2(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
del c[11] |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_LRU_3(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 12 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 13 |
||||
|
c._assert_invariants() |
||||
|
del c[11] |
||||
|
c._assert_invariants() |
||||
|
c[11] = 14 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 15 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 16 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_LRU_full(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
c._assert_invariants() |
||||
|
for i in xrange(11): |
||||
|
c._assert_invariants() |
||||
|
c[i] = i |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(10 in c.values(), c.values()) |
||||
|
self.failUnless(0 not in c.values()) |
||||
|
|
||||
|
del c[1] |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(1 not in c.values()) |
||||
|
self.failUnless(len(c) == 9) |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(1 not in c.values()) |
||||
|
self.failUnless(11 in c.values()) |
||||
|
del c[11] |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(1 not in c.values()) |
||||
|
self.failUnless(11 in c.values()) |
||||
|
|
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(1 not in c.values()) |
||||
|
self.failUnless(11 in c.values()) |
||||
|
|
||||
|
for i in xrange(200): |
||||
|
c[i] = i |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(199 in c.values()) |
||||
|
self.failUnless(190 in c.values()) |
||||
|
|
||||
|
def _test_LRU_has_key(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
c._assert_invariants() |
||||
|
for i in xrange(11): |
||||
|
c._assert_invariants() |
||||
|
c[i] = i |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(10 in c.values()) |
||||
|
self.failUnless(0 not in c.values()) |
||||
|
|
||||
|
# c.has_key(1) # this touches `1' and makes it fresher so that it will live and `2' will die next time we overfill. |
||||
|
c[1] = 1 # this touches `1' and makes it fresher so that it will live and `2' will die next time we overfill. |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
c[99] = 99 |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(1 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),)) |
||||
|
self.failUnless(not 2 in c.values()) |
||||
|
self.failUnless(99 in c.values()) |
||||
|
|
||||
|
def _test_LRU_not_overfull_on_idempotent_add(self, C): |
||||
|
c = C(maxsize=10) |
||||
|
for i in xrange(11): |
||||
|
c[i] = i |
||||
|
c[1] = "spam" |
||||
|
# Now 1 is the freshest, so 2 is the next one that would be removed *if* we went over limit. |
||||
|
c[3] = "eggs" |
||||
|
self.failUnless(c.has_key(2)) |
||||
|
self.failUnless(len(c) == 10) |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_LRU_overflow_on_update(self, C): |
||||
|
d = C(maxsize=10) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d2 = {} |
||||
|
for i in range(12): |
||||
|
d2[i] = i |
||||
|
d.update(d2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(len(d) == 10) |
||||
|
|
||||
|
def _test_LRU_overflow_on_init(self, C): |
||||
|
d2 = {} |
||||
|
for i in range(12): |
||||
|
d2[i] = i |
||||
|
d = C(d2, maxsize=10) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(len(d) == 10) |
||||
|
|
||||
|
def _test_em(self): |
||||
|
for klass in (cache.LRUCache, cache.SmallLRUCache,): |
||||
|
for testfunc in (self._test_empty_lookup, self._test_insert_and_get, self._test_insert_and_remove, self._test_extracted_bound_method, self._test_extracted_unbound_method, self._test_clear, self._test_update, self._test_setdefault,): |
||||
|
testfunc(klass()) |
||||
|
|
||||
|
for testfunc in (self._test_popitem, self._test_iterate_items, self._test_iterate_keys, self._test_iterate_values, self._test_key_error, ): |
||||
|
testfunc(klass) |
||||
|
|
||||
|
self._test_unbound_method(klass, klass()) |
||||
|
|
||||
|
for klass in (cache.LRUCache, cache.SmallLRUCache,): |
||||
|
for testfunc in (self._test_LRU_1, self._test_LRU_2, self._test_LRU_3, self._test_LRU_full, self._test_LRU_has_key, self._test_LRU_not_overfull_on_idempotent_add, self._test_LRU_overflow_on_update, self._test_LRU_overflow_on_init,): |
||||
|
testfunc(klass) |
||||
|
|
||||
|
def test_em(self): |
||||
|
self._test_em() |
||||
|
|
||||
|
def _mem_test_LRU_much_adding_some_removing(self): |
||||
|
for klass in (cache.LRUCache, cache.SmallLRUCache,): |
||||
|
return self._test_LRU_much_adding_some_removing(klass) |
||||
|
|
||||
|
def test_mem_leakage(self): |
||||
|
try: |
||||
|
self._test_mem_leakage() |
||||
|
except memutil.NotSupportedException: |
||||
|
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform." |
||||
|
pass |
||||
|
del test_mem_leakage # This test takes too long. |
||||
|
|
||||
|
def _test_mem_leakage(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state. |
||||
|
memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
slope = memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_mem_leakage_much_adding_some_removing(self): |
||||
|
try: |
||||
|
self._test_mem_leakage_much_adding_some_removing() |
||||
|
except memutil.NotSupportedException: |
||||
|
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform." |
||||
|
pass |
||||
|
del test_mem_leakage_much_adding_some_removing # This test takes too long. |
||||
|
|
||||
|
def _test_mem_leakage_much_adding_some_removing(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state. |
||||
|
memutil.measure_mem_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
slope = memutil.measure_mem_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self._mem_test_LRU_much_adding_some_removing, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_obj_leakage(self): |
||||
|
self._test_obj_leakage() |
||||
|
del test_obj_leakage # This test takes too long. |
||||
|
|
||||
|
def _test_obj_leakage(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state. |
||||
|
memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
slope = memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_obj_leakage_much_adding_some_removing(self): |
||||
|
self._test_obj_leakage_much_adding_some_removing() |
||||
|
del test_obj_leakage_much_adding_some_removing # This test takes too long. |
||||
|
|
||||
|
def _test_obj_leakage_much_adding_some_removing(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state. |
||||
|
memutil.measure_obj_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
slope = memutil.measure_obj_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self._mem_test_LRU_much_adding_some_removing, "%0.3f" % slope,)) |
@ -0,0 +1,441 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import random, unittest |
||||
|
|
||||
|
from pyutil.humanreadable import hr |
||||
|
from pyutil import memutil |
||||
|
from pyutil import odict |
||||
|
|
||||
|
class Bencher: |
||||
|
def __init__(self, klass, MAXREPS=2**8, MAXTIME=5): |
||||
|
print klass |
||||
|
self.klass = klass |
||||
|
self.MAXREPS = MAXREPS |
||||
|
self.MAXTIME = MAXTIME |
||||
|
self.d = {} |
||||
|
self.lrun = None |
||||
|
|
||||
|
def _generic_benchmarking_init(self, n): |
||||
|
self.d.clear() |
||||
|
self.lrun = self.klass() |
||||
|
for i in range(n): |
||||
|
self.d[i] = i |
||||
|
self.lrun[n+i] = n+i |
||||
|
|
||||
|
def _benchmark_init(self, n): |
||||
|
d2 = self.klass(initialdata=self.d) |
||||
|
assert len(d2) == len(self.d) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_update(self, n): |
||||
|
d2 = self.klass() |
||||
|
assert len(d2) == 0 |
||||
|
d2.update(self.d) |
||||
|
assert len(d2) == len(self.d) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_insert(self, n): |
||||
|
d2 = self.klass() |
||||
|
assert len(d2) == 0 |
||||
|
for k, v, in self.d.iteritems(): |
||||
|
d2[k] = v |
||||
|
assert len(d2) == len(self.d) |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_popitem(self, n): |
||||
|
d2 = self.klass(initialdata=self.d) |
||||
|
assert len(d2) == len(self.d) |
||||
|
for i in range(len(d2), 0, -1): |
||||
|
assert len(d2) == i |
||||
|
d2.popitem() |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_has_key_and_del(self, n): |
||||
|
d2 = self.klass(initialdata=self.d) |
||||
|
assert len(d2) == len(self.d) |
||||
|
for k in self.d.iterkeys(): |
||||
|
if d2.has_key(k): |
||||
|
del d2[k] |
||||
|
return True |
||||
|
|
||||
|
def _benchmark_init_and_remove(self, n): |
||||
|
d2 = self.klass(initialdata=self.d) |
||||
|
assert len(d2) == len(self.d) |
||||
|
for k in self.d.iterkeys(): |
||||
|
d2.remove(k, strictkey=False) |
||||
|
return True |
||||
|
|
||||
|
def bench(self, BSIZES=(128, 250, 2048, 5000, 2**13, 2**20,)): |
||||
|
from pyutil import benchutil |
||||
|
funcs = ("_benchmark_insert", "_benchmark_init_and_has_key_and_del", "_benchmark_init_and_remove", "_benchmark_init_and_popitem", "_benchmark_update", "_benchmark_init",) |
||||
|
max = 0 |
||||
|
for func in funcs: |
||||
|
if len(func) > max: |
||||
|
max = len(func) |
||||
|
for func in funcs: |
||||
|
print func + " " * (max + 1 - len(func)) |
||||
|
for BSIZE in BSIZES: |
||||
|
f = getattr(self, func) |
||||
|
benchutil.rep_bench(f, BSIZE, self._generic_benchmarking_init, MAXREPS=self.MAXREPS, MAXTIME=self.MAXTIME) |
||||
|
|
||||
|
def quick_bench(): |
||||
|
Bencher(odict.LRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15, 2**16,)) |
||||
|
Bencher(odict.LinkedListLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,)) |
||||
|
Bencher(odict.SmallLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,)) |
||||
|
|
||||
|
def slow_bench(): |
||||
|
Bencher(odict.LRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)]) |
||||
|
Bencher(odict.LinkedListLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)]) |
||||
|
Bencher(odict.SmallLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 17)]) |
||||
|
|
||||
|
MUCHADDINGSIZE=2**4 |
||||
|
|
||||
|
# The following parameters are for testing for memory leakage. |
||||
|
MIN_SLOPE = 512.0 # If it leaks less than 512.0 bytes per iteration, then it's probably just some kind of noise from the interpreter or something... |
||||
|
SAMPLES = 2**5 |
||||
|
# MIN_SLOPE is high because samples is low, which is because taking a statistically useful numbers of samples takes too long. |
||||
|
# For a *good* test, turn samples up as high as you can stand (maybe 2**10) and set MIN_SLOPE to about 1.0. |
||||
|
# For a *really* good test, add a variance measure to memutil.measure_mem_leakage(), and only consider it to be leaking if the slope is > 0.1 *and* is a "pretty good" fit for the data. |
||||
|
# MIN_SLOPE = 1.0 |
||||
|
# SAMPLES = 2**10 |
||||
|
|
||||
|
class Testy(unittest.TestCase): |
||||
|
def _test_empty_lookup(self, d) : |
||||
|
self.failUnless(d.get('spam') is None) |
||||
|
|
||||
|
def _test_key_error(self, C) : |
||||
|
d = C() |
||||
|
try: |
||||
|
d['spam'] |
||||
|
self.fail(d) |
||||
|
except KeyError : |
||||
|
pass |
||||
|
|
||||
|
def _test_insert_and_get_and_items(self, d) : |
||||
|
d.insert("spam", "eggs") |
||||
|
d["spam2"] = "eggs2" |
||||
|
self.failUnless(d.get("spam") == "eggs", str(d)) |
||||
|
self.failUnless(d.get("spam2") == "eggs2") |
||||
|
self.failUnless(d["spam"] == "eggs") |
||||
|
self.failUnless(d["spam2"] == "eggs2") |
||||
|
self.failUnlessEqual(d.items(), [("spam", "eggs"), ("spam2", "eggs2")], d) |
||||
|
|
||||
|
def _test_move_to_most_recent(self, d) : |
||||
|
d.insert("spam", "eggs") |
||||
|
d["spam2"] = "eggs2" |
||||
|
self.failUnless(d.get("spam") == "eggs", str(d)) |
||||
|
self.failUnless(d.get("spam2") == "eggs2") |
||||
|
self.failUnless(d["spam"] == "eggs") |
||||
|
self.failUnless(d["spam2"] == "eggs2") |
||||
|
self.failUnlessEqual(d.items(), [("spam", "eggs"), ("spam2", "eggs2")]) |
||||
|
d.move_to_most_recent("spam") |
||||
|
self.failUnlessEqual(d.items(), [("spam2", "eggs2"), ("spam", "eggs")]) |
||||
|
|
||||
|
def _test_insert_and_remove(self, d): |
||||
|
d.insert('spam', "eggs") |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggs") |
||||
|
self.failUnless(d['spam'] == "eggs") |
||||
|
self.failUnlessEqual(d.items(), [("spam", "eggs")]) |
||||
|
x = d.remove('spam') |
||||
|
self.failUnless(x == "eggs", "x: %s" % `x`) |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
self.failUnlessEqual(d.items(), []) |
||||
|
d['spam'] = "eggsy" |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggsy") |
||||
|
self.failUnless(d['spam'] == "eggsy") |
||||
|
self.failUnlessEqual(d.items(), [("spam", "eggsy")]) |
||||
|
del d['spam'] |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
self.failUnlessEqual(d.items(), []) |
||||
|
|
||||
|
def _test_setdefault(self, d): |
||||
|
d.setdefault('spam', "eggs") |
||||
|
self.failUnless(d.has_key('spam')) |
||||
|
self.failUnless(d.get('spam') == "eggs") |
||||
|
self.failUnless(d['spam'] == "eggs") |
||||
|
self.failUnlessEqual(d.items(), [("spam", "eggs")]) |
||||
|
x = d.remove('spam') |
||||
|
self.failUnless(x == "eggs", "x: %s" % `x`) |
||||
|
self.failUnless(not d.has_key('spam')) |
||||
|
self.failUnlessEqual(d.items(), []) |
||||
|
|
||||
|
def _test_extracted_bound_method(self, d): |
||||
|
insmeth = d.insert |
||||
|
insmeth('spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_extracted_unbound_method(self, d): |
||||
|
insumeth = d.__class__.insert |
||||
|
insumeth(d, 'spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_unbound_method(self, C, d): |
||||
|
umeth = C.insert |
||||
|
umeth(d, 'spammy', "eggsy") |
||||
|
self.failUnless(d.get('spammy') == "eggsy") |
||||
|
|
||||
|
def _test_clear(self, d): |
||||
|
d[11] = 11 |
||||
|
d._assert_invariants() |
||||
|
self.failUnless(len(d) == 1) |
||||
|
d.clear() |
||||
|
d._assert_invariants() |
||||
|
self.failUnless(len(d) == 0) |
||||
|
self.failUnlessEqual(d.items(), []) |
||||
|
|
||||
|
def _test_update_from_dict(self, d): |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d['b'] = 99 |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d2={ 'a': 0, 'b': 1, 'c': 2,} |
||||
|
d.update(d2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('c') == 2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
|
||||
|
def _test_update_from_odict(self, d): |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d['b'] = 99 |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
d2 = odict.OrderedDict() |
||||
|
d2['a'] = 0 |
||||
|
d2['b'] = 1 |
||||
|
d2['c'] = 2 |
||||
|
d.update(d2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b')) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnless(d.get('c') == 2) |
||||
|
self.failUnless(d._assert_invariants()) |
||||
|
self.failUnlessEqual(d.items(), [("b", 1), ("a", 0), ("c", 2)]) |
||||
|
|
||||
|
def _test_popitem(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
res = c.popitem() |
||||
|
self.failUnlessEqual(res, ("a", 1,)) |
||||
|
|
||||
|
c["a"] = 1 |
||||
|
c["b"] = 2 |
||||
|
|
||||
|
res = c.popitem() |
||||
|
self.failUnlessEqual(res, ("b", 2,)) |
||||
|
|
||||
|
def _test_pop(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
res = c.pop() |
||||
|
self.failUnlessEqual(res, "a") |
||||
|
|
||||
|
c["a"] = 1 |
||||
|
c["b"] = 2 |
||||
|
|
||||
|
res = c.pop() |
||||
|
self.failUnlessEqual(res, "b") |
||||
|
|
||||
|
def _test_iterate_items(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
c["b"] = 2 |
||||
|
i = c.iteritems() |
||||
|
x = i.next() |
||||
|
self.failUnlessEqual(x, ("a", 1,)) |
||||
|
x = i.next() |
||||
|
self.failUnlessEqual(x, ("b", 2,)) |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_iterate_keys(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
c["b"] = 2 |
||||
|
i = c.iterkeys() |
||||
|
x = i.next() |
||||
|
self.failUnlessEqual(x, "a") |
||||
|
x = i.next() |
||||
|
self.failUnlessEqual(x, "b") |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_iterate_values(self, C): |
||||
|
c = C({"a": 1}) |
||||
|
c["b"] = 2 |
||||
|
i = c.itervalues() |
||||
|
x = i.next() |
||||
|
self.failUnless(x == 1) |
||||
|
x = i.next() |
||||
|
self.failUnless(x == 2) |
||||
|
try: |
||||
|
i.next() |
||||
|
self.fail() # Should have gotten StopIteration exception |
||||
|
except StopIteration: |
||||
|
pass |
||||
|
|
||||
|
def _test_much_adding_some_removing(self, C): |
||||
|
c = C() |
||||
|
for i in range(MUCHADDINGSIZE): |
||||
|
c[i] = i |
||||
|
if (i % 4) == 0: |
||||
|
k = random.choice(c.keys()) |
||||
|
del c[k] |
||||
|
for i in range(MUCHADDINGSIZE): |
||||
|
c[i] = i |
||||
|
self.failUnlessEqual(len(c), MUCHADDINGSIZE) |
||||
|
|
||||
|
def _test_1(self, C): |
||||
|
c = C() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 1001 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_2(self, C): |
||||
|
c = C() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
del c[11] |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_3(self, C): |
||||
|
c = C() |
||||
|
c[11] = 11 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 12 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 13 |
||||
|
c._assert_invariants() |
||||
|
del c[11] |
||||
|
c._assert_invariants() |
||||
|
c[11] = 14 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 15 |
||||
|
c._assert_invariants() |
||||
|
c[11] = 16 |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
def _test_has_key(self, C): |
||||
|
c = C() |
||||
|
c._assert_invariants() |
||||
|
for i in xrange(11): |
||||
|
c._assert_invariants() |
||||
|
c[i] = i |
||||
|
c._assert_invariants() |
||||
|
del c[0] |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(10 in c.values()) |
||||
|
self.failUnless(0 not in c.values()) |
||||
|
|
||||
|
c.has_key(1) # this touches `1' but does not make it fresher so that it will get popped next time we pop. |
||||
|
c[1] = 1 # this touches `1' but does not make it fresher so that it will get popped. |
||||
|
c._assert_invariants() |
||||
|
|
||||
|
x = c.pop() |
||||
|
self.failUnlessEqual(x, 10) |
||||
|
|
||||
|
c[99] = 99 |
||||
|
c._assert_invariants() |
||||
|
self.failUnless(len(c) == 10) |
||||
|
self.failUnless(1 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),)) |
||||
|
self.failUnless(2 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),)) |
||||
|
self.failIf(10 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),)) |
||||
|
self.failUnless(99 in c.values()) |
||||
|
|
||||
|
def _test_em(self): |
||||
|
for klass in (odict.OrderedDict,): |
||||
|
for testfunc in (self._test_empty_lookup, self._test_insert_and_get_and_items, self._test_insert_and_remove, self._test_extracted_bound_method, self._test_extracted_unbound_method, self._test_clear, self._test_update_from_dict, self._test_update_from_odict, self._test_setdefault,): |
||||
|
testfunc(klass()) |
||||
|
|
||||
|
for testfunc in (self._test_pop, self._test_popitem, self._test_iterate_items, self._test_iterate_keys, self._test_iterate_values, self._test_key_error, ): |
||||
|
testfunc(klass) |
||||
|
|
||||
|
self._test_unbound_method(klass, klass()) |
||||
|
|
||||
|
for klass in (odict.OrderedDict,): |
||||
|
for testfunc in (self._test_1, self._test_2, self._test_3, self._test_has_key,): |
||||
|
testfunc(klass) |
||||
|
|
||||
|
def test_em(self): |
||||
|
self._test_em() |
||||
|
|
||||
|
def _mem_test_much_adding_some_removing(self): |
||||
|
for klass in (odict.LRUCache, odict.SmallLRUCache,): |
||||
|
return self._test_much_adding_some_removing(klass) |
||||
|
|
||||
|
def test_mem_leakage(self): |
||||
|
try: |
||||
|
self._test_mem_leakage() |
||||
|
except memutil.NotSupportedException: |
||||
|
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform." |
||||
|
pass |
||||
|
del test_mem_leakage # This test takes too long. |
||||
|
|
||||
|
def _test_mem_leakage(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state. |
||||
|
memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
slope = memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_mem_leakage_much_adding_some_removing(self): |
||||
|
try: |
||||
|
self._test_mem_leakage_much_adding_some_removing() |
||||
|
except memutil.NotSupportedException: |
||||
|
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform." |
||||
|
pass |
||||
|
del test_mem_leakage_much_adding_some_removing # This test takes too long. |
||||
|
|
||||
|
def _test_mem_leakage_much_adding_some_removing(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state. |
||||
|
memutil.measure_mem_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
slope = memutil.measure_mem_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self._mem_test_much_adding_some_removing, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_obj_leakage(self): |
||||
|
self._test_obj_leakage() |
||||
|
del test_obj_leakage # This test takes too long. |
||||
|
|
||||
|
def _test_obj_leakage(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state. |
||||
|
memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
slope = memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,)) |
||||
|
|
||||
|
def test_obj_leakage_much_adding_some_removing(self): |
||||
|
self._test_obj_leakage_much_adding_some_removing() |
||||
|
del test_obj_leakage_much_adding_some_removing # This test takes too long. |
||||
|
|
||||
|
def _test_obj_leakage_much_adding_some_removing(self): |
||||
|
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state. |
||||
|
memutil.measure_obj_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
slope = memutil.measure_obj_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0) |
||||
|
|
||||
|
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self._mem_test_much_adding_some_removing, "%0.3f" % slope,)) |
@ -0,0 +1,30 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
# Copyright (c) 2004-2009 Zooko "Zooko" Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import unittest |
||||
|
|
||||
|
from pyutil.assertutil import _assert |
||||
|
from pyutil import strutil |
||||
|
|
||||
|
class Teststrutil(unittest.TestCase): |
||||
|
def test_short_input(self): |
||||
|
self.failUnless(strutil.pop_trailing_newlines("\r\n") == "") |
||||
|
self.failUnless(strutil.pop_trailing_newlines("\r") == "") |
||||
|
self.failUnless(strutil.pop_trailing_newlines("x\r\n") == "x") |
||||
|
self.failUnless(strutil.pop_trailing_newlines("x\r") == "x") |
||||
|
|
||||
|
def test_split(self): |
||||
|
_assert(strutil.split_on_newlines("x\r\ny") == ["x", "y",], strutil.split_on_newlines("x\r\ny")) |
||||
|
_assert(strutil.split_on_newlines("x\r\ny\r\n") == ["x", "y", '',], strutil.split_on_newlines("x\r\ny\r\n")) |
||||
|
_assert(strutil.split_on_newlines("x\n\ny\n\n") == ["x", '', "y", '', '',], strutil.split_on_newlines("x\n\ny\n\n")) |
||||
|
|
||||
|
def test_commonprefix(self): |
||||
|
_assert(strutil.commonprefix(["foo","foobarooo", "foosplat",]) == 'foo', strutil.commonprefix(["foo","foobarooo", "foosplat",])) |
||||
|
_assert(strutil.commonprefix(["foo","afoobarooo", "foosplat",]) == '', strutil.commonprefix(["foo","afoobarooo", "foosplat",])) |
||||
|
|
||||
|
def test_commonsuffix(self): |
||||
|
_assert(strutil.commonsuffix(["foo","foobarooo", "foosplat",]) == '', strutil.commonsuffix(["foo","foobarooo", "foosplat",])) |
||||
|
_assert(strutil.commonsuffix(["foo","foobarooo", "foosplato",]) == 'o', strutil.commonsuffix(["foo","foobarooo", "foosplato",])) |
||||
|
_assert(strutil.commonsuffix(["foo","foobarooofoo", "foosplatofoo",]) == 'foo', strutil.commonsuffix(["foo","foobarooofoo", "foosplatofoo",])) |
@ -0,0 +1,77 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
|
||||
|
import unittest |
||||
|
|
||||
|
from pyutil import randutil |
||||
|
|
||||
|
from pyutil import zlibutil |
||||
|
|
||||
|
class Accumulator: |
||||
|
def __init__(self): |
||||
|
self.buf = '' |
||||
|
|
||||
|
def write(self, str): |
||||
|
self.buf += str |
||||
|
|
||||
|
def make_decomp(realdecomp): |
||||
|
def decomp(str, maxlen, maxmem): |
||||
|
d = Accumulator() |
||||
|
realdecomp(str, d, maxlen, maxmem) |
||||
|
return d.buf |
||||
|
return decomp |
||||
|
|
||||
|
def genrandstr(strlen): |
||||
|
return randutil.insecurerandstr(strlen) |
||||
|
|
||||
|
def genbombstr(strlen): |
||||
|
return '0' * strlen |
||||
|
|
||||
|
MAXMEM=65*2**20 |
||||
|
|
||||
|
class ZlibTestCase(unittest.TestCase): |
||||
|
def _help_test(self, genstring, decomp, strlen): |
||||
|
s = genstring(strlen) |
||||
|
cs = zlibutil.zlib.compress(s) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=strlen*2**3 + zlibutil.MINMAXMEM) |
||||
|
self.failUnless(s == s2) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=strlen*2**6 + zlibutil.MINMAXMEM) |
||||
|
self.failUnless(s == s2) |
||||
|
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=strlen*2**3 + zlibutil.MINMAXMEM) |
||||
|
|
||||
|
def _help_test_inplace_minmaxmem(self, genstring, decomp, strlen): |
||||
|
s = genstring(strlen) |
||||
|
cs = zlibutil.zlib.compress(s) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=zlibutil.MINMAXMEM) |
||||
|
self.failUnless(s == s2) |
||||
|
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=zlibutil.MINMAXMEM) |
||||
|
|
||||
|
def _help_test_inplace(self, genstring, decomp, strlen): |
||||
|
# ### XXX self.failUnlessRaises(UnsafeDecompressError, decomp, zlib.compress(genstring(strlen)), maxlen=strlen, maxmem=strlen-1) |
||||
|
s = genstring(strlen) |
||||
|
cs = zlibutil.zlib.compress(s) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen*2**3, zlibutil.MINMAXMEM)) |
||||
|
self.failUnless(s == s2) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen*2**6, zlibutil.MINMAXMEM)) |
||||
|
self.failUnless(s == s2) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen-1, zlibutil.MINMAXMEM)) |
||||
|
self.failUnless(s == s2) |
||||
|
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen/2, zlibutil.MINMAXMEM)) |
||||
|
self.failUnless(s == s2) |
||||
|
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=max(strlen*2**3, zlibutil.MINMAXMEM)) |
||||
|
|
||||
|
def testem(self): |
||||
|
# for strlen in [2**1, 2**2, 2**10, 2**14, 2**21]: # a *real* test ought to include 2**21, which exercises different cases re: maxmem. But it takes too long. |
||||
|
for strlen in [2, 3, 4, 99,]: |
||||
|
# print "strlen: %s\n" % (strlen,) |
||||
|
for decomp in [zlibutil.decompress, make_decomp(zlibutil.decompress_to_fileobj), make_decomp(zlibutil.decompress_to_spool),]: |
||||
|
# print "decomp: %s\n" % (decomp,) |
||||
|
for genstring in [genrandstr, genbombstr,]: |
||||
|
# print "genstring: %s\n" % (genstring,) |
||||
|
self._help_test(genstring, decomp, strlen) |
||||
|
|
||||
|
for decomp in [make_decomp(zlibutil.decompress_to_spool),]: |
||||
|
# print "decomp: %s\n" % (decomp,) |
||||
|
for genstring in [genrandstr, genbombstr,]: |
||||
|
# print "genstring: %s\n" % (genstring,) |
||||
|
self._help_test_inplace(genstring, decomp, strlen) |
||||
|
self._help_test_inplace_minmaxmem(genstring, decomp, strlen) |
@ -0,0 +1,121 @@ |
|||||
|
import os, signal, time |
||||
|
|
||||
|
from twisted.internet import defer, reactor |
||||
|
from twisted.trial import unittest |
||||
|
|
||||
|
import repeatable_random |
||||
|
repeatable_random # http://divmod.org/trac/ticket/1499 |
||||
|
|
||||
|
class SignalMixin: |
||||
|
# This class is necessary for any code which wants to use Processes |
||||
|
# outside the usual reactor.run() environment. It is copied from |
||||
|
# Twisted's twisted.test.test_process . Note that Twisted-8.2.0 uses |
||||
|
# something rather different. |
||||
|
sigchldHandler = None |
||||
|
|
||||
|
def setUp(self): |
||||
|
# make sure SIGCHLD handler is installed, as it should be on |
||||
|
# reactor.run(). problem is reactor may not have been run when this |
||||
|
# test runs. |
||||
|
if hasattr(reactor, "_handleSigchld") and hasattr(signal, "SIGCHLD"): |
||||
|
self.sigchldHandler = signal.signal(signal.SIGCHLD, |
||||
|
reactor._handleSigchld) |
||||
|
|
||||
|
def tearDown(self): |
||||
|
if self.sigchldHandler: |
||||
|
signal.signal(signal.SIGCHLD, self.sigchldHandler) |
||||
|
|
||||
|
class PollMixin: |
||||
|
|
||||
|
def poll(self, check_f, pollinterval=0.01): |
||||
|
# Return a Deferred, then call check_f periodically until it returns |
||||
|
# True, at which point the Deferred will fire.. If check_f raises an |
||||
|
# exception, the Deferred will errback. |
||||
|
d = defer.maybeDeferred(self._poll, None, check_f, pollinterval) |
||||
|
return d |
||||
|
|
||||
|
def _poll(self, res, check_f, pollinterval): |
||||
|
if check_f(): |
||||
|
return True |
||||
|
d = defer.Deferred() |
||||
|
d.addCallback(self._poll, check_f, pollinterval) |
||||
|
reactor.callLater(pollinterval, d.callback, None) |
||||
|
return d |
||||
|
|
||||
|
class TestMixin(SignalMixin): |
||||
|
def setUp(self, repeatable=False): |
||||
|
""" |
||||
|
@param repeatable: install the repeatable_randomness hacks to attempt |
||||
|
to without access to real randomness and real time.time from the |
||||
|
code under test |
||||
|
""" |
||||
|
self.repeatable = repeatable |
||||
|
if self.repeatable: |
||||
|
import repeatable_random |
||||
|
repeatable_random.force_repeatability() |
||||
|
if hasattr(time, 'realtime'): |
||||
|
self.teststarttime = time.realtime() |
||||
|
else: |
||||
|
self.teststarttime = time.time() |
||||
|
|
||||
|
def tearDown(self): |
||||
|
if self.repeatable: |
||||
|
repeatable_random.restore_non_repeatability() |
||||
|
self.clean_pending(required_to_quiesce=True) |
||||
|
|
||||
|
def clean_pending(self, dummy=None, required_to_quiesce=True): |
||||
|
""" |
||||
|
This handy method cleans all pending tasks from the reactor. |
||||
|
|
||||
|
When writing a unit test, consider the following question: |
||||
|
|
||||
|
Is the code that you are testing required to release control once it |
||||
|
has done its job, so that it is impossible for it to later come around |
||||
|
(with a delayed reactor task) and do anything further? |
||||
|
|
||||
|
If so, then trial will usefully test that for you -- if the code under |
||||
|
test leaves any pending tasks on the reactor then trial will fail it. |
||||
|
|
||||
|
On the other hand, some code is *not* required to release control -- some |
||||
|
code is allowed to continuously maintain control by rescheduling reactor |
||||
|
tasks in order to do ongoing work. Trial will incorrectly require that |
||||
|
code to clean up all its tasks from the reactor. |
||||
|
|
||||
|
Most people think that such code should be amended to have an optional |
||||
|
"shutdown" operation that releases all control, but on the contrary it is |
||||
|
good design for some code to *not* have a shutdown operation, but instead |
||||
|
to have a "crash-only" design in which it recovers from crash on startup. |
||||
|
|
||||
|
If the code under test is of the "long-running" kind, which is *not* |
||||
|
required to shutdown cleanly in order to pass tests, then you can simply |
||||
|
call testutil.clean_pending() at the end of the unit test, and trial will |
||||
|
be satisfied. |
||||
|
""" |
||||
|
pending = reactor.getDelayedCalls() |
||||
|
active = bool(pending) |
||||
|
for p in pending: |
||||
|
if p.active(): |
||||
|
p.cancel() |
||||
|
else: |
||||
|
print "WEIRDNESS! pending timed call not active!" |
||||
|
if required_to_quiesce and active: |
||||
|
self.fail("Reactor was still active when it was required to be quiescent.") |
||||
|
|
||||
|
try: |
||||
|
import win32file |
||||
|
import win32con |
||||
|
def w_make_readonly(path): |
||||
|
win32file.SetFileAttributes(path, win32con.FILE_ATTRIBUTE_READONLY) |
||||
|
def w_make_accessible(path): |
||||
|
win32file.SetFileAttributes(path, win32con.FILE_ATTRIBUTE_NORMAL) |
||||
|
# http://divmod.org/trac/ticket/1499 |
||||
|
make_readonly = w_make_readonly |
||||
|
make_accessible = w_make_accessible |
||||
|
except ImportError: |
||||
|
import stat |
||||
|
def make_readonly(path): |
||||
|
os.chmod(path, stat.S_IREAD) |
||||
|
os.chmod(os.path.dirname(path), stat.S_IREAD) |
||||
|
def make_accessible(path): |
||||
|
os.chmod(os.path.dirname(path), stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD) |
||||
|
os.chmod(path, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD) |
@ -0,0 +1,44 @@ |
|||||
|
from pyutil import benchutil |
||||
|
|
||||
|
import hashlib, random, os |
||||
|
|
||||
|
from decimal import Decimal |
||||
|
D=Decimal |
||||
|
|
||||
|
p1 = 'a'*32 |
||||
|
p1a = 'a'*32 |
||||
|
p2 = 'a'*31+'b' # close, but no cigar |
||||
|
p3 = 'b'*32 # different in the first byte |
||||
|
|
||||
|
def compare(n, f, a, b): |
||||
|
for i in xrange(n): |
||||
|
f(a, b) |
||||
|
|
||||
|
def eqeqcomp(a, b): |
||||
|
return a == b |
||||
|
|
||||
|
def hashcomp(a, b): |
||||
|
salt = os.urandom(32) |
||||
|
return hashlib.md5(salt+ a).digest() == hashlib.md5(salt+b).digest() |
||||
|
|
||||
|
N=10**4 |
||||
|
REPS=10**2 |
||||
|
|
||||
|
print "all times are in nanoseconds per comparison (scientific notation)" |
||||
|
print |
||||
|
|
||||
|
for comparator in [eqeqcomp, hashcomp]: |
||||
|
print "using comparator ", comparator |
||||
|
|
||||
|
# for (a, b, desc) in [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')]: |
||||
|
trials = [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')] |
||||
|
random.shuffle(trials) |
||||
|
for (a, b, desc) in trials: |
||||
|
print "comparing two strings that are %s to each other" % (desc,) |
||||
|
|
||||
|
def f(n): |
||||
|
compare(n, comparator, a, b) |
||||
|
|
||||
|
benchutil.rep_bench(f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print |
@ -0,0 +1,72 @@ |
|||||
|
from pyutil import benchutil |
||||
|
|
||||
|
import hashlib |
||||
|
import os |
||||
|
|
||||
|
from decimal import Decimal |
||||
|
D=Decimal |
||||
|
|
||||
|
p1 = 'a'*32 |
||||
|
p1a = 'a'*32 |
||||
|
p2 = 'a'*31+'b' # close, but no cigar |
||||
|
p3 = 'b'*32 # different in the first byte |
||||
|
|
||||
|
def compare(n, f, a, b): |
||||
|
for i in xrange(n): |
||||
|
f(a, b) |
||||
|
|
||||
|
def eqeq(a, b): |
||||
|
return a == b |
||||
|
|
||||
|
def equalsequals_s(n): |
||||
|
# return compare(n, eqeq, |
||||
|
for i in xrange(n): |
||||
|
p1 == p1a |
||||
|
|
||||
|
def equalsequals_c(n): |
||||
|
for i in xrange(n): |
||||
|
p1 == p2 |
||||
|
|
||||
|
def equalsequals_f(n): |
||||
|
for i in xrange(n): |
||||
|
p1 == p3 |
||||
|
|
||||
|
def hash_s(n): |
||||
|
for i in xrange(n): |
||||
|
salt = os.urandom(32) |
||||
|
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p1a).digest() |
||||
|
|
||||
|
def hash_c(n): |
||||
|
for i in xrange(n): |
||||
|
salt = os.urandom(32) |
||||
|
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p2).digest() |
||||
|
|
||||
|
def hash_f(n): |
||||
|
for i in xrange(n): |
||||
|
salt = os.urandom(32) |
||||
|
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p3).digest() |
||||
|
|
||||
|
N=10**4 |
||||
|
REPS=10**2 |
||||
|
|
||||
|
print "using '=='" |
||||
|
|
||||
|
print "same" |
||||
|
benchutil.rep_bench(equalsequals_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print "close" |
||||
|
benchutil.rep_bench(equalsequals_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print "far" |
||||
|
benchutil.rep_bench(equalsequals_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print "using hash" |
||||
|
|
||||
|
print "same" |
||||
|
benchutil.rep_bench(hash_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print "far" |
||||
|
benchutil.rep_bench(hash_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
||||
|
|
||||
|
print "close" |
||||
|
benchutil.rep_bench(hash_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) |
@ -0,0 +1,72 @@ |
|||||
|
# ISO-8601: |
||||
|
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html |
||||
|
|
||||
|
import calendar, datetime, re, time |
||||
|
|
||||
|
def iso_utc_date(now=None, t=time.time): |
||||
|
if now is None: |
||||
|
now = t() |
||||
|
return datetime.datetime.utcfromtimestamp(now).isoformat()[:10] |
||||
|
|
||||
|
def iso_utc(now=None, sep=' ', t=time.time, suffix='Z'): |
||||
|
if now is None: |
||||
|
now = t() |
||||
|
return datetime.datetime.utcfromtimestamp(now).isoformat(sep)+suffix |
||||
|
|
||||
|
def iso_local(now=None, sep=' ', t=time.time): |
||||
|
if now is None: |
||||
|
now = t() |
||||
|
return datetime.datetime.fromtimestamp(now).isoformat(sep) |
||||
|
|
||||
|
def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})[T_ ](?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<subsecond>\.\d+)?Z?")): |
||||
|
""" |
||||
|
The inverse of iso_utc(). |
||||
|
|
||||
|
Real ISO-8601 is "2003-01-08T06:30:59Z". We also accept |
||||
|
"2003-01-08 06:30:59Z" as suggested by RFC 3339. We also accept |
||||
|
"2003-01-08_06:30:59Z". We also accept the trailing 'Z' to be omitted. |
||||
|
""" |
||||
|
m = _conversion_re.match(isotime) |
||||
|
if not m: |
||||
|
raise ValueError, (isotime, "not a complete ISO8601 timestamp") |
||||
|
|
||||
|
year, month, day = int(m.group('year')), int(m.group('month')), int(m.group('day')) |
||||
|
hour, minute, second = int(m.group('hour')), int(m.group('minute')), int(m.group('second')) |
||||
|
subsecstr = m.group('subsecond') |
||||
|
if subsecstr: |
||||
|
subsecfloat = float(subsecstr) |
||||
|
else: |
||||
|
subsecfloat = 0 |
||||
|
|
||||
|
return calendar.timegm( (year, month, day, hour, minute, second, 0, 1, 0) ) + subsecfloat |
||||
|
|
||||
|
def parse_duration(s): |
||||
|
orig = s |
||||
|
unit = None |
||||
|
DAY = 24*60*60 |
||||
|
MONTH = 31*DAY |
||||
|
YEAR = 365*DAY |
||||
|
if s.endswith("s"): |
||||
|
s = s[:-1] |
||||
|
if s.endswith("day"): |
||||
|
unit = DAY |
||||
|
s = s[:-len("day")] |
||||
|
elif s.endswith("month"): |
||||
|
unit = MONTH |
||||
|
s = s[:-len("month")] |
||||
|
elif s.endswith("mo"): |
||||
|
unit = MONTH |
||||
|
s = s[:-len("mo")] |
||||
|
elif s.endswith("year"): |
||||
|
unit = YEAR |
||||
|
s = s[:-len("YEAR")] |
||||
|
else: |
||||
|
raise ValueError("no unit (like day, month, or year) in '%s'" % orig) |
||||
|
s = s.strip() |
||||
|
return int(s) * unit |
||||
|
|
||||
|
def parse_date(s): |
||||
|
# return seconds-since-epoch for the UTC midnight that starts the given |
||||
|
# day |
||||
|
return int(iso_utc_time_to_seconds(s + "T00:00:00")) |
||||
|
|
@ -0,0 +1,25 @@ |
|||||
|
# Copyright (c) 2005-2009 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import warnings |
||||
|
|
||||
|
# from the Twisted library |
||||
|
from twisted.internet import reactor |
||||
|
|
||||
|
# from the pyutil library |
||||
|
from weakutil import WeakMethod |
||||
|
|
||||
|
def callLater_weakly(delay, func, *args, **kwargs): |
||||
|
""" |
||||
|
Call func later, but if func is a bound method then make the reference it holds to object be a weak reference. |
||||
|
|
||||
|
Therefore, if this scheduled event is a bound method and it is the only thing keeping the object from being garbage collected, the object will be garbage collected and the event will be cancelled. |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
|
||||
|
def cleanup(weakmeth, thedeadweakref): |
||||
|
if weakmeth.callId.active(): |
||||
|
weakmeth.callId.cancel() |
||||
|
weakmeth = WeakMethod(func, callback=cleanup) |
||||
|
weakmeth.callId = reactor.callLater(delay, weakmeth, *args, **kwargs) |
||||
|
return weakmeth |
@ -0,0 +1,337 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
""" |
||||
|
"Rational" version definition and parsing for DistutilsVersionFight |
||||
|
discussion at PyCon 2009. |
||||
|
|
||||
|
This was written by Tarek Ziadé. |
||||
|
|
||||
|
Zooko copied it from http://bitbucket.org/tarek/distutilsversion/ on 2010-07-29. |
||||
|
""" |
||||
|
|
||||
|
import re |
||||
|
|
||||
|
class IrrationalVersionError(Exception): |
||||
|
"""This is an irrational version.""" |
||||
|
pass |
||||
|
|
||||
|
class HugeMajorVersionNumError(IrrationalVersionError): |
||||
|
"""An irrational version because the major version number is huge |
||||
|
(often because a year or date was used). |
||||
|
|
||||
|
See `error_on_huge_major_num` option in `NormalizedVersion` for details. |
||||
|
This guard can be disabled by setting that option False. |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
class PreconditionViolationException(Exception): |
||||
|
pass |
||||
|
|
||||
|
# A marker used in the second and third parts of the `parts` tuple, for |
||||
|
# versions that don't have those segments, to sort properly. An example |
||||
|
# of versions in sort order ('highest' last): |
||||
|
# 1.0b1 ((1,0), ('b',1), ('f',)) |
||||
|
# 1.0.dev345 ((1,0), ('f',), ('dev', 345)) |
||||
|
# 1.0 ((1,0), ('f',), ('f',)) |
||||
|
# 1.0.post256.dev345 ((1,0), ('f',), ('f', 'post', 256, 'dev', 345)) |
||||
|
# 1.0.post345 ((1,0), ('f',), ('f', 'post', 345, 'f')) |
||||
|
# ^ ^ ^ |
||||
|
# 'b' < 'f' ---------------------/ | | |
||||
|
# | | |
||||
|
# 'dev' < 'f' < 'post' -------------------/ | |
||||
|
# | |
||||
|
# 'dev' < 'f' ----------------------------------------------/ |
||||
|
# Other letters would do, but 'f' for 'final' is kind of nice. |
||||
|
FINAL_MARKER = ('f',) |
||||
|
|
||||
|
VERSION_RE = re.compile(r''' |
||||
|
^ |
||||
|
(?P<version>\d+\.\d+) # minimum 'N.N' |
||||
|
(?P<extraversion>(?:\.\d+)*) # any number of extra '.N' segments |
||||
|
(?: |
||||
|
(?P<prerel>[abc]|rc) # 'a'=alpha, 'b'=beta, 'c'=release candidate |
||||
|
# 'rc'= alias for release candidate |
||||
|
(?P<prerelversion>\d+(?:\.\d+)*) |
||||
|
)? |
||||
|
(?P<postdev>(\.post(?P<post>\d+)|-r(?P<oldpost>\d+))?(\.dev(?P<dev>\d+))?)? |
||||
|
$''', re.VERBOSE) |
||||
|
|
||||
|
class NormalizedVersion(object): |
||||
|
"""A rational version. |
||||
|
|
||||
|
Good: |
||||
|
1.2 # equivalent to "1.2.0" |
||||
|
1.2.0 |
||||
|
1.2a1 |
||||
|
1.2.3a2 |
||||
|
1.2.3b1 |
||||
|
1.2.3c1 |
||||
|
1.2.3.4 |
||||
|
TODO: fill this out |
||||
|
|
||||
|
Bad: |
||||
|
1 # mininum two numbers |
||||
|
1.2a # release level must have a release serial |
||||
|
1.2.3b |
||||
|
""" |
||||
|
def __init__(self, s, error_on_huge_major_num=True): |
||||
|
"""Create a NormalizedVersion instance from a version string. |
||||
|
|
||||
|
@param s {str} The version string. |
||||
|
@param error_on_huge_major_num {bool} Whether to consider an |
||||
|
apparent use of a year or full date as the major version number |
||||
|
an error. Default True. One of the observed patterns on PyPI before |
||||
|
the introduction of `NormalizedVersion` was version numbers like this: |
||||
|
2009.01.03 |
||||
|
20040603 |
||||
|
2005.01 |
||||
|
This guard is here to strongly encourage the package author to |
||||
|
use an alternate version, because a release deployed into PyPI |
||||
|
and, e.g. downstream Linux package managers, will forever remove |
||||
|
the possibility of using a version number like "1.0" (i.e. |
||||
|
where the major number is less than that huge major number). |
||||
|
""" |
||||
|
self._parse(s, error_on_huge_major_num) |
||||
|
|
||||
|
@classmethod |
||||
|
def from_parts(cls, version, prerelease=FINAL_MARKER, |
||||
|
devpost=FINAL_MARKER): |
||||
|
return cls(cls.parts_to_str((version, prerelease, devpost))) |
||||
|
|
||||
|
def _parse(self, s, error_on_huge_major_num=True): |
||||
|
"""Parses a string version into parts.""" |
||||
|
if not isinstance(s, basestring): |
||||
|
raise PreconditionViolationException("s is required to be a string: %s :: %s" % (s, type(s))) |
||||
|
|
||||
|
match = VERSION_RE.search(s) |
||||
|
if not match: |
||||
|
raise IrrationalVersionError(s) |
||||
|
|
||||
|
groups = match.groupdict() |
||||
|
parts = [] |
||||
|
|
||||
|
# main version |
||||
|
block = self._parse_numdots(groups['version'], s, False, 2) |
||||
|
extraversion = groups.get('extraversion') |
||||
|
if extraversion not in ('', None): |
||||
|
block += self._parse_numdots(extraversion[1:], s) |
||||
|
parts.append(tuple(block)) |
||||
|
|
||||
|
# prerelease |
||||
|
prerel = groups.get('prerel') |
||||
|
if prerel is not None: |
||||
|
block = [prerel] |
||||
|
block += self._parse_numdots(groups.get('prerelversion'), s, |
||||
|
pad_zeros_length=1) |
||||
|
parts.append(tuple(block)) |
||||
|
else: |
||||
|
parts.append(FINAL_MARKER) |
||||
|
|
||||
|
# postdev |
||||
|
if groups.get('postdev'): |
||||
|
post = groups.get('post') or groups.get('oldpost') |
||||
|
dev = groups.get('dev') |
||||
|
postdev = [] |
||||
|
if post is not None: |
||||
|
postdev.extend([FINAL_MARKER[0], 'post', int(post)]) |
||||
|
if dev is None: |
||||
|
postdev.append(FINAL_MARKER[0]) |
||||
|
if dev is not None: |
||||
|
postdev.extend(['dev', int(dev)]) |
||||
|
parts.append(tuple(postdev)) |
||||
|
else: |
||||
|
parts.append(FINAL_MARKER) |
||||
|
self.parts = tuple(parts) |
||||
|
if error_on_huge_major_num and self.parts[0][0] > 1980: |
||||
|
raise HugeMajorVersionNumError("huge major version number, %r, " |
||||
|
"which might cause future problems: %r" % (self.parts[0][0], s)) |
||||
|
|
||||
|
def _parse_numdots(self, s, full_ver_str, drop_trailing_zeros=True, |
||||
|
pad_zeros_length=0): |
||||
|
"""Parse 'N.N.N' sequences, return a list of ints. |
||||
|
|
||||
|
@param s {str} 'N.N.N...' sequence to be parsed |
||||
|
@param full_ver_str {str} The full version string from which this |
||||
|
comes. Used for error strings. |
||||
|
@param drop_trailing_zeros {bool} Whether to drop trailing zeros |
||||
|
from the returned list. Default True. |
||||
|
@param pad_zeros_length {int} The length to which to pad the |
||||
|
returned list with zeros, if necessary. Default 0. |
||||
|
""" |
||||
|
nums = [] |
||||
|
for n in s.split("."): |
||||
|
if len(n) > 1 and n[0] == '0': |
||||
|
raise IrrationalVersionError("cannot have leading zero in " |
||||
|
"version number segment: '%s' in %r" % (n, full_ver_str)) |
||||
|
nums.append(int(n)) |
||||
|
if drop_trailing_zeros: |
||||
|
while nums and nums[-1] == 0: |
||||
|
nums.pop() |
||||
|
while len(nums) < pad_zeros_length: |
||||
|
nums.append(0) |
||||
|
return nums |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.parts_to_str(self.parts) |
||||
|
|
||||
|
@classmethod |
||||
|
def parts_to_str(cls, parts): |
||||
|
"""Transforms a version expressed in tuple into its string |
||||
|
representation.""" |
||||
|
# XXX This doesn't check for invalid tuples |
||||
|
main, prerel, postdev = parts |
||||
|
s = '.'.join(str(v) for v in main) |
||||
|
if prerel is not FINAL_MARKER: |
||||
|
s += prerel[0] |
||||
|
s += '.'.join(str(v) for v in prerel[1:]) |
||||
|
if postdev and postdev is not FINAL_MARKER: |
||||
|
if postdev[0] == 'f': |
||||
|
postdev = postdev[1:] |
||||
|
i = 0 |
||||
|
while i < len(postdev): |
||||
|
if i % 2 == 0: |
||||
|
s += '.' |
||||
|
s += str(postdev[i]) |
||||
|
i += 1 |
||||
|
return s |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "%s('%s')" % (self.__class__.__name__, self) |
||||
|
|
||||
|
def _cannot_compare(self, other): |
||||
|
raise TypeError("cannot compare %s and %s" |
||||
|
% (type(self).__name__, type(other).__name__)) |
||||
|
|
||||
|
def __eq__(self, other): |
||||
|
if not isinstance(other, NormalizedVersion): |
||||
|
self._cannot_compare(other) |
||||
|
return self.parts == other.parts |
||||
|
|
||||
|
def __lt__(self, other): |
||||
|
if not isinstance(other, NormalizedVersion): |
||||
|
self._cannot_compare(other) |
||||
|
return self.parts < other.parts |
||||
|
|
||||
|
def __ne__(self, other): |
||||
|
return not self.__eq__(other) |
||||
|
|
||||
|
def __gt__(self, other): |
||||
|
return not (self.__lt__(other) or self.__eq__(other)) |
||||
|
|
||||
|
def __le__(self, other): |
||||
|
return self.__eq__(other) or self.__lt__(other) |
||||
|
|
||||
|
def __ge__(self, other): |
||||
|
return self.__eq__(other) or self.__gt__(other) |
||||
|
|
||||
|
def suggest_normalized_version(s): |
||||
|
"""Suggest a normalized version close to the given version string. |
||||
|
|
||||
|
If you have a version string that isn't rational (i.e. NormalizedVersion |
||||
|
doesn't like it) then you might be able to get an equivalent (or close) |
||||
|
rational version from this function. |
||||
|
|
||||
|
This does a number of simple normalizations to the given string, based |
||||
|
on observation of versions currently in use on PyPI. Given a dump of |
||||
|
those version during PyCon 2009, 4287 of them: |
||||
|
- 2312 (53.93%) match NormalizedVersion without change |
||||
|
- with the automatic suggestion |
||||
|
- 3474 (81.04%) match when using this suggestion method |
||||
|
|
||||
|
@param s {str} An irrational version string. |
||||
|
@returns A rational version string, or None, if couldn't determine one. |
||||
|
""" |
||||
|
try: |
||||
|
NormalizedVersion(s) |
||||
|
return s # already rational |
||||
|
except IrrationalVersionError: |
||||
|
pass |
||||
|
|
||||
|
rs = s.lower() |
||||
|
|
||||
|
# part of this could use maketrans |
||||
|
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'), |
||||
|
('beta', 'b'), ('rc', 'c'), ('-final', ''), |
||||
|
('-pre', 'c'), |
||||
|
('-release', ''), ('.release', ''), ('-stable', ''), |
||||
|
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''), |
||||
|
('final', '')): |
||||
|
rs = rs.replace(orig, repl) |
||||
|
|
||||
|
# if something ends with dev or pre, we add a 0 |
||||
|
rs = re.sub(r"pre$", r"pre0", rs) |
||||
|
rs = re.sub(r"dev$", r"dev0", rs) |
||||
|
|
||||
|
# if we have something like "b-2" or "a.2" at the end of the |
||||
|
# version, that is pobably beta, alpha, etc |
||||
|
# let's remove the dash or dot |
||||
|
rs = re.sub(r"([abc|rc])[\-\.](\d+)$", r"\1\2", rs) |
||||
|
|
||||
|
# 1.0-dev-r371 -> 1.0.dev371 |
||||
|
# 0.1-dev-r79 -> 0.1.dev79 |
||||
|
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs) |
||||
|
|
||||
|
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1 |
||||
|
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs) |
||||
|
|
||||
|
# Clean: v0.3, v1.0 |
||||
|
if rs.startswith('v'): |
||||
|
rs = rs[1:] |
||||
|
|
||||
|
# Clean leading '0's on numbers. |
||||
|
#TODO: unintended side-effect on, e.g., "2003.05.09" |
||||
|
# PyPI stats: 77 (~2%) better |
||||
|
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs) |
||||
|
|
||||
|
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers |
||||
|
# zero. |
||||
|
# PyPI stats: 245 (7.56%) better |
||||
|
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs) |
||||
|
|
||||
|
# the 'dev-rNNN' tag is a dev tag |
||||
|
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs) |
||||
|
|
||||
|
# clean the - when used as a pre delimiter |
||||
|
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs) |
||||
|
|
||||
|
# a terminal "dev" or "devel" can be changed into ".dev0" |
||||
|
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs) |
||||
|
|
||||
|
# a terminal "dev" can be changed into ".dev0" |
||||
|
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs) |
||||
|
|
||||
|
# a terminal "final" or "stable" can be removed |
||||
|
rs = re.sub(r"(final|stable)$", "", rs) |
||||
|
|
||||
|
# The 'r' and the '-' tags are post release tags |
||||
|
# 0.4a1.r10 -> 0.4a1.post10 |
||||
|
# 0.9.33-17222 -> 0.9.3.post17222 |
||||
|
# 0.9.33-r17222 -> 0.9.3.post17222 |
||||
|
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs) |
||||
|
|
||||
|
# Clean 'r' instead of 'dev' usage: |
||||
|
# 0.9.33+r17222 -> 0.9.3.dev17222 |
||||
|
# 1.0dev123 -> 1.0.dev123 |
||||
|
# 1.0.git123 -> 1.0.dev123 |
||||
|
# 1.0.bzr123 -> 1.0.dev123 |
||||
|
# 0.1a0dev.123 -> 0.1a0.dev123 |
||||
|
# PyPI stats: ~150 (~4%) better |
||||
|
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs) |
||||
|
|
||||
|
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage: |
||||
|
# 0.2.pre1 -> 0.2c1 |
||||
|
# 0.2-c1 -> 0.2c1 |
||||
|
# 1.0preview123 -> 1.0c123 |
||||
|
# PyPI stats: ~21 (0.62%) better |
||||
|
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs) |
||||
|
|
||||
|
|
||||
|
# Tcl/Tk uses "px" for their post release markers |
||||
|
rs = re.sub(r"p(\d+)$", r".post\1", rs) |
||||
|
|
||||
|
try: |
||||
|
NormalizedVersion(rs) |
||||
|
return rs # already rational |
||||
|
except IrrationalVersionError: |
||||
|
pass |
||||
|
return None |
||||
|
|
@ -0,0 +1,148 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# Copyright (c) 2004-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
extended version number class |
||||
|
""" |
||||
|
|
||||
|
# verlib a.k.a. distutils.version by Tarek Ziadé. |
||||
|
from pyutil.verlib import NormalizedVersion |
||||
|
def cmp_version(v1, v2): |
||||
|
return cmp(NormalizedVersion(str(v1)), NormalizedVersion(str(v2))) |
||||
|
|
||||
|
# Python Standard Library |
||||
|
import re |
||||
|
|
||||
|
# End users see version strings like this: |
||||
|
|
||||
|
# "1.0.0" |
||||
|
# ^ ^ ^ |
||||
|
# | | | |
||||
|
# | | '- micro version number |
||||
|
# | '- minor version number |
||||
|
# '- major version number |
||||
|
|
||||
|
# The first number is "major version number". The second number is the "minor |
||||
|
# version number" -- it gets bumped whenever we make a new release that adds or |
||||
|
# changes functionality. The third version is the "micro version number" -- it |
||||
|
# gets bumped whenever we make a new release that doesn't add or change |
||||
|
# functionality, but just fixes bugs (including performance issues). |
||||
|
|
||||
|
# Early-adopter end users see version strings like this: |
||||
|
|
||||
|
# "1.0.0a1" |
||||
|
# ^ ^ ^^^ |
||||
|
# | | ||| |
||||
|
# | | ||'- release number |
||||
|
# | | |'- a=alpha, b=beta, c=release candidate, or none |
||||
|
# | | '- micro version number |
||||
|
# | '- minor version number |
||||
|
# '- major version number |
||||
|
|
||||
|
# The optional "a" or "b" stands for "alpha release" or "beta release" |
||||
|
# respectively. The number after "a" or "b" gets bumped every time we |
||||
|
# make a new alpha or beta release. This has the same form and the same |
||||
|
# meaning as version numbers of releases of Python. |
||||
|
|
||||
|
# Developers see "full version strings", like this: |
||||
|
|
||||
|
# "1.0.0a1-55" |
||||
|
# ^ ^ ^^^ ^ |
||||
|
# | | ||| | |
||||
|
# | | ||| '- nano version number |
||||
|
# | | ||'- release number |
||||
|
# | | |'- a=alpha, b=beta, c=release candidate or none |
||||
|
# | | '- micro version number |
||||
|
# | '- minor version number |
||||
|
# '- major version number |
||||
|
|
||||
|
# or else like this: |
||||
|
|
||||
|
# "1.0.0a1-r22155" |
||||
|
# ^ ^ ^^^ ^ |
||||
|
# | | ||| | |
||||
|
# | | ||| '- revision number |
||||
|
# | | ||'- release number |
||||
|
# | | |'- a=alpha, b=beta, c=release candidate or none |
||||
|
# | | '- micro version number |
||||
|
# | '- minor version number |
||||
|
# '- major version number |
||||
|
|
||||
|
# The presence of the nano version number means that this is a development |
||||
|
# version. There are no guarantees about compatibility, etc. This version is |
||||
|
# considered to be more recent than the version without this field |
||||
|
# (e.g. "1.0.0a1"). |
||||
|
|
||||
|
# The nano version number or revision number is meaningful only to developers. |
||||
|
# It gets generated automatically from darcs revision control history by |
||||
|
# "darcsver.py". The nano version number is the count of patches that have been |
||||
|
# applied since the last version number tag was applied. The revision number is |
||||
|
# the count of all patches that have been applied in the history. |
||||
|
|
||||
|
VERSION_BASE_RE_STR="(\d+)(\.(\d+)(\.(\d+))?)?((a|b|c)(\d+))?(\.dev(\d+))?" |
||||
|
VERSION_SUFFIX_RE_STR="(-(\d+|r\d+)|.post\d+)?" |
||||
|
VERSION_RE_STR=VERSION_BASE_RE_STR + VERSION_SUFFIX_RE_STR |
||||
|
VERSION_RE=re.compile("^" + VERSION_RE_STR + "$") |
||||
|
|
||||
|
class Version(object): |
||||
|
def __init__(self, vstring=None): |
||||
|
self.major = None |
||||
|
self.minor = None |
||||
|
self.micro = None |
||||
|
self.prereleasetag = None |
||||
|
self.prerelease = None |
||||
|
self.nano = None |
||||
|
self.revision = None |
||||
|
if vstring: |
||||
|
try: |
||||
|
self.parse(vstring) |
||||
|
except ValueError, le: |
||||
|
le.args = tuple(le.args + ('vstring:', vstring,)) |
||||
|
raise |
||||
|
|
||||
|
def parse(self, vstring): |
||||
|
mo = VERSION_RE.search(vstring) |
||||
|
if not mo: |
||||
|
raise ValueError, "Not a valid version string for pyutil.version_class.Version(): %r" % (vstring,) |
||||
|
|
||||
|
self.major = int(mo.group(1)) |
||||
|
self.minor = mo.group(3) and int(mo.group(3)) or 0 |
||||
|
self.micro = mo.group(5) and int(mo.group(5)) or 0 |
||||
|
reltag = mo.group(6) |
||||
|
if reltag: |
||||
|
reltagnum = int(mo.group(8)) |
||||
|
self.prereleasetag = mo.group(7) |
||||
|
self.prerelease = reltagnum |
||||
|
|
||||
|
if mo.group(11): |
||||
|
if mo.group(11)[0] == '-': |
||||
|
if mo.group(12)[0] == 'r': |
||||
|
self.revision = int(mo.group(12)[1:]) |
||||
|
else: |
||||
|
self.nano = int(mo.group(12)) |
||||
|
else: |
||||
|
assert mo.group(11).startswith('.post'), mo.group(11) |
||||
|
self.revision = int(mo.group(11)[5:]) |
||||
|
|
||||
|
# XXX in the future, to be compatible with the Python "rational version numbering" scheme, we should move to using .post$REV instead of -r$REV: |
||||
|
# self.fullstr = "%d.%d.%d%s%s" % (self.major, self.minor, self.micro, self.prereleasetag and "%s%d" % (self.prereleasetag, self.prerelease,) or "", self.nano and "-%d" % (self.nano,) or self.revision and ".post%d" % (self.revision,) or "",) |
||||
|
self.fullstr = "%d.%d.%d%s%s" % (self.major, self.minor, self.micro, self.prereleasetag and "%s%d" % (self.prereleasetag, self.prerelease,) or "", self.nano and "-%d" % (self.nano,) or self.revision and "-r%d" % (self.revision,) or "",) |
||||
|
|
||||
|
def user_str(self): |
||||
|
return self.full_str() |
||||
|
|
||||
|
def full_str(self): |
||||
|
if hasattr(self, 'fullstr'): |
||||
|
return self.fullstr |
||||
|
else: |
||||
|
return 'None' |
||||
|
|
||||
|
def __str__(self): |
||||
|
return self.full_str() |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return self.__str__() |
||||
|
|
||||
|
def __cmp__ (self, other): |
||||
|
return cmp_version(self, other) |
@ -0,0 +1,42 @@ |
|||||
|
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
import warnings |
||||
|
|
||||
|
# from the Python Standard Library |
||||
|
from weakref import ref |
||||
|
|
||||
|
# from the pyutil library |
||||
|
from assertutil import precondition |
||||
|
|
||||
|
# Thanks to Thomas Wouters, JP Calderone and the authors from the Python Cookbook. |
||||
|
|
||||
|
# class WeakMethod copied from The Python Cookbook and simplified. |
||||
|
|
||||
|
class WeakMethod: |
||||
|
""" Wraps a function or, more importantly, a bound method, in |
||||
|
a way that allows a bound method's object to be GC'd """ |
||||
|
def __init__(self, fn, callback=None): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
precondition(hasattr(fn, 'im_self'), "fn is required to be a bound method.") |
||||
|
self._cleanupcallback = callback |
||||
|
self._obj = ref(fn.im_self, self.call_cleanup_cb) |
||||
|
self._meth = fn.im_func |
||||
|
|
||||
|
def __call__(self, *args, **kws): |
||||
|
s = self._obj() |
||||
|
if s: |
||||
|
return self._meth(s, *args,**kws) |
||||
|
|
||||
|
def __repr__(self): |
||||
|
return "<%s %s %s>" % (self.__class__.__name__, self._obj, self._meth,) |
||||
|
|
||||
|
def call_cleanup_cb(self, thedeadweakref): |
||||
|
if self._cleanupcallback is not None: |
||||
|
self._cleanupcallback(self, thedeadweakref) |
||||
|
|
||||
|
def factory_function_name_here(o): |
||||
|
if hasattr(o, 'im_self'): |
||||
|
return WeakMethod(o) |
||||
|
else: |
||||
|
return o |
@ -0,0 +1,50 @@ |
|||||
|
# Copyright © 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
What word has three letters and a 'x' in it? |
||||
|
|
||||
|
Not that one silly. |
||||
|
""" |
||||
|
|
||||
|
import warnings |
||||
|
import array, operator |
||||
|
|
||||
|
from pyutil.assertutil import precondition |
||||
|
|
||||
|
def py_xor(str1, str2): |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
precondition(len(str1) == len(str2), "str1 and str2 are required to be of the same length.", str1=str1, str2=str2) |
||||
|
|
||||
|
if len(str1)%4 == 0: |
||||
|
a1 = array.array('i', str1) |
||||
|
a2 = array.array('i', str2) |
||||
|
for i in range(len(a1)): |
||||
|
a2[i] = a2[i]^a1[i] |
||||
|
elif len(str1)%2 == 0: |
||||
|
a1 = array.array('h', str1) |
||||
|
a2 = array.array('h', str2) |
||||
|
for i in range(len(a1)): |
||||
|
a2[i] = a2[i]^a1[i] |
||||
|
else: |
||||
|
a1 = array.array('c', str1) |
||||
|
a2 = array.array('c', str2) |
||||
|
for i in range(len(a1)): |
||||
|
a2[i] = chr(ord(a2[i])^ord(a1[i])) |
||||
|
|
||||
|
return a2.tostring() |
||||
|
|
||||
|
def py_xor_simple(str1, str2): |
||||
|
""" |
||||
|
Benchmarks show that this is the same speed as py_xor() for small strings |
||||
|
and much slower for large strings, so don't use it. --Zooko 2002-04-29 |
||||
|
""" |
||||
|
warnings.warn("deprecated", DeprecationWarning) |
||||
|
precondition(len(str1) == len(str2), "str1 and str2 are required to be of the same length.", str1=str1, str2=str2) |
||||
|
|
||||
|
return ''.join(map(chr, map(operator.__xor__, map(ord, str1), map(ord, str2)))) |
||||
|
|
||||
|
# Now make "xor.xor()" be the best xor we've got: |
||||
|
xor = py_xor |
||||
|
|
||||
|
# for unit tests, see pyutil/test/test_xor.py. For benchmarks, see pyutil/test/bench_xor.py. |
@ -0,0 +1,261 @@ |
|||||
|
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn |
||||
|
# This file is part of pyutil; see README.rst for licensing terms. |
||||
|
|
||||
|
""" |
||||
|
Making your zlib experience that much nicer! |
||||
|
|
||||
|
Most importantly, this offers protection from "zlib bomb" attacks, where the |
||||
|
original data was maximally compressable, and a naive use of zlib would |
||||
|
consume all of your RAM while trying to decompress it. |
||||
|
""" |
||||
|
|
||||
|
import exceptions, string, zlib |
||||
|
|
||||
|
from humanreadable import hr |
||||
|
from pyutil.assertutil import precondition |
||||
|
|
||||
|
class DecompressError(exceptions.StandardError, zlib.error): pass |
||||
|
class UnsafeDecompressError(DecompressError): pass # This means it would take more memory to decompress than we can spare. |
||||
|
class TooBigError(DecompressError): pass # This means the resulting uncompressed text would exceed the maximum allowed length. |
||||
|
class ZlibError(DecompressError): pass # internal error, probably due to the input not being zlib compressed text |
||||
|
|
||||
|
# The smallest limit that you can impose on zlib decompression and still have |
||||
|
# a chance of succeeding at decompression. |
||||
|
|
||||
|
# constant memory overhead of zlib (76 KB), plus minbite (128 bytes) times |
||||
|
# maxexpansion (1032) times buffer-copying duplication (2), plus 2063 so as |
||||
|
# to reach the ceiling of div (2*1032) |
||||
|
|
||||
|
MINMAXMEM=76*2**10 + 128 * 1032 * 2 + 2063 - 1 |
||||
|
|
||||
|
# You should really specify a maxmem which is much higher than MINMAXMEM. If |
||||
|
# maxmem=MINMAXMEM, we will be reduced to decompressing the input in |
||||
|
# 128-byte bites, and furthermore unless the decompressed text is quite small, |
||||
|
# we will be forced to give up and spuriously raise UnsafeDecompressError! |
||||
|
# You really ought to pass a maxmem argument equal to the maximum possible |
||||
|
# memory that your app should ever allocate (for a short-term use). |
||||
|
# I typically set it to 65 MB. |
||||
|
|
||||
|
def decompress(zbuf, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))): |
||||
|
""" |
||||
|
Decompress zbuf so that it decompresses to <= maxlen bytes, while using |
||||
|
<= maxmem memory, or else raise an exception. If zbuf contains |
||||
|
uncompressed data an exception will be raised. |
||||
|
|
||||
|
This function guards against memory allocation attacks. |
||||
|
|
||||
|
@param maxlen the resulting text must not be greater than this |
||||
|
@param maxmem the execution of this function must not use more than this |
||||
|
amount of memory in bytes; The higher this number is (optimally |
||||
|
1032 * maxlen, or even greater), the faster this function can |
||||
|
complete. (Actually I don't fully understand the workings of zlib, so |
||||
|
this function might use a *little* more than this memory, but not a |
||||
|
lot more.) (Also, this function will raise an exception if the amount |
||||
|
of memory required even *approaches* maxmem. Another reason to make |
||||
|
it large.) (Hence the default value which would seem to be |
||||
|
exceedingly large until you realize that it means you can decompress |
||||
|
64 KB chunks of compressiontext at a bite.) |
||||
|
""" |
||||
|
assert isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!" |
||||
|
assert isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!" |
||||
|
assert maxlen <= maxmem, "maxlen is required to be <= maxmem. All data that is included in the return value is counted against maxmem as well as against maxlen, so it is impossible to return a result bigger than maxmem, even if maxlen is bigger than maxmem. See decompress_to_spool() if you want to spool a large text out while limiting the amount of memory used during the process." |
||||
|
|
||||
|
lenzbuf = len(zbuf) |
||||
|
offset = 0 |
||||
|
decomplen = 0 |
||||
|
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression |
||||
|
availmem = availmem / 2 # generating the result string from the intermediate strings will require using the same amount of memory again, briefly. If you care about this kind of thing, then let's rewrite this module in C. |
||||
|
|
||||
|
decompstrlist = [] |
||||
|
|
||||
|
decomp = zlib.decompressobj() |
||||
|
while offset < lenzbuf: |
||||
|
# How much compressedtext can we safely attempt to decompress now without going over `maxmem'? zlib docs say that theoretical maximum for the zlib format would be 1032:1. |
||||
|
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12 |
||||
|
if lencompbite < 128: |
||||
|
# If we can't safely attempt even a few bytes of compression text, let us give up. Either `maxmem' was too small or this compressiontext is actually a decompression bomb. |
||||
|
raise UnsafeDecompressError, "used up roughly maxmem memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s, lencompbite: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen, lencompbite,])) |
||||
|
# I wish the following were a local function like this: |
||||
|
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf): |
||||
|
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26 |
||||
|
try: |
||||
|
if (offset == 0) and (lencompbite >= lenzbuf): |
||||
|
tmpstr = decomp.decompress(zbuf) |
||||
|
else: |
||||
|
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite]) |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, hr(le), ) |
||||
|
|
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
availmem = availmem - lentmpstr |
||||
|
offset = offset + lencompbite |
||||
|
decompstrlist.append(tmpstr) |
||||
|
tmpstr = '' |
||||
|
|
||||
|
try: |
||||
|
tmpstr = decomp.flush() |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, le, ) |
||||
|
|
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
availmem = availmem - lentmpstr |
||||
|
offset = offset + lencompbite |
||||
|
if lentmpstr > 0: |
||||
|
decompstrlist.append(tmpstr) |
||||
|
tmpstr = '' |
||||
|
|
||||
|
if len(decompstrlist) > 0: |
||||
|
return string.join(decompstrlist, '') |
||||
|
else: |
||||
|
return decompstrlist[0] |
||||
|
|
||||
|
def decompress_to_fileobj(zbuf, fileobj, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))): |
||||
|
""" |
||||
|
Decompress zbuf so that it decompresses to <= maxlen bytes, while using |
||||
|
<= maxmem memory, or else raise an exception. If zbuf contains |
||||
|
uncompressed data an exception will be raised. |
||||
|
|
||||
|
This function guards against memory allocation attacks. |
||||
|
|
||||
|
Note that this assumes that data written to fileobj still occupies memory, |
||||
|
so such data counts against maxmem as well as against maxlen. |
||||
|
|
||||
|
@param maxlen the resulting text must not be greater than this |
||||
|
@param maxmem the execution of this function must not use more than this |
||||
|
amount of memory in bytes; The higher this number is (optimally |
||||
|
1032 * maxlen, or even greater), the faster this function can |
||||
|
complete. (Actually I don't fully understand the workings of zlib, so |
||||
|
this function might use a *little* more than this memory, but not a |
||||
|
lot more.) (Also, this function will raise an exception if the amount |
||||
|
of memory required even *approaches* maxmem. Another reason to make |
||||
|
it large.) (Hence the default value which would seem to be |
||||
|
exceedingly large until you realize that it means you can decompress |
||||
|
64 KB chunks of compressiontext at a bite.) |
||||
|
@param fileobj a file object to which the decompressed text will be written |
||||
|
""" |
||||
|
precondition(hasattr(fileobj, 'write') and callable(fileobj.write), "fileobj is required to have a write() method.", fileobj=fileobj) |
||||
|
precondition(isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!", maxlen=maxlen) |
||||
|
precondition(isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!", maxmem=maxmem) |
||||
|
precondition(maxlen <= maxmem, "maxlen is required to be <= maxmem. All data that is written out to fileobj is counted against maxmem as well as against maxlen, so it is impossible to return a result bigger than maxmem, even if maxlen is bigger than maxmem. See decompress_to_spool() if you want to spool a large text out while limiting the amount of memory used during the process.", maxlen=maxlen, maxmem=maxmem) |
||||
|
|
||||
|
lenzbuf = len(zbuf) |
||||
|
offset = 0 |
||||
|
decomplen = 0 |
||||
|
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression |
||||
|
|
||||
|
decomp = zlib.decompressobj() |
||||
|
while offset < lenzbuf: |
||||
|
# How much compressedtext can we safely attempt to decompress now without going over maxmem? zlib docs say that theoretical maximum for the zlib format would be 1032:1. |
||||
|
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12 |
||||
|
if lencompbite < 128: |
||||
|
# If we can't safely attempt even a few bytes of compression text, let us give up. Either maxmem was too small or this compressiontext is actually a decompression bomb. |
||||
|
raise UnsafeDecompressError, "used up roughly maxmem memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen,])) |
||||
|
# I wish the following were a local function like this: |
||||
|
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf): |
||||
|
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26 |
||||
|
try: |
||||
|
if (offset == 0) and (lencompbite >= lenzbuf): |
||||
|
tmpstr = decomp.decompress(zbuf) |
||||
|
else: |
||||
|
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite]) |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, le, ) |
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
availmem = availmem - lentmpstr |
||||
|
offset = offset + lencompbite |
||||
|
fileobj.write(tmpstr) |
||||
|
tmpstr = '' |
||||
|
|
||||
|
try: |
||||
|
tmpstr = decomp.flush() |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, le, ) |
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
availmem = availmem - lentmpstr |
||||
|
offset = offset + lencompbite |
||||
|
fileobj.write(tmpstr) |
||||
|
tmpstr = '' |
||||
|
|
||||
|
def decompress_to_spool(zbuf, fileobj, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))): |
||||
|
""" |
||||
|
Decompress zbuf so that it decompresses to <= maxlen bytes, while using |
||||
|
<= maxmem memory, or else raise an exception. If zbuf contains |
||||
|
uncompressed data an exception will be raised. |
||||
|
|
||||
|
This function guards against memory allocation attacks. |
||||
|
|
||||
|
Note that this assumes that data written to fileobj does *not* continue to |
||||
|
occupy memory, so such data doesn't count against maxmem, although of |
||||
|
course it still counts against maxlen. |
||||
|
|
||||
|
@param maxlen the resulting text must not be greater than this |
||||
|
@param maxmem the execution of this function must not use more than this |
||||
|
amount of memory in bytes; The higher this number is (optimally |
||||
|
1032 * maxlen, or even greater), the faster this function can |
||||
|
complete. (Actually I don't fully understand the workings of zlib, so |
||||
|
this function might use a *little* more than this memory, but not a |
||||
|
lot more.) (Also, this function will raise an exception if the amount |
||||
|
of memory required even *approaches* maxmem. Another reason to make |
||||
|
it large.) (Hence the default value which would seem to be |
||||
|
exceedingly large until you realize that it means you can decompress |
||||
|
64 KB chunks of compressiontext at a bite.) |
||||
|
@param fileobj the decompressed text will be written to it |
||||
|
""" |
||||
|
precondition(hasattr(fileobj, 'write') and callable(fileobj.write), "fileobj is required to have a write() method.", fileobj=fileobj) |
||||
|
precondition(isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!", maxlen=maxlen) |
||||
|
precondition(isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!", maxmem=maxmem) |
||||
|
|
||||
|
tmpstr = '' |
||||
|
lenzbuf = len(zbuf) |
||||
|
offset = 0 |
||||
|
decomplen = 0 |
||||
|
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression |
||||
|
|
||||
|
decomp = zlib.decompressobj() |
||||
|
while offset < lenzbuf: |
||||
|
# How much compressedtext can we safely attempt to decompress now without going over `maxmem'? zlib docs say that theoretical maximum for the zlib format would be 1032:1. |
||||
|
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12 |
||||
|
if lencompbite < 128: |
||||
|
# If we can't safely attempt even a few bytes of compression text, let us give up. Either `maxmem' was too small or this compressiontext is actually a decompression bomb. |
||||
|
raise UnsafeDecompressError, "used up roughly `maxmem' memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen,])) |
||||
|
# I wish the following were a local function like this: |
||||
|
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf): |
||||
|
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26 |
||||
|
try: |
||||
|
if (offset == 0) and (lencompbite >= lenzbuf): |
||||
|
tmpstr = decomp.decompress(zbuf) |
||||
|
else: |
||||
|
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite]) |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, le, ) |
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > `maxlen'. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
offset = offset + lencompbite |
||||
|
fileobj.write(tmpstr) |
||||
|
tmpstr = '' |
||||
|
|
||||
|
try: |
||||
|
tmpstr = decomp.flush() |
||||
|
except zlib.error, le: |
||||
|
raise ZlibError, (offset, lencompbite, decomplen, le, ) |
||||
|
lentmpstr = len(tmpstr) |
||||
|
decomplen = decomplen + lentmpstr |
||||
|
if decomplen > maxlen: |
||||
|
raise TooBigError, "length of resulting data > `maxlen'. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,])) |
||||
|
offset = offset + lencompbite |
||||
|
fileobj.write(tmpstr) |
||||
|
tmpstr = '' |
Loading…
Reference in new issue