Browse Source

Include module for desktop build

pull/834/head
Ruud 13 years ago
parent
commit
36f653f65b
  1. 1
      couchpotato/core/_base/desktop/main.py
  2. 2
      couchpotato/core/downloaders/base.py
  3. 1
      couchpotato/core/plugins/base.py
  4. 201
      libs/pyutil/PickleSaver.py
  5. 23
      libs/pyutil/__init__.py
  6. 17
      libs/pyutil/_version.py
  7. 60
      libs/pyutil/assertutil.py
  8. 55
      libs/pyutil/benchmarks/bench_json.py
  9. 53
      libs/pyutil/benchmarks/bench_xor.py
  10. 243
      libs/pyutil/benchutil.py
  11. 225
      libs/pyutil/benchutil.py~
  12. 735
      libs/pyutil/cache.py
  13. 605
      libs/pyutil/dictutil.py
  14. 271
      libs/pyutil/fileutil.py
  15. 271
      libs/pyutil/fileutil.py~
  16. 30
      libs/pyutil/find_exe.py
  17. 81
      libs/pyutil/hashexpand.py
  18. 115
      libs/pyutil/humanreadable.py
  19. 136
      libs/pyutil/increasing_timer.py
  20. 288
      libs/pyutil/iputil.py
  21. 456
      libs/pyutil/jsonutil.py
  22. 59
      libs/pyutil/lineutil.py
  23. 19
      libs/pyutil/logutil.py
  24. 106
      libs/pyutil/mathutil.py
  25. 586
      libs/pyutil/memutil.py
  26. 52
      libs/pyutil/nummedobj.py
  27. 99
      libs/pyutil/observer.py
  28. 552
      libs/pyutil/odict.py
  29. 552
      libs/pyutil/odict.py~
  30. 100
      libs/pyutil/platformutil.py
  31. 85
      libs/pyutil/randutil.py
  32. 85
      libs/pyutil/randutil.py~
  33. 90
      libs/pyutil/repeatable_random.py
  34. 0
      libs/pyutil/scripts/__init__.py
  35. 36
      libs/pyutil/scripts/lines.py
  36. 65
      libs/pyutil/scripts/memdump2dot.py
  37. 30
      libs/pyutil/scripts/randcookie.py
  38. 48
      libs/pyutil/scripts/randfile.py
  39. 30
      libs/pyutil/scripts/tailx.py
  40. 96
      libs/pyutil/scripts/try_decoding.py
  41. 19
      libs/pyutil/scripts/unsort.py
  42. 26
      libs/pyutil/scripts/verinfo.py
  43. 48
      libs/pyutil/strutil.py
  44. 0
      libs/pyutil/test/__init__.py
  45. 0
      libs/pyutil/test/current/__init__.py
  46. 0
      libs/pyutil/test/current/json_tests/__init__.py
  47. 15
      libs/pyutil/test/current/json_tests/test_decode.py
  48. 9
      libs/pyutil/test/current/json_tests/test_default.py
  49. 13
      libs/pyutil/test/current/json_tests/test_dump.py
  50. 36
      libs/pyutil/test/current/json_tests/test_encode_basestring_ascii.py
  51. 76
      libs/pyutil/test/current/json_tests/test_fail.py
  52. 9
      libs/pyutil/test/current/json_tests/test_float.py
  53. 41
      libs/pyutil/test/current/json_tests/test_indent.py
  54. 71
      libs/pyutil/test/current/json_tests/test_pass1.py
  55. 14
      libs/pyutil/test/current/json_tests/test_pass2.py
  56. 20
      libs/pyutil/test/current/json_tests/test_pass3.py
  57. 67
      libs/pyutil/test/current/json_tests/test_recursion.py
  58. 42
      libs/pyutil/test/current/json_tests/test_separators.py
  59. 18
      libs/pyutil/test/current/json_tests/test_speedups.py
  60. 55
      libs/pyutil/test/current/json_tests/test_unicode.py
  61. 18
      libs/pyutil/test/current/test_assertutil.py
  62. 33
      libs/pyutil/test/current/test_fileutil.py
  63. 33
      libs/pyutil/test/current/test_iputil.py
  64. 18
      libs/pyutil/test/current/test_jsonutil.py
  65. 135
      libs/pyutil/test/current/test_mathutil.py
  66. 97
      libs/pyutil/test/current/test_time_format.py
  67. 124
      libs/pyutil/test/current/test_verlib.py
  68. 23
      libs/pyutil/test/current/test_version_class.py
  69. 0
      libs/pyutil/test/deprecated/__init__.py
  70. 115
      libs/pyutil/test/deprecated/test_dictutil.py
  71. 36
      libs/pyutil/test/deprecated/test_picklesaver.py
  72. 24
      libs/pyutil/test/deprecated/test_xor.py
  73. 0
      libs/pyutil/test/out_of_shape/__init__.py
  74. 454
      libs/pyutil/test/out_of_shape/test_cache.py
  75. 441
      libs/pyutil/test/out_of_shape/test_odict.py
  76. 30
      libs/pyutil/test/out_of_shape/test_strutil.py
  77. 77
      libs/pyutil/test/out_of_shape/test_zlibutil.py
  78. 121
      libs/pyutil/testutil.py
  79. 44
      libs/pyutil/time_comparisons.py
  80. 72
      libs/pyutil/time_comparisons.py~
  81. 72
      libs/pyutil/time_format.py
  82. 25
      libs/pyutil/twistedutil.py
  83. 337
      libs/pyutil/verlib.py
  84. 148
      libs/pyutil/version_class.py
  85. 42
      libs/pyutil/weakutil.py
  86. 0
      libs/pyutil/xor/__init__.py
  87. 50
      libs/pyutil/xor/xor.py
  88. 261
      libs/pyutil/zlibutil.py

1
couchpotato/core/_base/desktop/main.py

@ -25,6 +25,7 @@ if Env.get('desktop'):
# Events to desktop
addEvent('app.after_shutdown', desktop.afterShutdown)
addEvent('app.load', desktop.onAppLoad, priority = 110)
def onClose(self, event):
return fireEvent('app.shutdown', single = True)

2
couchpotato/core/downloaders/base.py

@ -29,7 +29,7 @@ class Downloader(Plugin):
pass
def getDownloadStatus(self, data = {}, movie = {}):
pass
return False
def createNzbName(self, data, movie):
tag = self.cpTag(movie)

1
couchpotato/core/plugins/base.py

@ -192,6 +192,7 @@ class Plugin(object):
def doShutdown(self):
self.shuttingDown(True)
return True
def shuttingDown(self, value = None):
if value is None:

201
libs/pyutil/PickleSaver.py

@ -0,0 +1,201 @@
# Copyright (c) 2001 Autonomous Zone Industries
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
An object that makes some of the attributes of your class persistent, pickling
them and lazily writing them to a file.
"""
# from the Python Standard Library
import os
import cPickle as pickle
import warnings
# from the pyutil library
import fileutil
import nummedobj
import twistedutil
# from the Twisted library
from twisted.python import log
class PickleSaver(nummedobj.NummedObj):
"""
This makes some of the attributes of your class persistent, saving
them in a pickle and saving them lazily.
The general idea: You are going to tell PickleSaver which of your
attributes ought to be persistently saved, and the name of a file to
save them in. Those attributes will get saved to disk, and when
your object is instantiated those attributes will get set to the
values loaded from the file.
Usage: inherit from PickleSaver and call PickleSaver.__init__() in your
constructor. You will pass arguments to PickleSaver.__init__()
telling it which attributes to save, which file to save them in, and
what values they should have if there is no value stored for them in
the file.
Note: do *not* assign values to your persistent attributes in your
constructor, because you might thus overwrite their persistent
values.
Then whenever you change one of the persistent attributes, call
self.lazy_save() (it won't *really* save -- it'll just schedule a
save for DELAY minutes later.) If you update an attribute and
forget to call self.lazy_save() then the change will not be saved,
unless you later call self.lazy_save() before you shut down.
Data could be lost if the Python interpreter were to die
unexpectedly (for example, due to a segfault in a compiled machine
code module or due to the Python process being killed without
warning via SIGKILL) before the delay passes. However if the Python
interpreter shuts down cleanly (i.e., if it garbage collects and
invokes the __del__ methods of the collected objects), then the data
will be saved at that time (unless your class has the "not-collectable"
problem: http://python.org/doc/current/lib/module-gc.html -- search
in text for "uncollectable").
Note: you can pass DELAY=0 to make PickleSaver a not-so-lazy saver.
The advantage of laziness is that you don't touch the disk as
often -- touching disk is a performance cost.
To cleanly shutdown, invoke shutdown(). Further operations after that
will result in exceptions.
"""
class ExtRes:
"""
This is for holding things (external resources) that PickleSaver needs
to finalize after PickleSaver is killed. (post-mortem finalization)
In particular, this holds the names and values of all attributes
that have been changed, so that after the PickleSaver is
garbage-collected those values will be saved to the persistent file.
"""
def __init__(self, fname, objname):
self.fname = fname
self.objname = objname
self.dirty = False # True iff the attrs have been changed and need to be saved to disk; When you change this flag from False to True, you schedule a save task for 10 minutes later. When the save task goes off it changes the flag from True to False.
self.savertask = None
self.valstr = None # the pickled (serialized, string) contents of the attributes that should be saved
def _save_to_disk(self):
if self.valstr is not None:
log.msg("%s._save_to_disk(): fname: %s" % (self.objname, self.fname,))
of = open(self.fname + ".tmp", "wb")
of.write(self.valstr)
of.flush()
of.close()
of = None
fileutil.remove_if_possible(self.fname)
fileutil.rename(self.fname + ".tmp", self.fname)
log.msg("%s._save_to_disk(): now, having finished write(), os.path.isfile(%s): %s" % (self, self.fname, os.path.isfile(self.fname),))
self.valstr = None
self.dirty = False
try:
self.savertask.callId.cancel()
except:
pass
self.savertask = None
def shutdown(self):
if self.dirty:
self._save_to_disk()
if self.savertask:
try:
self.savertask.callId.cancel()
except:
pass
self.savertask = None
def __del__(self):
self.shutdown()
def __init__(self, fname, attrs, DELAY=60*60, savecb=None):
"""
@param attrs: a dict whose keys are the names of all the attributes to be persistently stored and whose values are the initial default value that the attribute gets set to the first time it is ever used; After this first initialization, the value will be persistent so the initial default value will never be used again.
@param savecb: if not None, then it is a callable that will be called after each save completes (useful for unit tests) (savecb doesn't get called after a shutdown-save, only after a scheduled save)
"""
warnings.warn("deprecated", DeprecationWarning)
nummedobj.NummedObj.__init__(self)
self._DELAY = DELAY
self._attrnames = attrs.keys()
self._extres = PickleSaver.ExtRes(fname=fname, objname=self.__repr__())
self._savecb = savecb
for attrname, defaultval in attrs.items():
setattr(self, attrname, defaultval)
try:
attrdict = pickle.loads(open(self._extres.fname, "rb").read())
for attrname, attrval in attrdict.items():
if not hasattr(self, attrname):
log.msg("WARNING: %s has no attribute named %s on load from disk, value: %s." % (self, attrname, attrval,))
setattr(self, attrname, attrval)
except (pickle.UnpicklingError, IOError, EOFError,), le:
try:
attrdict = pickle.loads(open(self._extres.fname + ".tmp", "rb").read())
for attrname, attrval in attrdict.items():
if not hasattr(self, attrname):
log.msg("WARNING: %s has no attribute named %s on load from disk, value: %s." % (self, attrname, attrval,))
setattr(self, attrname, attrval)
except (pickle.UnpicklingError, IOError, EOFError,), le2:
log.msg("Got exception attempting to load attrs. (This is normal if this is the first time you've used this persistent %s object.) fname: %s, le: %s, le2: %s" % (self.__class__, self._extres.fname, le, le2,))
self.lazy_save()
def _store_attrs_in_extres(self):
d = {}
for attrname in self._attrnames:
d[attrname] = getattr(self, attrname)
# log.msg("%s._store_attrs_in_extres: attrname: %s, val: %s" % (self, attrname, getattr(self, attrname),))
# pickle the attrs now, to ensure that there are no reference cycles
self._extres.valstr = pickle.dumps(d, True)
# log.msg("%s._store_attrs_in_extres: valstr: %s" % (self, self._extres.valstr,))
self._extres.dirty = True
def _save_to_disk(self):
log.msg("%s._save_to_disk()" % (self,))
self._extres._save_to_disk()
if self._savecb:
self._savecb()
def _lazy_save(self, delay=None):
""" @deprecated: use lazy_save() instead """
return self.lazy_save(delay)
def lazy_save(self, delay=None):
"""
@param delay: how long from now before the data gets saved to disk, or `None' in order to use the default value provided in the constructor
"""
if delay is None:
delay=self._DELAY
# copy the values into extres so that if `self' gets garbage-collected the values will be written to disk during post-mortem finalization. (This also marks it as dirty.)
self._store_attrs_in_extres()
newsavetask = twistedutil.callLater_weakly(delay, self._save_to_disk)
if self._extres.savertask:
if self._extres.savertask.callId.getTime() < newsavetask.callId.getTime():
try:
newsavetask.callId.cancel()
except:
pass
else:
try:
self._extres.savertask.callId.cancel()
except:
pass
self._extres.savertask = newsavetask
else:
self._extres.savertask = newsavetask
def shutdown(self):
self.extres.shutdown()
self.extres = None

23
libs/pyutil/__init__.py

@ -0,0 +1,23 @@
"""
Library of useful Python functions and classes.
Projects that have contributed substantial portions to pyutil:
U{Mojo Nation<http://mojonation.net/>}
U{Mnet<http://sf.net/projects/mnet>}
U{Allmydata<http://allmydata.com/>}
U{Tahoe-LAFS<http://tahoe-lafs.org/>}
mailto:zooko@zooko.com
pyutil web site: U{http://tahoe-lafs.org/trac/pyutil}
"""
__version__ = "unknown"
try:
from _version import __version__
except ImportError:
# We're running in a tree that hasn't run "./setup.py darcsver", and didn't
# come with a _version.py, so we don't know what our version is. This should
# not happen very often.
pass
__version__ # hush pyflakes

17
libs/pyutil/_version.py

@ -0,0 +1,17 @@
# This is the version of this tree, as created by setup.py darcsver from the darcs patch
# information: the main version number is taken from the most recent release
# tag. If some patches have been added since the last release, this will have a
# -NN "build number" suffix, or else a -rNN "revision number" suffix. Please see
# pyutil.version_class for a description of what the different fields mean.
__pkgname__ = "pyutil"
verstr = "1.9.3"
try:
from pyutil.version_class import Version as pyutil_Version
__version__ = pyutil_Version(verstr)
except (ImportError, ValueError):
# Maybe there is no pyutil installed, or this may be an older version of
# pyutil.version_class which does not support SVN-alike revision numbers.
from distutils.version import LooseVersion as distutils_Version
__version__ = distutils_Version(verstr)

60
libs/pyutil/assertutil.py

@ -0,0 +1,60 @@
# Copyright (c) 2003-2009 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Tests useful in assertion checking, prints out nicely formated messages too.
"""
from humanreadable import hr
def _assert(___cond=False, *___args, **___kwargs):
if ___cond:
return True
msgbuf=[]
if ___args:
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),))))
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]])
if ___kwargs:
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
else:
if ___kwargs:
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]])
raise AssertionError, "".join(msgbuf)
def precondition(___cond=False, *___args, **___kwargs):
if ___cond:
return True
msgbuf=["precondition", ]
if ___args or ___kwargs:
msgbuf.append(": ")
if ___args:
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),))))
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]])
if ___kwargs:
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
else:
if ___kwargs:
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]])
raise AssertionError, "".join(msgbuf)
def postcondition(___cond=False, *___args, **___kwargs):
if ___cond:
return True
msgbuf=["postcondition", ]
if ___args or ___kwargs:
msgbuf.append(": ")
if ___args:
msgbuf.append("%s %s" % tuple(map(hr, (___args[0], type(___args[0]),))))
msgbuf.extend([", %s %s" % tuple(map(hr, (arg, type(arg),))) for arg in ___args[1:]])
if ___kwargs:
msgbuf.append(", %s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
else:
if ___kwargs:
msgbuf.append("%s: %s %s" % ((___kwargs.items()[0][0],) + tuple(map(hr, (___kwargs.items()[0][1], type(___kwargs.items()[0][1]),)))))
msgbuf.extend([", %s: %s %s" % tuple(map(hr, (k, v, type(v),))) for k, v in ___kwargs.items()[1:]])
raise AssertionError, "".join(msgbuf)

55
libs/pyutil/benchmarks/bench_json.py

@ -0,0 +1,55 @@
from pyutil import randutil
import random
from decimal import Decimal
l = []
s = None
def data_strings(N):
assert isinstance(N, int), (N, type(N))
del l[:]
for i in range(N):
l.append(repr(randutil.insecurerandstr(4)))
global s
s = json.dumps(l)
def data_Decimals(N):
del l[:]
for i in range(N):
l.append(Decimal(str(random.randrange(0, 1000000000)))/random.randrange(1, 1000000000))
global s
s = jsonutil.dumps(l)
def data_floats(N):
del l[:]
for i in range(N):
l.append(float(random.randrange(0, 1000000000))/random.randrange(1, 1000000000))
global s
s = json.dumps(l)
import json
from pyutil import jsonutil
def je(N):
return json.dumps(l)
def ue(N):
return jsonutil.dumps(l)
def jd(N):
return json.loads(s)
def ud(N):
return jsonutil.loads(s)
from pyutil import benchutil
for i in (data_strings, data_floats, data_Decimals):
for e in (ud, ue, jd, je):
# for e in (ue,):
print "i: %s, e: %s" % (i, e,)
try:
benchutil.bench(e, initfunc=i, TOPXP=5, profile=False)
except TypeError, e:
print "skipping due to %s" % (e,)
benchutil.print_bench_footer()

53
libs/pyutil/benchmarks/bench_xor.py

@ -0,0 +1,53 @@
#!/usr/bin/env python
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import hmac, sys, random
from pyutil.assertutil import _assert
from pyutil.xor import xor
from pyutil import benchfunc
from pyutil import randutil
SFUNCS = [hmac._strxor, xor.py_xor,]
SFNAMES = ["hmac", "pyutil py",]
inputs = {}
def _help_init_string(N):
global inputs
if not inputs.has_key(N):
inputs[N] = [randutil.insecurerandstr(N), randutil.insecurerandstr(N),]
def _help_make_bench_xor(f):
def g(n):
assert inputs.has_key(n)
_assert(isinstance(inputs[n][0], str), "Required to be a string.", inputs[n][0])
assert len(inputs[n][0]) == n
_assert(isinstance(inputs[n][1], str), "Required to be a string.", inputs[n][1])
assert len(inputs[n][1]) == n
for SF in SFUNCS:
assert f(inputs[n][0], inputs[n][1]) == SF(inputs[n][0], inputs[n][1])
return f(inputs[n][0], inputs[n][1])
return g
def bench(SETSIZES=[2**x for x in range(0, 22, 3)]):
random.seed(0)
if len(SFUNCS) <= 1: print ""
maxnamel = max(map(len, SFNAMES))
for SETSIZE in SETSIZES:
seed = random.random()
# print "seed: ", seed
random.seed(seed)
i = 0
if len(SFUNCS) > 1: print ""
for FUNC in SFUNCS:
funcname = SFNAMES[i] + " " * (maxnamel - len(SFNAMES[i]))
print "%s" % funcname,
sys.stdout.flush()
benchfunc.rep_bench(_help_make_bench_xor(FUNC), SETSIZE, initfunc=_help_init_string, MAXREPS=2**9, MAXTIME=30)
i = i + 1
bench()

243
libs/pyutil/benchutil.py

@ -0,0 +1,243 @@
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Benchmark a function for its behavior with respect to N.
How to use this module:
1. Define a function which runs the code that you want to benchmark. The
function takes a single argument which is the size of the task (i.e. the "N"
parameter). Pass this function as the first argument to rep_bench(), and N as
the second, e.g.:
>>> from pyutil.benchutil import rep_bench
>>> def fib(N):
... if N <= 1:
... return 1
... else:
... return fib(N-1) + fib(N-2)
...
>>> rep_bench(fib, 25, UNITS_PER_SECOND=1000)
best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10)
The output is reporting the number of milliseconds that executing the function
took, divided by N, from ten different invocations of fib(). It reports the
best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of
the number of invocations (in this case 10).
2. Now run it with different values of N and look for patterns:
>>> for N in 1, 5, 9, 13, 17, 21:
... print "%2d" % N,
... rep_bench(fib, N, UNITS_PER_SECOND=1000000)
...
1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10)
5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10)
9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10)
13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10)
17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10)
21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10)
>>> print_bench_footer(UNITS_PER_SECOND=1000000)
all results are in time units per N
time units per second: 1000000; seconds per time unit: 0.000001
(The pattern here is that as N grows, the time per N grows.)
2. If you need to do some setting up before the code can run, then put the
setting-up code into a separate function so that it won't be included in the
timing measurements. A good way to share state between the setting-up function
and the main function is to make them be methods of the same object, e.g.:
>>> import random
>>> class O:
... def __init__(self):
... self.l = []
... def setup(self, N):
... del self.l[:]
... self.l.extend(range(N))
... random.shuffle(self.l)
... def sort(self, N):
... self.l.sort()
...
>>> o = O()
>>> for N in 1000, 10000, 100000, 1000000:
... print "%7d" % N,
... rep_bench(o.sort, N, o.setup)
...
1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10)
10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10)
100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10)
1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10)
3. Useful fact! rep_bench() returns a dict containing the numbers.
4. Things to fix:
a. I used to have it hooked up to use the "hotshot" profiler on the code being
measured. I recently tried to change it to use the newer cProfile profiler
instead, but I don't understand the interface to cProfiler so it just gives an
exception if you pass profile=True. Please fix this and send me a patch.
b. Wouldn't it be great if this script emitted results in a json format that
was understood by a tool to make pretty interactive explorable graphs? The
pretty graphs could look like those on http://speed.pypy.org/ . Please make
this work and send me a patch!
"""
import cProfile, operator, time
from decimal import Decimal as D
#from pyutil import jsonutil as json
import platform
if 'windows' in platform.system().lower():
clock = time.clock
else:
clock = time.time
from assertutil import _assert
def makeg(func):
def blah(n, func=func):
for i in xrange(n):
func()
return blah
def to_decimal(x):
"""
See if D(x) returns something. If instead it raises TypeError, x must have been a float, so convert it to Decimal by way of string. (In Python >= 2.7, D(x) does this automatically.
"""
try:
return D(x)
except TypeError:
return D("%0.54f" % (x,))
def mult(a, b):
"""
If we get TypeError from * (possibly because one is float and the other is Decimal), then promote them both to Decimal.
"""
try:
return a * b
except TypeError:
return to_decimal(a) * to_decimal(b)
def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False):
"""
Will run the func up to MAXREPS times, but won't start a new run if MAXTIME
(wall-clock time) has already elapsed (unless MAXTIME is None).
@param quiet Don't print anything--just return the results dict.
"""
assert isinstance(n, int), (n, type(n))
startwallclocktime = time.time()
tls = [] # elapsed time in seconds
bmes = []
while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)):
if initfunc:
initfunc(n)
try:
tl = bench_it(func, n, profile=profile, profresults=profresults)
except BadMeasure, bme:
bmes.append(bme)
else:
tls.append(tl)
if len(tls) == 0:
raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,))
sumtls = reduce(operator.__add__, tls)
mean = sumtls / len(tls)
tls.sort()
worst = tls[-1]
best = tls[0]
_assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,))
m = len(tls)/4
if m > 0:
mthbest = tls[m-1]
mthworst = tls[-m]
else:
mthbest = tls[0]
mthworst = tls[-1]
# The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst,
# etc, so we use mp1 to name it.
mp1 = m+1
res = {
'worst': mult(worst, UNITS_PER_SECOND)/n,
'best': mult(best, UNITS_PER_SECOND)/n,
'mp1': mp1,
'mth-best': mult(mthbest, UNITS_PER_SECOND)/n,
'mth-worst': mult(mthworst, UNITS_PER_SECOND)/n,
'mean': mult(mean, UNITS_PER_SECOND)/n,
'num': len(tls),
}
if not quiet:
print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res
return res
MARGINOFERROR = 10
worstemptymeasure = 0
class BadMeasure(Exception):
""" Either the clock wrapped (which happens with time.clock()) or
it went backwards (which happens with time.time() on rare
occasions), (or the code being measured completed before a single
clock tick). """
def __init__(self, startt, stopt, clock):
self.startt = startt
self.stopt = stopt
self.clock = clock
def __repr__(self):
return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock)
def do_nothing(n):
pass
def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"):
if profile:
st = clock()
cProfile.run('func(n)', profresults)
sto = clock()
else:
st = clock()
func(n)
sto = clock()
timeelapsed = sto - st
if timeelapsed <= 0:
raise BadMeasure(timeelapsed)
global worstemptymeasure
emsta = clock()
do_nothing(2**32)
emstop = clock()
empty = emstop - emsta
if empty > worstemptymeasure:
worstemptymeasure = empty
return timeelapsed
def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1):
BSIZES = []
for i in range(TOPXP-6, TOPXP+1, 2):
n = int(2 ** i)
if n < 1:
n = 1
if BSIZES and n <= BSIZES[-1]:
n *= 2
BSIZES.append(n)
res = {}
for BSIZE in BSIZES:
print "N: %7d," % BSIZE,
r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND)
res[BSIZE] = r
#if outputjson:
# write_file(jsonresultsfname, json.dumps(res))
return res
def print_bench_footer(UNITS_PER_SECOND=1):
print "all results are in time units per N"
print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND)

225
libs/pyutil/benchutil.py~

@ -0,0 +1,225 @@
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Benchmark a function for its behavior with respect to N.
How to use this module:
1. Define a function which runs the code that you want to benchmark. The
function takes a single argument which is the size of the task (i.e. the "N"
parameter). Pass this function as the first argument to rep_bench(), and N as
the second, e.g.:
>>> from pyutil.benchutil import rep_bench
>>> def fib(N):
... if N <= 1:
... return 1
... else:
... return fib(N-1) + fib(N-2)
...
>>> rep_bench(fib, 25, UNITS_PER_SECOND=1000)
best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10)
The output is reporting the number of milliseconds that executing the function
took, divided by N, from ten different invocations of fib(). It reports the
best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of
the number of invocations (in this case 10).
2. Now run it with different values of N and look for patterns:
>>> for N in 1, 5, 9, 13, 17, 21:
... print "%2d" % N,
... rep_bench(fib, N, UNITS_PER_SECOND=1000000)
...
1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10)
5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10)
9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10)
13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10)
17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10)
21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10)
>>> print_bench_footer(UNITS_PER_SECOND=1000000)
all results are in time units per N
time units per second: 1000000; seconds per time unit: 0.000001
(The pattern here is that as N grows, the time per N grows.)
2. If you need to do some setting up before the code can run, then put the
setting-up code into a separate function so that it won't be included in the
timing measurements. A good way to share state between the setting-up function
and the main function is to make them be methods of the same object, e.g.:
>>> import random
>>> class O:
... def __init__(self):
... self.l = []
... def setup(self, N):
... del self.l[:]
... self.l.extend(range(N))
... random.shuffle(self.l)
... def sort(self, N):
... self.l.sort()
...
>>> o = O()
>>> for N in 1000, 10000, 100000, 1000000:
... print "%7d" % N,
... rep_bench(o.sort, N, o.setup)
...
1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10)
10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10)
100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10)
1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10)
3. Useful fact! rep_bench() returns a dict containing the numbers.
4. Things to fix:
a. I used to have it hooked up to use the "hotshot" profiler on the code being
measured. I recently tried to change it to use the newer cProfile profiler
instead, but I don't understand the interface to cProfiler so it just gives an
exception if you pass profile=True. Please fix this and send me a patch.
b. Wouldn't it be great if this script emitted results in a json format that
was understood by a tool to make pretty interactive explorable graphs? The
pretty graphs could look like those on http://speed.pypy.org/ . Please make
this work and send me a patch!
"""
import cProfile, operator, time
from decimal import Decimal as D
#from pyutil import jsonutil as json
import platform
if 'windows' in platform.system().lower():
clock = time.clock
else:
clock = time.time
from assertutil import _assert
def makeg(func):
def blah(n, func=func):
for i in xrange(n):
func()
return blah
def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False):
"""
Will run the func up to MAXREPS times, but won't start a new run if MAXTIME
(wall-clock time) has already elapsed (unless MAXTIME is None).
@param quiet Don't print anything--just return the results dict.
"""
assert isinstance(n, int), (n, type(n))
startwallclocktime = time.time()
tls = [] # elapsed time in seconds
bmes = []
while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)):
if initfunc:
initfunc(n)
try:
tl = bench_it(func, n, profile=profile, profresults=profresults)
except BadMeasure, bme:
bmes.append(bme)
else:
tls.append(tl)
if len(tls) == 0:
raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,))
sumtls = reduce(operator.__add__, tls)
mean = sumtls / len(tls)
tls.sort()
worst = tls[-1]
best = tls[0]
_assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,))
m = len(tls)/4
if m > 0:
mthbest = tls[m-1]
mthworst = tls[-m]
else:
mthbest = tls[0]
mthworst = tls[-1]
# The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst,
# etc, so we use mp1 to name it.
mp1 = m+1
res = {
'worst': (worst*UNITS_PER_SECOND)/n,
'best': (best*UNITS_PER_SECOND)/n,
'mp1': mp1,
'mth-best': (mthbest*UNITS_PER_SECOND)/n,
'mth-worst': (mthworst*UNITS_PER_SECOND)/n,
'mean': (mean*UNITS_PER_SECOND)/n,
'num': len(tls),
}
if not quiet:
print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res
return res
MARGINOFERROR = 10
worstemptymeasure = 0
class BadMeasure(Exception):
""" Either the clock wrapped (which happens with time.clock()) or
it went backwards (which happens with time.time() on rare
occasions), (or the code being measured completed before a single
clock tick). """
def __init__(self, startt, stopt, clock):
self.startt = startt
self.stopt = stopt
self.clock = clock
def __repr__(self):
return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock)
def do_nothing(n):
pass
def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"):
if profile:
st = clock()
cProfile.run('func(n)', profresults)
sto = clock()
else:
st = clock()
func(n)
sto = clock()
timeelapsed = sto - st
if timeelapsed <= 0:
raise BadMeasure(timeelapsed)
global worstemptymeasure
emsta = clock()
do_nothing(2**32)
emstop = clock()
empty = emstop - emsta
if empty > worstemptymeasure:
worstemptymeasure = empty
return timeelapsed
def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1):
BSIZES = []
for i in range(TOPXP-6, TOPXP+1, 2):
n = int(2 ** i)
if n < 1:
n = 1
if BSIZES and n <= BSIZES[-1]:
n *= 2
BSIZES.append(n)
res = {}
for BSIZE in BSIZES:
print "N: %7d," % BSIZE,
r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND)
res[BSIZE] = r
#if outputjson:
# write_file(jsonresultsfname, json.dumps(res))
return res
def print_bench_footer(UNITS_PER_SECOND=1):
print "all results are in time units per N"
print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND)

735
libs/pyutil/cache.py

@ -0,0 +1,735 @@
# Copyright (c) 2002-2010 Zooko "Zooko" Wilcox-O'Hearn
"""
This module offers three implementations of an LRUCache, which is a dict that
drops items according to a Least-Recently-Used policy if the dict exceeds a
fixed maximum size.
Warning: if -O optimizations are not turned on then LRUCache performs
extensive self-analysis in every function call, which can take minutes
and minutes for a large cache. Turn on -O, or comment out ``assert self._assert_invariants()``
"""
import operator
from assertutil import _assert, precondition
from humanreadable import hr
class LRUCache:
"""
An efficient least-recently-used cache. It keeps an LRU queue, and when
the number of items in the cache reaches maxsize, it removes the least
recently used item.
"Looking" at an item, key, or value such as with "has_key()" makes that
item become the most recently used item.
You can also use "refresh()" to explicitly make an item become the most
recently used item.
Adding an item that is already in the dict *does* make it the most-
recently-used item although it does not change the state of the dict
itself.
See also SmallLRUCache (below), which is faster in some cases.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.hs][2]
def __iter__(self):
return self
def next(self):
if self.i is self.c.ts:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = n
return (k, v,)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.hs][2]
def __iter__(self):
return self
def next(self):
if self.i is self.c.ts:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = n
return k
class ValIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.hs][2]
def __iter__(self):
return self
def next(self):
if self.i is self.c.ts:
raise StopIteration
precondition(self.c.d.has_key(self.i), "The iterated LRUCache doesn't have the next key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c)
(v, p, n,) = self.c.d[self.i]
self.i = n
return v
class Sentinel:
def __init__(self, msg):
self.msg = msg
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.msg,)
def __init__(self, initialdata={}, maxsize=128):
precondition(maxsize > 0)
self.m = maxsize+2 # The +2 is for the head and tail nodes.
self.d = {} # k: k, v: [v, prev, next,] # the dict
self.hs = LRUCache.Sentinel("hs")
self.ts = LRUCache.Sentinel("ts")
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.update(initialdata)
assert self._assert_invariants()
def __repr_n__(self, n=None):
s = ["{",]
try:
iter = self.iteritems()
x = iter.next()
s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
i = 1
while (n is None) or (i < n):
x = iter.next()
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
except StopIteration:
pass
s.append("}")
return ''.join(s)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),)
def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),)
def _assert_invariants(self):
_assert(len(self.d) <= self.m, "Size is required to be <= maxsize.", len(self.d), self.m)
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d))
foundprevsentinel = 0
foundnextsentinel = 0
for (k, (v, p, n,)) in self.d.iteritems():
_assert(v not in (self.hs, self.ts,))
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n)
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n)
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n)
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n)
if p is self.hs:
foundprevsentinel += 1
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n)
if n is self.ts:
foundnextsentinel += 1
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n)
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).")
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).")
count = 0
for (k, v,) in self.iteritems():
_assert(k not in (self.hs, self.ts,))
count += 1
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels
return True
def freshen(self, k, strictkey=False):
assert self._assert_invariants()
if not self.d.has_key(k):
if strictkey:
raise KeyError, k
return
node = self.d[k]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
# move to front
hnode = self.d[self.hs]
node[1] = self.hs
node[2] = hnode[2]
hnode[2] = k
self.d[node[2]][1] = k
assert self._assert_invariants()
def iteritems(self):
return LRUCache.ItemIterator(self)
def itervalues(self):
return LRUCache.ValIterator(self)
def iterkeys(self):
return self.__iter__()
def __iter__(self):
return LRUCache.KeyIterator(self)
def __getitem__(self, key, default=None, strictkey=True):
node = self.d.get(key)
if not node:
if strictkey:
raise KeyError, key
return default
self.freshen(key)
return node[0]
def __setitem__(self, k, v=None):
assert self._assert_invariants()
node = self.d.get(k)
if node:
node[0] = v
self.freshen(k)
return
if len(self.d) == self.m:
# If this insert is going to increase the size of the cache to
# bigger than maxsize.
self.pop()
hnode = self.d[self.hs]
n = hnode[2]
self.d[k] = [v, self.hs, n,]
hnode[2] = k
self.d[n][1] = k
assert self._assert_invariants()
return v
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the value removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if self.d.has_key(key):
node = self.d[key]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
del self.d[key]
assert self._assert_invariants()
return node[0]
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def has_key(self, key):
assert self._assert_invariants()
if self.d.has_key(key):
self.freshen(key)
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def clear(self):
assert self._assert_invariants()
self.d.clear()
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
if len(otherdict) >= (self.m-2): # -2 for the sentinel nodes
# optimization
self.clear()
assert self._assert_invariants()
i = otherdict.iteritems()
try:
while len(self.d) < self.m:
(k, v,) = i.next()
assert self._assert_invariants()
self[k] = v
assert self._assert_invariants()
return self
except StopIteration:
_assert(False, "Internal error -- this should never have happened since the while loop should have terminated first.")
return self
for (k, v,) in otherdict.iteritems():
assert self._assert_invariants()
self[k] = v
assert self._assert_invariants()
def pop(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.ts][1]
self.remove(k)
assert self._assert_invariants()
return k
def popitem(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.ts][1]
val = self.remove(k)
assert self._assert_invariants()
return (k, val,)
def keys_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return t.keys()
def keys(self):
res = [None] * len(self)
i = 0
for k in self.iterkeys():
res[i] = k
i += 1
return res
def values_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return map(operator.__getitem__, t.values(), [0]*len(t))
def values(self):
res = [None] * len(self)
i = 0
for v in self.itervalues():
res[i] = v
i += 1
return res
def items(self):
res = [None] * len(self)
i = 0
for it in self.iteritems():
res[i] = it
i += 1
return res
def __len__(self):
return len(self.d) - 2
def insert(self, key, val=None):
assert self._assert_invariants()
result = self.__setitem__(key, val)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def get(self, key, default=None):
return self.__getitem__(key, default, strictkey=False)
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
class SmallLRUCache(dict):
"""
SmallLRUCache is faster than LRUCache for small sets. How small? That
depends on your machine and which operations you use most often. Use
performance profiling to determine whether the cache class that you are
using makes any difference to the performance of your program, and if it
does, then run "quick_bench()" in test/test_cache.py to see which cache
implementation is faster for the size of your datasets.
A simple least-recently-used cache. It keeps an LRU queue, and
when the number of items in the cache reaches maxsize, it removes
the least recently used item.
"Looking" at an item or a key such as with "has_key()" makes that
item become the most recently used item.
You can also use "refresh()" to explicitly make an item become the most
recently used item.
Adding an item that is already in the dict *does* make it the
most- recently-used item although it does not change the state of
the dict itself.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return (k, dict.__getitem__(self.c, k),)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return k
class ValueIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallLRUCache doesn't have this many elements. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallLRUCache doesn't have this key. Most likely this is because someone altered the contents of the LRUCache while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return dict.__getitem__(self.c, k)
def __init__(self, initialdata={}, maxsize=128):
dict.__init__(self, initialdata)
self._lru = initialdata.keys() # contains keys
self._maxsize = maxsize
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
def _assert_invariants(self):
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.")
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize)
return True
def insert(self, key, item=None):
assert self._assert_invariants()
result = self.__setitem__(key, item)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def __setitem__(self, key, item=None):
assert self._assert_invariants()
if dict.has_key(self, key):
self._lru.remove(key)
else:
if len(self._lru) == self._maxsize:
# If this insert is going to increase the size of the cache to bigger than maxsize:
killkey = self._lru.pop(0)
dict.__delitem__(self, killkey)
dict.__setitem__(self, key, item)
self._lru.append(key)
assert self._assert_invariants()
return item
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the object removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if dict.has_key(self, key):
val = dict.__getitem__(self, key)
dict.__delitem__(self, key)
self._lru.remove(key)
assert self._assert_invariants()
return val
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def clear(self):
assert self._assert_invariants()
dict.clear(self)
self._lru = []
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
if len(otherdict) > self._maxsize:
# Handling this special case here makes it possible to implement the
# other more common cases faster below.
dict.clear(self)
self._lru = []
if self._maxsize > (len(otherdict) - self._maxsize):
dict.update(self, otherdict)
while len(self) > self._maxsize:
dict.popitem(self)
else:
for k, v, in otherdict.iteritems():
if len(self) == self._maxsize:
break
dict.__setitem__(self, k, v)
self._lru = dict.keys(self)
assert self._assert_invariants()
return self
for k in otherdict.iterkeys():
if dict.has_key(self, k):
self._lru.remove(k)
self._lru.extend(otherdict.keys())
dict.update(self, otherdict)
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
return self
def has_key(self, key):
assert self._assert_invariants()
if dict.has_key(self, key):
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,)))
self._lru.remove(key)
self._lru.append(key)
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def refresh(self, key, strictkey=True):
"""
@param strictkey: raise a KeyError exception if key isn't present
"""
assert self._assert_invariants()
if not dict.has_key(self, key):
if strictkey:
raise KeyError, key
return
self._lru.remove(key)
self._lru.append(key)
def popitem(self):
if not self._lru:
raise KeyError, 'popitem(): dictionary is empty'
k = self._lru[-1]
obj = self.remove(k)
return (k, obj,)
class LinkedListLRUCache:
"""
This is slower and less featureful than LRUCache. It is included
here for comparison purposes.
Implementation of a length-limited O(1) LRU queue.
Built for and used by PyPE:
http://pype.sourceforge.net
original Copyright 2003 Josiah Carlson.
useful methods and _assert_invariant added by Zooko for testing and benchmarking purposes
"""
class Node:
def __init__(self, prev, me):
self.prev = prev
self.me = me
self.next = None
def __init__(self, initialdata={}, maxsize=128):
self._maxsize = max(maxsize, 1)
self.d = {}
self.first = None
self.last = None
for key, value in initialdata.iteritems():
self[key] = value
def clear(self):
self.d = {}
self.first = None
self.last = None
def update(self, otherdict):
for (k, v,) in otherdict.iteritems():
self[k] = v
def setdefault(self, key, default=None):
if not self.has_key(key):
self[key] = default
return self[key]
def _assert_invariants(self):
def lliterkeys(self):
cur = self.first
while cur != None:
cur2 = cur.next
yield cur.me[0]
cur = cur2
def lllen(self):
# Ugh.
acc = 0
for x in lliterkeys(self):
acc += 1
return acc
def llhaskey(self, key):
# Ugh.
for x in lliterkeys(self):
if x is key:
return True
return False
for k in lliterkeys(self):
_assert(self.d.has_key(k), "Each key in the linked list is required to be in the dict.", k)
for k in self.d.iterkeys():
_assert(llhaskey(self, k), "Each key in the dict is required to be in the linked list.", k)
_assert(lllen(self) == len(self.d), "internal consistency", self, self.d)
_assert(len(self.d) <= self._maxsize, "Size is required to be <= maxsize.")
return True
def __contains__(self, obj):
return obj in self.d
def has_key(self, key):
return self.__contains__(key)
def __getitem__(self, obj):
a = self.d[obj].me
self[a[0]] = a[1]
return a[1]
def get(self, key, default=None, strictkey=False):
if not self.has_key(key) and strictkey:
raise KeyError, key
if self.has_key(key):
return self.__getitem__(key)
else:
return default
def __setitem__(self, obj, val):
if obj in self.d:
del self[obj]
nobj = self.Node(self.last, (obj, val))
if self.first is None:
self.first = nobj
if self.last:
self.last.next = nobj
self.last = nobj
self.d[obj] = nobj
if len(self.d) > self._maxsize:
if self.first == self.last:
self.first = None
self.last = None
return
a = self.first
a.next.prev = None
self.first = a.next
a.next = None
del self.d[a.me[0]]
del a
def insert(self, key, item=None):
return self.__setitem__(key, item)
def __delitem__(self, obj, default=None, strictkey=True):
if self.d.has_key(obj):
nobj = self.d[obj]
if nobj.prev:
nobj.prev.next = nobj.next
else:
self.first = nobj.next
if nobj.next:
nobj.next.prev = nobj.prev
else:
self.last = nobj.prev
val = self.d[obj]
del self.d[obj]
return val.me[1]
elif strictkey:
raise KeyError, obj
else:
return default
def remove(self, obj, default=None, strictkey=True):
return self.__delitem__(obj, default=default, strictkey=strictkey)
def __iter__(self):
cur = self.first
while cur != None:
cur2 = cur.next
yield cur.me[1]
cur = cur2
def iteritems(self):
cur = self.first
while cur != None:
cur2 = cur.next
yield cur.me
cur = cur2
def iterkeys(self):
return iter(self.d)
def itervalues(self):
for i,j in self.iteritems():
yield j
def values(self):
l = []
for v in self.itervalues():
l.append(v)
return l
def keys(self):
return self.d.keys()
def __len__(self):
return self.d.__len__()
def popitem(self):
i = self.last.me
obj = self.remove(i[0])
return obj

605
libs/pyutil/dictutil.py

@ -0,0 +1,605 @@
"""
Tools to mess with dicts.
"""
import warnings
import copy, operator
from bisect import bisect_left, insort_left
from pyutil.assertutil import _assert, precondition
def move(k, d1, d2, strict=False):
"""
Move item with key k from d1 to d2.
"""
warnings.warn("deprecated", DeprecationWarning)
if strict and not d1.has_key(k):
raise KeyError, k
d2[k] = d1[k]
del d1[k]
def subtract(d1, d2):
"""
Remove all items from d1 whose key occurs in d2.
@returns d1
"""
warnings.warn("deprecated", DeprecationWarning)
if len(d1) > len(d2):
for k in d2.keys():
if d1.has_key(k):
del d1[k]
else:
for k in d1.keys():
if d2.has_key(k):
del d1[k]
return d1
class DictOfSets(dict):
def add(self, key, value):
warnings.warn("deprecated", DeprecationWarning)
if key in self:
self[key].add(value)
else:
self[key] = set([value])
def discard(self, key, value):
warnings.warn("deprecated", DeprecationWarning)
if not key in self:
return
self[key].discard(value)
if not self[key]:
del self[key]
class UtilDict:
def __init__(self, initialdata={}):
warnings.warn("deprecated", DeprecationWarning)
self.d = {}
self.update(initialdata)
def del_if_present(self, key):
if self.has_key(key):
del self[key]
def items_sorted_by_value(self):
"""
@return a sequence of (key, value,) pairs sorted according to value
"""
l = [(x[1], x[0],) for x in self.d.iteritems()]
l.sort()
return [(x[1], x[0],) for x in l]
def items_sorted_by_key(self):
"""
@return a sequence of (key, value,) pairs sorted according to key
"""
l = self.d.items()
l.sort()
return l
def __repr__(self, *args, **kwargs):
return self.d.__repr__(*args, **kwargs)
def __str__(self, *args, **kwargs):
return self.d.__str__(*args, **kwargs)
def __contains__(self, *args, **kwargs):
return self.d.__contains__(*args, **kwargs)
def __len__(self, *args, **kwargs):
return self.d.__len__(*args, **kwargs)
def __cmp__(self, other):
try:
return self.d.__cmp__(other)
except TypeError, le:
# maybe we should look for a .d member in other. I know this is insanely kludgey, but the Right Way To Do It is for dict.__cmp__ to use structural typing ("duck typing")
try:
return self.d.__cmp__(other.d)
except:
raise le
def __eq__(self, *args, **kwargs):
return self.d.__eq__(*args, **kwargs)
def __ne__(self, *args, **kwargs):
return self.d.__ne__(*args, **kwargs)
def __gt__(self, *args, **kwargs):
return self.d.__gt__(*args, **kwargs)
def __ge__(self, *args, **kwargs):
return self.d.__ge__(*args, **kwargs)
def __le__(self, *args, **kwargs):
return self.d.__le__(*args, **kwargs)
def __lt__(self, *args, **kwargs):
return self.d.__lt__(*args, **kwargs)
def __getitem__(self, *args, **kwargs):
return self.d.__getitem__(*args, **kwargs)
def __setitem__(self, *args, **kwargs):
return self.d.__setitem__(*args, **kwargs)
def __delitem__(self, *args, **kwargs):
return self.d.__delitem__(*args, **kwargs)
def __iter__(self, *args, **kwargs):
return self.d.__iter__(*args, **kwargs)
def clear(self, *args, **kwargs):
return self.d.clear(*args, **kwargs)
def copy(self, *args, **kwargs):
return self.__class__(self.d.copy(*args, **kwargs))
def fromkeys(self, *args, **kwargs):
return self.__class__(self.d.fromkeys(*args, **kwargs))
def get(self, key, default=None):
return self.d.get(key, default)
def has_key(self, *args, **kwargs):
return self.d.has_key(*args, **kwargs)
def items(self, *args, **kwargs):
return self.d.items(*args, **kwargs)
def iteritems(self, *args, **kwargs):
return self.d.iteritems(*args, **kwargs)
def iterkeys(self, *args, **kwargs):
return self.d.iterkeys(*args, **kwargs)
def itervalues(self, *args, **kwargs):
return self.d.itervalues(*args, **kwargs)
def keys(self, *args, **kwargs):
return self.d.keys(*args, **kwargs)
def pop(self, *args, **kwargs):
return self.d.pop(*args, **kwargs)
def popitem(self, *args, **kwargs):
return self.d.popitem(*args, **kwargs)
def setdefault(self, *args, **kwargs):
return self.d.setdefault(*args, **kwargs)
def update(self, *args, **kwargs):
self.d.update(*args, **kwargs)
def values(self, *args, **kwargs):
return self.d.values(*args, **kwargs)
class NumDict:
def __init__(self, initialdict={}):
warnings.warn("deprecated", DeprecationWarning)
self.d = copy.deepcopy(initialdict)
def add_num(self, key, val, default=0):
"""
If the key doesn't appear in self then it is created with value default
(before addition).
"""
self.d[key] = self.d.get(key, default) + val
def subtract_num(self, key, val, default=0):
self.d[key] = self.d.get(key, default) - val
def sum(self):
"""
@return: the sum of all values
"""
return reduce(operator.__add__, self.d.values())
def inc(self, key, default=0):
"""
Increment the value associated with key in dict. If there is no such
key, then one will be created with initial value 0 (before inc() --
therefore value 1 after inc).
"""
self.add_num(key, 1, default)
def dec(self, key, default=0):
"""
Decrement the value associated with key in dict. If there is no such
key, then one will be created with initial value 0 (before dec() --
therefore value -1 after dec).
"""
self.subtract_num(key, 1, default)
def items_sorted_by_value(self):
"""
@return a sequence of (key, value,) pairs sorted according to value
"""
l = [(x[1], x[0],) for x in self.d.iteritems()]
l.sort()
return [(x[1], x[0],) for x in l]
def item_with_largest_value(self):
it = self.d.iteritems()
(winner, winnerval,) = it.next()
try:
while True:
n, nv = it.next()
if nv > winnerval:
winner = n
winnerval = nv
except StopIteration:
pass
return (winner, winnerval,)
def items_sorted_by_key(self):
"""
@return a sequence of (key, value,) pairs sorted according to key
"""
l = self.d.items()
l.sort()
return l
def __repr__(self, *args, **kwargs):
return self.d.__repr__(*args, **kwargs)
def __str__(self, *args, **kwargs):
return self.d.__str__(*args, **kwargs)
def __contains__(self, *args, **kwargs):
return self.d.__contains__(*args, **kwargs)
def __len__(self, *args, **kwargs):
return self.d.__len__(*args, **kwargs)
def __cmp__(self, other):
try:
return self.d.__cmp__(other)
except TypeError, le:
# maybe we should look for a .d member in other. I know this is insanely kludgey, but the Right Way To Do It is for dict.__cmp__ to use structural typing ("duck typing")
try:
return self.d.__cmp__(other.d)
except:
raise le
def __eq__(self, *args, **kwargs):
return self.d.__eq__(*args, **kwargs)
def __ne__(self, *args, **kwargs):
return self.d.__ne__(*args, **kwargs)
def __gt__(self, *args, **kwargs):
return self.d.__gt__(*args, **kwargs)
def __ge__(self, *args, **kwargs):
return self.d.__ge__(*args, **kwargs)
def __le__(self, *args, **kwargs):
return self.d.__le__(*args, **kwargs)
def __lt__(self, *args, **kwargs):
return self.d.__lt__(*args, **kwargs)
def __getitem__(self, *args, **kwargs):
return self.d.__getitem__(*args, **kwargs)
def __setitem__(self, *args, **kwargs):
return self.d.__setitem__(*args, **kwargs)
def __delitem__(self, *args, **kwargs):
return self.d.__delitem__(*args, **kwargs)
def __iter__(self, *args, **kwargs):
return self.d.__iter__(*args, **kwargs)
def clear(self, *args, **kwargs):
return self.d.clear(*args, **kwargs)
def copy(self, *args, **kwargs):
return self.__class__(self.d.copy(*args, **kwargs))
def fromkeys(self, *args, **kwargs):
return self.__class__(self.d.fromkeys(*args, **kwargs))
def get(self, key, default=0):
return self.d.get(key, default)
def has_key(self, *args, **kwargs):
return self.d.has_key(*args, **kwargs)
def items(self, *args, **kwargs):
return self.d.items(*args, **kwargs)
def iteritems(self, *args, **kwargs):
return self.d.iteritems(*args, **kwargs)
def iterkeys(self, *args, **kwargs):
return self.d.iterkeys(*args, **kwargs)
def itervalues(self, *args, **kwargs):
return self.d.itervalues(*args, **kwargs)
def keys(self, *args, **kwargs):
return self.d.keys(*args, **kwargs)
def pop(self, *args, **kwargs):
return self.d.pop(*args, **kwargs)
def popitem(self, *args, **kwargs):
return self.d.popitem(*args, **kwargs)
def setdefault(self, *args, **kwargs):
return self.d.setdefault(*args, **kwargs)
def update(self, *args, **kwargs):
return self.d.update(*args, **kwargs)
def values(self, *args, **kwargs):
return self.d.values(*args, **kwargs)
def del_if_present(d, k):
if d.has_key(k):
del d[k]
class ValueOrderedDict:
"""
Note: this implementation assumes that the values do not mutate and change
their sort order. That is, it stores the values in a sorted list and
as items are added and removed from the dict, it makes updates to the list
which will keep the list sorted. But if a value that is currently sitting
in the list changes its sort order, then the internal consistency of this
object will be lost.
If that happens, and if assertion checking is turned on, then you will get
an assertion failure the very next time you try to do anything with this
ValueOrderedDict. However, those internal consistency checks are very slow
and almost certainly unacceptable to leave turned on in production code.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c)
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c)
if self.i == len(self.c.l):
raise StopIteration
le = self.c.l[self.i]
self.i += 1
return (le[1], le[0],)
def iteritems(self):
return ValueOrderedDict.ItemIterator(self)
def items(self):
return zip(map(operator.__getitem__, self.l, [1]*len(self.l)), map(operator.__getitem__, self.l, [0]*len(self.l)))
def values(self):
return map(operator.__getitem__, self.l, [0]*len(self.l))
def keys(self):
return map(operator.__getitem__, self.l, [1]*len(self.l))
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c)
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c)
if self.i == len(self.c.l):
raise StopIteration
le = self.c.l[self.i]
self.i += 1
return le[1]
def iterkeys(self):
return ValueOrderedDict.KeyIterator(self)
class ValueIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c.l), "The iterated ValueOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, self.c)
precondition((self.i == len(self.c.l)) or self.c.d.has_key(self.c.l[self.i][1]), "The iterated ValueOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the ValueOrderedDict while the iteration was in progress.", self.i, (self.i < len(self.c.l)) and self.c.l[self.i], self.c)
if self.i == len(self.c.l):
raise StopIteration
le = self.c.l[self.i]
self.i += 1
return le[0]
def itervalues(self):
return ValueOrderedDict.ValueIterator(self)
def __init__(self, initialdata={}):
warnings.warn("deprecated", DeprecationWarning)
self.d = {} # k: key, v: val
self.l = [] # sorted list of tuples of (val, key,)
self.update(initialdata)
assert self._assert_invariants()
def __len__(self):
return len(self.l)
def __repr_n__(self, n=None):
s = ["{",]
try:
iter = self.iteritems()
x = iter.next()
s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
i = 1
while (n is None) or (i < n):
x = iter.next()
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
except StopIteration:
pass
s.append("}")
return ''.join(s)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),)
def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),)
def __eq__(self, other):
for (k, v,) in other.iteritems():
if not self.d.has_key(k) or self.d[k] != v:
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def _assert_invariants(self):
iter = self.l.__iter__()
try:
oldx = iter.next()
while True:
x = iter.next()
# self.l is required to be sorted
_assert(x >= oldx, x, oldx)
# every element of self.l is required to appear in self.d
_assert(self.d.has_key(x[1]), x)
oldx =x
except StopIteration:
pass
for (k, v,) in self.d.iteritems():
i = bisect_left(self.l, (v, k,))
while (self.l[i][0] is not v) or (self.l[i][1] is not k):
i += 1
_assert(i < len(self.l), i, len(self.l), k, v, self.l)
_assert(self.l[i][0] is v, i, v, l=self.l, d=self.d)
_assert(self.l[i][1] is k, i, k, l=self.l, d=self.d)
return True
def insert(self, key, val=None):
assert self._assert_invariants()
result = self.__setitem__(key, val)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def __setitem__(self, key, val=None):
assert self._assert_invariants()
if self.d.has_key(key):
oldval = self.d[key]
if oldval != val:
# re-sort
i = bisect_left(self.l, (oldval, key,))
while (self.l[i][0] is not oldval) or (self.l[i][1] is not key):
i += 1
self.l.pop(i)
insort_left(self.l, (val, key,))
elif oldval is not val:
# replace
i = bisect_left(self.l, (oldval, key,))
while (self.l[i][0] is not oldval) or (self.l[i][1] is not key):
i += 1
self.l[i] = (val, key,)
else:
insort_left(self.l, (val, key,))
self.d[key] = val
assert self._assert_invariants()
return val
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
def __getitem__(self, key, default=None, strictkey=True):
if not self.d.has_key(key):
if strictkey:
raise KeyError, key
else:
return default
return self.d[key]
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the object removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if self.d.has_key(key):
val = self.d.pop(key)
i = bisect_left(self.l, (val, key,))
while (self.l[i][0] is not val) or (self.l[i][1] is not key):
i += 1
self.l.pop(i)
assert self._assert_invariants()
return val
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def clear(self):
assert self._assert_invariants()
self.d.clear()
del self.l[:]
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
for (k, v,) in otherdict.iteritems():
self.insert(k, v)
assert self._assert_invariants()
return self
def has_key(self, key):
assert self._assert_invariants()
return self.d.has_key(key)
def popitem(self):
if not self.l:
raise KeyError, 'popitem(): dictionary is empty'
le = self.l.pop(0)
del self.d[le[1]]
return (le[1], le[0],)
def pop(self, k, default=None, strictkey=False):
if not self.d.has_key(k):
if strictkey:
raise KeyError, k
else:
return default
v = self.d.pop(k)
i = bisect_left(self.l, (v, k,))
while (self.l[i][0] is not v) or (self.l[i][1] is not k):
i += 1
self.l.pop(i)
return v
def pop_from_list(self, i=0):
le = self.l.pop(i)
del self.d[le[1]]
return le[1]

271
libs/pyutil/fileutil.py

@ -0,0 +1,271 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Futz with files like a pro.
"""
import errno, exceptions, os, stat, tempfile
try:
import bsddb
except ImportError:
DBNoSuchFileError = None
else:
DBNoSuchFileError = bsddb.db.DBNoSuchFileError
# read_file() and write_file() copied from Mark Seaborn's blog post. Please
# read it for complete rationale:
# http://lackingrhoticity.blogspot.com/2009/12/readfile-and-writefile-in-python.html
def read_file(filename, mode='rb'):
""" Read the contents of the file named filename and return it in
a string. This function closes the file handle before it returns
(even if the underlying Python implementation's garbage collector
doesn't). """
fh = open(filename, mode)
try:
return fh.read()
finally:
fh.close()
def write_file(filename, data, mode='wb'):
""" Write the string data into a file named filename. This
function closes the file handle (ensuring that the written data is
flushed from the perspective of the Python implementation) before
it returns (even if the underlying Python implementation's garbage
collector doesn't)."""
fh = open(filename, mode)
try:
fh.write(data)
finally:
fh.close()
# For backwards-compatibility in case someone is using these names. We used to
# have a superkludge in fileutil.py under these names.
def rename(src, dst, tries=4, basedelay=0.1):
return os.rename(src, dst)
def remove(f, tries=4, basedelay=0.1):
return os.remove(f)
def rmdir(f, tries=4, basedelay=0.1):
return os.rmdir(f)
class _Dir(object):
"""
Hold a set of files and subdirs and clean them all up when asked to.
"""
def __init__(self, name, cleanup=True):
self.name = name
self.cleanup = cleanup
self.files = []
self.subdirs = set()
def file(self, fname, mode=None):
"""
Create a file in the tempdir and remember it so as to close() it
before attempting to cleanup the temp dir.
@rtype: file
"""
ffn = os.path.join(self.name, fname)
if mode is not None:
fo = open(ffn, mode)
else:
fo = open(ffn)
self.register_file(fo)
return fo
def subdir(self, dirname):
"""
Create a subdirectory in the tempdir and remember it so as to call
shutdown() on it before attempting to clean up.
@rtype: _Dir instance
"""
ffn = os.path.join(self.name, dirname)
sd = _Dir(ffn, self.cleanup)
self.register_subdir(sd)
make_dirs(sd.name)
return sd
def register_file(self, fileobj):
"""
Remember the file object and call close() on it before attempting to
clean up.
"""
self.files.append(fileobj)
def register_subdir(self, dirobj):
"""
Remember the _Dir object and call shutdown() on it before attempting
to clean up.
"""
self.subdirs.add(dirobj)
def shutdown(self):
if self.cleanup:
for subdir in hasattr(self, 'subdirs') and self.subdirs or []:
subdir.shutdown()
for fileobj in hasattr(self, 'files') and self.files or []:
if DBNoSuchFileError is None:
fileobj.close() # "close()" is idempotent so we don't need to catch exceptions here
else:
try:
fileobj.close()
except DBNoSuchFileError:
# Ah, except that the bsddb module's file-like object (a DB object) has a non-idempotent close...
pass
if hasattr(self, 'name'):
rm_dir(self.name)
def __repr__(self):
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name)
def __str__(self):
return self.__repr__()
def __del__(self):
try:
self.shutdown()
except:
import traceback
traceback.print_exc()
class NamedTemporaryDirectory(_Dir):
"""
Call tempfile.mkdtemp(), store the name of the dir in self.name, and
rm_dir() when it gets garbage collected or "shutdown()".
Also keep track of file objects for files within the tempdir and call
close() on them before rm_dir(). This is a convenient way to open temp
files within the directory, and it is very helpful on Windows because you
can't delete a directory which contains a file which is currently open.
"""
def __init__(self, cleanup=True, *args, **kwargs):
""" If cleanup, then the directory will be rmrf'ed when the object is shutdown. """
name = tempfile.mkdtemp(*args, **kwargs)
_Dir.__init__(self, name, cleanup)
class ReopenableNamedTemporaryFile:
"""
This uses tempfile.mkstemp() to generate a secure temp file. It then closes
the file, leaving a zero-length file as a placeholder. You can get the
filename with ReopenableNamedTemporaryFile.name. When the
ReopenableNamedTemporaryFile instance is garbage collected or its shutdown()
method is called, it deletes the file.
"""
def __init__(self, *args, **kwargs):
fd, self.name = tempfile.mkstemp(*args, **kwargs)
os.close(fd)
def __repr__(self):
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name)
def __str__(self):
return self.__repr__()
def __del__(self):
self.shutdown()
def shutdown(self):
remove(self.name)
def make_dirs(dirname, mode=0777):
"""
An idempotent version of os.makedirs(). If the dir already exists, do
nothing and return without raising an exception. If this call creates the
dir, return without raising an exception. If there is an error that
prevents creation or if the directory gets deleted after make_dirs() creates
it and before make_dirs() checks that it exists, raise an exception.
"""
tx = None
try:
os.makedirs(dirname, mode)
except OSError, x:
tx = x
if not os.path.isdir(dirname):
if tx:
raise tx
raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning...
def rmtree(dirname):
"""
A threadsafe and idempotent version of shutil.rmtree(). If the dir is
already gone, do nothing and return without raising an exception. If this
call removes the dir, return without raising an exception. If there is an
error that prevents deletion or if the directory gets created again after
rm_dir() deletes it and before rm_dir() checks that it is gone, raise an
exception.
"""
excs = []
try:
os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)
for f in os.listdir(dirname):
fullname = os.path.join(dirname, f)
if os.path.isdir(fullname):
rm_dir(fullname)
else:
remove(fullname)
os.rmdir(dirname)
except EnvironmentError, le:
# Ignore "No such file or directory", collect any other exception.
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT):
excs.append(le)
except Exception, le:
excs.append(le)
# Okay, now we've recursively removed everything, ignoring any "No
# such file or directory" errors, and collecting any other errors.
if os.path.exists(dirname):
if len(excs) == 1:
raise excs[0]
if len(excs) == 0:
raise OSError, "Failed to remove dir for unknown reason."
raise OSError, excs
def rm_dir(dirname):
# Renamed to be like shutil.rmtree and unlike rmdir.
return rmtree(dirname)
def remove_if_possible(f):
try:
remove(f)
except EnvironmentError:
pass
def remove_if_present(f):
try:
remove(f)
except EnvironmentError, le:
# Ignore "No such file or directory", re-raise any other exception.
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT):
raise
def rmdir_if_possible(f):
try:
rmdir(f)
except EnvironmentError:
pass
def open_or_create(fname, binarymode=True):
try:
f = open(fname, binarymode and "r+b" or "r+")
except EnvironmentError:
f = open(fname, binarymode and "w+b" or "w+")
return f
def du(basedir):
size = 0
for root, dirs, files in os.walk(basedir):
for f in files:
fn = os.path.join(root, f)
size += os.path.getsize(fn)
return size

271
libs/pyutil/fileutil.py~

@ -0,0 +1,271 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Futz with files like a pro.
"""
import errno, exceptions, os, stat, tempfile
try:
import bsddb
except ImportError:
DBNoSuchFileError = None
else:
DBNoSuchFileError = bsddb.db.DBNoSuchFileError
# read_file() and write_file() copied from Mark Seaborn's blog post. Please
# read it for complete rationale:
# http://lackingrhoticity.blogspot.com/2009/12/readfile-and-writefile-in-python.html
def read_file(filename, mode='rb'):
""" Read the contents of the file named filename and return it in
a string. This function closes the file handle before it returns
(even if the underlying Python implementation's garbage collector
doesn't). """
fh = open(filename, mode)
try:
return fh.read()
finally:
fh.close()
def write_file(filename, data, mode='wb'):
""" Write the string data into a file named filename. This
function closes the file handle (ensuring that the written data is
flushed from the perspective of the Python implementation) before
it returns (even if the underlying Python implementation's garbage
collector doesn't)."""
fh = open(filename, mode)
try:
fh.write(data)
finally:
fh.close()
# For backwards-compatibility in case someone is using these names. We used to
# have a superkludge in fileutil.py under these names.
def rename(src, dst, tries=4, basedelay=0.1):
return os.rename(src, dst)
def remove(f, tries=4, basedelay=0.1):
return os.remove(f)
def rmdir(f, tries=4, basedelay=0.1):
return os.rmdir(f)
class _Dir(object):
"""
Hold a set of files and subdirs and clean them all up when asked to.
"""
def __init__(self, name, cleanup=True):
self.name = name
self.cleanup = cleanup
self.files = []
self.subdirs = set()
def file(self, fname, mode=None):
"""
Create a file in the tempdir and remember it so as to close() it
before attempting to cleanup the temp dir.
@rtype: file
"""
ffn = os.path.join(self.name, fname)
if mode is not None:
fo = open(ffn, mode)
else:
fo = open(ffn)
self.register_file(fo)
return fo
def subdir(self, dirname):
"""
Create a subdirectory in the tempdir and remember it so as to call
shutdown() on it before attempting to clean up.
@rtype: _Dir instance
"""
ffn = os.path.join(self.name, dirname)
sd = _Dir(ffn, self.cleanup)
self.register_subdir(sd)
make_dirs(sd.name)
return sd
def register_file(self, fileobj):
"""
Remember the file object and call close() on it before attempting to
clean up.
"""
self.files.append(fileobj)
def register_subdir(self, dirobj):
"""
Remember the _Dir object and call shutdown() on it before attempting
to clean up.
"""
self.subdirs.add(dirobj)
def shutdown(self):
if self.cleanup:
for subdir in hasattr(self, 'subdirs') and self.subdirs or []:
subdir.shutdown()
for fileobj in hasattr(self, 'files') and self.files or []:
if DBNoSuchFileError is None:
fileobj.close() # "close()" is idempotent so we don't need to catch exceptions here
else:
try:
fileobj.close()
except DBNoSuchFileError:
# Ah, except that the bsddb module's file-like object (a DB object) has a non-idempotent close...
pass
if hasattr(self, 'name'):
rm_dir(self.name)
def __repr__(self):
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name)
def __str__(self):
return self.__repr__()
def __del__(self):
try:
self.shutdown()
except:
import traceback
traceback.print_exc()
class NamedTemporaryDirectory(_Dir):
"""
Call tempfile.mkdtemp(), store the name of the dir in self.name, and
rm_dir() when it gets garbage collected or "shutdown()".
Also keep track of file objects for files within the tempdir and call
close() on them before rm_dir(). This is a convenient way to open temp
files within the directory, and it is very helpful on Windows because you
can't delete a directory which contains a file which is currently open.
"""
def __init__(self, cleanup=True, *args, **kwargs):
""" If cleanup, then the directory will be rmrf'ed when the object is shutdown. """
name = tempfile.mkdtemp(*args, **kwargs)
_Dir.__init__(self, name, cleanup)
class ReopenableNamedTemporaryFile:
"""
This uses tempfile.mkstemp() to generate a secure temp file. It then closes
the file, leaving a zero-length file as a placeholder. You can get the
filename with ReopenableNamedTemporaryFile.name. When the
ReopenableNamedTemporaryFile instance is garbage collected or its shutdown()
method is called, it deletes the file.
"""
def __init__(self, *args, **kwargs):
fd, self.name = tempfile.mkstemp(*args, **kwargs)
os.close(fd)
def __repr__(self):
return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name)
def __str__(self):
return self.__repr__()
def __del__(self):
self.shutdown()
def shutdown(self):
remove(self.name)
def make_dirs(dirname, mode=0777):
"""
An idempotent version of os.makedirs(). If the dir already exists, do
nothing and return without raising an exception. If this call creates the
dir, return without raising an exception. If there is an error that
prevents creation or if the directory gets deleted after make_dirs() creates
it and before make_dirs() checks that it exists, raise an exception.
"""
tx = None
try:
os.makedirs(dirname, mode)
except OSError, x:
tx = x
if not os.path.isdir(dirname):
if tx:
raise tx
raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning...
def rmtree(dirname):
"""
A threadsafe and idempotent version of shutil.rmtree(). If the dir is
already gone, do nothing and return without raising an exception. If this
call removes the dir, return without raising an exception. If there is an
error that prevents deletion or if the directory gets created again after
rm_dir() deletes it and before rm_dir() checks that it is gone, raise an
exception.
"""
excs = []
try:
os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)
for f in os.listdir(dirname):
fullname = os.path.join(dirname, f)
if os.path.isdir(fullname):
rm_dir(fullname)
else:
remove(fullname)
os.rmdir(dirname)
except EnvironmentError, le:
# Ignore "No such file or directory", collect any other exception.
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT):
excs.append(le)
except Exception, le:
excs.append(le)
# Okay, now we've recursively removed everything, ignoring any "No
# such file or directory" errors, and collecting any other errors.
if os.path.exists(dirname):
if len(excs) == 1:
raise excs[0]
if len(excs) == 0:
raise OSError, "Failed to remove dir for unknown reason."
raise OSError, excs
def rm_dir(dirname):
# Renamed to be like shutil.rmtree and unlike rmdir.
return rmtree(dirname)
def remove_if_possible(f):
try:
remove(f)
except EnvironmentError:
pass
def remove_if_present(f):
try:
remove(f)
except EnvironmentError, le:
# Ignore "No such file or directory", re-raise any other exception.
if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT):
raise
def rmdir_if_possible(f):
try:
rmdir(f)
except EnvironmentError:
pass
def open_or_create(fname, binarymode=True):
try:
f = open(fname, binarymode and "r+b" or "r+")
except EnvironmentError:
f = open(fname, binarymode and "w+b" or "w+")
return f
def du(basedir):
size = 0
for root, dirs, files in os.walk(basedir):
for f in files:
fn = os.path.join(root, f)
size += os.path.getsize(fn)
return size

30
libs/pyutil/find_exe.py

@ -0,0 +1,30 @@
import warnings
import os, sys
from twisted.python.procutils import which
def find_exe(exename):
"""
Look for something named exename or exename + ".py".
This is a kludge.
@return: a list containing one element which is the path to the exename
(if it is thought to be executable), or else the first element being
sys.executable and the second element being the path to the
exename + ".py", or else return False if one can't be found
"""
warnings.warn("deprecated", DeprecationWarning)
exes = which(exename)
exe = exes and exes[0]
if not exe:
exe = os.path.join(sys.prefix, 'scripts', exename + '.py')
if os.path.exists(exe):
path, ext = os.path.splitext(exe)
if ext.lower() in [".exe", ".bat",]:
cmd = [exe,]
else:
cmd = [sys.executable, exe,]
return cmd
else:
return False

81
libs/pyutil/hashexpand.py

@ -0,0 +1,81 @@
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import warnings
"""
Cryptographically strong pseudo-random number generator based on SHA256.
"""
class SHA256Expander:
"""
Provide a cryptographically strong pseudo-random number generator based on
SHA256. Hopefully this means that no attacker will be able to predict any
bit of output that he hasn't seen, given that he doesn't know anything about
the seed and given that he can see as many bits of output as he desires
except for the bit that he is trying to predict. Further it is hoped that
an attacker will not even be able to determine whether a given stream of
random bytes was generated by this PRNG or by flipping a coin repeatedly.
The safety of this technique has not been verified by a Real Cryptographer.
... but it is similar to the PRNG in FIPS-186...
The seed and counter are encoded in DJB's netstring format so that I
don't have to think about the possibility of ambiguity.
Note: I've since learned more about the theory of secure hash functions
and the above is a strong assumption about a secure hash function. Use
of this class should be considered deprecated and you should use a more
well-analyzed KDF (such as the nascent standard HKDF) or stream cipher or
whatever it is that you need.
"""
def __init__(self, seed=None):
warnings.warn("deprecated", DeprecationWarning)
if seed is not None:
self.seed(seed)
def seed(self, seed):
import hashlib
self.starth = hashlib.sha256('24:pyutil hash expansion v2,10:algorithm:,6:SHA256,6:value:,')
seedlen = len(seed)
seedlenstr = str(seedlen)
self.starth.update(seedlenstr)
self.starth.update(':')
self.starth.update(seed)
self.starth.update(',')
self.avail = ""
self.counter = 0
def get(self, bytes):
bytesleft = bytes
res = []
while bytesleft > 0:
if len(self.avail) == 0:
h = self.starth.copy()
counterstr = str(self.counter)
counterstrlen = len(counterstr)
counterstrlenstr = str(counterstrlen)
h.update(counterstrlenstr)
h.update(':')
h.update(counterstr)
h.update(',')
self.avail = h.digest()
self.counter += 1
numb = min(len(self.avail), bytesleft)
(chunk, self.avail,) = (self.avail[:numb], self.avail[numb:],)
res.append(chunk)
bytesleft = bytesleft - numb
resstr = ''.join(res)
assert len(resstr) == bytes
return resstr
def sha256expand(inpstr, expbytes):
return SHA256Expander(inpstr).get(expbytes)

115
libs/pyutil/humanreadable.py

@ -0,0 +1,115 @@
# Copyright (c) 2001 Autonomous Zone Industries
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import exceptions, os
from repr import Repr
class BetterRepr(Repr):
def __init__(self):
Repr.__init__(self)
# Note: These levels can get adjusted dynamically! My goal is to get more info when printing important debug stuff like exceptions and stack traces and less info when logging normal events. --Zooko 2000-10-14
self.maxlevel = 6
self.maxdict = 6
self.maxlist = 6
self.maxtuple = 6
self.maxstring = 300
self.maxother = 300
def repr_function(self, obj, level):
if hasattr(obj, 'func_code'):
return '<' + obj.func_name + '() at ' + os.path.basename(obj.func_code.co_filename) + ':' + str(obj.func_code.co_firstlineno) + '>'
else:
return '<' + obj.func_name + '() at (builtin)'
def repr_instance_method(self, obj, level):
if hasattr(obj, 'func_code'):
return '<' + obj.im_class.__name__ + '.' + obj.im_func.__name__ + '() at ' + os.path.basename(obj.im_func.func_code.co_filename) + ':' + str(obj.im_func.func_code.co_firstlineno) + '>'
else:
return '<' + obj.im_class.__name__ + '.' + obj.im_func.__name__ + '() at (builtin)'
def repr_long(self, obj, level):
s = `obj` # XXX Hope this isn't too slow...
if len(s) > self.maxlong:
i = max(0, (self.maxlong-3)/2)
j = max(0, self.maxlong-3-i)
s = s[:i] + '...' + s[len(s)-j:]
if s[-1] == 'L':
return s[:-1]
return s
def repr_instance(self, obj, level):
"""
If it is an instance of Exception, format it nicely (trying to emulate
the format that you see when an exception is actually raised, plus
bracketing '<''s). If it is an instance of dict call self.repr_dict()
on it. If it is an instance of list call self.repr_list() on it. Else
call Repr.repr_instance().
"""
if isinstance(obj, exceptions.Exception):
# Don't cut down exception strings so much.
tms = self.maxstring
self.maxstring = max(512, tms * 4)
tml = self.maxlist
self.maxlist = max(12, tml * 4)
try:
if hasattr(obj, 'args'):
if len(obj.args) == 1:
return '<' + obj.__class__.__name__ + ': ' + self.repr1(obj.args[0], level-1) + '>'
else:
return '<' + obj.__class__.__name__ + ': ' + self.repr1(obj.args, level-1) + '>'
else:
return '<' + obj.__class__.__name__ + '>'
finally:
self.maxstring = tms
self.maxlist = tml
if isinstance(obj, dict):
return self.repr_dict(obj, level)
if isinstance(obj, list):
return self.repr_list(obj, level)
return Repr.repr_instance(self, obj, level)
def repr_list(self, obj, level):
"""
copied from standard repr.py and fixed to work on multithreadedly mutating lists.
"""
if level <= 0: return '[...]'
n = len(obj)
myl = obj[:min(n, self.maxlist)]
s = ''
for item in myl:
entry = self.repr1(item, level-1)
if s: s = s + ', '
s = s + entry
if n > self.maxlist: s = s + ', ...'
return '[' + s + ']'
def repr_dict(self, obj, level):
"""
copied from standard repr.py and fixed to work on multithreadedly mutating dicts.
"""
if level <= 0: return '{...}'
s = ''
n = len(obj)
items = obj.items()[:min(n, self.maxdict)]
items.sort()
for key, val in items:
entry = self.repr1(key, level-1) + ':' + self.repr1(val, level-1)
if s: s = s + ', '
s = s + entry
if n > self.maxdict: s = s + ', ...'
return '{' + s + '}'
# This object can be changed by other code updating this module's "brepr"
# variables. This is so that (a) code can use humanreadable with
# "from humanreadable import hr; hr(mything)", and (b) code can override
# humanreadable to provide application-specific human readable output
# (e.g. libbase32's base32id.AbbrevRepr).
brepr = BetterRepr()
def hr(x):
return brepr.repr(x)

136
libs/pyutil/increasing_timer.py

@ -0,0 +1,136 @@
# Copyright (c) 2001 Autonomous Zone Industries
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
This module was invented when it was discovered that time.time() can return
decreasing answers, which was causing scheduled tasks to get executed out of
order. See python bug report `[ #447945 ] time.time() is not
non-decreasing',
http://sourceforge.net/tracker/index.php?func=detail&aid=447945&group_id=5470&atid=105470
http://mail.python.org/pipermail/python-list/2001-August/thread.html#58296
After posting that bug report, I figured out that this isn't really a bug,
but a misunderstanding about the semantics of gettimeofday(). gettimeofday()
relies on the hardware clock, which is supposed to reflect the "real" time
i.e. the position and orientation of our planet with regard to our sun. But
the hardware clock gets adjusted, either for skew (because hardware clocks
always run a little faster or a little slower than they ought), or in order to
sync up with another clock e.g. through NTP. So it isn't really a bug in the
underlying platform (except perhaps a bug in the lack of a prominent warning
in the documentation), but if you depend on a monotonically increasing
timestamps, you need to use IncreasingTimer.time() instead of the Python
standard library's time.time(). --Zooko 2001-08-04
"""
import time as standardtime
# Here is a global reference to an IncreasingTimer.
# This singleton global IncreasingTimer instance gets created at module load time.
timer = None
class IncreasingTimer:
def __init__(self, inittime=None):
"""
@param inittime starting time (in seconds) or None in which case it
will be initialized to standardtime.time()
"""
if inittime is None:
inittime = standardtime.time()
self.lasttime = inittime # This stores the most recent answer that we returned from time().
self.delta = 0 # We add this to the result from the underlying standardtime.time().
# How big of an increment do we need to add in order to make the new float greater than the old float?
trye = 1.0
while (self.lasttime + trye) > self.lasttime:
olde = trye
trye = trye / 2.0
self._EPSILON = olde
def time(self):
"""
This returns the current time as a float, with as much precision as
the underlying Python interpreter can muster. In addition, successive
calls to time() always return bigger numbers. (standardtime.time()
can sometimes return the same or even a *smaller* number!)
On the other hand, calling time() is a bit slower than calling
standardtime.time(), so you might want to avoid it inside tight loops
and deal with decreasing or identical answers yourself.
Now by definition you cannot "reset" this clock to an earlier state.
This means that if you start a Python interpreter and instantiate an
IncreasingTimer, and then you subsequently realize that your
computer's clock was set to next year, and you set it back to the
correct year, that subsequent calls to standardtime.time() will return
a number indicating this year and IncreasingTimer.time() will continue
to return a number indicating next year. Therefore, you should use
the answers from IncreasingTimer.time() in such a way that the only
things you depend on are correctness in the relative *order* of two
times, (and, with the following caveat, the relative *difference*
between two times as well), not the global "correctness" of the times
with respect to the rest of the world.
The caveat is that if the underlying answers from standardtime.time()
jump *forward*, then this *does* distort the relative difference
between two answers from IncreasingTimer.time(). What
IncreasingTimer.time() does is if the underlying clock goes
*backwards*, then IncreasingTimer.time() still returns successively
higher numbers. Then if the underlying clock jumps *forwards*,
IncreasingTimer.time() also jumps forward the same amount. A weird
consequence of this is that if you were to set your system clock to
point to 10 years ago, and call:
t1 = increasingtimer.time()
and then set your system clock back to the present, and call:
t2 = increasingtimer.time()
, then there would be a 10-year difference between t2 and t1.
In practice, adjustments to the underlying system time are rarely that
drastic, and for some systems (e.g. Mnet's DoQ, for which this module
was invented) it doesn't matter anyway if time jumps forward.
Another note: Brian Warner has pointed out that there is another
caveat, which is due to there being a delay between successive calls
to IncreasingTimer.time(). When the underlying clock jumps backward,
then events which were scheduled before the jump and scheduled to go
off after the jump may be delayed by at most d, where d is the delay
between the two successive calls to IncreasingTimer which spanned the
jump.
@singlethreaded You must guarantee that you never have more than one
thread in this function at a time.
"""
t = standardtime.time() + self.delta
lasttime = self.lasttime
if t <= lasttime:
self.delta = self.delta + (lasttime - t) + self._EPSILON
t = lasttime + self._EPSILON
# TODO: If you were sure that you could generate a bigger float in one
# pass, you could change this `while' to an `if' and optimize out a
# test.
while t <= lasttime:
# We can get into here only if self._EPSILON is too small to make
# # the time float "tick over" to a new higher value. So we
# (permanently) # double self._EPSILON.
# TODO: Is doubling epsilon the best way to quickly get a
# minimally bigger float?
self._EPSILON = self._EPSILON * 2.0
# Delta, having smaller magnitude than t, can be incremented by
# more than t was incremented. (Up to the old epsilon more.)
# That's OK.
self.delta = self.delta + self._EPSILON
t = t + self._EPSILON
self.lasttime = t
return t
# create the global IncreasingTimer instance and `time' function
timer = IncreasingTimer()
time = timer.time

288
libs/pyutil/iputil.py

@ -0,0 +1,288 @@
# portions extracted from ipaddresslib by Autonomous Zone Industries, LGPL (author: Greg Smith)
# portions adapted from nattraverso.ipdiscover
# portions authored by Brian Warner, working for Allmydata
# most recent version authored by Zooko O'Whielacronx, working for Allmydata
# from the Python Standard Library
import os, re, socket, sys
# from Twisted
from twisted.internet import defer, reactor
from twisted.python import failure
from twisted.internet.protocol import DatagramProtocol
from twisted.internet.utils import getProcessOutput
from twisted.python.procutils import which
from twisted.python import log
# from pyutil
import observer
try:
import resource
def increase_rlimits():
# We'd like to raise our soft resource.RLIMIT_NOFILE, since certain
# systems (OS-X, probably solaris) start with a relatively low limit
# (256), and some unit tests want to open up more sockets than this.
# Most linux systems start with both hard and soft limits at 1024,
# which is plenty.
# unfortunately the values to pass to setrlimit() vary widely from
# one system to another. OS-X reports (256, HUGE), but the real hard
# limit is 10240, and accepts (-1,-1) to mean raise it to the
# maximum. Cygwin reports (256, -1), then ignores a request of
# (-1,-1): instead you have to guess at the hard limit (it appears to
# be 3200), so using (3200,-1) seems to work. Linux reports a
# sensible (1024,1024), then rejects (-1,-1) as trying to raise the
# maximum limit, so you could set it to (1024,1024) but you might as
# well leave it alone.
try:
current = resource.getrlimit(resource.RLIMIT_NOFILE)
except AttributeError:
# we're probably missing RLIMIT_NOFILE
return
if current[0] >= 1024:
# good enough, leave it alone
return
try:
if current[1] > 0 and current[1] < 1000000:
# solaris reports (256, 65536)
resource.setrlimit(resource.RLIMIT_NOFILE,
(current[1], current[1]))
else:
# this one works on OS-X (bsd), and gives us 10240, but
# it doesn't work on linux (on which both the hard and
# soft limits are set to 1024 by default).
resource.setrlimit(resource.RLIMIT_NOFILE, (-1,-1))
new = resource.getrlimit(resource.RLIMIT_NOFILE)
if new[0] == current[0]:
# probably cygwin, which ignores -1. Use a real value.
resource.setrlimit(resource.RLIMIT_NOFILE, (3200,-1))
except ValueError:
log.msg("unable to set RLIMIT_NOFILE: current value %s"
% (resource.getrlimit(resource.RLIMIT_NOFILE),))
except:
# who knows what. It isn't very important, so log it and continue
log.err()
except ImportError:
def _increase_rlimits():
# TODO: implement this for Windows. Although I suspect the
# solution might be "be running under the iocp reactor and
# make this function be a no-op".
pass
# pyflakes complains about two 'def FOO' statements in the same time,
# since one might be shadowing the other. This hack appeases pyflakes.
increase_rlimits = _increase_rlimits
def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET
"""
Return a Deferred that fires with a list of IPv4 addresses (as dotted-quad
strings) that are currently configured on this host, sorted in descending
order of how likely we think they are to work.
@param target: we want to learn an IP address they could try using to
connect to us; The default value is fine, but it might help if you
pass the address of a host that you are actually trying to be
reachable to.
"""
addresses = []
local_ip = get_local_ip_for(target)
if local_ip:
addresses.append(local_ip)
if sys.platform == "cygwin":
d = _cygwin_hack_find_addresses(target)
else:
d = _find_addresses_via_config()
def _collect(res):
for addr in res:
if addr != "0.0.0.0" and not addr in addresses:
addresses.append(addr)
return addresses
d.addCallback(_collect)
return d
def get_local_ip_for(target):
"""Find out what our IP address is for use by a given target.
@return: the IP address as a dotted-quad string which could be used by
to connect to us. It might work for them, it might not. If
there is no suitable address (perhaps we don't currently have an
externally-visible interface), this will return None.
"""
try:
target_ipaddr = socket.gethostbyname(target)
except socket.gaierror:
# DNS isn't running, or somehow we encountered an error
# note: if an interface is configured and up, but nothing is connected to it,
# gethostbyname("A.ROOT-SERVERS.NET") will take 20 seconds to raise socket.gaierror
# . This is synchronous and occurs for each node being started, so users of certain unit
# tests will see something like 120s of delay, which may be enough to hit the default
# trial timeouts. For that reason, get_local_addresses_async() was changed to default to
# the numerical ip address for A.ROOT-SERVERS.NET, to avoid this DNS lookup. This also
# makes node startup a tad faster.
return None
udpprot = DatagramProtocol()
port = reactor.listenUDP(0, udpprot)
try:
udpprot.transport.connect(target_ipaddr, 7)
localip = udpprot.transport.getHost().host
except socket.error:
# no route to that host
localip = None
port.stopListening() # note, this returns a Deferred
return localip
# k: result of sys.platform, v: which kind of IP configuration reader we use
_platform_map = {
"linux-i386": "linux", # redhat
"linux-ppc": "linux", # redhat
"linux2": "linux", # debian
"win32": "win32",
"irix6-n32": "irix",
"irix6-n64": "irix",
"irix6": "irix",
"openbsd2": "bsd",
"darwin": "bsd", # Mac OS X
"freebsd4": "bsd",
"freebsd5": "bsd",
"freebsd6": "bsd",
"netbsd1": "bsd",
"sunos5": "sunos",
"cygwin": "cygwin",
}
class UnsupportedPlatformError(Exception):
pass
# Wow, I'm really amazed at home much mileage we've gotten out of calling
# the external route.exe program on windows... It appears to work on all
# versions so far. Still, the real system calls would much be preferred...
# ... thus wrote Greg Smith in time immemorial...
_win32_path = 'route.exe'
_win32_args = ('print',)
_win32_re = re.compile('^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\d+)\s+(?P<metric>\d+)\s*$', flags=re.M|re.I|re.S)
# These work in Redhat 6.x and Debian 2.2 potato
_linux_path = '/sbin/ifconfig'
_linux_re = re.compile('^\s*inet addr:(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S)
# originally NetBSD 1.4 (submitted by Rhialto), Darwin, Mac OS X, FreeBSD, OpenBSD
_bsd_path = '/sbin/ifconfig'
_bsd_args = ('-a',)
_bsd_re = re.compile('^\s+inet (?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S)
# Irix 6.5
_irix_path = '/usr/etc/ifconfig'
# Solaris 2.x
_sunos_path = '/usr/sbin/ifconfig'
class SequentialTrier(object):
""" I hold a list of executables to try and try each one in turn
until one gives me a list of IP addresses."""
def __init__(self, exebasename, args, regex):
assert not os.path.isabs(exebasename)
self.exes_left_to_try = which(exebasename)
self.exes_left_to_try.reverse()
self.args = args
self.regex = regex
self.o = observer.OneShotObserverList()
self._try_next()
def _try_next(self):
if not self.exes_left_to_try:
self.o.fire(None)
else:
exe = self.exes_left_to_try.pop()
d2 = _query(exe, self.args, self.regex)
def cb(res):
if res:
self.o.fire(res)
else:
self._try_next()
def eb(why):
self._try_next()
d2.addCallbacks(cb, eb)
def when_tried(self):
return self.o.when_fired()
# k: platform string as provided in the value of _platform_map
# v: tuple of (path_to_tool, args, regex,)
_tool_map = {
"linux": (_linux_path, (), _linux_re,),
"win32": (_win32_path, _win32_args, _win32_re,),
"cygwin": (_win32_path, _win32_args, _win32_re,),
"bsd": (_bsd_path, _bsd_args, _bsd_re,),
"irix": (_irix_path, _bsd_args, _bsd_re,),
"sunos": (_sunos_path, _bsd_args, _bsd_re,),
}
def _find_addresses_via_config():
# originally by Greg Smith, hacked by Zooko to conform to Brian Warner's API.
platform = _platform_map.get(sys.platform)
(pathtotool, args, regex,) = _tool_map.get(platform, ('ifconfig', _bsd_args, _bsd_re,))
# If the platform isn't known then we attempt BSD-style ifconfig. If it
# turns out that we don't get anything resembling a dotted quad IPv4 address
# out of it, then we'll raise UnsupportedPlatformError.
# If pathtotool is a fully qualified path then we just try that.
# If it is merely an executable name then we use Twisted's
# "which()" utility and try each executable in turn until one
# gives us something that resembles a dotted-quad IPv4 address.
if os.path.isabs(pathtotool):
d = _query(pathtotool, args, regex)
else:
d = SequentialTrier(pathtotool, args, regex).when_tried()
d.addCallback(_check_result)
return d
def _check_result(result):
if not result and not _platform_map.has_key(sys.platform):
return failure.Failure(UnsupportedPlatformError(sys.platform))
else:
return result
def _query(path, args, regex):
d = getProcessOutput(path, args)
def _parse(output):
addresses = []
outputsplit = output.split('\n')
for outline in outputsplit:
m = regex.match(outline)
if m:
addr = m.groupdict()['address']
if addr not in addresses:
addresses.append(addr)
return addresses
d.addCallback(_parse)
return d
def _cygwin_hack_find_addresses(target):
addresses = []
for h in [target, "localhost", "127.0.0.1",]:
try:
addr = get_local_ip_for(h)
if addr not in addresses:
addresses.append(addr)
except socket.gaierror:
pass
return defer.succeed(addresses)

456
libs/pyutil/jsonutil.py

@ -0,0 +1,456 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
# We require simplejson>= 2.1.0 and set its default behavior to
# use_decimal=True. This retains backwards compatibility with previous
# versions of jsonutil (although it means jsonutil now requires a recent
# version of simplejson).
# http://code.google.com/p/simplejson/issues/detail?id=34
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format.
:mod:`simplejson` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
Encoding basic Python object hierarchies::
>>> import simplejson as json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
>>> print json.dumps("\"foo\bar")
"\"foo\bar"
>>> print json.dumps(u'\u1234')
"\u1234"
>>> print json.dumps('\\')
"\\"
>>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
{"a": 0, "b": 0, "c": 0}
>>> from StringIO import StringIO
>>> io = StringIO()
>>> json.dump(['streaming API'], io)
>>> io.getvalue()
'["streaming API"]'
Compact encoding::
>>> import simplejson as json
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
>>> import simplejson as json
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')
>>> print '\n'.join([l.rstrip() for l in s.splitlines()])
{
"4": 5,
"6": 7
}
Decoding JSON::
>>> import simplejson as json
>>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
>>> from StringIO import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API'
True
Specializing JSON object decoding::
>>> import simplejson as json
>>> def as_complex(dct):
... if '__complex__' in dct:
... return complex(dct['real'], dct['imag'])
... return dct
...
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex)
(1+2j)
>>> from decimal import Decimal
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
True
Specializing JSON object encoding::
>>> import simplejson as json
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... raise TypeError(repr(o) + " is not JSON serializable")
...
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
'[2.0, 1.0]'
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]'
Using simplejson.tool from the shell to validate and pretty-print::
$ echo '{"json":"obj"}' | python -m simplejson.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 2 (char 2)
"""
import pkg_resources
pkg_resources.require("simplejson>=2.1.0")
# Now we just import all of the contents of the simplejson package and
# then overwrite it with a copy of the simplejson __init__.py edited
# to make use_decimal=True the default.
import simplejson
__version__ = simplejson.__version__
__all__ = simplejson.__all__
# The unit tests rely on .encoder and .decoder, and although they are not
# included in simplejson.__all__ they are still attributes of the simplejson
# package since they are modules within it.
from simplejson import encoder, decoder, scanner
encoder, decoder, scanner # http://divmod.org/trac/ticket/1499
__all__.extend(['encoder', 'decoder', 'scanner'])
__author__ = simplejson.__author__
del simplejson
from decimal import Decimal
from simplejson.decoder import JSONDecoder, JSONDecodeError
JSONDecoder, JSONDecodeError # http://divmod.org/trac/ticket/1499
from simplejson.encoder import JSONEncoder
def _import_OrderedDict():
from pyutil.odict import OrderedDict
return OrderedDict
OrderedDict = _import_OrderedDict()
def _import_c_make_encoder():
from simplejson._speedups import make_encoder# XXX
try:
return make_encoder
except ImportError:
return None
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
cls = JSONEncoder
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding,
default=default, use_decimal=use_decimal, **kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
for chunk in iterable:
fp.write(chunk)
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value will be a
``unicode`` instance subject to normal Python ``str`` to ``unicode``
coercion rules instead of being escaped to an ASCII ``str``.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and not kw):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
return cls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding, default=default,
use_decimal=use_decimal, **kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None, parse_float=Decimal)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=True, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``True``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg.
"""
return loads(fp.read(),
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=True, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``True``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg.
"""
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None
and use_decimal and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
if object_hook is not None:
kw['object_hook'] = object_hook
if object_pairs_hook is not None:
kw['object_pairs_hook'] = object_pairs_hook
if parse_float is not None:
kw['parse_float'] = parse_float
if parse_int is not None:
kw['parse_int'] = parse_int
if parse_constant is not None:
kw['parse_constant'] = parse_constant
if not use_decimal:
kw['use_decimal'] = use_decimal
return cls(encoding=encoding, **kw).decode(s)
def _toggle_speedups(enabled):
import simplejson.decoder as dec
import simplejson.encoder as enc
import simplejson.scanner as scan
c_make_encoder = _import_c_make_encoder()
if enabled:
dec.scanstring = dec.c_scanstring or dec.py_scanstring
enc.c_make_encoder = c_make_encoder
enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
enc.py_encode_basestring_ascii)
scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
else:
dec.scanstring = dec.py_scanstring
enc.c_make_encoder = None
enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
_default_decoder = JSONDecoder(
encoding=None,
object_hook=None,
object_pairs_hook=None,
use_decimal=True,
)
global _default_encoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
)

59
libs/pyutil/lineutil.py

@ -0,0 +1,59 @@
#!/usr/bin/env python
import os, re
def lineify_fileobjs(ifo, ofo, strip=False):
from pyutil.strutil import pop_trailing_newlines, split_on_newlines
for l in ifo:
for sl in split_on_newlines(pop_trailing_newlines(l)):
if strip:
sl = sl.strip()
ofo.write(pop_trailing_newlines(sl) + '\n')
def lineify_file(fname, strip=False, nobak=True):
f = open(fname, "rU")
from pyutil.fileutil import ReopenableNamedTemporaryFile
rntf = ReopenableNamedTemporaryFile()
fo = open(rntf.name, "wb")
for l in f:
if strip:
l = l.strip() + '\n'
fo.write(l)
fo.close()
import shutil
if not nobak:
shutil.copyfile(fname, fname + ".lines.py-bak")
import shutil
try:
shutil.move(rntf.name, fname)
except EnvironmentError:
# Couldn't atomically overwrite, so just hope that this process doesn't die
# and the target file doesn't get recreated in between the following two
# operations:
if nobak:
os.remove(fname)
else:
shutil.move(fname, fname + ".lines.py-bak-2")
shutil.move(rntf.name, fname)
def darcs_metadir_dirpruner(dirs):
if "_darcs" in dirs:
dirs.remove("_darcs")
SCRE=re.compile("\\.(py|php|c|h|cpp|hpp|txt|sh|pyx|pxi|html|htm)$|makefile$", re.IGNORECASE)
def source_code_filepruner(fname):
return SCRE.search(fname)
def all_filepruner(fname):
return True
def all_dirpruner(dirs):
return
def lineify_all_files(dirname, strip=False, nobak=True, dirpruner=all_dirpruner, filepruner=all_filepruner):
for (root, dirs, files,) in os.walk(dirname):
dirpruner(dirs)
for fname in files:
fullfname = os.path.join(root, fname)
if filepruner(fullfname):
lineify_file(fullfname, strip=strip, nobak=nobak)

19
libs/pyutil/logutil.py

@ -0,0 +1,19 @@
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
# This little file makes it so that we can use "log.msg()" and the contents
# get logged to the Twisted logger if present, else to the Python Standard
# Library logger.
import warnings
warnings.warn("deprecated", DeprecationWarning)
try:
from twisted.python import log
log # http://divmod.org/trac/ticket/1499
except ImportError:
import logging
class MinimalLogger:
def msg(self, m):
logging.log(0, m)
log = MinimalLogger()

106
libs/pyutil/mathutil.py

@ -0,0 +1,106 @@
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
A few commonly needed functions.
"""
import math
def div_ceil(n, d):
"""
The smallest integer k such that k*d >= n.
"""
return (n/d) + (n%d != 0)
def next_multiple(n, k):
"""
The smallest multiple of k which is >= n. Note that if n is 0 then the
answer is 0.
"""
return div_ceil(n, k) * k
def pad_size(n, k):
"""
The smallest number that has to be added to n to equal a multiple of k.
"""
if n%k:
return k - n%k
else:
return 0
def is_power_of_k(n, k):
return k**int(math.log(n, k) + 0.5) == n
def next_power_of_k(n, k):
p = 1
while p < n:
p *= k
return p
def ave(l):
return sum(l) / len(l)
def log_ceil(n, b):
"""
The smallest integer k such that b^k >= n.
log_ceil(n, 2) is the number of bits needed to store any of n values, e.g.
the number of bits needed to store any of 128 possible values is 7.
"""
p = 1
k = 0
while p < n:
p *= b
k += 1
return k
def log_floor(n, b):
"""
The largest integer k such that b^k <= n.
"""
p = 1
k = 0
while p <= n:
p *= b
k += 1
return k - 1
def linear_fit_slope(ps):
"""
Single-independent-variable linear regression -- least squares method.
At least, I *think* this function computes that answer. I no longer
remember where I learned this trick and at the moment I can't prove to
myself that this is correct.
@param ps a sequence of tuples of (x, y)
"""
avex = ave([x for (x, y) in ps])
avey = ave([y for (x, y) in ps])
sxy = sum([ (x - avex) * (y - avey) for (x, y) in ps ])
sxx = sum([ (x - avex) ** 2 for (x, y) in ps ])
if sxx == 0:
return None
return sxy / sxx
def permute(l):
"""
Return all possible permutations of l.
@type l: sequence
@rtype a set of sequences
"""
if len(l) == 1:
return [l,]
res = []
for i in range(len(l)):
l2 = list(l[:])
x = l2.pop(i)
for l3 in permute(l2):
l3.append(x)
res.append(l3)
return res

586
libs/pyutil/memutil.py

@ -0,0 +1,586 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
# from the Python Standard Library
import exceptions, gc, math, operator, os, sys, types
# from the pyutil library
from assertutil import precondition
import mathutil
class Canary:
"""
Want to get a printout when your object is garbage collected? Then put "self.canary = Canary(self)" in your object's constructor.
"""
def __init__(self, owner):
self.ownerdesc = repr(owner)
def __del__(self):
print "Canary says that %s is gone." % self.ownerdesc
def estimate_mem_of_obj(o):
# assumes 32-bit CPUs...
PY_STRUCT_HEAD_LEN=4
if hasattr(o, '__len__'):
if isinstance(o, str):
return PY_STRUCT_HEAD_LEN + o.__len__() * 1
if isinstance(o, unicode):
return PY_STRUCT_HEAD_LEN + o.__len__() * 4 # 4 depends on implementation and is approximate
if isinstance(o, (tuple, list,)):
return PY_STRUCT_HEAD_LEN + o.__len__() * 4
if isinstance(o, (dict, set,)):
return PY_STRUCT_HEAD_LEN + o.__len__() * 4 * 2 * 2 # approximate
if isinstance(o, int):
return PY_STRUCT_HEAD_LEN + 4
if isinstance(o, long):
return PY_STRUCT_HEAD_LEN + 4
if o < 1:
return PY_STRUCT_HEAD_LEN
else:
return PY_STRUCT_HEAD_LEN + math.log(o) / 5 # the 5 was empirically determined (it is approximate)
if isinstance(o, float):
return PY_STRUCT_HEAD_LEN + 8
# Uh-oh... I wonder what we are missing here...
return PY_STRUCT_HEAD_LEN
def check_for_obj_leakage(f, *args, **kwargs):
"""
The idea is that I am going to invoke f(), then run gc.collect(), then run
gc.get_objects() to get a complete list of all objects in the system, then
invoke f() a second time, then run gc.collect(), then run gc.get_objects()
to get a list of all the objects *now* in the system.
Then I return a tuple two things: the first element of the tuple is the
difference between the number of objects in the second list and the number
of objects in the first list.
I.e., if this number is zero then you can be pretty sure there is no memory
leak, unless f is deleting some objects and replacing them by exactly the
same number of objects but the new objects take up more memory. If this
number is greater than zero then you can pretty sure there is a memory
leak, unless f is doing some memoization/caching behavior and it will
eventually stabilize, which you can detect by running
check_for_obj_leakage() more times and seeing if it stabilizes.
(Actually we run f() followed by gc.collect() one time before we start in
order to account for any static objects which are created the first time
you run f() and then re-used after that.)
The second element in the return value is the set of all objects which were
present in the second list and not in the first. Some of these objects
might be memory-leaked objects, or perhaps f deleted some objects and
replaced them with equivalent objects, in which case these objects are not
leaked.
(We actually invoke gc.collect() three times in a row in case there are
objects which get collected in the first pass that have finalizers which
create new reference-cycled objects... "3" is a superstitious number -- we
figure most of the time the finalizers of the things produced by the first
round of finalizers won't themselves product another round of
reference-cycled objects.)
"""
f()
gc.collect();gc.collect();gc.collect()
f()
gc.collect();gc.collect();gc.collect()
r1 = gc.get_objects()
f()
gc.collect();gc.collect();gc.collect()
r2 = gc.get_objects()
d2 = dict([(id(x), x) for x in r2])
# Now remove everything from r1, and r1 itself, from d2.
del d2[id(r1)]
for o in r1:
if id(o) in d2:
del d2[id(o)]
return (len(r2) - len(r1) - 1, d2)
def measure_obj_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs):
"""
The idea is we are going to use count_all_objects() to see how many
objects are in use, and keep track of that number with respect to how
many times we've invoked f(), and return the slope of the best linear
fit.
@param numsamples: recommended: 2**7
@param iterspersample: how many times f() should be invoked per sample;
Basically, choose iterspersample such that
iterspersample * numsamples *
how-long-it-takes-to-compute-f() is slightly less
than how long you are willing to wait for this
leak test.
@return: the slope of the best linear fit, which can be interpreted as 'the
approximate number of Python objects created and not destroyed
per invocation of f()'
"""
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples)
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample)
resiters = [None]*numsamples # values: iters
resnumobjs = [None]*numsamples # values: numobjs
totaliters = 0
for i in range(numsamples):
for j in range(iterspersample):
f(*args, **kwargs)
totaliters = totaliters + iterspersample
resiters[i] = totaliters
gc.collect()
resnumobjs[i] = count_all_objects()
# print "totaliters: %s, numobjs: %s" % (resiters[-1], resnumobjs[-1],)
avex = float(reduce(operator.__add__, resiters)) / len(resiters)
avey = float(reduce(operator.__add__, resnumobjs)) / len(resnumobjs)
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resnumobjs)))
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters))
return sxy / sxx
def linear_fit_slope(xs, ys):
avex = float(reduce(operator.__add__, xs)) / len(xs)
avey = float(reduce(operator.__add__, ys)) / len(ys)
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(xs, ys)))
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, xs))
return sxy / sxx
def measure_ref_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs):
"""
The idea is we are going to use sys.gettotalrefcount() to see how many
references are extant, and keep track of that number with respect to how
many times we've invoked f(), and return the slope of the best linear
fit.
@param numsamples: recommended: 2**7
@param iterspersample: how many times f() should be invoked per sample;
Basically, choose iterspersample such that
iterspersample * numsamples *
how-long-it-takes-to-compute-f() is slightly less
than how long you are willing to wait for this
leak test.
@return: the slope of the best linear fit, which can be interpreted as 'the
approximate number of Python references created and not
nullified per invocation of f()'
"""
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples)
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample)
try:
sys.gettotalrefcount()
except AttributeError, le:
raise AttributeError(le, "Probably this is not a debug build of Python, so it doesn't have a sys.gettotalrefcount function.")
resiters = [None]*numsamples # values: iters
resnumrefs = [None]*numsamples # values: numrefs
totaliters = 0
for i in range(numsamples):
for j in range(iterspersample):
f(*args, **kwargs)
totaliters = totaliters + iterspersample
resiters[i] = totaliters
gc.collect()
resnumrefs[i] = sys.gettotalrefcount()
# print "totaliters: %s, numrefss: %s" % (resiters[-1], resnumrefs[-1],)
avex = float(reduce(operator.__add__, resiters)) / len(resiters)
avey = float(reduce(operator.__add__, resnumrefs)) / len(resnumrefs)
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resnumrefs)))
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters))
return sxy / sxx
class NotSupportedException(exceptions.StandardError):
"""
Just an exception class. It is thrown by get_mem_usage if the OS does
not support the operation.
"""
pass
def get_mem_used():
"""
This only works on Linux, and only if the /proc/$PID/statm output is the
same as that in linux kernel 2.6. Also `os.getpid()' must work.
@return: tuple of (res, virt) used by this process
"""
try:
import resource
except ImportError:
raise NotSupportedException
# sample output from cat /proc/$PID/statm:
# 14317 3092 832 279 0 2108 0
a = os.popen("cat /proc/%s/statm 2>/dev/null" % os.getpid()).read().split()
if not a:
raise NotSupportedException
return (int(a[1]) * resource.getpagesize(), int(a[0]) * resource.getpagesize(),)
def get_mem_used_res():
"""
This only works on Linux, and only if the /proc/$PID/statm output is the
same as that in linux kernel 2.6. Also `os.getpid()' must work.
"""
try:
import resource
except ImportError:
raise NotSupportedException
# sample output from cat /proc/$PID/statm:
# 14317 3092 832 279 0 2108 0
a = os.popen("cat /proc/%s/statm" % os.getpid()).read().split()
if not len(a) > 1:
raise NotSupportedException
return int(a[1]) * resource.getpagesize()
def get_mem_usage_virt_and_res():
"""
This only works on Linux, and only if the /proc/$PID/statm output is the
same as that in linux kernel 2.6. Also `os.getpid()' must work.
"""
try:
import resource
except ImportError:
raise NotSupportedException
# sample output from cat /proc/$PID/statm:
# 14317 3092 832 279 0 2108 0
a = os.popen("cat /proc/%s/statm" % os.getpid()).read().split()
if not len(a) > 1:
raise NotSupportedException
return (int(a[0]) * resource.getpagesize(), int(a[1]) * resource.getpagesize(),)
class Measurer(object):
def __init__(self, f, numsamples=2**7, iterspersample=2**4, *args, **kwargs):
"""
@param f a callable; If it returns a deferred then the memory will not
be measured and the next iteration will not be started until the
deferred fires; else the memory will be measured and the next
iteration started when f returns.
"""
self.f = f
self.numsamples = numsamples
self.iterspersample = iterspersample
self.args = args
self.kwargs = kwargs
# from twisted
from twisted.internet import defer
self.d = defer.Deferred()
def when_complete(self):
return self.d
def _invoke(self):
d = self.f(*self.args, **self.kwargs)
# from twisted
from twisted.internet import defer
if isinstance(d, defer.Deferred):
d.addCallback(self._after)
else:
self._after(None)
def start(self):
self.resiters = [None]*self.numsamples # values: iters
self.resmemusage = [None]*self.numsamples # values: memusage
self.totaliters = 0
self.i = 0
self.j = 0
self._invoke()
def _after(self, o):
self.j += 1
if self.j < self.iterspersample:
self._invoke()
return
if self.i < self.numsamples:
self.j = 0
self.i += 1
self.totaliters += self.iterspersample
self.resiters[self.i] = self.totaliters
self.resmemusage[self.i] = get_mem_used_res()
self._invoke()
return
self.d.callback(mathutil.linear_fit_slope(zip(self.resiters, self.resmemusage)))
def measure_mem_leakage(f, numsamples=2**7, iterspersample=2**4, *args, **kwargs):
"""
This does the same thing as measure_obj_leakage() but instead of using
count_all_objects() it uses get_mem_usage(), which is currently
implemented for Linux and barely implemented for Mac OS X.
@param numsamples: recommended: 2**7
@param iterspersample: how many times `f()' should be invoked per sample;
Basically, choose `iterspersample' such that
(iterspersample * numsamples *
how-long-it-takes-to-compute-`f()') is slightly
less than how long you are willing to wait for
this leak test.
@return: the slope of the best linear fit, which can be interpreted as
'the approximate number of system bytes allocated and not freed
per invocation of f()'
"""
precondition(numsamples > 0, "numsamples is required to be positive.", numsamples)
precondition(iterspersample > 0, "iterspersample is required to be positive.", iterspersample)
resiters = [None]*numsamples # values: iters
resmemusage = [None]*numsamples # values: memusage
totaliters = 0
for i in range(numsamples):
for j in range(iterspersample):
f(*args, **kwargs)
totaliters = totaliters + iterspersample
resiters[i] = totaliters
gc.collect()
resmemusage[i] = get_mem_used_res()
# print "totaliters: %s, numobjs: %s" % (resiters[-1], resmemusage[-1],)
avex = float(reduce(operator.__add__, resiters)) / len(resiters)
avey = float(reduce(operator.__add__, resmemusage)) / len(resmemusage)
sxy = reduce(operator.__add__, map(lambda a, avex=avex, avey=avey: (a[0] - avex) * (a[1] - avey), zip(resiters, resmemusage)))
sxx = reduce(operator.__add__, map(lambda a, avex=avex: (a - avex) ** 2, resiters))
if sxx == 0:
return None
return sxy / sxx
def describe_object(o, FunctionType=types.FunctionType, MethodType=types.MethodType, InstanceType=types.InstanceType):
"""
For human analysis, when humans are attempting to understand where all the
memory is going. Argument o is an object, return value is a string
describing the object.
"""
sl = []
if isinstance(o, FunctionType):
try:
sl.append("<type 'function' %s>" % str(o.func_name))
except:
pass
elif isinstance(o, MethodType):
try:
sl.append("<type 'method' %s>" % str(o.im_func.func_name))
except:
pass
elif isinstance(o, InstanceType):
try:
sl.append("<type 'instance' %s>" % str(o.__class__.__name__))
except:
pass
else:
sl.append(str(type(o)))
try:
sl.append(str(len(o)))
except:
pass
return ''.join(sl)
import dictutil
def describe_object_with_dict_details(o):
sl = []
sl.append(str(type(o)))
if isinstance(o, types.FunctionType):
try:
sl.append(str(o.func_name))
except:
pass
elif isinstance(o, types.MethodType):
try:
sl.append(str(o.im_func.func_name))
except:
pass
try:
sl.append(str(len(o)))
except:
pass
if isinstance(o, dict) and o:
sl.append('-')
nd = dictutil.NumDict()
for k, v in o.iteritems():
nd.inc((describe_object(k), describe_object(v),))
k, v = nd.item_with_largest_value()
sl.append("-")
iterator = o.iteritems()
k,v = iterator.next()
sl.append(describe_object(k))
sl.append(":")
sl.append(describe_object(v))
return ''.join(sl)
def describe_dict(o):
sl = ['<dict']
l = len(o)
sl.append(str(l))
if l:
sl.append("-")
iterator = o.iteritems()
firstitem=True
try:
while True:
if firstitem:
firstitem = False
else:
sl.append(", ")
k,v = iterator.next()
sl.append(describe_object(k))
sl.append(": ")
sl.append(describe_object(v))
except StopIteration:
pass
sl.append('>')
return ''.join(sl)
def count_all_objects():
ids = set()
ls = locals()
import inspect
cf = inspect.currentframe()
for o in gc.get_objects():
if o is ids or o is ls or o is cf:
continue
if not id(o) in ids:
ids.add(id(o))
for so in gc.get_referents(o):
if not id(so) in ids:
ids.add(id(so))
return len(ids)
def visit_all_objects(f):
"""
Brian and I *think* that this gets all objects. This is predicated on the
assumption that every object either participates in gc, or is at most one
hop from an object that participates in gc. This was Brian's clever idea.
"""
ids = set()
ls = locals()
import inspect
cf = inspect.currentframe()
for o in gc.get_objects():
if o is ids or o is ls or o is cf:
continue
if not id(o) in ids:
ids.add(id(o))
f(o)
for so in gc.get_referents(o):
if not id(so) in ids:
ids.add(id(so))
f(so)
def get_all_objects():
objs = []
def addit(o):
objs.append(o)
visit_all_objects(addit)
return objs
def describe_all_objects():
import dictutil
d = dictutil.NumDict()
for o in get_all_objects():
d.inc(describe_object(o))
return d
def dump_description_of_object(o, f):
f.write("%x" % (id(o),))
f.write("-")
f.write(describe_object(o))
f.write("\n")
def dump_description_of_object_refs(o, f):
# This holds the ids of all referents that we've already dumped.
dumped = set()
# First, any __dict__ items
try:
itemsiter = o.__dict__.iteritems()
except:
pass
else:
for k, v in itemsiter:
try:
idr = id(v)
if idr not in dumped:
dumped.add(idr)
f.write("%d:"%len(k))
f.write(k)
f.write(",")
f.write("%0x,"%idr)
except:
pass
# Then anything else that gc.get_referents() returns.
for r in gc.get_referents(o):
idr = id(r)
if idr not in dumped:
dumped.add(idr)
f.write("0:,%0x,"%idr)
def dump_descriptions_of_all_objects(f):
ids = set()
ls = locals()
for o in gc.get_objects():
if o is f or o is ids or o is ls:
continue
if not id(o) in ids:
ids.add(id(o))
dump_description_of_object(o, f)
for so in gc.get_referents(o):
if o is f or o is ids or o is ls:
continue
if not id(so) in ids:
ids.add(id(so))
dump_description_of_object(so, f)
ls = None # break reference cycle
return len(ids)
def dump_description_of_object_with_refs(o, f):
f.write("%0x" % (id(o),))
f.write("-")
desc = describe_object(o)
f.write("%d:"%len(desc))
f.write(desc)
f.write(",")
dump_description_of_object_refs(o, f)
f.write("\n")
def dump_descriptions_of_all_objects_with_refs(f):
ids = set()
ls = locals()
for o in gc.get_objects():
if o is f or o is ids or o is ls:
continue
if not id(o) in ids:
ids.add(id(o))
dump_description_of_object_with_refs(o, f)
for so in gc.get_referents(o):
if o is f or o is ids or o is ls:
continue
if not id(so) in ids:
ids.add(id(so))
dump_description_of_object_with_refs(so, f)
ls = None # break reference cycle
return len(ids)
import re
NRE = re.compile("[1-9][0-9]*$")
def undump_descriptions_of_all_objects(inf):
d = {}
for l in inf:
dash=l.find('-')
if dash == -1:
raise l
mo = NRE.search(l)
if mo:
typstr = l[dash+1:mo.start(0)]
num=int(mo.group(0))
if str(num) != mo.group(0):
raise mo.group(0)
else:
typstr = l[dash+1:]
num = None
d[l[:dash]] = (typstr, num,)
return d

52
libs/pyutil/nummedobj.py

@ -0,0 +1,52 @@
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn
# mailto:zooko@zooko.com
# This file is part of pyutil; see README.rst for licensing terms.
import dictutil
class NummedObj(object):
"""
This is useful for nicer debug printouts. Instead of objects of the same class being
distinguished from one another by their memory address, they each get a unique number, which
can be read as "the first object of this class", "the second object of this class", etc. This
is especially useful because separate runs of a program will yield identical debug output,
(assuming that the objects get created in the same order in each run). This makes it possible
to diff outputs from separate runs to see what changed, without having to ignore a difference
on every line due to different memory addresses of objects.
"""
objnums = dictutil.NumDict() # key: class names, value: highest used object number
def __init__(self, klass=None):
"""
@param klass: in which class are you counted? If default value of `None', then self.__class__ will be used.
"""
if klass is None:
klass = self.__class__
self._classname = klass.__name__
NummedObj.objnums.inc(self._classname)
self._objid = NummedObj.objnums[self._classname]
def __repr__(self):
return "<%s #%d>" % (self._classname, self._objid,)
def __lt__(self, other):
return (self._objid, self._classname,) < (other._objid, other._classname,)
def __le__(self, other):
return (self._objid, self._classname,) <= (other._objid, other._classname,)
def __eq__(self, other):
return (self._objid, self._classname,) == (other._objid, other._classname,)
def __ne__(self, other):
return (self._objid, self._classname,) != (other._objid, other._classname,)
def __gt__(self, other):
return (self._objid, self._classname,) > (other._objid, other._classname,)
def __ge__(self, other):
return (self._objid, self._classname,) >= (other._objid, other._classname,)
def __hash__(self):
return id(self)

99
libs/pyutil/observer.py

@ -0,0 +1,99 @@
# -*- test-case-name: allmydata.test.test_observer -*-
from twisted.internet import defer
try:
from foolscap.eventual import eventually
eventually # http://divmod.org/trac/ticket/1499
except ImportError:
from twisted.internet import reactor
def eventually(f, *args, **kwargs):
return reactor.callLater(0, f, *args, **kwargs)
"""The idiom we use is for the observed object to offer a method named
'when_something', which returns a deferred. That deferred will be fired when
something happens. The way this is typically implemented is that the observed
has an ObserverList whose when_fired method is called in the observed's
'when_something'."""
class OneShotObserverList:
"""A one-shot event distributor."""
def __init__(self):
self._fired = False
self._result = None
self._watchers = []
self.__repr__ = self._unfired_repr
def _unfired_repr(self):
return "<OneShotObserverList [%s]>" % (self._watchers, )
def _fired_repr(self):
return "<OneShotObserverList -> %s>" % (self._result, )
def _get_result(self):
return self._result
def when_fired(self):
if self._fired:
return defer.succeed(self._get_result())
d = defer.Deferred()
self._watchers.append(d)
return d
def fire(self, result):
assert not self._fired
self._fired = True
self._result = result
self._fire(result)
def _fire(self, result):
for w in self._watchers:
eventually(w.callback, result)
del self._watchers
self.__repr__ = self._fired_repr
def fire_if_not_fired(self, result):
if not self._fired:
self.fire(result)
class LazyOneShotObserverList(OneShotObserverList):
"""
a variant of OneShotObserverList which does not retain
the result it handles, but rather retains a callable()
through which is retrieves the data if and when needed.
"""
def __init__(self):
OneShotObserverList.__init__(self)
def _get_result(self):
return self._result_producer()
def fire(self, result_producer):
"""
@param result_producer: a no-arg callable which
returns the data which is to be considered the
'result' for this observer list. note that this
function may be called multiple times - once
upon initial firing, and potentially once more
for each subsequent when_fired() deferred created
"""
assert not self._fired
self._fired = True
self._result_producer = result_producer
if self._watchers: # if not, don't call result_producer
self._fire(self._get_result())
class ObserverList:
"""A simple class to distribute events to a number of subscribers."""
def __init__(self):
self._watchers = []
def subscribe(self, observer):
self._watchers.append(observer)
def unsubscribe(self, observer):
self._watchers.remove(observer)
def notify(self, *args, **kwargs):
for o in self._watchers:
eventually(o, *args, **kwargs)

552
libs/pyutil/odict.py

@ -0,0 +1,552 @@
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn
"""
This module offers a Ordered Dict, which is a dict that preserves
insertion order. See PEP 372 for description of the problem. This
implementation uses a linked-list to get good O(1) asymptotic
performance. (Actually it is O(hashtable-update-cost), but whatever.)
Warning: if -O optimizations are not turned on then OrderedDict performs
extensive self-analysis in every function call, which can take minutes
and minutes for a large cache. Turn on -O, or comment out assert
self._assert_invariants()
"""
import operator
from assertutil import _assert, precondition
from humanreadable import hr
class OrderedDict:
"""
An efficient ordered dict.
Adding an item that is already in the dict *does not* make it the
most- recently-added item although it may change the state of the
dict itself (if the value is different than the previous value).
See also SmallOrderedDict (below), which is faster in some cases.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = p
return (k, v,)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = p
return k
class ValIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
precondition(self.c.d.has_key(self.i), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
(v, p, n,) = self.c.d[self.i]
self.i = p
return v
class Sentinel:
def __init__(self, msg):
self.msg = msg
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.msg,)
def __init__(self, initialdata={}):
self.d = {} # k: k, v: [v, prev, next,] # the dict
self.hs = OrderedDict.Sentinel("hs")
self.ts = OrderedDict.Sentinel("ts")
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.update(initialdata)
assert self._assert_invariants()
def __repr_n__(self, n=None):
s = ["{",]
try:
iter = self.iteritems()
x = iter.next()
s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
i = 1
while (n is None) or (i < n):
x = iter.next()
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
except StopIteration:
pass
s.append("}")
return ''.join(s)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),)
def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),)
def _assert_invariants(self):
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d))
foundprevsentinel = 0
foundnextsentinel = 0
for (k, (v, p, n,)) in self.d.iteritems():
_assert(v not in (self.hs, self.ts,))
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n)
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n)
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n)
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n)
if p is self.hs:
foundprevsentinel += 1
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n)
if n is self.ts:
foundnextsentinel += 1
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n)
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).")
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).")
count = 0
for (k, v,) in self.iteritems():
_assert(k not in (self.hs, self.ts,), k, self.hs, self.ts)
count += 1
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels
return True
def move_to_most_recent(self, k, strictkey=False):
assert self._assert_invariants()
if not self.d.has_key(k):
if strictkey:
raise KeyError, k
return
node = self.d[k]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
# move to front
hnode = self.d[self.hs]
node[1] = self.hs
node[2] = hnode[2]
hnode[2] = k
self.d[node[2]][1] = k
assert self._assert_invariants()
def iteritems(self):
return OrderedDict.ItemIterator(self)
def itervalues(self):
return OrderedDict.ValIterator(self)
def iterkeys(self):
return self.__iter__()
def __iter__(self):
return OrderedDict.KeyIterator(self)
def __getitem__(self, key, default=None, strictkey=True):
node = self.d.get(key)
if not node:
if strictkey:
raise KeyError, key
return default
return node[0]
def __setitem__(self, k, v=None):
assert self._assert_invariants()
node = self.d.get(k)
if node:
node[0] = v
return
hnode = self.d[self.hs]
n = hnode[2]
self.d[k] = [v, self.hs, n,]
hnode[2] = k
self.d[n][1] = k
assert self._assert_invariants()
return v
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the value removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if self.d.has_key(key):
node = self.d[key]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
del self.d[key]
assert self._assert_invariants()
return node[0]
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def has_key(self, key):
assert self._assert_invariants()
if self.d.has_key(key):
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def clear(self):
assert self._assert_invariants()
self.d.clear()
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
for (k, v,) in otherdict.iteritems():
assert self._assert_invariants()
self[k] = v
assert self._assert_invariants()
def pop(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.hs][2]
self.remove(k)
assert self._assert_invariants()
return k
def popitem(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.hs][2]
val = self.remove(k)
assert self._assert_invariants()
return (k, val,)
def keys_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return t.keys()
def keys(self):
res = [None] * len(self)
i = 0
for k in self.iterkeys():
res[i] = k
i += 1
return res
def values_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return map(operator.__getitem__, t.values(), [0]*len(t))
def values(self):
res = [None] * len(self)
i = 0
for v in self.itervalues():
res[i] = v
i += 1
return res
def items(self):
res = [None] * len(self)
i = 0
for it in self.iteritems():
res[i] = it
i += 1
return res
def __len__(self):
return len(self.d) - 2
def insert(self, key, val=None):
assert self._assert_invariants()
result = self.__setitem__(key, val)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def get(self, key, default=None):
return self.__getitem__(key, default, strictkey=False)
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
class SmallOrderedDict(dict):
"""
SmallOrderedDict is faster than OrderedDict for small sets. How small? That
depends on your machine and which operations you use most often. Use
performance profiling to determine whether the ordered dict class that you are
using makes any difference to the performance of your program, and if it
does, then run "quick_bench()" in test/test_cache.py to see which cache
implementation is faster for the size of your datasets.
A simple least-recently-used cache. It keeps an LRU queue, and
when the number of items in the cache reaches maxsize, it removes
the least recently used item.
"Looking" at an item or a key such as with "has_key()" makes that
item become the most recently used item.
You can also use "refresh()" to explicitly make an item become the most
recently used item.
Adding an item that is already in the dict *does* make it the
most- recently-used item although it does not change the state of
the dict itself.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return (k, dict.__getitem__(self.c, k),)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return k
class ValueIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return dict.__getitem__(self.c, k)
def __init__(self, initialdata={}, maxsize=128):
dict.__init__(self, initialdata)
self._lru = initialdata.keys() # contains keys
self._maxsize = maxsize
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
def _assert_invariants(self):
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.")
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize)
return True
def insert(self, key, item=None):
assert self._assert_invariants()
result = self.__setitem__(key, item)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def __setitem__(self, key, item=None):
assert self._assert_invariants()
if dict.has_key(self, key):
self._lru.remove(key)
else:
if len(self._lru) == self._maxsize:
# If this insert is going to increase the size of the cache to bigger than maxsize:
killkey = self._lru.pop(0)
dict.__delitem__(self, killkey)
dict.__setitem__(self, key, item)
self._lru.append(key)
assert self._assert_invariants()
return item
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the object removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if dict.has_key(self, key):
val = dict.__getitem__(self, key)
dict.__delitem__(self, key)
self._lru.remove(key)
assert self._assert_invariants()
return val
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def clear(self):
assert self._assert_invariants()
dict.clear(self)
self._lru = []
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
if len(otherdict) > self._maxsize:
# Handling this special case here makes it possible to implement the
# other more common cases faster below.
dict.clear(self)
self._lru = []
if self._maxsize > (len(otherdict) - self._maxsize):
dict.update(self, otherdict)
while len(self) > self._maxsize:
dict.popitem(self)
else:
for k, v, in otherdict.iteritems():
if len(self) == self._maxsize:
break
dict.__setitem__(self, k, v)
self._lru = dict.keys(self)
assert self._assert_invariants()
return self
for k in otherdict.iterkeys():
if dict.has_key(self, k):
self._lru.remove(k)
self._lru.extend(otherdict.keys())
dict.update(self, otherdict)
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
return self
def has_key(self, key):
assert self._assert_invariants()
if dict.has_key(self, key):
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,)))
self._lru.remove(key)
self._lru.append(key)
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def refresh(self, key, strictkey=True):
"""
@param strictkey: raise a KeyError exception if key isn't present
"""
assert self._assert_invariants()
if not dict.has_key(self, key):
if strictkey:
raise KeyError, key
return
self._lru.remove(key)
self._lru.append(key)
def popitem(self):
if not self._lru:
raise KeyError, 'popitem(): dictionary is empty'
k = self._lru[-1]
obj = self.remove(k)
return (k, obj,)

552
libs/pyutil/odict.py~

@ -0,0 +1,552 @@
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn
"""
This module offers a Ordered Dict, which is a dict that preserves
insertion order. See PEP 372 for description of the problem. This
implementation uses a linked-list to get good O(1) asymptotic
performance. (Actually it is O(hashtable-update-cost), but whatever.)
Warning: if -O optimizations are not turned on then OrderedDict performs
extensive self-analysis in every function call, which can take minutes
and minutes for a large cache. Turn on -O, or comment out assert
self._assert_invariants()
"""
import operator
from assertutil import _assert, precondition
from humanreadable import hr
class OrderedDict:
"""
An efficient ordered dict.
Adding an item that is already in the dict *does not* make it the
most- recently-added item although it may change the state of the
dict itself (if the value is different than the previous value).
See also SmallOrderedDict (below), which is faster in some cases.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = p
return (k, v,)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
k = self.i
precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c)
(v, p, n,) = self.c.d[k]
self.i = p
return k
class ValIterator:
def __init__(self, c):
self.c = c
self.i = c.d[c.ts][1]
def __iter__(self):
return self
def next(self):
if self.i is self.c.hs:
raise StopIteration
precondition(self.c.d.has_key(self.i), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
(v, p, n,) = self.c.d[self.i]
self.i = p
return v
class Sentinel:
def __init__(self, msg):
self.msg = msg
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.msg,)
def __init__(self, initialdata={}):
self.d = {} # k: k, v: [v, prev, next,] # the dict
self.hs = OrderedDict.Sentinel("hs")
self.ts = OrderedDict.Sentinel("ts")
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.update(initialdata)
assert self._assert_invariants()
def __repr_n__(self, n=None):
s = ["{",]
try:
iter = self.iteritems()
x = iter.next()
s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
i = 1
while (n is None) or (i < n):
x = iter.next()
s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1]))
except StopIteration:
pass
s.append("}")
return ''.join(s)
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),)
def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),)
def _assert_invariants(self):
_assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d))
foundprevsentinel = 0
foundnextsentinel = 0
for (k, (v, p, n,)) in self.d.iteritems():
_assert(v not in (self.hs, self.ts,))
_assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n)
_assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n)
_assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n)
_assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n)
if p is self.hs:
foundprevsentinel += 1
_assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n)
if n is self.ts:
foundnextsentinel += 1
_assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n)
_assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).")
_assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).")
count = 0
for (k, v,) in self.iteritems():
_assert(k not in (self.hs, self.ts,), k, self.hs, self.ts)
count += 1
_assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels
return True
def move_to_most_recent(self, k, strictkey=False):
assert self._assert_invariants()
if not self.d.has_key(k):
if strictkey:
raise KeyError, k
return
node = self.d[k]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
# move to front
hnode = self.d[self.hs]
node[1] = self.hs
node[2] = hnode[2]
hnode[2] = k
self.d[node[2]][1] = k
assert self._assert_invariants()
def iteritems(self):
return OrderedDict.ItemIterator(self)
def itervalues(self):
return OrderedDict.ValIterator(self)
def iterkeys(self):
return self.__iter__()
def __iter__(self):
return OrderedDict.KeyIterator(self)
def __getitem__(self, key, default=None, strictkey=True):
node = self.d.get(key)
if not node:
if strictkey:
raise KeyError, key
return default
return node[0]
def __setitem__(self, k, v=None):
assert self._assert_invariants()
node = self.d.get(k)
if node:
node[0] = v
return
hnode = self.d[self.hs]
n = hnode[2]
self.d[k] = [v, self.hs, n,]
hnode[2] = k
self.d[n][1] = k
assert self._assert_invariants()
return v
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the value removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if self.d.has_key(key):
node = self.d[key]
# relink
self.d[node[1]][2] = node[2]
self.d[node[2]][1] = node[1]
del self.d[key]
assert self._assert_invariants()
return node[0]
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def has_key(self, key):
assert self._assert_invariants()
if self.d.has_key(key):
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def clear(self):
assert self._assert_invariants()
self.d.clear()
self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes.
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
for (k, v,) in otherdict.iteritems():
assert self._assert_invariants()
self[k] = v
assert self._assert_invariants()
def pop(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.hs][2]
self.remove(k)
assert self._assert_invariants()
return k
def popitem(self):
assert self._assert_invariants()
if len(self.d) < 2: # the +2 is for the sentinels
raise KeyError, 'popitem(): dictionary is empty'
k = self.d[self.hs][2]
val = self.remove(k)
assert self._assert_invariants()
return (k, val,)
def keys_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return t.keys()
def keys(self):
res = [None] * len(self)
i = 0
for k in self.iterkeys():
res[i] = k
i += 1
return res
def values_unsorted(self):
assert self._assert_invariants()
t = self.d.copy()
del t[self.hs]
del t[self.ts]
assert self._assert_invariants()
return map(operator.__getitem__, t.values(), [0]*len(t))
def values(self):
res = [None] * len(self)
i = 0
for v in self.itervalues():
res[i] = v
i += 1
return res
def items(self):
res = [None] * len(self)
i = 0
for it in self.iteritems():
res[i] = it
i += 1
return res
def __len__(self):
return len(self.d) - 2
def insert(self, key, val=None):
assert self._assert_invariants()
result = self.__setitem__(key, val)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def get(self, key, default=None):
return self.__getitem__(key, default, strictkey=False)
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
class SmallOrderedDict(dict):
"""
SmallOrderedDict is faster than OrderedDict for small sets. How small? That
depends on your machine and which operations you use most often. Use
performance profiling to determine whether the cache class that you are
using makes any difference to the performance of your program, and if it
does, then run "quick_bench()" in test/test_cache.py to see which cache
implementation is faster for the size of your datasets.
A simple least-recently-used cache. It keeps an LRU queue, and
when the number of items in the cache reaches maxsize, it removes
the least recently used item.
"Looking" at an item or a key such as with "has_key()" makes that
item become the most recently used item.
You can also use "refresh()" to explicitly make an item become the most
recently used item.
Adding an item that is already in the dict *does* make it the
most- recently-used item although it does not change the state of
the dict itself.
"""
class ItemIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return (k, dict.__getitem__(self.c, k),)
class KeyIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return k
class ValueIterator:
def __init__(self, c):
self.c = c
self.i = 0
def __iter__(self):
return self
def next(self):
precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c)
precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c)
if self.i == len(self.c._lru):
raise StopIteration
k = self.i
self.i += 1
return dict.__getitem__(self.c, k)
def __init__(self, initialdata={}, maxsize=128):
dict.__init__(self, initialdata)
self._lru = initialdata.keys() # contains keys
self._maxsize = maxsize
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
def _assert_invariants(self):
_assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.")
_assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self)
_assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize)
return True
def insert(self, key, item=None):
assert self._assert_invariants()
result = self.__setitem__(key, item)
assert self._assert_invariants()
return result
def setdefault(self, key, default=None):
assert self._assert_invariants()
if not self.has_key(key):
self[key] = default
assert self._assert_invariants()
return self[key]
def __setitem__(self, key, item=None):
assert self._assert_invariants()
if dict.has_key(self, key):
self._lru.remove(key)
else:
if len(self._lru) == self._maxsize:
# If this insert is going to increase the size of the cache to bigger than maxsize:
killkey = self._lru.pop(0)
dict.__delitem__(self, killkey)
dict.__setitem__(self, key, item)
self._lru.append(key)
assert self._assert_invariants()
return item
def remove(self, key, default=None, strictkey=True):
assert self._assert_invariants()
result = self.__delitem__(key, default, strictkey)
assert self._assert_invariants()
return result
def __delitem__(self, key, default=None, strictkey=True):
"""
@param strictkey: True if you want a KeyError in the case that
key is not there, False if you want a reference to default
in the case that key is not there
@param default: the object to return if key is not there; This
is ignored if strictkey.
@return: the object removed or default if there is not item by
that key and strictkey is False
"""
assert self._assert_invariants()
if dict.has_key(self, key):
val = dict.__getitem__(self, key)
dict.__delitem__(self, key)
self._lru.remove(key)
assert self._assert_invariants()
return val
elif strictkey:
assert self._assert_invariants()
raise KeyError, key
else:
assert self._assert_invariants()
return default
def clear(self):
assert self._assert_invariants()
dict.clear(self)
self._lru = []
assert self._assert_invariants()
def update(self, otherdict):
"""
@return: self
"""
assert self._assert_invariants()
if len(otherdict) > self._maxsize:
# Handling this special case here makes it possible to implement the
# other more common cases faster below.
dict.clear(self)
self._lru = []
if self._maxsize > (len(otherdict) - self._maxsize):
dict.update(self, otherdict)
while len(self) > self._maxsize:
dict.popitem(self)
else:
for k, v, in otherdict.iteritems():
if len(self) == self._maxsize:
break
dict.__setitem__(self, k, v)
self._lru = dict.keys(self)
assert self._assert_invariants()
return self
for k in otherdict.iterkeys():
if dict.has_key(self, k):
self._lru.remove(k)
self._lru.extend(otherdict.keys())
dict.update(self, otherdict)
over = len(self) - self._maxsize
if over > 0:
map(dict.__delitem__, [self]*over, self._lru[:over])
del self._lru[:over]
assert self._assert_invariants()
return self
def has_key(self, key):
assert self._assert_invariants()
if dict.has_key(self, key):
assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,)))
self._lru.remove(key)
self._lru.append(key)
assert self._assert_invariants()
return True
else:
assert self._assert_invariants()
return False
def refresh(self, key, strictkey=True):
"""
@param strictkey: raise a KeyError exception if key isn't present
"""
assert self._assert_invariants()
if not dict.has_key(self, key):
if strictkey:
raise KeyError, key
return
self._lru.remove(key)
self._lru.append(key)
def popitem(self):
if not self._lru:
raise KeyError, 'popitem(): dictionary is empty'
k = self._lru[-1]
obj = self.remove(k)
return (k, obj,)

100
libs/pyutil/platformutil.py

@ -0,0 +1,100 @@
# Thanks to Daenyth for help porting this to Arch Linux.
import os, platform, re, subprocess
_distributor_id_cmdline_re = re.compile("(?:Distributor ID:)\s*(.*)", re.I)
_release_cmdline_re = re.compile("(?:Release:)\s*(.*)", re.I)
_distributor_id_file_re = re.compile("(?:DISTRIB_ID\s*=)\s*(.*)", re.I)
_release_file_re = re.compile("(?:DISTRIB_RELEASE\s*=)\s*(.*)", re.I)
global _distname,_version
_distname = None
_version = None
def get_linux_distro():
""" Tries to determine the name of the Linux OS distribution name.
First, try to parse a file named "/etc/lsb-release". If it exists, and
contains the "DISTRIB_ID=" line and the "DISTRIB_RELEASE=" line, then return
the strings parsed from that file.
If that doesn't work, then invoke platform.dist().
If that doesn't work, then try to execute "lsb_release", as standardized in
2001:
http://refspecs.freestandards.org/LSB_1.0.0/gLSB/lsbrelease.html
The current version of the standard is here:
http://refspecs.freestandards.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/lsbrelease.html
that lsb_release emitted, as strings.
Returns a tuple (distname,version). Distname is what LSB calls a
"distributor id", e.g. "Ubuntu". Version is what LSB calls a "release",
e.g. "8.04".
A version of this has been submitted to python as a patch for the standard
library module "platform":
http://bugs.python.org/issue3937
"""
global _distname,_version
if _distname and _version:
return (_distname, _version)
try:
etclsbrel = open("/etc/lsb-release", "rU")
for line in etclsbrel:
m = _distributor_id_file_re.search(line)
if m:
_distname = m.group(1).strip()
if _distname and _version:
return (_distname, _version)
m = _release_file_re.search(line)
if m:
_version = m.group(1).strip()
if _distname and _version:
return (_distname, _version)
except EnvironmentError:
pass
(_distname, _version) = platform.dist()[:2]
if _distname and _version:
return (_distname, _version)
try:
p = subprocess.Popen(["lsb_release", "--all"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
rc = p.wait()
if rc == 0:
for line in p.stdout.readlines():
m = _distributor_id_cmdline_re.search(line)
if m:
_distname = m.group(1).strip()
if _distname and _version:
return (_distname, _version)
m = _release_cmdline_re.search(p.stdout.read())
if m:
_version = m.group(1).strip()
if _distname and _version:
return (_distname, _version)
except EnvironmentError:
pass
if os.path.exists("/etc/arch-release"):
return ("Arch_Linux", "")
return (_distname,_version)
def get_platform():
# Our version of platform.platform(), telling us both less and more than the
# Python Standard Library's version does.
# We omit details such as the Linux kernel version number, but we add a
# more detailed and correct rendition of the Linux distribution and
# distribution-version.
if "linux" in platform.system().lower():
return platform.system()+"-"+"_".join(get_linux_distro())+"-"+platform.machine()+"-"+"_".join([x for x in platform.architecture() if x])
else:
return platform.platform()

85
libs/pyutil/randutil.py

@ -0,0 +1,85 @@
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import warnings
import os, random
try:
import hashexpand
class SHA256Random(hashexpand.SHA256Expander, random.Random):
def __init__(self, seed=None, deterministic=True):
warnings.warn("deprecated", DeprecationWarning)
if not deterministic:
raise NotImplementedError, "SHA256Expander is always deterministic. For non-deterministic, try urandomRandom."
hashexpand.SHA256Expander.__init__(self)
random.Random.__init__(self, seed)
self.seed(seed)
def seed(self, seed=None):
if seed is None:
import increasing_timer
seed = repr(increasing_timer.time())
hashexpand.SHA256Expander.seed(self, seed)
class SHA256Random(hashexpand.SHA256Expander, random.Random):
def __init__(self, seed=""):
warnings.warn("deprecated", DeprecationWarning)
hashexpand.SHA256Expander.__init__(self)
self.seed(seed)
def seed(self, seed=None):
if seed is None:
seed = os.urandom(32)
hashexpand.SHA256Expander.seed(self, seed)
except ImportError, le:
class InsecureSHA256Random:
def __init__(self, seed=None):
raise ImportError, le
class SHA256Random:
def __init__(self, seed=""):
raise ImportError, le
class devrandomRandom(random.Random):
""" The problem with using this one, of course, is that it blocks. This
is, of course, a security flaw. (On Linux and probably on other
systems.) --Zooko 2005-03-04
Not repeatable.
"""
def __init__(self):
warnings.warn("deprecated", DeprecationWarning)
self.dr = open("/dev/random", "r")
def get(self, bytes):
return self.dr.read(bytes)
class devurandomRandom(random.Random):
""" The problem with using this one is that it gives answers even when it
has never been properly seeded, e.g. when you are booting from CD and have
just started up and haven't yet gathered enough entropy to actually be
unguessable. (On Linux and probably on other systems.) --Zooko 2005-03-04
Not repeatable.
"""
def get(self, bytes):
warnings.warn("deprecated", DeprecationWarning)
return os.urandom(bytes)
randobj = devurandomRandom()
get = randobj.get
random = randobj.random
randrange = randobj.randrange
shuffle = randobj.shuffle
choice = randobj.choice
seed = randobj.seed
def randstr(n):
return ''.join(map(chr, map(randrange, [0]*n, [256]*n)))
import random as insecurerandom
def insecurerandstr(n):
return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n)))

85
libs/pyutil/randutil.py~

@ -0,0 +1,85 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import warnings
import os, random
try:
import hashexpand
class SHA256Random(hashexpand.SHA256Expander, random.Random):
def __init__(self, seed=None, deterministic=True):
warnings.warn("deprecated", DeprecationWarning)
if not deterministic:
raise NotImplementedError, "SHA256Expander is always deterministic. For non-deterministic, try urandomRandom."
hashexpand.SHA256Expander.__init__(self)
random.Random.__init__(self, seed)
self.seed(seed)
def seed(self, seed=None):
if seed is None:
import increasing_timer
seed = repr(increasing_timer.time())
hashexpand.SHA256Expander.seed(self, seed)
class SHA256Random(hashexpand.SHA256Expander, random.Random):
def __init__(self, seed=""):
warnings.warn("deprecated", DeprecationWarning)
hashexpand.SHA256Expander.__init__(self)
self.seed(seed)
def seed(self, seed=None):
if seed is None:
seed = os.urandom(32)
hashexpand.SHA256Expander.seed(self, seed)
except ImportError, le:
class InsecureSHA256Random:
def __init__(self, seed=None):
raise ImportError, le
class SHA256Random:
def __init__(self, seed=""):
raise ImportError, le
class devrandomRandom(random.Random):
""" The problem with using this one, of course, is that it blocks. This
is, of course, a security flaw. (On Linux and probably on other
systems.) --Zooko 2005-03-04
Not repeatable.
"""
def __init__(self):
warnings.warn("deprecated", DeprecationWarning)
self.dr = open("/dev/random", "r")
def get(self, bytes):
return self.dr.read(bytes)
class devurandomRandom(random.Random):
""" The problem with using this one is that it gives answers even when it
has never been properly seeded, e.g. when you are booting from CD and have
just started up and haven't yet gathered enough entropy to actually be
unguessable. (On Linux and probably on other systems.) --Zooko 2005-03-04
Not repeatable.
"""
def get(self, bytes):
warnings.warn("deprecated", DeprecationWarning)
return os.urandom(bytes)
randobj = devurandomRandom()
get = randobj.get
random = randobj.random
randrange = randobj.randrange
shuffle = randobj.shuffle
choice = randobj.choice
seed = randobj.seed
def randstr(n):
return ''.join(map(chr, map(randrange, [0]*n, [256]*n)))
import random as insecurerandom
def insecurerandstr(n):
return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n)))

90
libs/pyutil/repeatable_random.py

@ -0,0 +1,90 @@
"""
If you execute force_repeatability() then the following things are changed in the runtime:
1. random.random() and its sibling functions, and random.Random.seed() in the random module are seeded with a known seed so that they will return the same sequence on each run.
2. os.urandom() is replaced by a fake urandom that returns a pseudorandom sequence.
3. time.time() is replaced by a fake time that returns an incrementing number. (Original time.time is available as time.realtime.)
Which seed will be used?
If the environment variable REPEATABLE_RANDOMNESS_SEED is set, then it will use that. Else, it will use the current real time. In either case it logs the seed that it used.
Caveats:
1. If some code has acquired a random.Random object before force_repeatability() is executed, then that Random object will produce non-reproducible results. For example, the tempfile module in the Python Standard Library does this.
2. Likewise if some code called time.time() before force_repeatability() was called, then it will have gotten a real time stamp. For example, trial does this. (Then it later subtracts that real timestamp from a faketime timestamp to calculate elapsed time, resulting in a large negative elapsed time.)
3. Fake urandom has an added constraint for performance reasons -- you can't ask it for more than 64 bytes of randomness at a time. (I couldn't figure out how to generate large fake random strings efficiently.)
"""
import os, random, time
if not hasattr(time, "realtime"):
time.realtime = time.time
if not hasattr(os, "realurandom"):
os.realurandom = os.urandom
if not hasattr(random, "realseed"):
random.realseed = random.seed
tdelta = 0
seeded = False
def force_repeatability():
now = 1043659734.0
def faketime():
global tdelta
tdelta += 1
return now + tdelta
time.faketime = faketime
time.time = faketime
from idlib import i2b
def fakeurandom(n):
if n > 64:
raise ("Can't produce more than 64 bytes of pseudorandomness efficiently.")
elif n == 0:
return ''
else:
z = i2b(random.getrandbits(n*8))
x = z + "0" * (n-len(z))
assert len(x) == n
return x
os.fakeurandom = fakeurandom
os.urandom = fakeurandom
global seeded
if not seeded:
SEED = os.environ.get('REPEATABLE_RANDOMNESS_SEED', None)
if SEED is None:
# Generate a seed which is integral and fairly short (to ease cut-and-paste, writing it down, etc.).
t = time.realtime()
subsec = t % 1
t += (subsec * 1000000)
t %= 1000000
SEED = long(t)
import sys
sys.stdout.write("REPEATABLE_RANDOMNESS_SEED: %s\n" % SEED) ; sys.stdout.flush()
sys.stdout.write("In order to reproduce this run of the code, set the environment variable \"REPEATABLE_RANDOMNESS_SEED\" to %s before executing.\n" % SEED) ; sys.stdout.flush()
random.seed(SEED)
def seed_which_refuses(a):
sys.stdout.write("I refuse to reseed to %s. Go away!\n" % (a,)) ; sys.stdout.flush()
return
random.realseed = random.seed
random.seed = seed_which_refuses
seeded = True
import setutil
setutil.RandomSet.DETERMINISTIC = True
def restore_real_clock():
time.time = time.realtime
def restore_real_urandom():
os.urandom = os.realurandom
def restore_real_seed():
random.seed = random.realseed
def restore_non_repeatability():
restore_real_seed()
restore_real_urandom()
restore_real_clock()

0
libs/pyutil/scripts/__init__.py

36
libs/pyutil/scripts/lines.py

@ -0,0 +1,36 @@
#!/usr/bin/env python
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
from pyutil import lineutil
import sys
def main():
if len(sys.argv) > 1 and "-s" in sys.argv[1:]:
strip = True
sys.argv.remove("-s")
else:
strip = False
if len(sys.argv) > 1 and "-n" in sys.argv[1:]:
nobak = True
sys.argv.remove("-n")
else:
nobak = False
if len(sys.argv) > 1:
pipe = False
else:
pipe = True
if pipe:
lineutil.lineify_fileobjs(sys.stdin, sys.stdout)
else:
for fn in sys.argv[1:]:
lineutil.lineify_file(fn, strip, nobak)
if __name__ == '__main__':
main()

65
libs/pyutil/scripts/memdump2dot.py

@ -0,0 +1,65 @@
#!/usr/bin/env python
import bindann
bindann.install_exception_handler()
import sys
inf = open(sys.argv[1], "r")
outf = open(sys.argv[1]+".dot", "w")
outf.write("digraph %s {\n" % sys.argv[1].replace(".",""))
def parse_netstring(l, i):
try:
j = l.find(':', i)
if j == -1:
return (None, len(l),)
lenval = int(l[i:j])
val = l[j+1:j+1+lenval]
# skip the comma
assert l[j+1+lenval] == ","
return (val, j+1+lenval+1,)
except Exception, le:
le.args = tuple(le.args + (l, i,))
raise
def parse_ref(l, i):
(attrname, i,) = parse_netstring(l, i)
j = l.find(",", i)
assert j != -1
objid = l[i:j]
return (objid, attrname, j+1,)
def parse_memdump_line(l):
result = []
i = l.find('-')
objid = l[:i]
(objdesc, i,) = parse_netstring(l, i+1)
result.append((objid, objdesc,))
while i != -1 and i < len(l):
(objid, attrname, i,) = parse_ref(l, i)
result.append((objid, attrname,))
return result
for l in inf:
if l[-1] != "\n":
raise "waht the HECK? %r" % l
res = parse_memdump_line(l.strip())
# declare the node
outf.write("\"%s\" [label=\"%s\"];\n" % (res[0][0], res[0][1],))
# declare all the edges
for edge in res[1:]:
if edge[1]:
# a named edge
outf.write("\"%s\" -> \"%s\" [style=bold, label=\"%s\"];\n" % (res[0][0], edge[0], edge[1],))
else:
# an anonymous edge
outf.write("\"%s\" -> \"%s\";\n" % (res[0][0], edge[0]))
outf.write("}")

30
libs/pyutil/scripts/randcookie.py

@ -0,0 +1,30 @@
#!/usr/bin/env python
import os, sys
import zbase32
def main():
if len(sys.argv) > 1:
l = int(sys.argv[1])
else:
l = 64
bl = (l + 7) / 8
s = zbase32.b2a_l(os.urandom(bl), l)
# insert some hyphens for easier memorization
chs = 3 + (len(s)%8==0)
i = chs
while i < len(s)-1:
s = s[:i] + "-" + s[i:]
i += 1
chs = 7-chs
i += chs
print s
if __name__ == '__main__':
main()

48
libs/pyutil/scripts/randfile.py

@ -0,0 +1,48 @@
#!/usr/bin/env python
import os, sys
from random import randrange
import argparse
def main():
CHUNKSIZE=2**20
parser = argparse.ArgumentParser(prog="randfile", description="Create a file of pseudorandom bytes (not cryptographically secure).")
parser.add_argument('-b', '--num-bytes', help="how many bytes to write per output file (default 20)", type=int, metavar="BYTES", default=20)
parser.add_argument('-f', '--output-file-prefix', help="prefix of the name of the output file to create and fill with random bytes (default \"randfile\"", metavar="OUTFILEPRE", default="randfile")
parser.add_argument('-n', '--num-files', help="how many files to write (default 1)", type=int, metavar="FILES", default=1)
parser.add_argument('-F', '--force', help='overwrite any file already present', action='store_true')
parser.add_argument('-p', '--progress', help='write an "x" for every file completed and a "." for every %d bytes' % CHUNKSIZE, action='store_true')
args = parser.parse_args()
for i in xrange(args.num_files):
bytesleft = args.num_bytes
outputfname = args.output_file_prefix + "." + str(i)
if args.force:
f = open(outputfname, "wb")
else:
flags = os.O_WRONLY|os.O_CREAT|os.O_EXCL | (hasattr(os, 'O_BINARY') and os.O_BINARY)
fd = os.open(outputfname, flags)
f = os.fdopen(fd, "wb")
zs = [0]*CHUNKSIZE
ts = [256]*CHUNKSIZE
while bytesleft >= CHUNKSIZE:
f.write(''.join(map(chr, map(randrange, zs, ts))))
bytesleft -= CHUNKSIZE
if args.progress:
sys.stdout.write(".") ; sys.stdout.flush()
zs = [0]*bytesleft
ts = [256]*bytesleft
f.write(''.join(map(chr, map(randrange, zs, ts))))
if args.progress:
sys.stdout.write("x") ; sys.stdout.flush()
if __name__ == "__main__":
main()

30
libs/pyutil/scripts/tailx.py

@ -0,0 +1,30 @@
#!/usr/bin/env python
# output all but the first N lines of a file
# Allen Short and Jp Calderone wrote this coool version:
import itertools, sys
def main():
K = int(sys.argv[1])
if len(sys.argv) > 2:
fname = sys.argv[2]
inf = open(fname, 'r')
else:
inf = sys.stdin
sys.stdout.writelines(itertools.islice(inf, K, None))
if __name__ == '__main__':
main()
# thus replacing my dumb version:
# # from the Python Standard Library
# import sys
#
# i = K
# for l in sys.stdin.readlines():
# if i:
# i -= 1
# else:
# print l,

96
libs/pyutil/scripts/try_decoding.py

@ -0,0 +1,96 @@
#!/usr/bin/env python
import binascii, codecs, encodings, locale, os, sys, zlib
import argparse
def listcodecs(dir):
names = []
for filename in os.listdir(dir):
if filename[-3:] != '.py':
continue
name = filename[:-3]
# Check whether we've found a true codec
try:
codecs.lookup(name)
except LookupError:
# Codec not found
continue
except Exception:
# Probably an error from importing the codec; still it's
# a valid code name
pass
names.append(name)
return names
def listem():
return listcodecs(encodings.__path__[0])
def _canonical_encoding(encoding):
if encoding is None:
encoding = 'utf-8'
encoding = encoding.lower()
if encoding == "cp65001":
encoding = 'utf-8'
elif encoding == "us-ascii" or encoding == "646":
encoding = 'ascii'
# sometimes Python returns an encoding name that it doesn't support for conversion
# fail early if this happens
try:
u"test".encode(encoding)
except (LookupError, AttributeError):
raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,))
return encoding
def get_output_encoding():
return _canonical_encoding(sys.stdout.encoding or locale.getpreferredencoding())
def get_argv_encoding():
if sys.platform == 'win32':
# Unicode arguments are not supported on Windows yet; see Tahoe-LAFS tickets #565 and #1074.
return 'ascii'
else:
return get_output_encoding()
output_encoding = get_output_encoding()
argv_encoding = get_argv_encoding()
def type_unicode(argstr):
return argstr.decode(argv_encoding)
def main():
parser = argparse.ArgumentParser(prog="try_decoding", description="Try decoding some bytes with all sorts of different codecs and print out any that decode.")
parser.add_argument('inputfile', help='file to decode or "-" for stdin', type=argparse.FileType('rb'), metavar='INF')
parser.add_argument('-t', '--target', help='unicode string to match against (if any)', type=type_unicode, metavar='T')
parser.add_argument('-a', '--accept-bytes', help='include codecs which return bytes instead of returning unicode (they will be marked with "!!!" in the output)', action='store_true')
args = parser.parse_args()
inb = args.inputfile.read()
for codec in listem():
try:
u = inb.decode(codec)
except (UnicodeDecodeError, IOError, TypeError, IndexError, UnicodeError, ValueError, zlib.error, binascii.Error):
pass
else:
if isinstance(u, unicode):
if args.target:
if args.target != u:
continue
print "%19s" % codec,
print ':',
print u.encode(output_encoding)
else:
if not args.accept_bytes:
continue
print "%19s" % codec,
print "!!! ",
print ':',
print u
if __name__ == "__main__":
main()

19
libs/pyutil/scripts/unsort.py

@ -0,0 +1,19 @@
#!/usr/bin/env python
# randomize the lines of stdin or a file
import random, sys
def main():
if len(sys.argv) > 1:
fname = sys.argv[1]
inf = open(fname, 'r')
else:
inf = sys.stdin
lines = inf.readlines()
random.shuffle(lines)
sys.stdout.writelines(lines)
if __name__ == '__main__':
main()

26
libs/pyutil/scripts/verinfo.py

@ -0,0 +1,26 @@
#!/usr/bin/env python
import exceptions
class UsageError(exceptions.Exception): pass
import sys
import pkg_resources
def main():
if len(sys.argv) <= 1:
raise UsageError, "USAGE: verinfo DISTRIBUTIONNAME [PACKAGENAME]"
DISTNAME=sys.argv[1]
if len(sys.argv) >= 3:
PACKNAME=sys.argv[2]
else:
PACKNAME=DISTNAME
print "pkg_resources.require('%s') => " % (DISTNAME,),
print pkg_resources.require(DISTNAME)
print "import %s;print %s => " % (PACKNAME, PACKNAME,),
x = __import__(PACKNAME)
print x
print "import %s;print %s.__version__ => " % (PACKNAME, PACKNAME,),
print hasattr(x, '__version__') and x.__version__
if __name__ == "__main__":
main()

48
libs/pyutil/strutil.py

@ -0,0 +1,48 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
def commonprefix(l):
cp = []
for i in range(min(map(len, l))):
c = l[0][i]
for s in l[1:]:
if s[i] != c:
return ''.join(cp)
cp.append(c)
return ''.join(cp)
def commonsuffix(l):
cp = []
for i in range(min(map(len, l))):
c = l[0][-i-1]
for s in l[1:]:
if s[-i-1] != c:
cp.reverse()
return ''.join(cp)
cp.append(c)
cp.reverse()
return ''.join(cp)
def split_on_newlines(s):
"""
Splits s on all of the three newline sequences: "\r\n", "\r", or "\n".
"""
res = []
for x in s.split('\r\n'):
for y in x.split('\r'):
res.extend(y.split('\n'))
return res
def pop_trailing_newlines(s):
"""
@return a copy of s minus any trailing "\n"'s or "\r"'s
"""
i = len(s)-1
if i < 0:
return ''
while s[i] in ('\n', '\r',):
i = i - 1
if i < 0:
return ''
return s[:i+1]

0
libs/pyutil/test/__init__.py

0
libs/pyutil/test/current/__init__.py

0
libs/pyutil/test/current/json_tests/__init__.py

15
libs/pyutil/test/current/json_tests/test_decode.py

@ -0,0 +1,15 @@
import decimal
from unittest import TestCase
from pyutil import jsonutil as json
class TestDecode(TestCase):
def test_decimal(self):
rval = json.loads('1.1', parse_float=decimal.Decimal)
self.assert_(isinstance(rval, decimal.Decimal))
self.assertEquals(rval, decimal.Decimal('1.1'))
def test_float(self):
rval = json.loads('1', parse_int=float)
self.assert_(isinstance(rval, float))
self.assertEquals(rval, 1.0)

9
libs/pyutil/test/current/json_tests/test_default.py

@ -0,0 +1,9 @@
from unittest import TestCase
from pyutil import jsonutil as json
class TestDefault(TestCase):
def test_default(self):
self.assertEquals(
json.dumps(type, default=repr),
json.dumps(repr(type)))

13
libs/pyutil/test/current/json_tests/test_dump.py

@ -0,0 +1,13 @@
from unittest import TestCase
from cStringIO import StringIO
from pyutil import jsonutil as json
class TestDump(TestCase):
def test_dump(self):
sio = StringIO()
json.dump({}, sio)
self.assertEquals(sio.getvalue(), '{}')
def test_dumps(self):
self.assertEquals(json.dumps({}), '{}')

36
libs/pyutil/test/current/json_tests/test_encode_basestring_ascii.py

@ -0,0 +1,36 @@
from twisted.trial.unittest import SkipTest, TestCase
from pyutil.jsonutil import encoder
CASES = [
(u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
(u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
(u'controls', '"controls"'),
(u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
(u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
(u' s p a c e d ', '" s p a c e d "'),
(u'\U0001d120', '"\\ud834\\udd20"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
(u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
(u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
(u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
]
class TestEncodeBaseStringAscii(TestCase):
def test_py_encode_basestring_ascii(self):
self._test_encode_basestring_ascii(encoder.py_encode_basestring_ascii)
def test_c_encode_basestring_ascii(self):
if not encoder.c_encode_basestring_ascii:
raise SkipTest("no C extension speedups available to test")
self._test_encode_basestring_ascii(encoder.c_encode_basestring_ascii)
def _test_encode_basestring_ascii(self, encode_basestring_ascii):
for input_string, expect in CASES:
result = encode_basestring_ascii(input_string)
self.assertEquals(result, expect)

76
libs/pyutil/test/current/json_tests/test_fail.py

@ -0,0 +1,76 @@
from unittest import TestCase
from pyutil import jsonutil as json
# Fri Dec 30 18:57:26 2005
JSONDOCS = [
# http://json.org/JSON_checker/test/fail1.json
'"A JSON payload should be an object or array, not a string."',
# http://json.org/JSON_checker/test/fail2.json
'["Unclosed array"',
# http://json.org/JSON_checker/test/fail3.json
'{unquoted_key: "keys must be quoted}',
# http://json.org/JSON_checker/test/fail4.json
'["extra comma",]',
# http://json.org/JSON_checker/test/fail5.json
'["double extra comma",,]',
# http://json.org/JSON_checker/test/fail6.json
'[ , "<-- missing value"]',
# http://json.org/JSON_checker/test/fail7.json
'["Comma after the close"],',
# http://json.org/JSON_checker/test/fail8.json
'["Extra close"]]',
# http://json.org/JSON_checker/test/fail9.json
'{"Extra comma": true,}',
# http://json.org/JSON_checker/test/fail10.json
'{"Extra value after close": true} "misplaced quoted value"',
# http://json.org/JSON_checker/test/fail11.json
'{"Illegal expression": 1 + 2}',
# http://json.org/JSON_checker/test/fail12.json
'{"Illegal invocation": alert()}',
# http://json.org/JSON_checker/test/fail13.json
'{"Numbers cannot have leading zeroes": 013}',
# http://json.org/JSON_checker/test/fail14.json
'{"Numbers cannot be hex": 0x14}',
# http://json.org/JSON_checker/test/fail15.json
'["Illegal backslash escape: \\x15"]',
# http://json.org/JSON_checker/test/fail16.json
'["Illegal backslash escape: \\\'"]',
# http://json.org/JSON_checker/test/fail17.json
'["Illegal backslash escape: \\017"]',
# http://json.org/JSON_checker/test/fail18.json
'[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
# http://json.org/JSON_checker/test/fail19.json
'{"Missing colon" null}',
# http://json.org/JSON_checker/test/fail20.json
'{"Double colon":: null}',
# http://json.org/JSON_checker/test/fail21.json
'{"Comma instead of colon", null}',
# http://json.org/JSON_checker/test/fail22.json
'["Colon instead of comma": false]',
# http://json.org/JSON_checker/test/fail23.json
'["Bad value", truth]',
# http://json.org/JSON_checker/test/fail24.json
"['single quote']",
# http://code.google.com/p/simplejson/issues/detail?id=3
u'["A\u001FZ control characters in string"]',
]
SKIPS = {
1: "why not have a string payload?",
18: "spec doesn't specify any nesting limitations",
}
class TestFail(TestCase):
def test_failures(self):
for idx, doc in enumerate(JSONDOCS):
idx = idx + 1
if idx in SKIPS:
json.loads(doc)
continue
try:
json.loads(doc)
except ValueError:
pass
else:
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))

9
libs/pyutil/test/current/json_tests/test_float.py

@ -0,0 +1,9 @@
import math
from unittest import TestCase
from pyutil import jsonutil as json
class TestFloat(TestCase):
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]:
self.assertEquals(float(json.dumps(num)), num)

41
libs/pyutil/test/current/json_tests/test_indent.py

@ -0,0 +1,41 @@
from unittest import TestCase
from pyutil import jsonutil as json
import textwrap
class TestIndent(TestCase):
def test_indent(self):
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
{'nifty': 87}, {'field': 'yes', 'morefield': False} ]
expect = textwrap.dedent("""\
[
[
"blorpie"
],
[
"whoops"
],
[],
"d-shtaeou",
"d-nthiouh",
"i-vhbjkhnth",
{
"nifty": 87
},
{
"field": "yes",
"morefield": false
}
]""")
d1 = json.dumps(h)
d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': '))
h1 = json.loads(d1)
h2 = json.loads(d2)
self.assertEquals(h1, h)
self.assertEquals(h2, h)
self.assertEquals(d2, expect)

71
libs/pyutil/test/current/json_tests/test_pass1.py

@ -0,0 +1,71 @@
from unittest import TestCase
from pyutil import jsonutil as json
# from http://json.org/JSON_checker/test/pass1.json
JSON = r'''
[
"JSON Test Pattern pass1",
{"object with 1 member":["array with 1 element"]},
{},
[],
-42,
true,
false,
null,
{
"integer": 1234567890,
"real": -9876.543210,
"e": 0.123456789e-12,
"E": 1.234567890E+34,
"": 23456789012E666,
"zero": 0,
"one": 1,
"space": " ",
"quote": "\"",
"backslash": "\\",
"controls": "\b\f\n\r\t",
"slash": "/ & \/",
"alpha": "abcdefghijklmnopqrstuvwyz",
"ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
"digit": "0123456789",
"special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
"hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
"true": true,
"false": false,
"null": null,
"array":[ ],
"object":{ },
"address": "50 St. James Street",
"url": "http://www.JSON.org/",
"comment": "// /* <!-- --",
"# -- --> */": " ",
" s p a c e d " :[1,2 , 3
,
4 , 5 , 6 ,7 ],
"compact": [1,2,3,4,5,6,7],
"jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
"quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
"\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
: "A key can be any string"
},
0.5 ,98.6
,
99.44
,
1066
,"rosebud"]
'''
class TestPass1(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEquals(res, json.loads(out))
self.failUnless("2.3456789012E+676" in json.dumps(res, allow_nan=False))

14
libs/pyutil/test/current/json_tests/test_pass2.py

@ -0,0 +1,14 @@
from unittest import TestCase
from pyutil import jsonutil as json
# from http://json.org/JSON_checker/test/pass2.json
JSON = r'''
[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
'''
class TestPass2(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEquals(res, json.loads(out))

20
libs/pyutil/test/current/json_tests/test_pass3.py

@ -0,0 +1,20 @@
from unittest import TestCase
from pyutil import jsonutil as json
# from http://json.org/JSON_checker/test/pass3.json
JSON = r'''
{
"JSON Test Pattern pass3": {
"The outermost value": "must be an object or array.",
"In this test": "It is an object."
}
}
'''
class TestPass3(TestCase):
def test_parse(self):
# test in/out equivalence and parsing
res = json.loads(JSON)
out = json.dumps(res)
self.assertEquals(res, json.loads(out))

67
libs/pyutil/test/current/json_tests/test_recursion.py

@ -0,0 +1,67 @@
from unittest import TestCase
from pyutil import jsonutil as json
class JSONTestObject:
pass
class RecursiveJSONEncoder(json.JSONEncoder):
recurse = False
def default(self, o):
if o is JSONTestObject:
if self.recurse:
return [JSONTestObject]
else:
return 'JSONTestObject'
return json.JSONEncoder.default(o)
class TestRecursion(TestCase):
def test_listrecursion(self):
x = []
x.append(x)
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on list recursion")
x = []
y = [x]
x.append(y)
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on alternating list recursion")
y = []
x = [y, y]
# ensure that the marker is cleared
json.dumps(x)
def test_dictrecursion(self):
x = {}
x["test"] = x
try:
json.dumps(x)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on dict recursion")
x = {}
{"a": x, "b": x}
# ensure that the marker is cleared
json.dumps(x)
def test_defaultrecursion(self):
enc = RecursiveJSONEncoder()
self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"')
enc.recurse = True
try:
enc.encode(JSONTestObject)
except ValueError:
pass
else:
self.fail("didn't raise ValueError on default recursion")

42
libs/pyutil/test/current/json_tests/test_separators.py

@ -0,0 +1,42 @@
import textwrap
from unittest import TestCase
from pyutil import jsonutil as json
class TestSeparators(TestCase):
def test_separators(self):
h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
{'nifty': 87}, {'field': 'yes', 'morefield': False} ]
expect = textwrap.dedent("""\
[
[
"blorpie"
] ,
[
"whoops"
] ,
[] ,
"d-shtaeou" ,
"d-nthiouh" ,
"i-vhbjkhnth" ,
{
"nifty" : 87
} ,
{
"field" : "yes" ,
"morefield" : false
}
]""")
d1 = json.dumps(h)
d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : '))
h1 = json.loads(d1)
h2 = json.loads(d2)
self.assertEquals(h1, h)
self.assertEquals(h2, h)
self.assertEquals(d2, expect)

18
libs/pyutil/test/current/json_tests/test_speedups.py

@ -0,0 +1,18 @@
from twisted.trial.unittest import SkipTest, TestCase
from pyutil.jsonutil import decoder
from pyutil.jsonutil import encoder
class TestSpeedups(TestCase):
def test_scanstring(self):
if not encoder.c_encode_basestring_ascii:
raise SkipTest("no C extension speedups available to test")
self.assertEquals(decoder.scanstring.__module__, "simplejson._speedups")
self.assert_(decoder.scanstring is decoder.c_scanstring)
def test_encode_basestring_ascii(self):
if not encoder.c_encode_basestring_ascii:
raise SkipTest("no C extension speedups available to test")
self.assertEquals(encoder.encode_basestring_ascii.__module__, "simplejson._speedups")
self.assert_(encoder.encode_basestring_ascii is
encoder.c_encode_basestring_ascii)

55
libs/pyutil/test/current/json_tests/test_unicode.py

@ -0,0 +1,55 @@
from unittest import TestCase
from pyutil import jsonutil as json
class TestUnicode(TestCase):
def test_encoding1(self):
encoder = json.JSONEncoder(encoding='utf-8')
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
s = u.encode('utf-8')
ju = encoder.encode(u)
js = encoder.encode(s)
self.assertEquals(ju, js)
def test_encoding2(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
s = u.encode('utf-8')
ju = json.dumps(u, encoding='utf-8')
js = json.dumps(s, encoding='utf-8')
self.assertEquals(ju, js)
def test_encoding3(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u)
self.assertEquals(j, '"\\u03b1\\u03a9"')
def test_encoding4(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u])
self.assertEquals(j, '["\\u03b1\\u03a9"]')
def test_encoding5(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps(u, ensure_ascii=False)
self.assertEquals(j, u'"%s"' % (u,))
def test_encoding6(self):
u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
j = json.dumps([u], ensure_ascii=False)
self.assertEquals(j, u'["%s"]' % (u,))
def test_big_unicode_encode(self):
u = u'\U0001d120'
self.assertEquals(json.dumps(u), '"\\ud834\\udd20"')
self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"')
def test_big_unicode_decode(self):
u = u'z\U0001d120x'
self.assertEquals(json.loads('"' + u + '"'), u)
self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u)
def test_unicode_decode(self):
for i in range(0, 0xd7ff):
u = unichr(i)
js = '"\\u%04x"' % (i,)
self.assertEquals(json.loads(js), u)

18
libs/pyutil/test/current/test_assertutil.py

@ -0,0 +1,18 @@
#!/usr/bin/env python
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn
# portions Copyright (c) 2001 Autonomous Zone Industries
# This file is part of pyutil; see README.rst for licensing terms.
# Python Standard Library modules
import unittest
from pyutil import assertutil
class Testy(unittest.TestCase):
def test_bad_precond(self):
adict=23
try:
assertutil.precondition(isinstance(adict, dict), "adict is required to be a dict.", 23, adict=adict, foo=None)
except AssertionError, le:
self.failUnless(le.args[0] == "precondition: 'adict is required to be a dict.' <type 'str'>, 23 <type 'int'>, foo: None <type 'NoneType'>, 'adict': 23 <type 'int'>")

33
libs/pyutil/test/current/test_fileutil.py

@ -0,0 +1,33 @@
import unittest
import os
from pyutil import fileutil
class FileUtil(unittest.TestCase):
def mkdir(self, basedir, path, mode=0777):
fn = os.path.join(basedir, path)
fileutil.make_dirs(fn, mode)
def touch(self, basedir, path, mode=None, data="touch\n"):
fn = os.path.join(basedir, path)
f = open(fn, "w")
f.write(data)
f.close()
if mode is not None:
os.chmod(fn, mode)
def test_du(self):
basedir = "util/FileUtil/test_du"
fileutil.make_dirs(basedir)
d = os.path.join(basedir, "space-consuming")
self.mkdir(d, "a/b")
self.touch(d, "a/b/1.txt", data="a"*10)
self.touch(d, "a/b/2.txt", data="b"*11)
self.mkdir(d, "a/c")
self.touch(d, "a/c/1.txt", data="c"*12)
self.touch(d, "a/c/2.txt", data="d"*13)
used = fileutil.du(basedir)
self.failUnlessEqual(10+11+12+13, used)

33
libs/pyutil/test/current/test_iputil.py

@ -0,0 +1,33 @@
#!/usr/bin/env python
try:
from twisted.trial import unittest
unittest # http://divmod.org/trac/ticket/1499
except ImportError, le:
print "Skipping test_iputil since it requires Twisted and Twisted could not be imported: %s" % (le,)
else:
from pyutil import iputil, testutil
import re
DOTTED_QUAD_RE=re.compile("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$")
class ListAddresses(testutil.SignalMixin):
def test_get_local_ip_for(self):
addr = iputil.get_local_ip_for('127.0.0.1')
self.failUnless(DOTTED_QUAD_RE.match(addr))
def test_list_async(self):
try:
from twisted.trial import unittest
unittest # http://divmod.org/trac/ticket/1499
from pyutil import iputil
except ImportError, le:
raise unittest.SkipTest("iputil could not be imported (probably because its dependency, Twisted, is not installed). %s" % (le,))
d = iputil.get_local_addresses_async()
def _check(addresses):
self.failUnless(len(addresses) >= 1) # always have localhost
self.failUnless("127.0.0.1" in addresses, addresses)
d.addCallbacks(_check)
return d
test_list_async.timeout=2

18
libs/pyutil/test/current/test_jsonutil.py

@ -0,0 +1,18 @@
#!/usr/bin/env python
import unittest
from decimal import Decimal
from pyutil import jsonutil
zero_point_one = Decimal("0.1")
class TestDecimal(unittest.TestCase):
def test_encode(self):
self.failUnlessEqual(jsonutil.dumps(zero_point_one), "0.1")
def test_decode(self):
self.failUnlessEqual(jsonutil.loads("0.1"), zero_point_one)
def test_no_exception_on_convergent_parse_float(self):
self.failUnlessEqual(jsonutil.loads("0.1", parse_float=Decimal), zero_point_one)

135
libs/pyutil/test/current/test_mathutil.py

@ -0,0 +1,135 @@
#!/usr/bin/env python
import unittest
from pyutil import mathutil
from pyutil.assertutil import _assert
class MathUtilTestCase(unittest.TestCase):
def _help_test_is_power_of_k(self, k):
for i in range(2, 40):
_assert(mathutil.is_power_of_k(k**i, k), k, i)
def test_is_power_of_k(self):
for i in range(2, 5):
self._help_test_is_power_of_k(i)
def test_log_ceil(self):
f = mathutil.log_ceil
self.failUnlessEqual(f(1, 2), 0)
self.failUnlessEqual(f(1, 3), 0)
self.failUnlessEqual(f(2, 2), 1)
self.failUnlessEqual(f(2, 3), 1)
self.failUnlessEqual(f(3, 2), 2)
def test_log_floor(self):
f = mathutil.log_floor
self.failUnlessEqual(f(1, 2), 0)
self.failUnlessEqual(f(1, 3), 0)
self.failUnlessEqual(f(2, 2), 1)
self.failUnlessEqual(f(2, 3), 0)
self.failUnlessEqual(f(3, 2), 1)
def test_div_ceil(self):
f = mathutil.div_ceil
self.failUnlessEqual(f(0, 1), 0)
self.failUnlessEqual(f(0, 2), 0)
self.failUnlessEqual(f(0, 3), 0)
self.failUnlessEqual(f(1, 3), 1)
self.failUnlessEqual(f(2, 3), 1)
self.failUnlessEqual(f(3, 3), 1)
self.failUnlessEqual(f(4, 3), 2)
self.failUnlessEqual(f(5, 3), 2)
self.failUnlessEqual(f(6, 3), 2)
self.failUnlessEqual(f(7, 3), 3)
def test_next_multiple(self):
f = mathutil.next_multiple
self.failUnlessEqual(f(5, 1), 5)
self.failUnlessEqual(f(5, 2), 6)
self.failUnlessEqual(f(5, 3), 6)
self.failUnlessEqual(f(5, 4), 8)
self.failUnlessEqual(f(5, 5), 5)
self.failUnlessEqual(f(5, 6), 6)
self.failUnlessEqual(f(32, 1), 32)
self.failUnlessEqual(f(32, 2), 32)
self.failUnlessEqual(f(32, 3), 33)
self.failUnlessEqual(f(32, 4), 32)
self.failUnlessEqual(f(32, 5), 35)
self.failUnlessEqual(f(32, 6), 36)
self.failUnlessEqual(f(32, 7), 35)
self.failUnlessEqual(f(32, 8), 32)
self.failUnlessEqual(f(32, 9), 36)
self.failUnlessEqual(f(32, 10), 40)
self.failUnlessEqual(f(32, 11), 33)
self.failUnlessEqual(f(32, 12), 36)
self.failUnlessEqual(f(32, 13), 39)
self.failUnlessEqual(f(32, 14), 42)
self.failUnlessEqual(f(32, 15), 45)
self.failUnlessEqual(f(32, 16), 32)
self.failUnlessEqual(f(32, 17), 34)
self.failUnlessEqual(f(32, 18), 36)
self.failUnlessEqual(f(32, 589), 589)
def test_pad_size(self):
f = mathutil.pad_size
self.failUnlessEqual(f(0, 4), 0)
self.failUnlessEqual(f(1, 4), 3)
self.failUnlessEqual(f(2, 4), 2)
self.failUnlessEqual(f(3, 4), 1)
self.failUnlessEqual(f(4, 4), 0)
self.failUnlessEqual(f(5, 4), 3)
def test_is_power_of_k_part_2(self):
f = mathutil.is_power_of_k
for i in range(1, 100):
if i in (1, 2, 4, 8, 16, 32, 64):
self.failUnless(f(i, 2), "but %d *is* a power of 2" % i)
else:
self.failIf(f(i, 2), "but %d is *not* a power of 2" % i)
for i in range(1, 100):
if i in (1, 3, 9, 27, 81):
self.failUnless(f(i, 3), "but %d *is* a power of 3" % i)
else:
self.failIf(f(i, 3), "but %d is *not* a power of 3" % i)
def test_next_power_of_k(self):
f = mathutil.next_power_of_k
self.failUnlessEqual(f(0,2), 1)
self.failUnlessEqual(f(1,2), 1)
self.failUnlessEqual(f(2,2), 2)
self.failUnlessEqual(f(3,2), 4)
self.failUnlessEqual(f(4,2), 4)
for i in range(5, 8): self.failUnlessEqual(f(i,2), 8, "%d" % i)
for i in range(9, 16): self.failUnlessEqual(f(i,2), 16, "%d" % i)
for i in range(17, 32): self.failUnlessEqual(f(i,2), 32, "%d" % i)
for i in range(33, 64): self.failUnlessEqual(f(i,2), 64, "%d" % i)
for i in range(65, 100): self.failUnlessEqual(f(i,2), 128, "%d" % i)
self.failUnlessEqual(f(0,3), 1)
self.failUnlessEqual(f(1,3), 1)
self.failUnlessEqual(f(2,3), 3)
self.failUnlessEqual(f(3,3), 3)
for i in range(4, 9): self.failUnlessEqual(f(i,3), 9, "%d" % i)
for i in range(10, 27): self.failUnlessEqual(f(i,3), 27, "%d" % i)
for i in range(28, 81): self.failUnlessEqual(f(i,3), 81, "%d" % i)
for i in range(82, 200): self.failUnlessEqual(f(i,3), 243, "%d" % i)
def test_ave(self):
f = mathutil.ave
self.failUnlessEqual(f([1,2,3]), 2)
self.failUnlessEqual(f([0,0,0,4]), 1)
self.failUnlessAlmostEqual(f([0.0, 1.0, 1.0]), .666666666666)
def failUnlessEqualContents(self, a, b):
self.failUnlessEqual(sorted(a), sorted(b))
def test_permute(self):
f = mathutil.permute
self.failUnlessEqualContents(f([]), [])
self.failUnlessEqualContents(f([1]), [[1]])
self.failUnlessEqualContents(f([1,2]), [[1,2], [2,1]])
self.failUnlessEqualContents(f([1,2,3]),
[[1,2,3], [1,3,2],
[2,1,3], [2,3,1],
[3,1,2], [3,2,1]])

97
libs/pyutil/test/current/test_time_format.py

@ -0,0 +1,97 @@
#!/usr/bin/env python
"""\
Test time_format.py
"""
import os, time, unittest
from pyutil import time_format, increasing_timer
class TimeUtilTestCase(unittest.TestCase):
def setUp(self):
pass
def tearDown(self):
pass
def test_iso8601_utc_time(self, timer=increasing_timer.timer):
ts1 = time_format.iso_utc(timer.time() - 20)
ts2 = time_format.iso_utc()
assert ts1 < ts2, "failed: %s < %s" % (ts1, ts2)
ts3 = time_format.iso_utc(timer.time() + 20)
assert ts2 < ts3, "failed: %s < %s" % (ts2, ts3)
def test_iso_utc_time_to_localseconds(self, timer=increasing_timer.timer):
# test three times of the year so that a DST problem would hopefully be triggered
t1 = int(timer.time() - 365*3600/3)
iso_utc_t1 = time_format.iso_utc(t1)
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1)
assert t1 == t1_2, (t1, t1_2)
t1 = int(timer.time() - (365*3600*2/3))
iso_utc_t1 = time_format.iso_utc(t1)
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1)
self.failUnlessEqual(t1, t1_2)
t1 = int(timer.time())
iso_utc_t1 = time_format.iso_utc(t1)
t1_2 = time_format.iso_utc_time_to_seconds(iso_utc_t1)
self.failUnlessEqual(t1, t1_2)
def test_epoch(self):
return self._help_test_epoch()
def test_epoch_in_London(self):
# Europe/London is a particularly troublesome timezone. Nowadays, its
# offset from GMT is 0. But in 1970, its offset from GMT was 1.
# (Apparently in 1970 Britain had redefined standard time to be GMT+1
# and stayed in standard time all year round, whereas today
# Europe/London standard time is GMT and Europe/London Daylight
# Savings Time is GMT+1.) The current implementation of
# time_format.iso_utc_time_to_seconds() breaks if the timezone is
# Europe/London. (As soon as this unit test is done then I'll change
# that implementation to something that works even in this case...)
origtz = os.environ.get('TZ')
os.environ['TZ'] = "Europe/London"
if hasattr(time, 'tzset'):
time.tzset()
try:
return self._help_test_epoch()
finally:
if origtz is None:
del os.environ['TZ']
else:
os.environ['TZ'] = origtz
if hasattr(time, 'tzset'):
time.tzset()
def _help_test_epoch(self):
origtzname = time.tzname
s = time_format.iso_utc_time_to_seconds("1970-01-01T00:00:01Z")
self.failUnlessEqual(s, 1.0)
s = time_format.iso_utc_time_to_seconds("1970-01-01_00:00:01Z")
self.failUnlessEqual(s, 1.0)
s = time_format.iso_utc_time_to_seconds("1970-01-01 00:00:01Z")
self.failUnlessEqual(s, 1.0)
self.failUnlessEqual(time_format.iso_utc(1.0), "1970-01-01 00:00:01Z")
self.failUnlessEqual(time_format.iso_utc(1.0, sep="_"),
"1970-01-01_00:00:01Z")
now = time.time()
isostr = time_format.iso_utc(now)
timestamp = time_format.iso_utc_time_to_seconds(isostr)
self.failUnlessEqual(int(timestamp), int(now))
def my_time():
return 1.0
self.failUnlessEqual(time_format.iso_utc(t=my_time),
"1970-01-01 00:00:01Z")
self.failUnlessRaises(ValueError,
time_format.iso_utc_time_to_seconds,
"invalid timestring")
s = time_format.iso_utc_time_to_seconds("1970-01-01 00:00:01.500Z")
self.failUnlessEqual(s, 1.5)
# Look for daylight-savings-related errors.
thatmomentinmarch = time_format.iso_utc_time_to_seconds("2009-03-20 21:49:02.226536Z")
self.failUnlessEqual(thatmomentinmarch, 1237585742.226536)
self.failUnlessEqual(origtzname, time.tzname)

124
libs/pyutil/test/current/test_verlib.py

@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
"""Tests for distutils.version."""
import unittest
import doctest
from pyutil.verlib import NormalizedVersion as V
from pyutil.verlib import IrrationalVersionError
from pyutil.verlib import suggest_normalized_version as suggest
class VersionTestCase(unittest.TestCase):
versions = ((V('1.0'), '1.0'),
(V('1.1'), '1.1'),
(V('1.2.3'), '1.2.3'),
(V('1.2'), '1.2'),
(V('1.2.3a4'), '1.2.3a4'),
(V('1.2c4'), '1.2c4'),
(V('1.2.3.4'), '1.2.3.4'),
(V('1.2.3.4.0b3'), '1.2.3.4b3'),
(V('1.2.0.0.0'), '1.2'),
(V('1.0.dev345'), '1.0.dev345'),
(V('1.0.post456.dev623'), '1.0.post456.dev623'))
def test_basic_versions(self):
for v, s in self.versions:
self.assertEquals(str(v), s)
def test_from_parts(self):
for v, s in self.versions:
v2 = V.from_parts(*v.parts)
self.assertEquals(v, v2)
self.assertEquals(str(v), str(v2))
def test_irrational_versions(self):
irrational = ('1', '1.2a', '1.2.3b', '1.02', '1.2a03',
'1.2a3.04', '1.2.dev.2', '1.2dev', '1.2.dev',
'1.2.dev2.post2', '1.2.post2.dev3.post4')
for s in irrational:
self.assertRaises(IrrationalVersionError, V, s)
def test_comparison(self):
r"""
>>> V('1.2.0') == '1.2'
Traceback (most recent call last):
...
TypeError: cannot compare NormalizedVersion and str
>>> V('1.2.0') == V('1.2')
True
>>> V('1.2.0') == V('1.2.3')
False
>>> V('1.2.0') < V('1.2.3')
True
>>> (V('1.0') > V('1.0b2'))
True
>>> (V('1.0') > V('1.0c2') > V('1.0c1') > V('1.0b2') > V('1.0b1')
... > V('1.0a2') > V('1.0a1'))
True
>>> (V('1.0.0') > V('1.0.0c2') > V('1.0.0c1') > V('1.0.0b2') > V('1.0.0b1')
... > V('1.0.0a2') > V('1.0.0a1'))
True
>>> V('1.0') < V('1.0.post456.dev623')
True
>>> V('1.0.post456.dev623') < V('1.0.post456') < V('1.0.post1234')
True
>>> (V('1.0a1')
... < V('1.0a2.dev456')
... < V('1.0a2')
... < V('1.0a2.1.dev456') # e.g. need to do a quick post release on 1.0a2
... < V('1.0a2.1')
... < V('1.0b1.dev456')
... < V('1.0b2')
... < V('1.0c1.dev456')
... < V('1.0c1')
... < V('1.0.dev7')
... < V('1.0.dev18')
... < V('1.0.dev456')
... < V('1.0.dev1234')
... < V('1.0')
... < V('1.0.post456.dev623') # development version of a post release
... < V('1.0.post456'))
True
"""
# must be a simpler way to call the docstrings
doctest.run_docstring_examples(self.test_comparison, globals(),
name='test_comparison')
def test_suggest_normalized_version(self):
self.assertEquals(suggest('1.0'), '1.0')
self.assertEquals(suggest('1.0-alpha1'), '1.0a1')
self.assertEquals(suggest('1.0c2'), '1.0c2')
self.assertEquals(suggest('walla walla washington'), None)
self.assertEquals(suggest('2.4c1'), '2.4c1')
# from setuptools
self.assertEquals(suggest('0.4a1.r10'), '0.4a1.post10')
self.assertEquals(suggest('0.7a1dev-r66608'), '0.7a1.dev66608')
self.assertEquals(suggest('0.6a9.dev-r41475'), '0.6a9.dev41475')
self.assertEquals(suggest('2.4preview1'), '2.4c1')
self.assertEquals(suggest('2.4pre1') , '2.4c1')
self.assertEquals(suggest('2.1-rc2'), '2.1c2')
# from pypi
self.assertEquals(suggest('0.1dev'), '0.1.dev0')
self.assertEquals(suggest('0.1.dev'), '0.1.dev0')
# we want to be able to parse Twisted
# development versions are like post releases in Twisted
self.assertEquals(suggest('9.0.0+r2363'), '9.0.0.post2363')
# pre-releases are using markers like "pre1"
self.assertEquals(suggest('9.0.0pre1'), '9.0.0c1')
# we want to be able to parse Tcl-TK
# they us "p1" "p2" for post releases
self.assertEquals(suggest('1.4p1'), '1.4.post1')

23
libs/pyutil/test/current/test_version_class.py

@ -0,0 +1,23 @@
import unittest
from pyutil import version_class
V = version_class.Version
class T(unittest.TestCase):
def test_rc_regex_rejects_rc_suffix(self):
self.failUnlessRaises(ValueError, V, '9.9.9rc9')
def test_rc_regex_rejects_trailing_garbage(self):
self.failUnlessRaises(ValueError, V, '9.9.9c9HEYTHISISNTRIGHT')
def test_comparisons(self):
self.failUnless(V('1.0') < V('1.1'))
self.failUnless(V('1.0a1') < V('1.0'))
self.failUnless(V('1.0a1') < V('1.0b1'))
self.failUnless(V('1.0b1') < V('1.0c1'))
self.failUnless(V('1.0a1') < V('1.0a1-r99'))
self.failUnlessEqual(V('1.0a1.post987'), V('1.0a1-r987'))
self.failUnlessEqual(str(V('1.0a1.post999')), '1.0.0a1-r999')
self.failUnlessEqual(str(V('1.0a1-r999')), '1.0.0a1-r999')
self.failIfEqual(V('1.0a1'), V('1.0a1-r987'))

0
libs/pyutil/test/deprecated/__init__.py

115
libs/pyutil/test/deprecated/test_dictutil.py

@ -0,0 +1,115 @@
#!/usr/bin/env python
# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import random, sys, traceback, unittest
from pyutil.assertutil import _assert
from pyutil import dictutil
class EqButNotIs:
def __init__(self, x):
self.x = x
self.hash = int(random.randrange(0, 2**31))
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__, self.x,)
def __hash__(self):
return self.hash
def __le__(self, other):
return self.x <= other
def __lt__(self, other):
return self.x < other
def __ge__(self, other):
return self.x >= other
def __gt__(self, other):
return self.x > other
def __ne__(self, other):
return self.x != other
def __eq__(self, other):
return self.x == other
class Testy(unittest.TestCase):
def _help_test_empty_dict(self, klass):
d1 = klass()
d2 = klass({})
self.failUnless(d1 == d2, "d1: %r, d2: %r" % (d1, d2,))
self.failUnless(len(d1) == 0)
self.failUnless(len(d2) == 0)
def _help_test_nonempty_dict(self, klass):
d1 = klass({'a': 1, 'b': "eggs", 3: "spam",})
d2 = klass({'a': 1, 'b': "eggs", 3: "spam",})
self.failUnless(d1 == d2)
self.failUnless(len(d1) == 3, "%s, %s" % (len(d1), d1,))
self.failUnless(len(d2) == 3)
def _help_test_eq_but_notis(self, klass):
d = klass({'a': 3, 'b': EqButNotIs(3), 'c': 3})
d.pop('b')
d.clear()
d['a'] = 3
d['b'] = EqButNotIs(3)
d['c'] = 3
d.pop('b')
d.clear()
d['b'] = EqButNotIs(3)
d['a'] = 3
d['c'] = 3
d.pop('b')
d.clear()
d['a'] = EqButNotIs(3)
d['c'] = 3
d['a'] = 3
d.clear()
fake3 = EqButNotIs(3)
fake7 = EqButNotIs(7)
d[fake3] = fake7
d[3] = 7
d[3] = 8
_assert(filter(lambda x: x is 8, d.itervalues()))
_assert(filter(lambda x: x is fake7, d.itervalues()))
_assert(not filter(lambda x: x is 7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8.
_assert(filter(lambda x: x is fake3, d.iterkeys()))
_assert(filter(lambda x: x is 3, d.iterkeys()))
d[fake3] = 8
d.clear()
d[3] = 7
fake3 = EqButNotIs(3)
fake7 = EqButNotIs(7)
d[fake3] = fake7
d[3] = 8
_assert(filter(lambda x: x is 8, d.itervalues()))
_assert(filter(lambda x: x is fake7, d.itervalues()))
_assert(not filter(lambda x: x is 7, d.itervalues())) # The real 7 should have been ejected by the d[3] = 8.
_assert(filter(lambda x: x is fake3, d.iterkeys()))
_assert(filter(lambda x: x is 3, d.iterkeys()))
d[fake3] = 8
def test_em(self):
for klass in (dictutil.UtilDict, dictutil.NumDict, dictutil.ValueOrderedDict,):
# print "name of class: ", klass
for helper in (self._help_test_empty_dict, self._help_test_nonempty_dict, self._help_test_eq_but_notis,):
# print "name of test func: ", helper
try:
helper(klass)
except:
(etype, evalue, realtb) = sys.exc_info()
traceback.print_exception(etype, evalue, realtb)
self.fail(evalue)
del realtb
def suite():
suite = unittest.makeSuite(Testy, 'test')
return suite
if __name__ == '__main__':
unittest.main()

36
libs/pyutil/test/deprecated/test_picklesaver.py

@ -0,0 +1,36 @@
#!/usr/bin/env python
# Copyright (c) 2002 Luke 'Artimage' Nelson
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import os
try:
from twisted.trial import unittest
except ImportError, le:
print "Skipping %s since it requires Twisted and Twisted could not be imported: %s" % (__name__, le,)
else:
from pyutil import PickleSaver, fileutil
class Thingie(PickleSaver.PickleSaver):
def __init__(self, fname, delay=30):
PickleSaver.PickleSaver.__init__(self, fname=fname, attrs={'tmp_store':'False'}, DELAY=delay)
class PickleSaverTest(unittest.TestCase):
def _test_save_now(self, fname):
thingie = Thingie(fname, delay=0)
thingie.tmp_store = 'True'
thingie.lazy_save() # Note: it was constructed with default save delay of 0.
def test_save_now(self):
"""
This test should create a lazy save object, save it with no delay and check if the file exists.
"""
tempdir = fileutil.NamedTemporaryDirectory()
fname = os.path.join(tempdir.name, "picklesavertest")
self._test_save_now(fname)
self.failUnless(os.path.isfile(fname), "The file [%s] does not exist." %(fname,))
tempdir.shutdown()

24
libs/pyutil/test/deprecated/test_xor.py

@ -0,0 +1,24 @@
#!/usr/bin/env python
# Copyright (c) 2002-2009 Zooko Wilcox-O'Hearn
# portions Copyright (c) 2001 Autonomous Zone Industries
# This file is part of pyutil; see README.rst for licensing terms.
#
import unittest
from pyutil.xor import xor
# unit tests
def _help_test(xf):
assert xf('\000', '\000') == '\000'
assert xf('\001', '\000') == '\001'
assert xf('\001', '\001') == '\000'
assert xf('\000\001', '\000\001') == '\000\000'
assert xf('\100\101', '\000\101') == '\100\000'
class Testy(unittest.TestCase):
def test_em(self):
for xorfunc in (xor.py_xor, xor.py_xor_simple, xor.xor,):
if callable(xorfunc):
# print "testing xorfunc ", xorfunc
_help_test(xorfunc)

0
libs/pyutil/test/out_of_shape/__init__.py

454
libs/pyutil/test/out_of_shape/test_cache.py

@ -0,0 +1,454 @@
#!/usr/bin/env python
# Copyright (c) 2002-2010 Zooko "Zooko" Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import random, unittest
from pyutil.assertutil import _assert
from pyutil.humanreadable import hr
from pyutil import memutil
from pyutil import cache
class Bencher:
def __init__(self, klass, MAXREPS=2**8, MAXTIME=5):
print klass
self.klass = klass
self.MAXREPS = MAXREPS
self.MAXTIME = MAXTIME
self.d = {}
self.lrun = None
def _generic_benchmarking_init(self, n):
self.d.clear()
global lrun
self.lrun = self.klass(maxsize=n)
for i in range(n):
self.d[i] = i
self.lrun[n+i] = n+i
def _benchmark_init(self, n):
MAXSIZE=n/2
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE)
assert len(d2) == min(len(self.d), MAXSIZE)
return True
def _benchmark_update(self, n):
MAXSIZE=n/2
d2 = self.klass(maxsize=MAXSIZE)
assert len(d2) == 0
d2.update(self.d)
assert len(d2) == min(len(self.d), MAXSIZE)
return True
def _benchmark_insert(self, n):
MAXSIZE=n/2
d2 = self.klass(maxsize=MAXSIZE)
assert len(d2) == 0
for k, v, in self.d.iteritems():
d2[k] = v
assert len(d2) == min(len(self.d), MAXSIZE)
return True
def _benchmark_init_and_popitem(self, n):
MAXSIZE=n/2
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE)
assert len(d2) == min(len(self.d), MAXSIZE)
for i in range(len(d2), 0, -1):
assert len(d2) == i
d2.popitem()
return True
def _benchmark_init_and_has_key_and_del(self, n):
MAXSIZE=n/2
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE)
assert len(d2) == min(len(self.d), MAXSIZE)
for k in self.d.iterkeys():
if d2.has_key(k):
del d2[k]
return True
def _benchmark_init_and_remove(self, n):
MAXSIZE=n/2
d2 = self.klass(initialdata=self.d, maxsize=MAXSIZE)
assert len(d2) == min(len(self.d), MAXSIZE)
for k in self.d.iterkeys():
d2.remove(k, strictkey=False)
return True
def bench(self, BSIZES=(128, 250, 2048, 5000, 2**13, 2**20,)):
from pyutil import benchutil
funcs = ("_benchmark_insert", "_benchmark_init_and_has_key_and_del", "_benchmark_init_and_remove", "_benchmark_init_and_popitem", "_benchmark_update", "_benchmark_init",)
max = 0
for func in funcs:
if len(func) > max:
max = len(func)
for func in funcs:
print func + " " * (max + 1 - len(func))
for BSIZE in BSIZES:
f = getattr(self, func)
benchutil.rep_bench(f, BSIZE, self._generic_benchmarking_init, MAXREPS=self.MAXREPS, MAXTIME=self.MAXTIME)
def quick_bench():
Bencher(cache.LRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15, 2**16,))
Bencher(cache.LinkedListLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,))
Bencher(cache.SmallLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,))
def slow_bench():
Bencher(cache.LRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)])
Bencher(cache.LinkedListLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)])
Bencher(cache.SmallLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 17)])
MUCHADDINGSIZE=2**4
MUCHADDINGNUM = 2**4
# The following parameters are for testing for memory leakage.
MIN_SLOPE = 512.0 # If it leaks less than 512.0 bytes per iteration, then it's probably just some kind of noise from the interpreter or something...
SAMPLES = 2**5
# MIN_SLOPE is high because samples is low, which is because taking a statistically useful numbers of samples takes too long.
# For a *good* test, turn samples up as high as you can stand (maybe 2**10) and set MIN_SLOPE to about 1.0.
# For a *really* good test, add a variance measure to memutil.measure_mem_leakage(), and only consider it to be leaking if the slope is > 0.1 *and* is a "pretty good" fit for the data.
# MIN_SLOPE = 1.0
# SAMPLES = 2**10
class Testy(unittest.TestCase):
def _test_empty_lookup(self, d) :
self.failUnless(d.get('spam') is None)
def _test_key_error(self, C) :
d = C()
try:
d['spam']
self.fail(d)
except KeyError :
pass
def _test_insert_and_get(self, d) :
d.insert("spam", "eggs")
d["spam2"] = "eggs2"
self.failUnless(d.get("spam") == "eggs", str(d))
self.failUnless(d.get("spam2") == "eggs2")
self.failUnless(d["spam"] == "eggs")
self.failUnless(d["spam2"] == "eggs2")
def _test_insert_and_remove(self, d):
d.insert('spam', "eggs")
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggs")
self.failUnless(d['spam'] == "eggs")
x = d.remove('spam')
self.failUnless(x == "eggs", "x: %s" % `x`)
self.failUnless(not d.has_key('spam'))
d['spam'] = "eggs"
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggs")
self.failUnless(d['spam'] == "eggs")
del d['spam']
self.failUnless(not d.has_key('spam'))
def _test_setdefault(self, d):
d.setdefault('spam', "eggs")
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggs")
self.failUnless(d['spam'] == "eggs")
x = d.remove('spam')
self.failUnless(x == "eggs", "x: %s" % `x`)
self.failUnless(not d.has_key('spam'))
def _test_extracted_bound_method(self, d):
insmeth = d.insert
insmeth('spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_extracted_unbound_method(self, d):
insumeth = d.__class__.insert
insumeth(d, 'spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_unbound_method(self, C, d):
umeth = C.insert
umeth(d, 'spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_clear(self, d):
d[11] = 11
d._assert_invariants()
self.failUnless(len(d) == 1)
d.clear()
d._assert_invariants()
self.failUnless(len(d) == 0)
def _test_update(self, d):
self.failUnless(d._assert_invariants())
d['b'] = 99
self.failUnless(d._assert_invariants())
d2={ 'a': 0, 'b': 1, 'c': 2,}
d.update(d2)
self.failUnless(d._assert_invariants())
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('c') == 2)
self.failUnless(d._assert_invariants())
def _test_popitem(self, C):
c = C({"a": 1})
res = c.popitem()
_assert(res == ("a", 1,), C, c, res)
self.failUnless(res == ("a", 1,))
def _test_iterate_items(self, C):
c = C({"a": 1})
i = c.iteritems()
x = i.next()
self.failUnless(x == ("a", 1,))
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_iterate_keys(self, C):
c = C({"a": 1})
i = c.iterkeys()
x = i.next()
self.failUnless(x == "a")
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_iterate_values(self, C):
c = C({"a": 1})
i = c.itervalues()
x = i.next()
self.failUnless(x == 1)
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_LRU_much_adding_some_removing(self, C):
c = C(maxsize=MUCHADDINGSIZE)
for i in range(MUCHADDINGNUM):
c[i] = i
if (i % 400) == 0:
k = random.choice(c.keys())
del c[k]
for i in range(MUCHADDINGSIZE):
c[i] = i
self.failUnless(len(c) == MUCHADDINGSIZE)
def _test_LRU_1(self, C):
c = C(maxsize=10)
c[11] = 11
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
def _test_LRU_2(self, C):
c = C(maxsize=10)
c[11] = 11
c._assert_invariants()
del c[11]
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 11
c._assert_invariants()
def _test_LRU_3(self, C):
c = C(maxsize=10)
c[11] = 11
c._assert_invariants()
c[11] = 12
c._assert_invariants()
c[11] = 13
c._assert_invariants()
del c[11]
c._assert_invariants()
c[11] = 14
c._assert_invariants()
c[11] = 15
c._assert_invariants()
c[11] = 16
c._assert_invariants()
def _test_LRU_full(self, C):
c = C(maxsize=10)
c._assert_invariants()
for i in xrange(11):
c._assert_invariants()
c[i] = i
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(10 in c.values(), c.values())
self.failUnless(0 not in c.values())
del c[1]
c._assert_invariants()
self.failUnless(1 not in c.values())
self.failUnless(len(c) == 9)
c[11] = 11
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(1 not in c.values())
self.failUnless(11 in c.values())
del c[11]
c._assert_invariants()
c[11] = 11
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(1 not in c.values())
self.failUnless(11 in c.values())
c[11] = 11
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(1 not in c.values())
self.failUnless(11 in c.values())
for i in xrange(200):
c[i] = i
c._assert_invariants()
self.failUnless(199 in c.values())
self.failUnless(190 in c.values())
def _test_LRU_has_key(self, C):
c = C(maxsize=10)
c._assert_invariants()
for i in xrange(11):
c._assert_invariants()
c[i] = i
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(10 in c.values())
self.failUnless(0 not in c.values())
# c.has_key(1) # this touches `1' and makes it fresher so that it will live and `2' will die next time we overfill.
c[1] = 1 # this touches `1' and makes it fresher so that it will live and `2' will die next time we overfill.
c._assert_invariants()
c[99] = 99
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(1 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),))
self.failUnless(not 2 in c.values())
self.failUnless(99 in c.values())
def _test_LRU_not_overfull_on_idempotent_add(self, C):
c = C(maxsize=10)
for i in xrange(11):
c[i] = i
c[1] = "spam"
# Now 1 is the freshest, so 2 is the next one that would be removed *if* we went over limit.
c[3] = "eggs"
self.failUnless(c.has_key(2))
self.failUnless(len(c) == 10)
c._assert_invariants()
def _test_LRU_overflow_on_update(self, C):
d = C(maxsize=10)
self.failUnless(d._assert_invariants())
d2 = {}
for i in range(12):
d2[i] = i
d.update(d2)
self.failUnless(d._assert_invariants())
self.failUnless(len(d) == 10)
def _test_LRU_overflow_on_init(self, C):
d2 = {}
for i in range(12):
d2[i] = i
d = C(d2, maxsize=10)
self.failUnless(d._assert_invariants())
self.failUnless(len(d) == 10)
def _test_em(self):
for klass in (cache.LRUCache, cache.SmallLRUCache,):
for testfunc in (self._test_empty_lookup, self._test_insert_and_get, self._test_insert_and_remove, self._test_extracted_bound_method, self._test_extracted_unbound_method, self._test_clear, self._test_update, self._test_setdefault,):
testfunc(klass())
for testfunc in (self._test_popitem, self._test_iterate_items, self._test_iterate_keys, self._test_iterate_values, self._test_key_error, ):
testfunc(klass)
self._test_unbound_method(klass, klass())
for klass in (cache.LRUCache, cache.SmallLRUCache,):
for testfunc in (self._test_LRU_1, self._test_LRU_2, self._test_LRU_3, self._test_LRU_full, self._test_LRU_has_key, self._test_LRU_not_overfull_on_idempotent_add, self._test_LRU_overflow_on_update, self._test_LRU_overflow_on_init,):
testfunc(klass)
def test_em(self):
self._test_em()
def _mem_test_LRU_much_adding_some_removing(self):
for klass in (cache.LRUCache, cache.SmallLRUCache,):
return self._test_LRU_much_adding_some_removing(klass)
def test_mem_leakage(self):
try:
self._test_mem_leakage()
except memutil.NotSupportedException:
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform."
pass
del test_mem_leakage # This test takes too long.
def _test_mem_leakage(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state.
memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
slope = memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,))
def test_mem_leakage_much_adding_some_removing(self):
try:
self._test_mem_leakage_much_adding_some_removing()
except memutil.NotSupportedException:
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform."
pass
del test_mem_leakage_much_adding_some_removing # This test takes too long.
def _test_mem_leakage_much_adding_some_removing(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state.
memutil.measure_mem_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0)
slope = memutil.measure_mem_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self._mem_test_LRU_much_adding_some_removing, "%0.3f" % slope,))
def test_obj_leakage(self):
self._test_obj_leakage()
del test_obj_leakage # This test takes too long.
def _test_obj_leakage(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state.
memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
slope = memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,))
def test_obj_leakage_much_adding_some_removing(self):
self._test_obj_leakage_much_adding_some_removing()
del test_obj_leakage_much_adding_some_removing # This test takes too long.
def _test_obj_leakage_much_adding_some_removing(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state.
memutil.measure_obj_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0)
slope = memutil.measure_obj_leakage(self._mem_test_LRU_much_adding_some_removing, SAMPLES, iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self._mem_test_LRU_much_adding_some_removing, "%0.3f" % slope,))

441
libs/pyutil/test/out_of_shape/test_odict.py

@ -0,0 +1,441 @@
#!/usr/bin/env python
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import random, unittest
from pyutil.humanreadable import hr
from pyutil import memutil
from pyutil import odict
class Bencher:
def __init__(self, klass, MAXREPS=2**8, MAXTIME=5):
print klass
self.klass = klass
self.MAXREPS = MAXREPS
self.MAXTIME = MAXTIME
self.d = {}
self.lrun = None
def _generic_benchmarking_init(self, n):
self.d.clear()
self.lrun = self.klass()
for i in range(n):
self.d[i] = i
self.lrun[n+i] = n+i
def _benchmark_init(self, n):
d2 = self.klass(initialdata=self.d)
assert len(d2) == len(self.d)
return True
def _benchmark_update(self, n):
d2 = self.klass()
assert len(d2) == 0
d2.update(self.d)
assert len(d2) == len(self.d)
return True
def _benchmark_insert(self, n):
d2 = self.klass()
assert len(d2) == 0
for k, v, in self.d.iteritems():
d2[k] = v
assert len(d2) == len(self.d)
return True
def _benchmark_init_and_popitem(self, n):
d2 = self.klass(initialdata=self.d)
assert len(d2) == len(self.d)
for i in range(len(d2), 0, -1):
assert len(d2) == i
d2.popitem()
return True
def _benchmark_init_and_has_key_and_del(self, n):
d2 = self.klass(initialdata=self.d)
assert len(d2) == len(self.d)
for k in self.d.iterkeys():
if d2.has_key(k):
del d2[k]
return True
def _benchmark_init_and_remove(self, n):
d2 = self.klass(initialdata=self.d)
assert len(d2) == len(self.d)
for k in self.d.iterkeys():
d2.remove(k, strictkey=False)
return True
def bench(self, BSIZES=(128, 250, 2048, 5000, 2**13, 2**20,)):
from pyutil import benchutil
funcs = ("_benchmark_insert", "_benchmark_init_and_has_key_and_del", "_benchmark_init_and_remove", "_benchmark_init_and_popitem", "_benchmark_update", "_benchmark_init",)
max = 0
for func in funcs:
if len(func) > max:
max = len(func)
for func in funcs:
print func + " " * (max + 1 - len(func))
for BSIZE in BSIZES:
f = getattr(self, func)
benchutil.rep_bench(f, BSIZE, self._generic_benchmarking_init, MAXREPS=self.MAXREPS, MAXTIME=self.MAXTIME)
def quick_bench():
Bencher(odict.LRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15, 2**16,))
Bencher(odict.LinkedListLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,))
Bencher(odict.SmallLRUCache, MAXTIME=2).bench(BSIZES=(2**7, 2**12, 2**14, 2**15,))
def slow_bench():
Bencher(odict.LRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)])
Bencher(odict.LinkedListLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 21)])
Bencher(odict.SmallLRUCache, MAXTIME=5).bench(BSIZES=[2**x for x in range(7, 17)])
MUCHADDINGSIZE=2**4
# The following parameters are for testing for memory leakage.
MIN_SLOPE = 512.0 # If it leaks less than 512.0 bytes per iteration, then it's probably just some kind of noise from the interpreter or something...
SAMPLES = 2**5
# MIN_SLOPE is high because samples is low, which is because taking a statistically useful numbers of samples takes too long.
# For a *good* test, turn samples up as high as you can stand (maybe 2**10) and set MIN_SLOPE to about 1.0.
# For a *really* good test, add a variance measure to memutil.measure_mem_leakage(), and only consider it to be leaking if the slope is > 0.1 *and* is a "pretty good" fit for the data.
# MIN_SLOPE = 1.0
# SAMPLES = 2**10
class Testy(unittest.TestCase):
def _test_empty_lookup(self, d) :
self.failUnless(d.get('spam') is None)
def _test_key_error(self, C) :
d = C()
try:
d['spam']
self.fail(d)
except KeyError :
pass
def _test_insert_and_get_and_items(self, d) :
d.insert("spam", "eggs")
d["spam2"] = "eggs2"
self.failUnless(d.get("spam") == "eggs", str(d))
self.failUnless(d.get("spam2") == "eggs2")
self.failUnless(d["spam"] == "eggs")
self.failUnless(d["spam2"] == "eggs2")
self.failUnlessEqual(d.items(), [("spam", "eggs"), ("spam2", "eggs2")], d)
def _test_move_to_most_recent(self, d) :
d.insert("spam", "eggs")
d["spam2"] = "eggs2"
self.failUnless(d.get("spam") == "eggs", str(d))
self.failUnless(d.get("spam2") == "eggs2")
self.failUnless(d["spam"] == "eggs")
self.failUnless(d["spam2"] == "eggs2")
self.failUnlessEqual(d.items(), [("spam", "eggs"), ("spam2", "eggs2")])
d.move_to_most_recent("spam")
self.failUnlessEqual(d.items(), [("spam2", "eggs2"), ("spam", "eggs")])
def _test_insert_and_remove(self, d):
d.insert('spam', "eggs")
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggs")
self.failUnless(d['spam'] == "eggs")
self.failUnlessEqual(d.items(), [("spam", "eggs")])
x = d.remove('spam')
self.failUnless(x == "eggs", "x: %s" % `x`)
self.failUnless(not d.has_key('spam'))
self.failUnlessEqual(d.items(), [])
d['spam'] = "eggsy"
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggsy")
self.failUnless(d['spam'] == "eggsy")
self.failUnlessEqual(d.items(), [("spam", "eggsy")])
del d['spam']
self.failUnless(not d.has_key('spam'))
self.failUnlessEqual(d.items(), [])
def _test_setdefault(self, d):
d.setdefault('spam', "eggs")
self.failUnless(d.has_key('spam'))
self.failUnless(d.get('spam') == "eggs")
self.failUnless(d['spam'] == "eggs")
self.failUnlessEqual(d.items(), [("spam", "eggs")])
x = d.remove('spam')
self.failUnless(x == "eggs", "x: %s" % `x`)
self.failUnless(not d.has_key('spam'))
self.failUnlessEqual(d.items(), [])
def _test_extracted_bound_method(self, d):
insmeth = d.insert
insmeth('spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_extracted_unbound_method(self, d):
insumeth = d.__class__.insert
insumeth(d, 'spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_unbound_method(self, C, d):
umeth = C.insert
umeth(d, 'spammy', "eggsy")
self.failUnless(d.get('spammy') == "eggsy")
def _test_clear(self, d):
d[11] = 11
d._assert_invariants()
self.failUnless(len(d) == 1)
d.clear()
d._assert_invariants()
self.failUnless(len(d) == 0)
self.failUnlessEqual(d.items(), [])
def _test_update_from_dict(self, d):
self.failUnless(d._assert_invariants())
d['b'] = 99
self.failUnless(d._assert_invariants())
d2={ 'a': 0, 'b': 1, 'c': 2,}
d.update(d2)
self.failUnless(d._assert_invariants())
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('c') == 2)
self.failUnless(d._assert_invariants())
def _test_update_from_odict(self, d):
self.failUnless(d._assert_invariants())
d['b'] = 99
self.failUnless(d._assert_invariants())
d2 = odict.OrderedDict()
d2['a'] = 0
d2['b'] = 1
d2['c'] = 2
d.update(d2)
self.failUnless(d._assert_invariants())
self.failUnless(d.get('a') == 0, "d.get('a'): %s" % d.get('a'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('b') == 1, "d.get('b'): %s" % d.get('b'))
self.failUnless(d._assert_invariants())
self.failUnless(d.get('c') == 2)
self.failUnless(d._assert_invariants())
self.failUnlessEqual(d.items(), [("b", 1), ("a", 0), ("c", 2)])
def _test_popitem(self, C):
c = C({"a": 1})
res = c.popitem()
self.failUnlessEqual(res, ("a", 1,))
c["a"] = 1
c["b"] = 2
res = c.popitem()
self.failUnlessEqual(res, ("b", 2,))
def _test_pop(self, C):
c = C({"a": 1})
res = c.pop()
self.failUnlessEqual(res, "a")
c["a"] = 1
c["b"] = 2
res = c.pop()
self.failUnlessEqual(res, "b")
def _test_iterate_items(self, C):
c = C({"a": 1})
c["b"] = 2
i = c.iteritems()
x = i.next()
self.failUnlessEqual(x, ("a", 1,))
x = i.next()
self.failUnlessEqual(x, ("b", 2,))
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_iterate_keys(self, C):
c = C({"a": 1})
c["b"] = 2
i = c.iterkeys()
x = i.next()
self.failUnlessEqual(x, "a")
x = i.next()
self.failUnlessEqual(x, "b")
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_iterate_values(self, C):
c = C({"a": 1})
c["b"] = 2
i = c.itervalues()
x = i.next()
self.failUnless(x == 1)
x = i.next()
self.failUnless(x == 2)
try:
i.next()
self.fail() # Should have gotten StopIteration exception
except StopIteration:
pass
def _test_much_adding_some_removing(self, C):
c = C()
for i in range(MUCHADDINGSIZE):
c[i] = i
if (i % 4) == 0:
k = random.choice(c.keys())
del c[k]
for i in range(MUCHADDINGSIZE):
c[i] = i
self.failUnlessEqual(len(c), MUCHADDINGSIZE)
def _test_1(self, C):
c = C()
c[11] = 11
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
c[11] = 1001
c._assert_invariants()
def _test_2(self, C):
c = C()
c[11] = 11
c._assert_invariants()
del c[11]
c._assert_invariants()
c[11] = 11
c._assert_invariants()
c[11] = 11
c._assert_invariants()
def _test_3(self, C):
c = C()
c[11] = 11
c._assert_invariants()
c[11] = 12
c._assert_invariants()
c[11] = 13
c._assert_invariants()
del c[11]
c._assert_invariants()
c[11] = 14
c._assert_invariants()
c[11] = 15
c._assert_invariants()
c[11] = 16
c._assert_invariants()
def _test_has_key(self, C):
c = C()
c._assert_invariants()
for i in xrange(11):
c._assert_invariants()
c[i] = i
c._assert_invariants()
del c[0]
self.failUnless(len(c) == 10)
self.failUnless(10 in c.values())
self.failUnless(0 not in c.values())
c.has_key(1) # this touches `1' but does not make it fresher so that it will get popped next time we pop.
c[1] = 1 # this touches `1' but does not make it fresher so that it will get popped.
c._assert_invariants()
x = c.pop()
self.failUnlessEqual(x, 10)
c[99] = 99
c._assert_invariants()
self.failUnless(len(c) == 10)
self.failUnless(1 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),))
self.failUnless(2 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),))
self.failIf(10 in c.values(), "C: %s, c.values(): %s" % (hr(C), hr(c.values(),),))
self.failUnless(99 in c.values())
def _test_em(self):
for klass in (odict.OrderedDict,):
for testfunc in (self._test_empty_lookup, self._test_insert_and_get_and_items, self._test_insert_and_remove, self._test_extracted_bound_method, self._test_extracted_unbound_method, self._test_clear, self._test_update_from_dict, self._test_update_from_odict, self._test_setdefault,):
testfunc(klass())
for testfunc in (self._test_pop, self._test_popitem, self._test_iterate_items, self._test_iterate_keys, self._test_iterate_values, self._test_key_error, ):
testfunc(klass)
self._test_unbound_method(klass, klass())
for klass in (odict.OrderedDict,):
for testfunc in (self._test_1, self._test_2, self._test_3, self._test_has_key,):
testfunc(klass)
def test_em(self):
self._test_em()
def _mem_test_much_adding_some_removing(self):
for klass in (odict.LRUCache, odict.SmallLRUCache,):
return self._test_much_adding_some_removing(klass)
def test_mem_leakage(self):
try:
self._test_mem_leakage()
except memutil.NotSupportedException:
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform."
pass
del test_mem_leakage # This test takes too long.
def _test_mem_leakage(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state.
memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
slope = memutil.measure_mem_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,))
def test_mem_leakage_much_adding_some_removing(self):
try:
self._test_mem_leakage_much_adding_some_removing()
except memutil.NotSupportedException:
print "Skipping memory leak test since measurement of current mem usage isn't implemented on this platform."
pass
del test_mem_leakage_much_adding_some_removing # This test takes too long.
def _test_mem_leakage_much_adding_some_removing(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of memory state.
memutil.measure_mem_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0)
slope = memutil.measure_mem_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks memory at a rate of approximately %s system bytes per invocation" % (self._mem_test_much_adding_some_removing, "%0.3f" % slope,))
def test_obj_leakage(self):
self._test_obj_leakage()
del test_obj_leakage # This test takes too long.
def _test_obj_leakage(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state.
memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
slope = memutil.measure_obj_leakage(self.test_em, max(2**3, SAMPLES/2**3), iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self.test_em, "%0.3f" % slope,))
def test_obj_leakage_much_adding_some_removing(self):
self._test_obj_leakage_much_adding_some_removing()
del test_obj_leakage_much_adding_some_removing # This test takes too long.
def _test_obj_leakage_much_adding_some_removing(self):
# measure one and throw it away, in order to reach a "steady state" in terms of initialization of objects state.
memutil.measure_obj_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0)
slope = memutil.measure_obj_leakage(self._mem_test_much_adding_some_removing, SAMPLES, iterspersample=2**0)
self.failUnless(slope <= MIN_SLOPE, "%s leaks objects at a rate of approximately %s system bytes per invocation" % (self._mem_test_much_adding_some_removing, "%0.3f" % slope,))

30
libs/pyutil/test/out_of_shape/test_strutil.py

@ -0,0 +1,30 @@
#!/usr/bin/env python
# Copyright (c) 2004-2009 Zooko "Zooko" Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import unittest
from pyutil.assertutil import _assert
from pyutil import strutil
class Teststrutil(unittest.TestCase):
def test_short_input(self):
self.failUnless(strutil.pop_trailing_newlines("\r\n") == "")
self.failUnless(strutil.pop_trailing_newlines("\r") == "")
self.failUnless(strutil.pop_trailing_newlines("x\r\n") == "x")
self.failUnless(strutil.pop_trailing_newlines("x\r") == "x")
def test_split(self):
_assert(strutil.split_on_newlines("x\r\ny") == ["x", "y",], strutil.split_on_newlines("x\r\ny"))
_assert(strutil.split_on_newlines("x\r\ny\r\n") == ["x", "y", '',], strutil.split_on_newlines("x\r\ny\r\n"))
_assert(strutil.split_on_newlines("x\n\ny\n\n") == ["x", '', "y", '', '',], strutil.split_on_newlines("x\n\ny\n\n"))
def test_commonprefix(self):
_assert(strutil.commonprefix(["foo","foobarooo", "foosplat",]) == 'foo', strutil.commonprefix(["foo","foobarooo", "foosplat",]))
_assert(strutil.commonprefix(["foo","afoobarooo", "foosplat",]) == '', strutil.commonprefix(["foo","afoobarooo", "foosplat",]))
def test_commonsuffix(self):
_assert(strutil.commonsuffix(["foo","foobarooo", "foosplat",]) == '', strutil.commonsuffix(["foo","foobarooo", "foosplat",]))
_assert(strutil.commonsuffix(["foo","foobarooo", "foosplato",]) == 'o', strutil.commonsuffix(["foo","foobarooo", "foosplato",]))
_assert(strutil.commonsuffix(["foo","foobarooofoo", "foosplatofoo",]) == 'foo', strutil.commonsuffix(["foo","foobarooofoo", "foosplatofoo",]))

77
libs/pyutil/test/out_of_shape/test_zlibutil.py

@ -0,0 +1,77 @@
#!/usr/bin/env python
import unittest
from pyutil import randutil
from pyutil import zlibutil
class Accumulator:
def __init__(self):
self.buf = ''
def write(self, str):
self.buf += str
def make_decomp(realdecomp):
def decomp(str, maxlen, maxmem):
d = Accumulator()
realdecomp(str, d, maxlen, maxmem)
return d.buf
return decomp
def genrandstr(strlen):
return randutil.insecurerandstr(strlen)
def genbombstr(strlen):
return '0' * strlen
MAXMEM=65*2**20
class ZlibTestCase(unittest.TestCase):
def _help_test(self, genstring, decomp, strlen):
s = genstring(strlen)
cs = zlibutil.zlib.compress(s)
s2 = decomp(cs, maxlen=strlen, maxmem=strlen*2**3 + zlibutil.MINMAXMEM)
self.failUnless(s == s2)
s2 = decomp(cs, maxlen=strlen, maxmem=strlen*2**6 + zlibutil.MINMAXMEM)
self.failUnless(s == s2)
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=strlen*2**3 + zlibutil.MINMAXMEM)
def _help_test_inplace_minmaxmem(self, genstring, decomp, strlen):
s = genstring(strlen)
cs = zlibutil.zlib.compress(s)
s2 = decomp(cs, maxlen=strlen, maxmem=zlibutil.MINMAXMEM)
self.failUnless(s == s2)
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=zlibutil.MINMAXMEM)
def _help_test_inplace(self, genstring, decomp, strlen):
# ### XXX self.failUnlessRaises(UnsafeDecompressError, decomp, zlib.compress(genstring(strlen)), maxlen=strlen, maxmem=strlen-1)
s = genstring(strlen)
cs = zlibutil.zlib.compress(s)
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen*2**3, zlibutil.MINMAXMEM))
self.failUnless(s == s2)
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen*2**6, zlibutil.MINMAXMEM))
self.failUnless(s == s2)
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen-1, zlibutil.MINMAXMEM))
self.failUnless(s == s2)
s2 = decomp(cs, maxlen=strlen, maxmem=max(strlen/2, zlibutil.MINMAXMEM))
self.failUnless(s == s2)
self.failUnlessRaises(zlibutil.TooBigError, decomp, cs, maxlen=strlen-1, maxmem=max(strlen*2**3, zlibutil.MINMAXMEM))
def testem(self):
# for strlen in [2**1, 2**2, 2**10, 2**14, 2**21]: # a *real* test ought to include 2**21, which exercises different cases re: maxmem. But it takes too long.
for strlen in [2, 3, 4, 99,]:
# print "strlen: %s\n" % (strlen,)
for decomp in [zlibutil.decompress, make_decomp(zlibutil.decompress_to_fileobj), make_decomp(zlibutil.decompress_to_spool),]:
# print "decomp: %s\n" % (decomp,)
for genstring in [genrandstr, genbombstr,]:
# print "genstring: %s\n" % (genstring,)
self._help_test(genstring, decomp, strlen)
for decomp in [make_decomp(zlibutil.decompress_to_spool),]:
# print "decomp: %s\n" % (decomp,)
for genstring in [genrandstr, genbombstr,]:
# print "genstring: %s\n" % (genstring,)
self._help_test_inplace(genstring, decomp, strlen)
self._help_test_inplace_minmaxmem(genstring, decomp, strlen)

121
libs/pyutil/testutil.py

@ -0,0 +1,121 @@
import os, signal, time
from twisted.internet import defer, reactor
from twisted.trial import unittest
import repeatable_random
repeatable_random # http://divmod.org/trac/ticket/1499
class SignalMixin:
# This class is necessary for any code which wants to use Processes
# outside the usual reactor.run() environment. It is copied from
# Twisted's twisted.test.test_process . Note that Twisted-8.2.0 uses
# something rather different.
sigchldHandler = None
def setUp(self):
# make sure SIGCHLD handler is installed, as it should be on
# reactor.run(). problem is reactor may not have been run when this
# test runs.
if hasattr(reactor, "_handleSigchld") and hasattr(signal, "SIGCHLD"):
self.sigchldHandler = signal.signal(signal.SIGCHLD,
reactor._handleSigchld)
def tearDown(self):
if self.sigchldHandler:
signal.signal(signal.SIGCHLD, self.sigchldHandler)
class PollMixin:
def poll(self, check_f, pollinterval=0.01):
# Return a Deferred, then call check_f periodically until it returns
# True, at which point the Deferred will fire.. If check_f raises an
# exception, the Deferred will errback.
d = defer.maybeDeferred(self._poll, None, check_f, pollinterval)
return d
def _poll(self, res, check_f, pollinterval):
if check_f():
return True
d = defer.Deferred()
d.addCallback(self._poll, check_f, pollinterval)
reactor.callLater(pollinterval, d.callback, None)
return d
class TestMixin(SignalMixin):
def setUp(self, repeatable=False):
"""
@param repeatable: install the repeatable_randomness hacks to attempt
to without access to real randomness and real time.time from the
code under test
"""
self.repeatable = repeatable
if self.repeatable:
import repeatable_random
repeatable_random.force_repeatability()
if hasattr(time, 'realtime'):
self.teststarttime = time.realtime()
else:
self.teststarttime = time.time()
def tearDown(self):
if self.repeatable:
repeatable_random.restore_non_repeatability()
self.clean_pending(required_to_quiesce=True)
def clean_pending(self, dummy=None, required_to_quiesce=True):
"""
This handy method cleans all pending tasks from the reactor.
When writing a unit test, consider the following question:
Is the code that you are testing required to release control once it
has done its job, so that it is impossible for it to later come around
(with a delayed reactor task) and do anything further?
If so, then trial will usefully test that for you -- if the code under
test leaves any pending tasks on the reactor then trial will fail it.
On the other hand, some code is *not* required to release control -- some
code is allowed to continuously maintain control by rescheduling reactor
tasks in order to do ongoing work. Trial will incorrectly require that
code to clean up all its tasks from the reactor.
Most people think that such code should be amended to have an optional
"shutdown" operation that releases all control, but on the contrary it is
good design for some code to *not* have a shutdown operation, but instead
to have a "crash-only" design in which it recovers from crash on startup.
If the code under test is of the "long-running" kind, which is *not*
required to shutdown cleanly in order to pass tests, then you can simply
call testutil.clean_pending() at the end of the unit test, and trial will
be satisfied.
"""
pending = reactor.getDelayedCalls()
active = bool(pending)
for p in pending:
if p.active():
p.cancel()
else:
print "WEIRDNESS! pending timed call not active!"
if required_to_quiesce and active:
self.fail("Reactor was still active when it was required to be quiescent.")
try:
import win32file
import win32con
def w_make_readonly(path):
win32file.SetFileAttributes(path, win32con.FILE_ATTRIBUTE_READONLY)
def w_make_accessible(path):
win32file.SetFileAttributes(path, win32con.FILE_ATTRIBUTE_NORMAL)
# http://divmod.org/trac/ticket/1499
make_readonly = w_make_readonly
make_accessible = w_make_accessible
except ImportError:
import stat
def make_readonly(path):
os.chmod(path, stat.S_IREAD)
os.chmod(os.path.dirname(path), stat.S_IREAD)
def make_accessible(path):
os.chmod(os.path.dirname(path), stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)
os.chmod(path, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)

44
libs/pyutil/time_comparisons.py

@ -0,0 +1,44 @@
from pyutil import benchutil
import hashlib, random, os
from decimal import Decimal
D=Decimal
p1 = 'a'*32
p1a = 'a'*32
p2 = 'a'*31+'b' # close, but no cigar
p3 = 'b'*32 # different in the first byte
def compare(n, f, a, b):
for i in xrange(n):
f(a, b)
def eqeqcomp(a, b):
return a == b
def hashcomp(a, b):
salt = os.urandom(32)
return hashlib.md5(salt+ a).digest() == hashlib.md5(salt+b).digest()
N=10**4
REPS=10**2
print "all times are in nanoseconds per comparison (scientific notation)"
print
for comparator in [eqeqcomp, hashcomp]:
print "using comparator ", comparator
# for (a, b, desc) in [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')]:
trials = [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')]
random.shuffle(trials)
for (a, b, desc) in trials:
print "comparing two strings that are %s to each other" % (desc,)
def f(n):
compare(n, comparator, a, b)
benchutil.rep_bench(f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print

72
libs/pyutil/time_comparisons.py~

@ -0,0 +1,72 @@
from pyutil import benchutil
import hashlib
import os
from decimal import Decimal
D=Decimal
p1 = 'a'*32
p1a = 'a'*32
p2 = 'a'*31+'b' # close, but no cigar
p3 = 'b'*32 # different in the first byte
def compare(n, f, a, b):
for i in xrange(n):
f(a, b)
def eqeq(a, b):
return a == b
def equalsequals_s(n):
# return compare(n, eqeq,
for i in xrange(n):
p1 == p1a
def equalsequals_c(n):
for i in xrange(n):
p1 == p2
def equalsequals_f(n):
for i in xrange(n):
p1 == p3
def hash_s(n):
for i in xrange(n):
salt = os.urandom(32)
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p1a).digest()
def hash_c(n):
for i in xrange(n):
salt = os.urandom(32)
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p2).digest()
def hash_f(n):
for i in xrange(n):
salt = os.urandom(32)
hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p3).digest()
N=10**4
REPS=10**2
print "using '=='"
print "same"
benchutil.rep_bench(equalsequals_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print "close"
benchutil.rep_bench(equalsequals_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print "far"
benchutil.rep_bench(equalsequals_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print "using hash"
print "same"
benchutil.rep_bench(hash_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print "far"
benchutil.rep_bench(hash_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)
print "close"
benchutil.rep_bench(hash_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS)

72
libs/pyutil/time_format.py

@ -0,0 +1,72 @@
# ISO-8601:
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
import calendar, datetime, re, time
def iso_utc_date(now=None, t=time.time):
if now is None:
now = t()
return datetime.datetime.utcfromtimestamp(now).isoformat()[:10]
def iso_utc(now=None, sep=' ', t=time.time, suffix='Z'):
if now is None:
now = t()
return datetime.datetime.utcfromtimestamp(now).isoformat(sep)+suffix
def iso_local(now=None, sep=' ', t=time.time):
if now is None:
now = t()
return datetime.datetime.fromtimestamp(now).isoformat(sep)
def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})[T_ ](?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<subsecond>\.\d+)?Z?")):
"""
The inverse of iso_utc().
Real ISO-8601 is "2003-01-08T06:30:59Z". We also accept
"2003-01-08 06:30:59Z" as suggested by RFC 3339. We also accept
"2003-01-08_06:30:59Z". We also accept the trailing 'Z' to be omitted.
"""
m = _conversion_re.match(isotime)
if not m:
raise ValueError, (isotime, "not a complete ISO8601 timestamp")
year, month, day = int(m.group('year')), int(m.group('month')), int(m.group('day'))
hour, minute, second = int(m.group('hour')), int(m.group('minute')), int(m.group('second'))
subsecstr = m.group('subsecond')
if subsecstr:
subsecfloat = float(subsecstr)
else:
subsecfloat = 0
return calendar.timegm( (year, month, day, hour, minute, second, 0, 1, 0) ) + subsecfloat
def parse_duration(s):
orig = s
unit = None
DAY = 24*60*60
MONTH = 31*DAY
YEAR = 365*DAY
if s.endswith("s"):
s = s[:-1]
if s.endswith("day"):
unit = DAY
s = s[:-len("day")]
elif s.endswith("month"):
unit = MONTH
s = s[:-len("month")]
elif s.endswith("mo"):
unit = MONTH
s = s[:-len("mo")]
elif s.endswith("year"):
unit = YEAR
s = s[:-len("YEAR")]
else:
raise ValueError("no unit (like day, month, or year) in '%s'" % orig)
s = s.strip()
return int(s) * unit
def parse_date(s):
# return seconds-since-epoch for the UTC midnight that starts the given
# day
return int(iso_utc_time_to_seconds(s + "T00:00:00"))

25
libs/pyutil/twistedutil.py

@ -0,0 +1,25 @@
# Copyright (c) 2005-2009 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import warnings
# from the Twisted library
from twisted.internet import reactor
# from the pyutil library
from weakutil import WeakMethod
def callLater_weakly(delay, func, *args, **kwargs):
"""
Call func later, but if func is a bound method then make the reference it holds to object be a weak reference.
Therefore, if this scheduled event is a bound method and it is the only thing keeping the object from being garbage collected, the object will be garbage collected and the event will be cancelled.
"""
warnings.warn("deprecated", DeprecationWarning)
def cleanup(weakmeth, thedeadweakref):
if weakmeth.callId.active():
weakmeth.callId.cancel()
weakmeth = WeakMethod(func, callback=cleanup)
weakmeth.callId = reactor.callLater(delay, weakmeth, *args, **kwargs)
return weakmeth

337
libs/pyutil/verlib.py

@ -0,0 +1,337 @@
# -*- coding: utf-8 -*-
"""
"Rational" version definition and parsing for DistutilsVersionFight
discussion at PyCon 2009.
This was written by Tarek Ziadé.
Zooko copied it from http://bitbucket.org/tarek/distutilsversion/ on 2010-07-29.
"""
import re
class IrrationalVersionError(Exception):
"""This is an irrational version."""
pass
class HugeMajorVersionNumError(IrrationalVersionError):
"""An irrational version because the major version number is huge
(often because a year or date was used).
See `error_on_huge_major_num` option in `NormalizedVersion` for details.
This guard can be disabled by setting that option False.
"""
pass
class PreconditionViolationException(Exception):
pass
# A marker used in the second and third parts of the `parts` tuple, for
# versions that don't have those segments, to sort properly. An example
# of versions in sort order ('highest' last):
# 1.0b1 ((1,0), ('b',1), ('f',))
# 1.0.dev345 ((1,0), ('f',), ('dev', 345))
# 1.0 ((1,0), ('f',), ('f',))
# 1.0.post256.dev345 ((1,0), ('f',), ('f', 'post', 256, 'dev', 345))
# 1.0.post345 ((1,0), ('f',), ('f', 'post', 345, 'f'))
# ^ ^ ^
# 'b' < 'f' ---------------------/ | |
# | |
# 'dev' < 'f' < 'post' -------------------/ |
# |
# 'dev' < 'f' ----------------------------------------------/
# Other letters would do, but 'f' for 'final' is kind of nice.
FINAL_MARKER = ('f',)
VERSION_RE = re.compile(r'''
^
(?P<version>\d+\.\d+) # minimum 'N.N'
(?P<extraversion>(?:\.\d+)*) # any number of extra '.N' segments
(?:
(?P<prerel>[abc]|rc) # 'a'=alpha, 'b'=beta, 'c'=release candidate
# 'rc'= alias for release candidate
(?P<prerelversion>\d+(?:\.\d+)*)
)?
(?P<postdev>(\.post(?P<post>\d+)|-r(?P<oldpost>\d+))?(\.dev(?P<dev>\d+))?)?
$''', re.VERBOSE)
class NormalizedVersion(object):
"""A rational version.
Good:
1.2 # equivalent to "1.2.0"
1.2.0
1.2a1
1.2.3a2
1.2.3b1
1.2.3c1
1.2.3.4
TODO: fill this out
Bad:
1 # mininum two numbers
1.2a # release level must have a release serial
1.2.3b
"""
def __init__(self, s, error_on_huge_major_num=True):
"""Create a NormalizedVersion instance from a version string.
@param s {str} The version string.
@param error_on_huge_major_num {bool} Whether to consider an
apparent use of a year or full date as the major version number
an error. Default True. One of the observed patterns on PyPI before
the introduction of `NormalizedVersion` was version numbers like this:
2009.01.03
20040603
2005.01
This guard is here to strongly encourage the package author to
use an alternate version, because a release deployed into PyPI
and, e.g. downstream Linux package managers, will forever remove
the possibility of using a version number like "1.0" (i.e.
where the major number is less than that huge major number).
"""
self._parse(s, error_on_huge_major_num)
@classmethod
def from_parts(cls, version, prerelease=FINAL_MARKER,
devpost=FINAL_MARKER):
return cls(cls.parts_to_str((version, prerelease, devpost)))
def _parse(self, s, error_on_huge_major_num=True):
"""Parses a string version into parts."""
if not isinstance(s, basestring):
raise PreconditionViolationException("s is required to be a string: %s :: %s" % (s, type(s)))
match = VERSION_RE.search(s)
if not match:
raise IrrationalVersionError(s)
groups = match.groupdict()
parts = []
# main version
block = self._parse_numdots(groups['version'], s, False, 2)
extraversion = groups.get('extraversion')
if extraversion not in ('', None):
block += self._parse_numdots(extraversion[1:], s)
parts.append(tuple(block))
# prerelease
prerel = groups.get('prerel')
if prerel is not None:
block = [prerel]
block += self._parse_numdots(groups.get('prerelversion'), s,
pad_zeros_length=1)
parts.append(tuple(block))
else:
parts.append(FINAL_MARKER)
# postdev
if groups.get('postdev'):
post = groups.get('post') or groups.get('oldpost')
dev = groups.get('dev')
postdev = []
if post is not None:
postdev.extend([FINAL_MARKER[0], 'post', int(post)])
if dev is None:
postdev.append(FINAL_MARKER[0])
if dev is not None:
postdev.extend(['dev', int(dev)])
parts.append(tuple(postdev))
else:
parts.append(FINAL_MARKER)
self.parts = tuple(parts)
if error_on_huge_major_num and self.parts[0][0] > 1980:
raise HugeMajorVersionNumError("huge major version number, %r, "
"which might cause future problems: %r" % (self.parts[0][0], s))
def _parse_numdots(self, s, full_ver_str, drop_trailing_zeros=True,
pad_zeros_length=0):
"""Parse 'N.N.N' sequences, return a list of ints.
@param s {str} 'N.N.N...' sequence to be parsed
@param full_ver_str {str} The full version string from which this
comes. Used for error strings.
@param drop_trailing_zeros {bool} Whether to drop trailing zeros
from the returned list. Default True.
@param pad_zeros_length {int} The length to which to pad the
returned list with zeros, if necessary. Default 0.
"""
nums = []
for n in s.split("."):
if len(n) > 1 and n[0] == '0':
raise IrrationalVersionError("cannot have leading zero in "
"version number segment: '%s' in %r" % (n, full_ver_str))
nums.append(int(n))
if drop_trailing_zeros:
while nums and nums[-1] == 0:
nums.pop()
while len(nums) < pad_zeros_length:
nums.append(0)
return nums
def __str__(self):
return self.parts_to_str(self.parts)
@classmethod
def parts_to_str(cls, parts):
"""Transforms a version expressed in tuple into its string
representation."""
# XXX This doesn't check for invalid tuples
main, prerel, postdev = parts
s = '.'.join(str(v) for v in main)
if prerel is not FINAL_MARKER:
s += prerel[0]
s += '.'.join(str(v) for v in prerel[1:])
if postdev and postdev is not FINAL_MARKER:
if postdev[0] == 'f':
postdev = postdev[1:]
i = 0
while i < len(postdev):
if i % 2 == 0:
s += '.'
s += str(postdev[i])
i += 1
return s
def __repr__(self):
return "%s('%s')" % (self.__class__.__name__, self)
def _cannot_compare(self, other):
raise TypeError("cannot compare %s and %s"
% (type(self).__name__, type(other).__name__))
def __eq__(self, other):
if not isinstance(other, NormalizedVersion):
self._cannot_compare(other)
return self.parts == other.parts
def __lt__(self, other):
if not isinstance(other, NormalizedVersion):
self._cannot_compare(other)
return self.parts < other.parts
def __ne__(self, other):
return not self.__eq__(other)
def __gt__(self, other):
return not (self.__lt__(other) or self.__eq__(other))
def __le__(self, other):
return self.__eq__(other) or self.__lt__(other)
def __ge__(self, other):
return self.__eq__(other) or self.__gt__(other)
def suggest_normalized_version(s):
"""Suggest a normalized version close to the given version string.
If you have a version string that isn't rational (i.e. NormalizedVersion
doesn't like it) then you might be able to get an equivalent (or close)
rational version from this function.
This does a number of simple normalizations to the given string, based
on observation of versions currently in use on PyPI. Given a dump of
those version during PyCon 2009, 4287 of them:
- 2312 (53.93%) match NormalizedVersion without change
- with the automatic suggestion
- 3474 (81.04%) match when using this suggestion method
@param s {str} An irrational version string.
@returns A rational version string, or None, if couldn't determine one.
"""
try:
NormalizedVersion(s)
return s # already rational
except IrrationalVersionError:
pass
rs = s.lower()
# part of this could use maketrans
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
('beta', 'b'), ('rc', 'c'), ('-final', ''),
('-pre', 'c'),
('-release', ''), ('.release', ''), ('-stable', ''),
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
('final', '')):
rs = rs.replace(orig, repl)
# if something ends with dev or pre, we add a 0
rs = re.sub(r"pre$", r"pre0", rs)
rs = re.sub(r"dev$", r"dev0", rs)
# if we have something like "b-2" or "a.2" at the end of the
# version, that is pobably beta, alpha, etc
# let's remove the dash or dot
rs = re.sub(r"([abc|rc])[\-\.](\d+)$", r"\1\2", rs)
# 1.0-dev-r371 -> 1.0.dev371
# 0.1-dev-r79 -> 0.1.dev79
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
# Clean: v0.3, v1.0
if rs.startswith('v'):
rs = rs[1:]
# Clean leading '0's on numbers.
#TODO: unintended side-effect on, e.g., "2003.05.09"
# PyPI stats: 77 (~2%) better
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
# zero.
# PyPI stats: 245 (7.56%) better
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
# the 'dev-rNNN' tag is a dev tag
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
# clean the - when used as a pre delimiter
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
# a terminal "dev" or "devel" can be changed into ".dev0"
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
# a terminal "dev" can be changed into ".dev0"
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
# a terminal "final" or "stable" can be removed
rs = re.sub(r"(final|stable)$", "", rs)
# The 'r' and the '-' tags are post release tags
# 0.4a1.r10 -> 0.4a1.post10
# 0.9.33-17222 -> 0.9.3.post17222
# 0.9.33-r17222 -> 0.9.3.post17222
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
# Clean 'r' instead of 'dev' usage:
# 0.9.33+r17222 -> 0.9.3.dev17222
# 1.0dev123 -> 1.0.dev123
# 1.0.git123 -> 1.0.dev123
# 1.0.bzr123 -> 1.0.dev123
# 0.1a0dev.123 -> 0.1a0.dev123
# PyPI stats: ~150 (~4%) better
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
# 0.2.pre1 -> 0.2c1
# 0.2-c1 -> 0.2c1
# 1.0preview123 -> 1.0c123
# PyPI stats: ~21 (0.62%) better
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
# Tcl/Tk uses "px" for their post release markers
rs = re.sub(r"p(\d+)$", r".post\1", rs)
try:
NormalizedVersion(rs)
return rs # already rational
except IrrationalVersionError:
pass
return None

148
libs/pyutil/version_class.py

@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2004-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
extended version number class
"""
# verlib a.k.a. distutils.version by Tarek Ziadé.
from pyutil.verlib import NormalizedVersion
def cmp_version(v1, v2):
return cmp(NormalizedVersion(str(v1)), NormalizedVersion(str(v2)))
# Python Standard Library
import re
# End users see version strings like this:
# "1.0.0"
# ^ ^ ^
# | | |
# | | '- micro version number
# | '- minor version number
# '- major version number
# The first number is "major version number". The second number is the "minor
# version number" -- it gets bumped whenever we make a new release that adds or
# changes functionality. The third version is the "micro version number" -- it
# gets bumped whenever we make a new release that doesn't add or change
# functionality, but just fixes bugs (including performance issues).
# Early-adopter end users see version strings like this:
# "1.0.0a1"
# ^ ^ ^^^
# | | |||
# | | ||'- release number
# | | |'- a=alpha, b=beta, c=release candidate, or none
# | | '- micro version number
# | '- minor version number
# '- major version number
# The optional "a" or "b" stands for "alpha release" or "beta release"
# respectively. The number after "a" or "b" gets bumped every time we
# make a new alpha or beta release. This has the same form and the same
# meaning as version numbers of releases of Python.
# Developers see "full version strings", like this:
# "1.0.0a1-55"
# ^ ^ ^^^ ^
# | | ||| |
# | | ||| '- nano version number
# | | ||'- release number
# | | |'- a=alpha, b=beta, c=release candidate or none
# | | '- micro version number
# | '- minor version number
# '- major version number
# or else like this:
# "1.0.0a1-r22155"
# ^ ^ ^^^ ^
# | | ||| |
# | | ||| '- revision number
# | | ||'- release number
# | | |'- a=alpha, b=beta, c=release candidate or none
# | | '- micro version number
# | '- minor version number
# '- major version number
# The presence of the nano version number means that this is a development
# version. There are no guarantees about compatibility, etc. This version is
# considered to be more recent than the version without this field
# (e.g. "1.0.0a1").
# The nano version number or revision number is meaningful only to developers.
# It gets generated automatically from darcs revision control history by
# "darcsver.py". The nano version number is the count of patches that have been
# applied since the last version number tag was applied. The revision number is
# the count of all patches that have been applied in the history.
VERSION_BASE_RE_STR="(\d+)(\.(\d+)(\.(\d+))?)?((a|b|c)(\d+))?(\.dev(\d+))?"
VERSION_SUFFIX_RE_STR="(-(\d+|r\d+)|.post\d+)?"
VERSION_RE_STR=VERSION_BASE_RE_STR + VERSION_SUFFIX_RE_STR
VERSION_RE=re.compile("^" + VERSION_RE_STR + "$")
class Version(object):
def __init__(self, vstring=None):
self.major = None
self.minor = None
self.micro = None
self.prereleasetag = None
self.prerelease = None
self.nano = None
self.revision = None
if vstring:
try:
self.parse(vstring)
except ValueError, le:
le.args = tuple(le.args + ('vstring:', vstring,))
raise
def parse(self, vstring):
mo = VERSION_RE.search(vstring)
if not mo:
raise ValueError, "Not a valid version string for pyutil.version_class.Version(): %r" % (vstring,)
self.major = int(mo.group(1))
self.minor = mo.group(3) and int(mo.group(3)) or 0
self.micro = mo.group(5) and int(mo.group(5)) or 0
reltag = mo.group(6)
if reltag:
reltagnum = int(mo.group(8))
self.prereleasetag = mo.group(7)
self.prerelease = reltagnum
if mo.group(11):
if mo.group(11)[0] == '-':
if mo.group(12)[0] == 'r':
self.revision = int(mo.group(12)[1:])
else:
self.nano = int(mo.group(12))
else:
assert mo.group(11).startswith('.post'), mo.group(11)
self.revision = int(mo.group(11)[5:])
# XXX in the future, to be compatible with the Python "rational version numbering" scheme, we should move to using .post$REV instead of -r$REV:
# self.fullstr = "%d.%d.%d%s%s" % (self.major, self.minor, self.micro, self.prereleasetag and "%s%d" % (self.prereleasetag, self.prerelease,) or "", self.nano and "-%d" % (self.nano,) or self.revision and ".post%d" % (self.revision,) or "",)
self.fullstr = "%d.%d.%d%s%s" % (self.major, self.minor, self.micro, self.prereleasetag and "%s%d" % (self.prereleasetag, self.prerelease,) or "", self.nano and "-%d" % (self.nano,) or self.revision and "-r%d" % (self.revision,) or "",)
def user_str(self):
return self.full_str()
def full_str(self):
if hasattr(self, 'fullstr'):
return self.fullstr
else:
return 'None'
def __str__(self):
return self.full_str()
def __repr__(self):
return self.__str__()
def __cmp__ (self, other):
return cmp_version(self, other)

42
libs/pyutil/weakutil.py

@ -0,0 +1,42 @@
# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
import warnings
# from the Python Standard Library
from weakref import ref
# from the pyutil library
from assertutil import precondition
# Thanks to Thomas Wouters, JP Calderone and the authors from the Python Cookbook.
# class WeakMethod copied from The Python Cookbook and simplified.
class WeakMethod:
""" Wraps a function or, more importantly, a bound method, in
a way that allows a bound method's object to be GC'd """
def __init__(self, fn, callback=None):
warnings.warn("deprecated", DeprecationWarning)
precondition(hasattr(fn, 'im_self'), "fn is required to be a bound method.")
self._cleanupcallback = callback
self._obj = ref(fn.im_self, self.call_cleanup_cb)
self._meth = fn.im_func
def __call__(self, *args, **kws):
s = self._obj()
if s:
return self._meth(s, *args,**kws)
def __repr__(self):
return "<%s %s %s>" % (self.__class__.__name__, self._obj, self._meth,)
def call_cleanup_cb(self, thedeadweakref):
if self._cleanupcallback is not None:
self._cleanupcallback(self, thedeadweakref)
def factory_function_name_here(o):
if hasattr(o, 'im_self'):
return WeakMethod(o)
else:
return o

0
libs/pyutil/xor/__init__.py

50
libs/pyutil/xor/xor.py

@ -0,0 +1,50 @@
# Copyright © 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
What word has three letters and a 'x' in it?
Not that one silly.
"""
import warnings
import array, operator
from pyutil.assertutil import precondition
def py_xor(str1, str2):
warnings.warn("deprecated", DeprecationWarning)
precondition(len(str1) == len(str2), "str1 and str2 are required to be of the same length.", str1=str1, str2=str2)
if len(str1)%4 == 0:
a1 = array.array('i', str1)
a2 = array.array('i', str2)
for i in range(len(a1)):
a2[i] = a2[i]^a1[i]
elif len(str1)%2 == 0:
a1 = array.array('h', str1)
a2 = array.array('h', str2)
for i in range(len(a1)):
a2[i] = a2[i]^a1[i]
else:
a1 = array.array('c', str1)
a2 = array.array('c', str2)
for i in range(len(a1)):
a2[i] = chr(ord(a2[i])^ord(a1[i]))
return a2.tostring()
def py_xor_simple(str1, str2):
"""
Benchmarks show that this is the same speed as py_xor() for small strings
and much slower for large strings, so don't use it. --Zooko 2002-04-29
"""
warnings.warn("deprecated", DeprecationWarning)
precondition(len(str1) == len(str2), "str1 and str2 are required to be of the same length.", str1=str1, str2=str2)
return ''.join(map(chr, map(operator.__xor__, map(ord, str1), map(ord, str2))))
# Now make "xor.xor()" be the best xor we've got:
xor = py_xor
# for unit tests, see pyutil/test/test_xor.py. For benchmarks, see pyutil/test/bench_xor.py.

261
libs/pyutil/zlibutil.py

@ -0,0 +1,261 @@
# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn
# This file is part of pyutil; see README.rst for licensing terms.
"""
Making your zlib experience that much nicer!
Most importantly, this offers protection from "zlib bomb" attacks, where the
original data was maximally compressable, and a naive use of zlib would
consume all of your RAM while trying to decompress it.
"""
import exceptions, string, zlib
from humanreadable import hr
from pyutil.assertutil import precondition
class DecompressError(exceptions.StandardError, zlib.error): pass
class UnsafeDecompressError(DecompressError): pass # This means it would take more memory to decompress than we can spare.
class TooBigError(DecompressError): pass # This means the resulting uncompressed text would exceed the maximum allowed length.
class ZlibError(DecompressError): pass # internal error, probably due to the input not being zlib compressed text
# The smallest limit that you can impose on zlib decompression and still have
# a chance of succeeding at decompression.
# constant memory overhead of zlib (76 KB), plus minbite (128 bytes) times
# maxexpansion (1032) times buffer-copying duplication (2), plus 2063 so as
# to reach the ceiling of div (2*1032)
MINMAXMEM=76*2**10 + 128 * 1032 * 2 + 2063 - 1
# You should really specify a maxmem which is much higher than MINMAXMEM. If
# maxmem=MINMAXMEM, we will be reduced to decompressing the input in
# 128-byte bites, and furthermore unless the decompressed text is quite small,
# we will be forced to give up and spuriously raise UnsafeDecompressError!
# You really ought to pass a maxmem argument equal to the maximum possible
# memory that your app should ever allocate (for a short-term use).
# I typically set it to 65 MB.
def decompress(zbuf, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))):
"""
Decompress zbuf so that it decompresses to <= maxlen bytes, while using
<= maxmem memory, or else raise an exception. If zbuf contains
uncompressed data an exception will be raised.
This function guards against memory allocation attacks.
@param maxlen the resulting text must not be greater than this
@param maxmem the execution of this function must not use more than this
amount of memory in bytes; The higher this number is (optimally
1032 * maxlen, or even greater), the faster this function can
complete. (Actually I don't fully understand the workings of zlib, so
this function might use a *little* more than this memory, but not a
lot more.) (Also, this function will raise an exception if the amount
of memory required even *approaches* maxmem. Another reason to make
it large.) (Hence the default value which would seem to be
exceedingly large until you realize that it means you can decompress
64 KB chunks of compressiontext at a bite.)
"""
assert isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!"
assert isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!"
assert maxlen <= maxmem, "maxlen is required to be <= maxmem. All data that is included in the return value is counted against maxmem as well as against maxlen, so it is impossible to return a result bigger than maxmem, even if maxlen is bigger than maxmem. See decompress_to_spool() if you want to spool a large text out while limiting the amount of memory used during the process."
lenzbuf = len(zbuf)
offset = 0
decomplen = 0
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression
availmem = availmem / 2 # generating the result string from the intermediate strings will require using the same amount of memory again, briefly. If you care about this kind of thing, then let's rewrite this module in C.
decompstrlist = []
decomp = zlib.decompressobj()
while offset < lenzbuf:
# How much compressedtext can we safely attempt to decompress now without going over `maxmem'? zlib docs say that theoretical maximum for the zlib format would be 1032:1.
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12
if lencompbite < 128:
# If we can't safely attempt even a few bytes of compression text, let us give up. Either `maxmem' was too small or this compressiontext is actually a decompression bomb.
raise UnsafeDecompressError, "used up roughly maxmem memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s, lencompbite: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen, lencompbite,]))
# I wish the following were a local function like this:
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf):
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26
try:
if (offset == 0) and (lencompbite >= lenzbuf):
tmpstr = decomp.decompress(zbuf)
else:
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite])
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, hr(le), )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
availmem = availmem - lentmpstr
offset = offset + lencompbite
decompstrlist.append(tmpstr)
tmpstr = ''
try:
tmpstr = decomp.flush()
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, le, )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
availmem = availmem - lentmpstr
offset = offset + lencompbite
if lentmpstr > 0:
decompstrlist.append(tmpstr)
tmpstr = ''
if len(decompstrlist) > 0:
return string.join(decompstrlist, '')
else:
return decompstrlist[0]
def decompress_to_fileobj(zbuf, fileobj, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))):
"""
Decompress zbuf so that it decompresses to <= maxlen bytes, while using
<= maxmem memory, or else raise an exception. If zbuf contains
uncompressed data an exception will be raised.
This function guards against memory allocation attacks.
Note that this assumes that data written to fileobj still occupies memory,
so such data counts against maxmem as well as against maxlen.
@param maxlen the resulting text must not be greater than this
@param maxmem the execution of this function must not use more than this
amount of memory in bytes; The higher this number is (optimally
1032 * maxlen, or even greater), the faster this function can
complete. (Actually I don't fully understand the workings of zlib, so
this function might use a *little* more than this memory, but not a
lot more.) (Also, this function will raise an exception if the amount
of memory required even *approaches* maxmem. Another reason to make
it large.) (Hence the default value which would seem to be
exceedingly large until you realize that it means you can decompress
64 KB chunks of compressiontext at a bite.)
@param fileobj a file object to which the decompressed text will be written
"""
precondition(hasattr(fileobj, 'write') and callable(fileobj.write), "fileobj is required to have a write() method.", fileobj=fileobj)
precondition(isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!", maxlen=maxlen)
precondition(isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!", maxmem=maxmem)
precondition(maxlen <= maxmem, "maxlen is required to be <= maxmem. All data that is written out to fileobj is counted against maxmem as well as against maxlen, so it is impossible to return a result bigger than maxmem, even if maxlen is bigger than maxmem. See decompress_to_spool() if you want to spool a large text out while limiting the amount of memory used during the process.", maxlen=maxlen, maxmem=maxmem)
lenzbuf = len(zbuf)
offset = 0
decomplen = 0
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression
decomp = zlib.decompressobj()
while offset < lenzbuf:
# How much compressedtext can we safely attempt to decompress now without going over maxmem? zlib docs say that theoretical maximum for the zlib format would be 1032:1.
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12
if lencompbite < 128:
# If we can't safely attempt even a few bytes of compression text, let us give up. Either maxmem was too small or this compressiontext is actually a decompression bomb.
raise UnsafeDecompressError, "used up roughly maxmem memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen,]))
# I wish the following were a local function like this:
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf):
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26
try:
if (offset == 0) and (lencompbite >= lenzbuf):
tmpstr = decomp.decompress(zbuf)
else:
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite])
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, le, )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
availmem = availmem - lentmpstr
offset = offset + lencompbite
fileobj.write(tmpstr)
tmpstr = ''
try:
tmpstr = decomp.flush()
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, le, )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > maxlen. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
availmem = availmem - lentmpstr
offset = offset + lencompbite
fileobj.write(tmpstr)
tmpstr = ''
def decompress_to_spool(zbuf, fileobj, maxlen=(65 * (2**20)), maxmem=(65 * (2**20))):
"""
Decompress zbuf so that it decompresses to <= maxlen bytes, while using
<= maxmem memory, or else raise an exception. If zbuf contains
uncompressed data an exception will be raised.
This function guards against memory allocation attacks.
Note that this assumes that data written to fileobj does *not* continue to
occupy memory, so such data doesn't count against maxmem, although of
course it still counts against maxlen.
@param maxlen the resulting text must not be greater than this
@param maxmem the execution of this function must not use more than this
amount of memory in bytes; The higher this number is (optimally
1032 * maxlen, or even greater), the faster this function can
complete. (Actually I don't fully understand the workings of zlib, so
this function might use a *little* more than this memory, but not a
lot more.) (Also, this function will raise an exception if the amount
of memory required even *approaches* maxmem. Another reason to make
it large.) (Hence the default value which would seem to be
exceedingly large until you realize that it means you can decompress
64 KB chunks of compressiontext at a bite.)
@param fileobj the decompressed text will be written to it
"""
precondition(hasattr(fileobj, 'write') and callable(fileobj.write), "fileobj is required to have a write() method.", fileobj=fileobj)
precondition(isinstance(maxlen, (int, long,)) and maxlen > 0, "maxlen is required to be a real maxlen, geez!", maxlen=maxlen)
precondition(isinstance(maxmem, (int, long,)) and maxmem > 0, "maxmem is required to be a real maxmem, geez!", maxmem=maxmem)
tmpstr = ''
lenzbuf = len(zbuf)
offset = 0
decomplen = 0
availmem = maxmem - (76 * 2**10) # zlib can take around 76 KB RAM to do decompression
decomp = zlib.decompressobj()
while offset < lenzbuf:
# How much compressedtext can we safely attempt to decompress now without going over `maxmem'? zlib docs say that theoretical maximum for the zlib format would be 1032:1.
lencompbite = availmem / 1032 # XXX TODO: The biggest compression ratio zlib can have for whole files is 1032:1. Unfortunately I don't know if small chunks of compressiontext *within* a file can expand to more than that. I'll assume not... --Zooko 2001-05-12
if lencompbite < 128:
# If we can't safely attempt even a few bytes of compression text, let us give up. Either `maxmem' was too small or this compressiontext is actually a decompression bomb.
raise UnsafeDecompressError, "used up roughly `maxmem' memory. maxmem: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxmem, len(zbuf), offset, decomplen,]))
# I wish the following were a local function like this:
# def proc_decomp_bite(tmpstr, lencompbite=0, decomplen=decomplen, maxlen=maxlen, availmem=availmem, decompstrlist=decompstrlist, offset=offset, zbuf=zbuf):
# ...but we can't conveniently and efficiently update the integer variables like offset in the outer scope. Oh well. --Zooko 2003-06-26
try:
if (offset == 0) and (lencompbite >= lenzbuf):
tmpstr = decomp.decompress(zbuf)
else:
tmpstr = decomp.decompress(zbuf[offset:offset+lencompbite])
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, le, )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > `maxlen'. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
offset = offset + lencompbite
fileobj.write(tmpstr)
tmpstr = ''
try:
tmpstr = decomp.flush()
except zlib.error, le:
raise ZlibError, (offset, lencompbite, decomplen, le, )
lentmpstr = len(tmpstr)
decomplen = decomplen + lentmpstr
if decomplen > maxlen:
raise TooBigError, "length of resulting data > `maxlen'. maxlen: %s, len(zbuf): %s, offset: %s, decomplen: %s" % tuple(map(hr, [maxlen, len(zbuf), offset, decomplen,]))
offset = offset + lencompbite
fileobj.write(tmpstr)
tmpstr = ''
Loading…
Cancel
Save