You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
243 lines
8.9 KiB
243 lines
8.9 KiB
# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn
|
|
# This file is part of pyutil; see README.rst for licensing terms.
|
|
|
|
"""
|
|
Benchmark a function for its behavior with respect to N.
|
|
|
|
How to use this module:
|
|
|
|
1. Define a function which runs the code that you want to benchmark. The
|
|
function takes a single argument which is the size of the task (i.e. the "N"
|
|
parameter). Pass this function as the first argument to rep_bench(), and N as
|
|
the second, e.g.:
|
|
|
|
>>> from pyutil.benchutil import rep_bench
|
|
>>> def fib(N):
|
|
... if N <= 1:
|
|
... return 1
|
|
... else:
|
|
... return fib(N-1) + fib(N-2)
|
|
...
|
|
>>> rep_bench(fib, 25, UNITS_PER_SECOND=1000)
|
|
best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10)
|
|
|
|
The output is reporting the number of milliseconds that executing the function
|
|
took, divided by N, from ten different invocations of fib(). It reports the
|
|
best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of
|
|
the number of invocations (in this case 10).
|
|
|
|
2. Now run it with different values of N and look for patterns:
|
|
|
|
>>> for N in 1, 5, 9, 13, 17, 21:
|
|
... print "%2d" % N,
|
|
... rep_bench(fib, N, UNITS_PER_SECOND=1000000)
|
|
...
|
|
1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10)
|
|
5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10)
|
|
9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10)
|
|
13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10)
|
|
17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10)
|
|
21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10)
|
|
>>> print_bench_footer(UNITS_PER_SECOND=1000000)
|
|
all results are in time units per N
|
|
time units per second: 1000000; seconds per time unit: 0.000001
|
|
|
|
(The pattern here is that as N grows, the time per N grows.)
|
|
|
|
2. If you need to do some setting up before the code can run, then put the
|
|
setting-up code into a separate function so that it won't be included in the
|
|
timing measurements. A good way to share state between the setting-up function
|
|
and the main function is to make them be methods of the same object, e.g.:
|
|
|
|
>>> import random
|
|
>>> class O:
|
|
... def __init__(self):
|
|
... self.l = []
|
|
... def setup(self, N):
|
|
... del self.l[:]
|
|
... self.l.extend(range(N))
|
|
... random.shuffle(self.l)
|
|
... def sort(self, N):
|
|
... self.l.sort()
|
|
...
|
|
>>> o = O()
|
|
>>> for N in 1000, 10000, 100000, 1000000:
|
|
... print "%7d" % N,
|
|
... rep_bench(o.sort, N, o.setup)
|
|
...
|
|
1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10)
|
|
10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10)
|
|
100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10)
|
|
1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10)
|
|
|
|
3. Useful fact! rep_bench() returns a dict containing the numbers.
|
|
|
|
4. Things to fix:
|
|
|
|
a. I used to have it hooked up to use the "hotshot" profiler on the code being
|
|
measured. I recently tried to change it to use the newer cProfile profiler
|
|
instead, but I don't understand the interface to cProfiler so it just gives an
|
|
exception if you pass profile=True. Please fix this and send me a patch.
|
|
|
|
b. Wouldn't it be great if this script emitted results in a json format that
|
|
was understood by a tool to make pretty interactive explorable graphs? The
|
|
pretty graphs could look like those on http://speed.pypy.org/ . Please make
|
|
this work and send me a patch!
|
|
"""
|
|
|
|
import cProfile, operator, time
|
|
from decimal import Decimal as D
|
|
|
|
#from pyutil import jsonutil as json
|
|
|
|
import platform
|
|
if 'windows' in platform.system().lower():
|
|
clock = time.clock
|
|
else:
|
|
clock = time.time
|
|
|
|
from assertutil import _assert
|
|
|
|
def makeg(func):
|
|
def blah(n, func=func):
|
|
for i in xrange(n):
|
|
func()
|
|
return blah
|
|
|
|
def to_decimal(x):
|
|
"""
|
|
See if D(x) returns something. If instead it raises TypeError, x must have been a float, so convert it to Decimal by way of string. (In Python >= 2.7, D(x) does this automatically.
|
|
"""
|
|
try:
|
|
return D(x)
|
|
except TypeError:
|
|
return D("%0.54f" % (x,))
|
|
|
|
def mult(a, b):
|
|
"""
|
|
If we get TypeError from * (possibly because one is float and the other is Decimal), then promote them both to Decimal.
|
|
"""
|
|
try:
|
|
return a * b
|
|
except TypeError:
|
|
return to_decimal(a) * to_decimal(b)
|
|
|
|
def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False):
|
|
"""
|
|
Will run the func up to MAXREPS times, but won't start a new run if MAXTIME
|
|
(wall-clock time) has already elapsed (unless MAXTIME is None).
|
|
|
|
@param quiet Don't print anything--just return the results dict.
|
|
"""
|
|
assert isinstance(n, int), (n, type(n))
|
|
startwallclocktime = time.time()
|
|
tls = [] # elapsed time in seconds
|
|
bmes = []
|
|
while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)):
|
|
if initfunc:
|
|
initfunc(n)
|
|
try:
|
|
tl = bench_it(func, n, profile=profile, profresults=profresults)
|
|
except BadMeasure, bme:
|
|
bmes.append(bme)
|
|
else:
|
|
tls.append(tl)
|
|
if len(tls) == 0:
|
|
raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,))
|
|
sumtls = reduce(operator.__add__, tls)
|
|
mean = sumtls / len(tls)
|
|
tls.sort()
|
|
worst = tls[-1]
|
|
best = tls[0]
|
|
_assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,))
|
|
m = len(tls)/4
|
|
if m > 0:
|
|
mthbest = tls[m-1]
|
|
mthworst = tls[-m]
|
|
else:
|
|
mthbest = tls[0]
|
|
mthworst = tls[-1]
|
|
|
|
# The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst,
|
|
# etc, so we use mp1 to name it.
|
|
mp1 = m+1
|
|
res = {
|
|
'worst': mult(worst, UNITS_PER_SECOND)/n,
|
|
'best': mult(best, UNITS_PER_SECOND)/n,
|
|
'mp1': mp1,
|
|
'mth-best': mult(mthbest, UNITS_PER_SECOND)/n,
|
|
'mth-worst': mult(mthworst, UNITS_PER_SECOND)/n,
|
|
'mean': mult(mean, UNITS_PER_SECOND)/n,
|
|
'num': len(tls),
|
|
}
|
|
|
|
if not quiet:
|
|
print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res
|
|
|
|
return res
|
|
|
|
MARGINOFERROR = 10
|
|
|
|
worstemptymeasure = 0
|
|
|
|
class BadMeasure(Exception):
|
|
""" Either the clock wrapped (which happens with time.clock()) or
|
|
it went backwards (which happens with time.time() on rare
|
|
occasions), (or the code being measured completed before a single
|
|
clock tick). """
|
|
def __init__(self, startt, stopt, clock):
|
|
self.startt = startt
|
|
self.stopt = stopt
|
|
self.clock = clock
|
|
|
|
def __repr__(self):
|
|
return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock)
|
|
|
|
def do_nothing(n):
|
|
pass
|
|
|
|
def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"):
|
|
if profile:
|
|
st = clock()
|
|
cProfile.run('func(n)', profresults)
|
|
sto = clock()
|
|
else:
|
|
st = clock()
|
|
func(n)
|
|
sto = clock()
|
|
timeelapsed = sto - st
|
|
if timeelapsed <= 0:
|
|
raise BadMeasure(timeelapsed)
|
|
global worstemptymeasure
|
|
emsta = clock()
|
|
do_nothing(2**32)
|
|
emstop = clock()
|
|
empty = emstop - emsta
|
|
if empty > worstemptymeasure:
|
|
worstemptymeasure = empty
|
|
return timeelapsed
|
|
|
|
def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1):
|
|
BSIZES = []
|
|
for i in range(TOPXP-6, TOPXP+1, 2):
|
|
n = int(2 ** i)
|
|
if n < 1:
|
|
n = 1
|
|
if BSIZES and n <= BSIZES[-1]:
|
|
n *= 2
|
|
BSIZES.append(n)
|
|
|
|
res = {}
|
|
for BSIZE in BSIZES:
|
|
print "N: %7d," % BSIZE,
|
|
r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND)
|
|
res[BSIZE] = r
|
|
|
|
#if outputjson:
|
|
# write_file(jsonresultsfname, json.dumps(res))
|
|
|
|
return res
|
|
|
|
def print_bench_footer(UNITS_PER_SECOND=1):
|
|
print "all results are in time units per N"
|
|
print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND)
|
|
|