# Copyright (c) 2002-2013 Zooko Wilcox-O'Hearn # This file is part of pyutil; see README.rst for licensing terms. """ Benchmark a function for its behavior with respect to N. How to use this module: 1. Define a function which runs the code that you want to benchmark. The function takes a single argument which is the size of the task (i.e. the "N" parameter). Pass this function as the first argument to rep_bench(), and N as the second, e.g.: >>> from pyutil.benchutil import rep_bench >>> def fib(N): ... if N <= 1: ... return 1 ... else: ... return fib(N-1) + fib(N-2) ... >>> rep_bench(fib, 25, UNITS_PER_SECOND=1000) best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10) The output is reporting the number of milliseconds that executing the function took, divided by N, from ten different invocations of fib(). It reports the best, worst, M-th best, M-th worst, and mean, where "M" is 1/4 of the number of invocations (in this case 10). 2. Now run it with different values of N and look for patterns: >>> for N in 1, 5, 9, 13, 17, 21: ... print "%2d" % N, ... rep_bench(fib, N, UNITS_PER_SECOND=1000000) ... 1 best: 9.537e-01, 3th-best: 9.537e-01, mean: 1.121e+00, 3th-worst: 1.192e+00, worst: 2.146e+00 (of 10) 5 best: 5.722e-01, 3th-best: 6.199e-01, mean: 7.200e-01, 3th-worst: 8.106e-01, worst: 8.106e-01 (of 10) 9 best: 2.437e+00, 3th-best: 2.464e+00, mean: 2.530e+00, 3th-worst: 2.570e+00, worst: 2.676e+00 (of 10) 13 best: 1.154e+01, 3th-best: 1.168e+01, mean: 5.638e+01, 3th-worst: 1.346e+01, worst: 4.478e+02 (of 10) 17 best: 6.230e+01, 3th-best: 6.247e+01, mean: 6.424e+01, 3th-worst: 6.460e+01, worst: 7.294e+01 (of 10) 21 best: 3.376e+02, 3th-best: 3.391e+02, mean: 3.521e+02, 3th-worst: 3.540e+02, worst: 3.963e+02 (of 10) >>> print_bench_footer(UNITS_PER_SECOND=1000000) all results are in time units per N time units per second: 1000000; seconds per time unit: 0.000001 (The pattern here is that as N grows, the time per N grows.) 2. If you need to do some setting up before the code can run, then put the setting-up code into a separate function so that it won't be included in the timing measurements. A good way to share state between the setting-up function and the main function is to make them be methods of the same object, e.g.: >>> import random >>> class O: ... def __init__(self): ... self.l = [] ... def setup(self, N): ... del self.l[:] ... self.l.extend(range(N)) ... random.shuffle(self.l) ... def sort(self, N): ... self.l.sort() ... >>> o = O() >>> for N in 1000, 10000, 100000, 1000000: ... print "%7d" % N, ... rep_bench(o.sort, N, o.setup) ... 1000 best: 4.830e+02, 3th-best: 4.950e+02, mean: 5.730e+02, 3th-worst: 5.858e+02, worst: 7.451e+02 (of 10) 10000 best: 6.342e+02, 3th-best: 6.367e+02, mean: 6.678e+02, 3th-worst: 6.851e+02, worst: 7.848e+02 (of 10) 100000 best: 8.309e+02, 3th-best: 8.338e+02, mean: 8.435e+02, 3th-worst: 8.540e+02, worst: 8.559e+02 (of 10) 1000000 best: 1.327e+03, 3th-best: 1.339e+03, mean: 1.349e+03, 3th-worst: 1.357e+03, worst: 1.374e+03 (of 10) 3. Useful fact! rep_bench() returns a dict containing the numbers. 4. Things to fix: a. I used to have it hooked up to use the "hotshot" profiler on the code being measured. I recently tried to change it to use the newer cProfile profiler instead, but I don't understand the interface to cProfiler so it just gives an exception if you pass profile=True. Please fix this and send me a patch. xxx change it to statprof b. Wouldn't it be great if this script emitted results in a json format that was understood by a tool to make pretty interactive explorable graphs? The pretty graphs could look like those on http://speed.pypy.org/ . Please make this work and send me a patch! """ import cProfile, operator, time from decimal import Decimal as D #from pyutil import jsonutil as json import platform if 'windows' in platform.system().lower(): clock = time.clock else: clock = time.time from assertutil import _assert def makeg(func): def blah(n, func=func): for i in xrange(n): func() return blah def to_decimal(x): """ See if D(x) returns something. If instead it raises TypeError, x must have been a float, so convert it to Decimal by way of string. (In Python >= 2.7, D(x) does this automatically. """ try: return D(x) except TypeError: return D("%0.54f" % (x,)) def mult(a, b): """ If we get TypeError from * (possibly because one is float and the other is Decimal), then promote them both to Decimal. """ try: return a * b except TypeError: return to_decimal(a) * to_decimal(b) def rep_bench(func, n, runtime=1.0, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): """ Will run the func up to MAXREPS times, but won't start a new run if MAXTIME (wall-clock time) has already elapsed (unless MAXTIME is None). @param quiet Don't print anything--just return the results dict. """ assert isinstance(n, int), (n, type(n)) global worstemptymeasure emsta = clock() do_nothing(2**32) emstop = clock() empty = emstop - emsta if empty > worstemptymeasure: worstemptymeasure = empty if (worstemptymeasure*2) >= runtime: raise BadMeasure("Apparently simply invoking an empty Python function can take as long as %0.10f seconds, and we were running iterations for only about %0.10f seconds. So the measurement of the runtime of the code under benchmark is not reliable. Please pass a higher number for the 'runtime' argument to bench_it().") startwallclocktime = time.time() tls = [] # (elapsed time per iter in seconds, iters) bmes = [] while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)): if initfunc: initfunc(n) try: tl, iters = bench_it(func, n, runtime=runtime, profile=profile, profresults=profresults) except BadMeasure, bme: bmes.append(bme) else: tls.append((tl, iters)) if len(tls) == 0: raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,)) sumtls = sum([tl for (tl, iters) in tls]) mean = sumtls / len(tls) tls.sort() worst = tls[-1][0] best = tls[0][0] m = len(tls)/4 if m > 0: mthbest = tls[m-1][0] mthworst = tls[-m][0] else: mthbest = tls[0][0] mthworst = tls[-1][0] # The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst, # etc, so we use mp1 to name it. mp1 = m+1 res = { 'worst': mult(worst, UNITS_PER_SECOND)/n, 'best': mult(best, UNITS_PER_SECOND)/n, 'mp1': mp1, 'mth-best': mult(mthbest, UNITS_PER_SECOND)/n, 'mth-worst': mult(mthworst, UNITS_PER_SECOND)/n, 'mean': mult(mean, UNITS_PER_SECOND)/n, 'num': len(tls), } if not quiet: print "best: %(best)#8.03e, %(mp1)3dth-best: %(mth-best)#8.03e, mean: %(mean)#8.03e, %(mp1)3dth-worst: %(mth-worst)#8.03e, worst: %(worst)#8.03e (of %(num)6d)" % res return res MARGINOFERROR = 10 worstemptymeasure = 0 class BadMeasure(Exception): """ Either the clock wrapped (which happens with time.clock()) or it went backwards (which happens with time.time() on rare occasions), (or the code being measured completed before a single clock tick). """ def __init__(self, startt, stopt, clock): self.startt = startt self.stopt = stopt self.clock = clock def __repr__(self): return "<%s %s - %s (%s)>" % (self.__class__.__name__, self.startt, self.stopt, self.clock) def do_nothing(n): pass def bench_it(func, n, runtime=1.0, profile=False, profresults="pyutil-benchutil.prof"): if profile: raise NotImplementedException() else: iters = 0 st = clock() deadline = st + runtime sto = clock() while sto < deadline: func(n) iters += 1 sto = clock() timeelapsed = sto - st if (timeelapsed <= 0) or (iters == 0): raise BadMeasure((timeelapsed, iters)) return (timeelapsed / iters, iters) def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1): BSIZES = [] for i in range(TOPXP-6, TOPXP+1, 2): n = int(2 ** i) if n < 1: n = 1 if BSIZES and n <= BSIZES[-1]: n *= 2 BSIZES.append(n) res = {} for BSIZE in BSIZES: print "N: %7d," % BSIZE, r = rep_bench(func, BSIZE, initfunc=initfunc, MAXREPS=MAXREPS, MAXTIME=MAXTIME, profile=profile, profresults=profresults, UNITS_PER_SECOND=UNITS_PER_SECOND) res[BSIZE] = r #if outputjson: # write_file(jsonresultsfname, json.dumps(res)) return res def print_bench_footer(UNITS_PER_SECOND=1): print "all results are in time units per N" print "time units per second: %s; seconds per time unit: %s" % (UNITS_PER_SECOND, D(1)/UNITS_PER_SECOND)