ref: 4d86a3839e7b85b288e3b7b32fe1a10b9c4f3727
dir: /bench.c/
#include <u.h> #include <tos.h> #include <libc.h> #include <bench.h> #define Nsec 1000000000ULL #define BENCHTIME (Nsec) /* 1s in ns */ int NPROC; /* * nsec() is wallclock and can be adjusted by timesync * so need to use cycles() instead, but fall back to * nsec() in case we can't */ uvlong nanosec(void) { static uvlong fasthz, xstart; uvlong x; if(fasthz == ~0ULL) return nsec() - xstart; if(fasthz == 0){ if(_tos->cyclefreq){ fasthz = _tos->cyclefreq; cycles(&xstart); } else { fasthz = ~0ULL; xstart = nsec(); } return 0; } cycles(&x); x -= xstart; uvlong q = x / fasthz; uvlong r = x % fasthz; return q*Nsec + r*Nsec/fasthz; } static int min(int x, int y) { if(x > y) { return y; } return x; } static int max(int x, int y) { if(x < y) { return y; } return x; } // run the benchmarking function once, looping n times static void benchrunn(B *b, int n) { b->N = n; // reset b->start = nanosec(); b->ns = 0; cycles(&b->scycles); b->item.fn(b); // stop cycles(&b->ecycles); b->ns += nanosec() - b->start; if(b->overheadns != -1) b->ns -= b->overheadns; b->bcycles += b->ecycles - b->scycles; if(b->overheadcy != -1) b->bcycles -= b->overheadcy; } static vlong nsperop(B *b) { if(b->N <= 0) return 0; return b->ns / (vlong)b->N; } static uvlong cyperop(B *b) { if(b->N <= 0) return 0; return b->bcycles / (uvlong)b->N; } static int rounddown10(int n) { int tens, result, i; tens = 0; while(n >= 10) { n = n / 10; tens++; } result = 1; for(i = 0; i < tens; i++) { result *= 10; } return result; } static int roundup(int n) { int base; base = rounddown10(n); if(n <= base) return base; if(n <= 2*base) return 2*base; if(n <= 5*base) return 5*base; return 10*base; } // run the benchmark for one function static BResult benchrun(B *b) { int n, last; vlong d; BResult res; b->overheadns = -1; b->overheadcy = -1; benchrunn(b, 0); benchrunn(b, 0); b->overheadns = b->ns; b->overheadcy = b->bcycles; n = 1; benchrunn(b, n); d = BENCHTIME; while(b->ns < d && n < 1000000000) { last = n; if(nsperop(b) == 0) { n = 1000000000; } else { n = (int) d/nsperop(b); } n = max(min(n+n/2, 100*last), last+1); n = roundup(n); benchrunn(b, n); } res.N = b->N; res.ns = b->ns; res.cycles = b->bcycles; res.overhead = b->overheadns; return res; } static void benchres(BResult *res) { char nsop[32]; char cyop[32]; vlong nsperop; uvlong cyperop; if(res->N <= 0) { nsperop = 0; cyperop = 0; } else { nsperop = res->ns / (vlong)res->N; cyperop = res->cycles / (uvlong)res->N; } snprint(nsop, sizeof(nsop), "%10lld ns/op", nsperop); snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop); if(res->N > 0 && nsperop < 100) { if(nsperop < 10) snprint(nsop, sizeof(nsop), "%13.2f ns/op", (double)res->ns / (double)res->N); else snprint(nsop, sizeof(nsop), "%12.1f ns/op", (double)res->ns / (double)res->N); } if(res->N > 0 && cyperop < 100) { if(cyperop < 10) snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N); else snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N); } print("%10d N %.16s\t%s (total %f s)\n", res->N, nsop, cyop, (double)res->ns / Nsec); } /* * public api */ // setup. currently only calculates cycles() overhead. // not strictly necessary, but will give better cycle counts. void benchinit(int, char **) { char *e; if((e = getenv("NPROC")) == nil) NPROC = 1; else NPROC = atoi(e); free(e); } // bench a single function void bench(char *name, void (*fn)(B*)) { B b; BResult res; memset(&b, 0, sizeof(B)); memset(&res, 0, sizeof(BResult)); b.item.name = name; b.item.fn = fn; if(strncmp(name, "bench", 5) == 0) name += 5; print("%16s\t", name); res = benchrun(&b); benchres(&res); } void xbench(char *name, void (*fn)(B*), void (*init)(void)) { init(); bench(name, fn); } // bench an array of functions void benchitems(BItem items[], int len) { int i; for(i = 0; i < len; i++) { bench(items[i].name, items[i].fn); } }