ref: 16a2cc57f9cd49f84db08e9c46478d8d0acfdf45
dir: /bench.c/
#include <u.h> #include <tos.h> #include <libc.h> #include <bench.h> #define Nsec 1000000000ULL #define BENCHTIME (Nsec) /* 1s in ns */ int NPROC; /* * nsec() is wallclock and can be adjusted by timesync * so need to use cycles() instead, but fall back to * nsec() in case we can't */ uvlong nanosec(void) { static uvlong fasthz, xstart; uvlong x; if(fasthz == ~0ULL) return nsec() - xstart; if(fasthz == 0){ if(_tos->cyclefreq){ fasthz = _tos->cyclefreq; cycles(&xstart); } else { fasthz = ~0ULL; xstart = nsec(); } return 0; } cycles(&x); x -= xstart; uvlong q = x / fasthz; uvlong r = x % fasthz; return q*Nsec + r*Nsec/fasthz; } static int min(int x, int y) { if(x > y) { return y; } return x; } static int max(int x, int y) { if(x < y) { return y; } return x; } // run the benchmarking function once, looping n times static void benchrunn(B *b, int n) { b->N = n; // reset b->start = nanosec(); b->ns = 0; cycles(&b->scycles); b->item.fn(b); // stop cycles(&b->ecycles); b->ns += nanosec() - b->start; if(b->overheadns != -1) b->ns -= b->overheadns; b->bcycles += b->ecycles - b->scycles; if(b->overheadcy != -1) b->bcycles -= b->overheadcy; } static vlong nsperop(B *b) { if(b->N <= 0) return 0; return b->ns / (vlong)b->N; } static uvlong cyperop(B *b) { if(b->N <= 0) return 0; return b->bcycles / (uvlong)b->N; } static int rounddown10(int n) { int tens, result, i; tens = 0; while(n >= 10) { n = n / 10; tens++; } result = 1; for(i = 0; i < tens; i++) { result *= 10; } return result; } static int roundup(int n) { int base; base = rounddown10(n); if(n <= base) return base; if(n <= 2*base) return 2*base; if(n <= 5*base) return 5*base; return 10*base; } // run the benchmark for one function static BResult benchrun(B *b) { int n, last; vlong d; BResult res; b->overheadns = -1; b->overheadcy = -1; benchrunn(b, 0); benchrunn(b, 0); b->overheadns = b->ns; b->overheadcy = b->bcycles; n = 1; benchrunn(b, n); d = BENCHTIME; while(b->ns < d && n < 1000000000) { last = n; if(nsperop(b) == 0) { n = 1000000000; } else { n = (int) d/nsperop(b); } n = max(min(n+n/2, 100*last), last+1); n = roundup(n); benchrunn(b, n); } res.N = b->N; res.ns = b->ns; res.cycles = b->bcycles; res.overhead = b->overheadns; return res; } double scaletime(vlong ns, vlong n, char **unit) { static const struct { char *name; vlong div; } units[] = { {"ns", 1}, {"μs", 1000}, {"ms", 1000*1000}, {"s", 1000*1000*1000}, {"m", 60*1000*1000*1000}, {"h", 3600*1000*1000*1000}, }; int i; for(i = 0; i < nelem(units)-1; i++) if(ns / (n * units[i].div) < 1000) break; *unit = units[i].name; return (double)ns / (double)(n*units[i].div); } static void benchres(BResult *res) { char *unit; char tmop[64]; char cyop[32]; double nsperop; uvlong cyperop; if(res->N <= 0) { print("skipped\n"); return; } nsperop = scaletime(res->ns, (vlong)res->N, &unit); snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit); cyperop = res->cycles / (uvlong)res->N; if(cyperop < 10) snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N); else if(cyperop < 100) snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N); else snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop); print("%10d N %s\t%s\n", res->N, tmop, cyop); } /* * public api */ // setup. currently only calculates cycles() overhead. // not strictly necessary, but will give better cycle counts. void benchinit(int, char **) { char *e; if((e = getenv("NPROC")) == nil) NPROC = 1; else NPROC = atoi(e); free(e); } // bench a single function void bench(char *name, void (*fn)(B*)) { B b; BResult res; memset(&b, 0, sizeof(B)); memset(&res, 0, sizeof(BResult)); b.item.name = name; b.item.fn = fn; if(strncmp(name, "bench", 5) == 0) name += 5; print("%24s\t", name); res = benchrun(&b); benchres(&res); } // bench an array of functions void benchitems(BItem items[], int len) { int i; for(i = 0; i < len; i++) { bench(items[i].name, items[i].fn); } }