ref: 721724916347c46df7cfa3105ac6fd1f6da652ed
dir: /bench.c/
#include <u.h> #include <tos.h> #include <libc.h> #include <bench.h> #define Nsec 1000000000ULL #define BENCHTIME (Nsec) /* 1s in ns */ typedef struct Hist Hist; struct Hist { int min; int max; int bktmax; int bkt[64]; }; int NPROC; int showhist; /* * nsec() is wallclock and can be adjusted by timesync * so need to use cycles() instead, but fall back to * nsec() in case we can't */ uvlong nanosec(void) { static uvlong fasthz, xstart; uvlong x; if(fasthz == ~0ULL) return nsec() - xstart; if(fasthz == 0){ if(_tos->cyclefreq){ fasthz = _tos->cyclefreq; cycles(&xstart); } else { fasthz = ~0ULL; xstart = nsec(); } return 0; } cycles(&x); x -= xstart; uvlong q = x / fasthz; uvlong r = x % fasthz; return q*Nsec + r*Nsec/fasthz; } static int min(int x, int y) { if(x > y) { return y; } return x; } static int max(int x, int y) { if(x < y) { return y; } return x; } void histrecord(Hist *h, B *b) { vlong ns; int i; if(h == nil) return; ns = b->ns; for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++) ns >>= 1; h->bkt[i]++; if(h->bkt[i] > h->bktmax) h->bktmax = h->bkt[i]; if(i < h->min || h->min == -1) h->min = i; if(i >= h->max || h->max == -1) h->max = i+1; } // run the benchmarking function once, looping n times static void benchrunn(B *b, int n, Hist *h) { vlong now; // reset b->ns = 0; b->start = nanosec(); cycles(&b->scycles); b->N = n; b->item.fn(b); // count cycles(&b->ecycles); now = nanosec(); b->ns += nanosec() - b->start - b->overheadns; b->bcycles += b->ecycles - b->scycles - b->overheadcy; histrecord(h, b); } static vlong nsperop(B *b) { if(b->N <= 0) return 0; return b->ns / (vlong)b->N; } static uvlong cyperop(B *b) { if(b->N <= 0) return 0; return b->bcycles / (uvlong)b->N; } static int rounddown10(vlong n) { int tens, result, i; tens = 0; while(n >= 10) { n = n / 10; tens++; } result = 1; for(i = 0; i < tens; i++) { result *= 10; } return result; } static int roundup(vlong ns, vlong div) { vlong n; int base; if(div == 0 || ns/div >= BENCHTIME) return BENCHTIME; n = ns / div; base = rounddown10(n); if(n <= 5) return 5; if(n <= base) return base; if(n <= 2*base) return 2*base; if(n <= 5*base) return 5*base; return 10*base; } // run the benchmark for one function static void benchrun(B *b, Hist *h) { int i, n; b->overheadns = 0; b->overheadcy = 0; /* warm caches */ benchrunn(b, 0, nil); /* measure overhead */ for(i = 0; i < 20; i++){ b->ns = 0; b->bcycles = 0; benchrunn(b, 0, nil); if(i == 0 || b->ns < b->overheadns) b->overheadns = b->ns; if(i == 0 || b->bcycles < b->overheadcy) b->overheadcy = b->bcycles; } /* do the run */ h->min = -1; h->max = -1; /* estimate */ benchrunn(b, 1, h); n = roundup(BENCHTIME, b->ns); print("%10d N ", n); if(h != nil) for(i = 0; i < n; i++) benchrunn(b, 1, h); else benchrunn(b, n, nil); } double scaletime(vlong ns, vlong n, char **unit) { static const struct { char *name; vlong div; } units[] = { {"ns", 1}, {"μs", 1000}, {"ms", 1000*1000}, {"s", 1000*1000*1000}, {"m", 60*1000*1000*1000}, {"h", 3600*1000*1000*1000}, }; int i; for(i = 0; i < nelem(units)-1; i++) if(ns / (n * units[i].div) < 1000) break; *unit = units[i].name; return (double)ns / (double)(n*units[i].div); } static void benchres(B *res, Hist *h) { char *unit; char bar[64]; char tmop[64]; char cyop[32]; int i, j, lim; double nsperop; uvlong cyperop; if(res->N <= 0) { print("skipped\n"); return; } nsperop = scaletime(res->ns, (vlong)res->N, &unit); snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit); cyperop = res->bcycles / (uvlong)res->N; if(cyperop < 10) snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N); else if(cyperop < 100) snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->bcycles / (double)res->N); else snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop); print("%s\t%s\n", tmop, cyop); if(h != nil){ for(i = h->min; i < h->max; i++){ if(h->bktmax < 30) lim = h->bkt[i]; else lim = 30*h->bkt[i] / h->bktmax; if(lim == 0 && h->bkt[i] > 0) lim = 1; for(j = 0; j < lim; j++) bar[j] = '*'; bar[j] = 0; print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar); } } } static void usage(void) { fprint(2, "usage: %s [-s]\n", argv0); exits("usage"); } void benchinit(int argc, char **argv) { char *e; if((e = getenv("NPROC")) == nil) NPROC = 1; else NPROC = atoi(e); free(e); ARGBEGIN{ case 'h': showhist++; break; default: usage(); }ARGEND; } // bench a single function void bench(char *name, void (*fn)(B*)) { Hist h; B b; memset(&b, 0, sizeof(B)); memset(&h, 0, sizeof(Hist)); b.item.name = name; b.item.fn = fn; if(strncmp(name, "bench", 5) == 0) name += 5; print("%24s\t", name); benchrun(&b, showhist ? &h : nil); benchres(&b, showhist ? &h : nil); } // bench an array of functions void benchitems(BItem items[], int len) { int i; for(i = 0; i < len; i++) { bench(items[i].name, items[i].fn); } }