ref: a1c8da91f18c8bd3cdc7ce36c70538969e0c5602
dir: /bench.c/
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <sys/time.h> #include <unistd.h> #include "bench.h" #define Nsec 1000000000ULL #define nelem(x) (sizeof(x)/sizeof((x)[0])) #define BENCHTIME (Nsec) /* 1s in ns */ typedef struct Hist Hist; struct Hist { int min; int max; int bktmax; int bkt[64]; }; int NPROC; int showhist; /* * nsec() is wallclock and can be adjusted by timesync * so need to use cycles() instead, but fall back to * nsec() in case we can't */ uvlong nanosec(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uvlong)ts.tv_sec * Nsec + (uvlong)ts.tv_nsec; } static void cycles(uvlong *x) { #if defined(__x86_64__) || defined(__i386__) unsigned int lo, hi; __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); *x = ((uvlong)hi << 32) | lo; #else *x = 0; // No cycle counter support on this architecture #endif } static int min(int x, int y) { if(x > y) { return y; } return x; } static int max(int x, int y) { if(x < y) { return y; } return x; } void histrecord(Hist *h, B *b) { vlong ns; int i; if(h == NULL) return; ns = b->ns; for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++) ns >>= 1; h->bkt[i]++; if(h->bkt[i] > h->bktmax) h->bktmax = h->bkt[i]; if(i < h->min || h->min == -1) h->min = i; if(i >= h->max || h->max == -1) h->max = i+1; } // run the benchmarking function once, looping n times static void benchrunn(B *b, int n, Hist *h) { vlong now; // reset b->ns = 0; b->start = nanosec(); cycles(&b->scycles); b->N = n; b->item.fn(b); // count cycles(&b->ecycles); now = nanosec(); b->ns += nanosec() - b->start - b->overheadns; b->bcycles += b->ecycles - b->scycles - b->overheadcy; histrecord(h, b); } static vlong nsperop(B *b) { if(b->N <= 0) return 0; return b->ns / (vlong)b->N; } static uvlong cyperop(B *b) { if(b->N <= 0) return 0; return b->bcycles / (uvlong)b->N; } static int rounddown10(vlong n) { int tens, result, i; tens = 0; while(n >= 10) { n = n / 10; tens++; } result = 1; for(i = 0; i < tens; i++) { result *= 10; } return result; } static int roundup(vlong ns, vlong div) { vlong n; int base; if(div == 0 || ns/div >= BENCHTIME) return BENCHTIME; n = ns / div; base = rounddown10(n); if(n <= 5) return 5; if(n <= base) return base; if(n <= 2*base) return 2*base; if(n <= 5*base) return 5*base; return 10*base; } // run the benchmark for one function static void benchrun(B *b, Hist *h) { int i, n; b->overheadns = 0; b->overheadcy = 0; /* warm caches */ benchrunn(b, 0, NULL); /* measure overhead */ for(i = 0; i < 20; i++){ b->ns = 0; b->bcycles = 0; benchrunn(b, 0, NULL); if(i == 0 || b->ns < b->overheadns) b->overheadns = b->ns; if(i == 0 || b->bcycles < b->overheadcy) b->overheadcy = b->bcycles; } /* estimate */ benchrunn(b, 1, h); n = roundup(BENCHTIME, b->ns); printf("%10d N ", n); if(h != NULL){ for(i = 0; i < n; i++) benchrunn(b, 1, h); }else benchrunn(b, n, NULL); } double scaletime(vlong ns, vlong n, char **unit) { static const struct { char *name; vlong div; } units[] = { {"ns", 1}, {"μs", 1000}, {"ms", 1000*1000}, {"s", 1000*1000*1000}, {"m", 60LL*1000*1000*1000}, {"h", 3600LL*1000*1000*1000}, }; int i; for(i = 0; i < nelem(units)-1; i++) if(ns / (n * units[i].div) < 1000) break; *unit = units[i].name; return (double)ns / (double)(n*units[i].div); } static void benchres(B *res, Hist *h) { char *unit; char bar[64]; char tmop[64]; char cyop[32]; int i, j, lim; double nsperop; uvlong cyperop; if(res->N <= 0) { printf("skipped\n"); return; } nsperop = scaletime(res->ns, (vlong)res->N, &unit); snprintf(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit); cyperop = res->bcycles / (uvlong)res->N; if(cyperop < 10) snprintf(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N); else if(cyperop < 100) snprintf(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->bcycles / (double)res->N); else snprintf(cyop, sizeof(cyop), "%10lu cy/op", cyperop); printf("%s\t%s\n", tmop, cyop); if(h != NULL){ for(i = h->min; i < h->max; i++){ if(h->bktmax < 30) lim = h->bkt[i]; else lim = 30*h->bkt[i] / h->bktmax; if(lim == 0 && h->bkt[i] > 0) lim = 1; for(j = 0; j < lim; j++) bar[j] = '*'; bar[j] = 0; printf("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar); } } } static void usage(void) { fprintf(stderr, "usage: benchmark [-h]\n"); exit(1); } void benchinit(int argc, char **argv) { char *e; if((e = getenv("NPROC")) == NULL) NPROC = sysconf(_SC_NPROCESSORS_ONLN); else NPROC = atoi(e); if(e != NULL) free(e); int c; while((c = getopt(argc, argv, "h")) != -1) { switch(c) { case 'h': showhist++; break; default: usage(); } } } // bench a single function void bench(char *name, void (*fn)(B*)) { Hist h; B b; memset(&b, 0, sizeof(B)); memset(&h, 0, sizeof(Hist)); b.item.name = name; b.item.fn = fn; if(strncmp(name, "bench", 5) == 0) name += 5; printf("%24s\t", name); h.min = -1; h.max = -1; benchrun(&b, showhist ? &h : NULL); benchres(&b, showhist ? &h : NULL); } // bench an array of functions void benchitems(BItem items[], int len) { int i; for(i = 0; i < len; i++) { bench(items[i].name, items[i].fn); } }