ref: 721724916347c46df7cfa3105ac6fd1f6da652ed
parent: 2869a292caf121ee26d4fc5f0594c051210e9e2f
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Jul 14 13:22:37 EDT 2025
bench: histogram support
--- a/bench.c
+++ b/bench.c
@@ -6,8 +6,17 @@
#define Nsec 1000000000ULL
#define BENCHTIME (Nsec) /* 1s in ns */
-int NPROC;
+typedef struct Hist Hist;
+struct Hist {
+ int min;
+ int max;
+ int bktmax;
+ int bkt[64];
+};
+
+int NPROC;
+int showhist;
/*
* nsec() is wallclock and can be adjusted by timesync
* so need to use cycles() instead, but fall back to
@@ -59,28 +68,46 @@
return x;
}
+void
+histrecord(Hist *h, B *b)
+{
+ vlong ns;
+ int i;
+
+ if(h == nil)
+ return;
+ ns = b->ns;
+ for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++)
+ ns >>= 1;
+ h->bkt[i]++;
+ if(h->bkt[i] > h->bktmax)
+ h->bktmax = h->bkt[i];
+ if(i < h->min || h->min == -1)
+ h->min = i;
+ if(i >= h->max || h->max == -1)
+ h->max = i+1;
+}
+
// run the benchmarking function once, looping n times
static void
-benchrunn(B *b, int n)
+benchrunn(B *b, int n, Hist *h)
{
+ vlong now;
- b->N = n;
-
// reset
- b->start = nanosec();
b->ns = 0;
+ b->start = nanosec();
cycles(&b->scycles);
+ b->N = n;
b->item.fn(b);
- // stop
+ // count
cycles(&b->ecycles);
- b->ns += nanosec() - b->start;
- if(b->overheadns != -1)
- b->ns -= b->overheadns;
- b->bcycles += b->ecycles - b->scycles;
- if(b->overheadcy != -1)
- b->bcycles -= b->overheadcy;
+ now = nanosec();
+ b->ns += nanosec() - b->start - b->overheadns;
+ b->bcycles += b->ecycles - b->scycles - b->overheadcy;
+ histrecord(h, b);
}
static vlong
@@ -102,7 +129,7 @@
}
static int
-rounddown10(int n)
+rounddown10(vlong n)
{
int tens, result, i;
@@ -123,12 +150,17 @@
}
static int
-roundup(int n)
+roundup(vlong ns, vlong div)
{
+ vlong n;
int base;
+ if(div == 0 || ns/div >= BENCHTIME)
+ return BENCHTIME;
+ n = ns / div;
base = rounddown10(n);
-
+ if(n <= 5)
+ return 5;
if(n <= base)
return base;
if(n <= 2*base)
@@ -135,47 +167,43 @@
return 2*base;
if(n <= 5*base)
return 5*base;
-
return 10*base;
}
// run the benchmark for one function
-static BResult
-benchrun(B *b)
+static void
+benchrun(B *b, Hist *h)
{
- int n, last;
- vlong d;
- BResult res;
+ int i, n;
- b->overheadns = -1;
- b->overheadcy = -1;
- benchrunn(b, 0);
- benchrunn(b, 0);
- b->overheadns = b->ns;
- b->overheadcy = b->bcycles;
+ b->overheadns = 0;
+ b->overheadcy = 0;
+ /* warm caches */
+ benchrunn(b, 0, nil);
- n = 1;
- benchrunn(b, n);
- d = BENCHTIME;
- while(b->ns < d && n < 1000000000) {
- last = n;
- if(nsperop(b) == 0) {
- n = 1000000000;
- } else {
- n = (int) d/nsperop(b);
- }
-
- n = max(min(n+n/2, 100*last), last+1);
-
- n = roundup(n);
- benchrunn(b, n);
+ /* measure overhead */
+ for(i = 0; i < 20; i++){
+ b->ns = 0;
+ b->bcycles = 0;
+ benchrunn(b, 0, nil);
+ if(i == 0 || b->ns < b->overheadns)
+ b->overheadns = b->ns;
+ if(i == 0 || b->bcycles < b->overheadcy)
+ b->overheadcy = b->bcycles;
}
- res.N = b->N;
- res.ns = b->ns;
- res.cycles = b->bcycles;
- res.overhead = b->overheadns;
- return res;
+ /* do the run */
+ h->min = -1;
+ h->max = -1;
+ /* estimate */
+ benchrunn(b, 1, h);
+ n = roundup(BENCHTIME, b->ns);
+ print("%10d N ", n);
+ if(h != nil)
+ for(i = 0; i < n; i++)
+ benchrunn(b, 1, h);
+ else
+ benchrunn(b, n, nil);
}
double
@@ -202,11 +230,13 @@
}
static void
-benchres(BResult *res)
+benchres(B *res, Hist *h)
{
char *unit;
+ char bar[64];
char tmop[64];
char cyop[32];
+ int i, j, lim;
double nsperop;
uvlong cyperop;
@@ -218,25 +248,40 @@
nsperop = scaletime(res->ns, (vlong)res->N, &unit);
snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);
- cyperop = res->cycles / (uvlong)res->N;
+ cyperop = res->bcycles / (uvlong)res->N;
if(cyperop < 10)
- snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);
+ snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N);
else if(cyperop < 100)
- snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N);
+ snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->bcycles / (double)res->N);
else
snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop);
- print("%10d N %s\t%s\n", res->N, tmop, cyop);
+ print("%s\t%s\n", tmop, cyop);
+ if(h != nil){
+ for(i = h->min; i < h->max; i++){
+ if(h->bktmax < 30)
+ lim = h->bkt[i];
+ else
+ lim = 30*h->bkt[i] / h->bktmax;
+ if(lim == 0 && h->bkt[i] > 0)
+ lim = 1;
+ for(j = 0; j < lim; j++)
+ bar[j] = '*';
+ bar[j] = 0;
+ print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar);
+ }
+ }
}
-/*
- * public api
-*/
+static void
+usage(void)
+{
+ fprint(2, "usage: %s [-s]\n", argv0);
+ exits("usage");
+}
-// setup. currently only calculates cycles() overhead.
-// not strictly necessary, but will give better cycle counts.
void
-benchinit(int, char **)
+benchinit(int argc, char **argv)
{
char *e;
@@ -245,6 +290,13 @@
else
NPROC = atoi(e);
free(e);
+ ARGBEGIN{
+ case 'h':
+ showhist++;
+ break;
+ default:
+ usage();
+ }ARGEND;
}
// bench a single function
@@ -251,11 +303,11 @@
void
bench(char *name, void (*fn)(B*))
{
+ Hist h;
B b;
- BResult res;
memset(&b, 0, sizeof(B));
- memset(&res, 0, sizeof(BResult));
+ memset(&h, 0, sizeof(Hist));
b.item.name = name;
b.item.fn = fn;
@@ -263,9 +315,8 @@
if(strncmp(name, "bench", 5) == 0)
name += 5;
print("%24s\t", name);
- res = benchrun(&b);
-
- benchres(&res);
+ benchrun(&b, showhist ? &h : nil);
+ benchres(&b, showhist ? &h : nil);
}
// bench an array of functions
--- a/bench.h
+++ b/bench.h
@@ -1,5 +1,4 @@
typedef struct BItem BItem;
-typedef struct BResult BResult;
typedef struct B B;
// single benchmark function
@@ -9,15 +8,6 @@
void (*fn)(B*);
};
-// result of benchmarking
-struct BResult
-{
- int N;
- vlong ns;
- uvlong cycles;
- vlong overhead;
-};
-
// type passed to bench functions
struct B
{
@@ -27,6 +17,8 @@
uvlong scycles; /* start cycles */
uvlong ecycles; /* end cycles */
uvlong bcycles; /* best cycles */
+ long *histo; /* histogram */
+ int nhisto; /* histogram size */
vlong overheadns; /* cost of doing 0 iters */
vlong overheadcy; /* cost of doing 0 iters, cycles */
BItem item;
@@ -45,5 +37,4 @@
// public api
void benchinit(int, char**);
void bench(char *name, void (*)(B*));
-void xbench(char *name, void(*)(B*), void (*)(void));
void benchitems(BItem[], int);
--
⑨