shithub: sysbench

Download patch

ref: 721724916347c46df7cfa3105ac6fd1f6da652ed
parent: 2869a292caf121ee26d4fc5f0594c051210e9e2f
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Jul 14 13:22:37 EDT 2025

bench: histogram support

--- a/bench.c
+++ b/bench.c
@@ -6,8 +6,17 @@
 #define Nsec 1000000000ULL
 #define BENCHTIME (Nsec)	/* 1s in ns */
 
-int	NPROC;
+typedef struct Hist	Hist;
 
+struct Hist {
+	int	min;
+	int	max;
+	int	bktmax;
+	int	bkt[64];
+};
+
+int	NPROC;
+int	showhist;
 /*
  * nsec() is wallclock and can be adjusted by timesync
  * so need to use cycles() instead, but fall back to
@@ -59,28 +68,46 @@
         return x;
 }
 
+void
+histrecord(Hist *h, B *b)
+{
+	vlong ns;
+	int i;
+
+	if(h == nil)
+		return;
+	ns = b->ns;
+	for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++)
+		ns >>= 1;
+	h->bkt[i]++;
+	if(h->bkt[i] > h->bktmax)
+		h->bktmax = h->bkt[i];
+	if(i < h->min || h->min == -1)
+		h->min = i;
+	if(i >= h->max || h->max == -1)
+		h->max = i+1;
+}
+
 // run the benchmarking function once, looping n times
 static void
-benchrunn(B *b, int n)
+benchrunn(B *b, int n, Hist *h)
 {
+	vlong now;
 
-	b->N = n;
-
 	// reset
-	b->start = nanosec();
 	b->ns = 0;
+	b->start = nanosec();
 	cycles(&b->scycles);
 
+	b->N = n;
 	b->item.fn(b);
 
-	// stop
+	// count
 	cycles(&b->ecycles);
-	b->ns += nanosec() - b->start;
-	if(b->overheadns != -1)
-		b->ns -= b->overheadns;
-	b->bcycles += b->ecycles - b->scycles;
-	if(b->overheadcy != -1)
-		b->bcycles -= b->overheadcy;
+	now = nanosec();
+	b->ns += nanosec() - b->start - b->overheadns;
+	b->bcycles += b->ecycles - b->scycles - b->overheadcy;
+	histrecord(h, b);
 }
 
 static vlong
@@ -102,7 +129,7 @@
 }
 
 static int
-rounddown10(int n)
+rounddown10(vlong n)
 {
 	int tens, result, i;
 
@@ -123,12 +150,17 @@
 }
 
 static int
-roundup(int n)
+roundup(vlong ns, vlong div)
 {
+	vlong n;
 	int base;
 
+	if(div == 0 || ns/div >= BENCHTIME)
+		return BENCHTIME;
+	n = ns / div;
 	base = rounddown10(n);
-
+	if(n <= 5)
+		return 5;
 	if(n <= base)
 		return base;
 	if(n <= 2*base)
@@ -135,47 +167,43 @@
 		return 2*base;
 	if(n <= 5*base)
 		return 5*base;
-
 	return 10*base;
 }
 
 // run the benchmark for one function
-static BResult
-benchrun(B *b)
+static void
+benchrun(B *b, Hist *h)
 {
-	int n, last;
-	vlong d;
-	BResult res;
+	int i, n;
 
-	b->overheadns = -1;
-	b->overheadcy = -1;
-	benchrunn(b, 0);
-	benchrunn(b, 0);
-	b->overheadns = b->ns;
-	b->overheadcy = b->bcycles;
+	b->overheadns = 0;
+	b->overheadcy = 0;
+	/* warm caches */
+	benchrunn(b, 0, nil);
 
-	n = 1;
-	benchrunn(b, n);
-	d = BENCHTIME;
-	while(b->ns < d && n < 1000000000) {
-		last = n;
-		if(nsperop(b) == 0) {
-			n = 1000000000;
-		} else {
-			n = (int) d/nsperop(b);
-		}
-
-		n = max(min(n+n/2, 100*last), last+1);
-
-		n = roundup(n);
-		benchrunn(b, n);
+	/* measure overhead */
+	for(i = 0; i < 20; i++){
+		b->ns = 0;
+		b->bcycles = 0;
+		benchrunn(b, 0, nil);
+		if(i == 0 || b->ns < b->overheadns)
+			b->overheadns = b->ns;
+		if(i == 0 || b->bcycles < b->overheadcy)
+			b->overheadcy = b->bcycles;
 	}
 
-	res.N = b->N;
-	res.ns = b->ns;
-	res.cycles = b->bcycles;
-	res.overhead = b->overheadns;
-	return res;
+	/* do the run */
+	h->min = -1;
+	h->max = -1;
+	/* estimate */
+	benchrunn(b, 1, h);
+	n = roundup(BENCHTIME, b->ns);
+	print("%10d N ", n);
+	if(h != nil)
+		for(i = 0; i < n; i++)
+			benchrunn(b, 1, h);
+	else
+		benchrunn(b, n, nil);
 }
 
 double
@@ -202,11 +230,13 @@
 }
 
 static void
-benchres(BResult *res)
+benchres(B *res, Hist *h)
 {
 	char *unit;
+	char bar[64];
 	char tmop[64];
 	char cyop[32];
+	int i, j, lim;
 	double nsperop;
 	uvlong cyperop;
 
@@ -218,25 +248,40 @@
 	nsperop = scaletime(res->ns, (vlong)res->N, &unit);
 	snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);
 
-	cyperop = res->cycles / (uvlong)res->N;
+	cyperop = res->bcycles / (uvlong)res->N;
 	if(cyperop < 10)
-		snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);
+		snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N);
 	else if(cyperop < 100)
-		snprint(cyop, sizeof(cyop), "%12.1f  cy/op", (double)res->cycles / (double)res->N);
+		snprint(cyop, sizeof(cyop), "%12.1f  cy/op", (double)res->bcycles / (double)res->N);
 	else
 		snprint(cyop, sizeof(cyop), "%10ulld  cy/op", cyperop);
 
-	print("%10d N %s\t%s\n", res->N, tmop, cyop);
+	print("%s\t%s\n", tmop, cyop);
+	if(h != nil){
+		for(i = h->min; i < h->max; i++){
+			if(h->bktmax < 30)
+				lim = h->bkt[i];
+			else
+				lim = 30*h->bkt[i] / h->bktmax;
+			if(lim == 0 && h->bkt[i] > 0)
+				lim = 1;
+			for(j = 0; j < lim; j++)
+				bar[j] = '*';
+			bar[j] = 0;
+			print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar);
+		}
+	}
 }
 
-/*
- * public api
-*/
+static void
+usage(void)
+{
+	fprint(2, "usage: %s [-s]\n", argv0);
+	exits("usage");
+}
 
-// setup. currently only calculates cycles() overhead.
-// not strictly necessary, but will give better cycle counts.
 void
-benchinit(int, char **)
+benchinit(int argc, char **argv)
 {
 	char *e;
 
@@ -245,6 +290,13 @@
 	else
 		NPROC = atoi(e);
 	free(e);
+	ARGBEGIN{
+	case 'h':
+		showhist++;
+		break;
+	default:
+		usage();
+	}ARGEND;
 }
 
 // bench a single function
@@ -251,11 +303,11 @@
 void
 bench(char *name, void (*fn)(B*))
 {
+	Hist h;
 	B b;
-	BResult res;
 
 	memset(&b, 0, sizeof(B));
-	memset(&res, 0, sizeof(BResult));
+	memset(&h, 0, sizeof(Hist));
 
 	b.item.name = name;
 	b.item.fn = fn;
@@ -263,9 +315,8 @@
 	if(strncmp(name, "bench", 5) == 0)
 		name += 5;
 	print("%24s\t", name);
-	res = benchrun(&b);
-
-	benchres(&res);
+	benchrun(&b, showhist ? &h : nil);
+	benchres(&b, showhist ? &h : nil);
 }
 
 // bench an array of functions
--- a/bench.h
+++ b/bench.h
@@ -1,5 +1,4 @@
 typedef struct BItem BItem;
-typedef struct BResult BResult;
 typedef struct B B;
 
 // single benchmark function
@@ -9,15 +8,6 @@
 	void (*fn)(B*);
 };
 
-// result of benchmarking
-struct BResult
-{
-	int N;
-	vlong ns;
-	uvlong cycles;
-	vlong overhead;
-};
-
 // type passed to bench functions
 struct B
 {
@@ -27,6 +17,8 @@
 	uvlong scycles;	/* start cycles */
 	uvlong ecycles;	/* end cycles */
 	uvlong bcycles;	/* best cycles */
+	long	*histo;	/* histogram */
+	int	nhisto;	/* histogram size */
 	vlong overheadns;	/* cost of doing 0 iters */
 	vlong overheadcy;	/* cost of doing 0 iters, cycles */
 	BItem item;
@@ -45,5 +37,4 @@
 // public api
 void benchinit(int, char**);
 void bench(char *name, void (*)(B*));
-void xbench(char *name, void(*)(B*), void (*)(void));
 void benchitems(BItem[], int);
--