shithub: sysbench

--- a/bench.c

+++ b/bench.c

@@ -6,8 +6,17 @@

 #define Nsec 1000000000ULL

 #define BENCHTIME (Nsec)	/* 1s in ns */

-int	NPROC;

+typedef struct Hist	Hist;

+struct Hist {

+	int	min;

+	int	max;

+	int	bktmax;

+	int	bkt[64];

+};

+int	NPROC;

+int	showhist;

/*

  * nsec() is wallclock and can be adjusted by timesync

  * so need to use cycles() instead, but fall back to

@@ -59,28 +68,46 @@

         return x;

+void

+histrecord(Hist *h, B *b)

+{

+	vlong ns;

+	int i;

+	if(h == nil)

+		return;

+	ns = b->ns;

+	for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++)

+		ns >>= 1;

+	h->bkt[i]++;

+	if(h->bkt[i] > h->bktmax)

+		h->bktmax = h->bkt[i];

+	if(i < h->min || h->min == -1)

+		h->min = i;

+	if(i >= h->max || h->max == -1)

+		h->max = i+1;

+}

 // run the benchmarking function once, looping n times

 static void

-benchrunn(B *b, int n)

+benchrunn(B *b, int n, Hist *h)

+	vlong now;

-	b->N = n;

 	// reset

-	b->start = nanosec();

 	b->ns = 0;

+	b->start = nanosec();

 	cycles(&b->scycles);

+	b->N = n;

 	b->item.fn(b);

-	// stop

+	// count

 	cycles(&b->ecycles);

-	b->ns += nanosec() - b->start;

-	if(b->overheadns != -1)

-		b->ns -= b->overheadns;

-	b->bcycles += b->ecycles - b->scycles;

-	if(b->overheadcy != -1)

-		b->bcycles -= b->overheadcy;

+	now = nanosec();

+	b->ns += nanosec() - b->start - b->overheadns;

+	b->bcycles += b->ecycles - b->scycles - b->overheadcy;

+	histrecord(h, b);

 static vlong

@@ -102,7 +129,7 @@

 static int

-rounddown10(int n)

+rounddown10(vlong n)

 	int tens, result, i;

@@ -123,12 +150,17 @@

 static int

-roundup(int n)

+roundup(vlong ns, vlong div)

+	vlong n;

 	int base;

+	if(div == 0 || ns/div >= BENCHTIME)

+		return BENCHTIME;

+	n = ns / div;

 	base = rounddown10(n);

+	if(n <= 5)

+		return 5;

 	if(n <= base)

 		return base;

 	if(n <= 2*base)

@@ -135,47 +167,43 @@

 		return 2*base;

 	if(n <= 5*base)

 		return 5*base;

 	return 10*base;

 // run the benchmark for one function

-static BResult

-benchrun(B *b)

+static void

+benchrun(B *b, Hist *h)

-	int n, last;

-	vlong d;

-	BResult res;

+	int i, n;

-	b->overheadns = -1;

-	b->overheadcy = -1;

-	benchrunn(b, 0);

-	benchrunn(b, 0);

-	b->overheadns = b->ns;

-	b->overheadcy = b->bcycles;

+	b->overheadns = 0;

+	b->overheadcy = 0;

+	/* warm caches */

+	benchrunn(b, 0, nil);

-	n = 1;

-	benchrunn(b, n);

-	d = BENCHTIME;

-	while(b->ns < d && n < 1000000000) {

-		last = n;

-		if(nsperop(b) == 0) {

-			n = 1000000000;

-		} else {

-			n = (int) d/nsperop(b);

-		}

-		n = max(min(n+n/2, 100*last), last+1);

-		n = roundup(n);

-		benchrunn(b, n);

+	/* measure overhead */

+	for(i = 0; i < 20; i++){

+		b->ns = 0;

+		b->bcycles = 0;

+		benchrunn(b, 0, nil);

+		if(i == 0 || b->ns < b->overheadns)

+			b->overheadns = b->ns;

+		if(i == 0 || b->bcycles < b->overheadcy)

+			b->overheadcy = b->bcycles;

-	res.N = b->N;

-	res.ns = b->ns;

-	res.cycles = b->bcycles;

-	res.overhead = b->overheadns;

-	return res;

+	/* do the run */

+	h->min = -1;

+	h->max = -1;

+	/* estimate */

+	benchrunn(b, 1, h);

+	n = roundup(BENCHTIME, b->ns);

+	print("%10d N ", n);

+	if(h != nil)

+		for(i = 0; i < n; i++)

+			benchrunn(b, 1, h);

+	else

+		benchrunn(b, n, nil);

 double

@@ -202,11 +230,13 @@

 static void

-benchres(BResult *res)

+benchres(B *res, Hist *h)

 	char *unit;

+	char bar[64];

 	char tmop[64];

 	char cyop[32];

+	int i, j, lim;

 	double nsperop;

 	uvlong cyperop;

@@ -218,25 +248,40 @@

 	nsperop = scaletime(res->ns, (vlong)res->N, &unit);

 	snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);

-	cyperop = res->cycles / (uvlong)res->N;

+	cyperop = res->bcycles / (uvlong)res->N;

 	if(cyperop < 10)

-		snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);

+		snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N);

 	else if(cyperop < 100)

-		snprint(cyop, sizeof(cyop), "%12.1f  cy/op", (double)res->cycles / (double)res->N);

+		snprint(cyop, sizeof(cyop), "%12.1f  cy/op", (double)res->bcycles / (double)res->N);

 	else

 		snprint(cyop, sizeof(cyop), "%10ulld  cy/op", cyperop);

-	print("%10d N %s\t%s\n", res->N, tmop, cyop);

+	print("%s\t%s\n", tmop, cyop);

+	if(h != nil){

+		for(i = h->min; i < h->max; i++){

+			if(h->bktmax < 30)

+				lim = h->bkt[i];

+			else

+				lim = 30*h->bkt[i] / h->bktmax;

+			if(lim == 0 && h->bkt[i] > 0)

+				lim = 1;

+			for(j = 0; j < lim; j++)

+				bar[j] = '*';

+			bar[j] = 0;

+			print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar);

+		}

+	}

-/*

- * public api

-*/

+static void

+usage(void)

+{

+	fprint(2, "usage: %s [-s]\n", argv0);

+	exits("usage");

+}

-// setup. currently only calculates cycles() overhead.

-// not strictly necessary, but will give better cycle counts.

 void

-benchinit(int, char **)

+benchinit(int argc, char **argv)

 	char *e;

@@ -245,6 +290,13 @@

 	else

 		NPROC = atoi(e);

 	free(e);

+	ARGBEGIN{

+	case 'h':

+		showhist++;

+		break;

+	default:

+		usage();

+	}ARGEND;

 // bench a single function

@@ -251,11 +303,11 @@

 void

 bench(char *name, void (*fn)(B*))

+	Hist h;

 	B b;

-	BResult res;

 	memset(&b, 0, sizeof(B));

-	memset(&res, 0, sizeof(BResult));

+	memset(&h, 0, sizeof(Hist));

 	b.item.name = name;

 	b.item.fn = fn;

@@ -263,9 +315,8 @@

 	if(strncmp(name, "bench", 5) == 0)

 		name += 5;

 	print("%24s\t", name);

-	res = benchrun(&b);

-	benchres(&res);

+	benchrun(&b, showhist ? &h : nil);

+	benchres(&b, showhist ? &h : nil);

 // bench an array of functions

--- a/bench.h

+++ b/bench.h

@@ -1,5 +1,4 @@

 typedef struct BItem BItem;

-typedef struct BResult BResult;

 typedef struct B B;

 // single benchmark function

@@ -9,15 +8,6 @@

 	void (*fn)(B*);

};

-// result of benchmarking

-struct BResult

-{

-	int N;

-	vlong ns;

-	uvlong cycles;

-	vlong overhead;

-};

 // type passed to bench functions

 struct B

@@ -27,6 +17,8 @@

 	uvlong scycles;	/* start cycles */

 	uvlong ecycles;	/* end cycles */

 	uvlong bcycles;	/* best cycles */

+	long	*histo;	/* histogram */

+	int	nhisto;	/* histogram size */

 	vlong overheadns;	/* cost of doing 0 iters */

 	vlong overheadcy;	/* cost of doing 0 iters, cycles */

 	BItem item;

@@ -45,5 +37,4 @@

 // public api

 void benchinit(int, char**);

 void bench(char *name, void (*)(B*));

-void xbench(char *name, void(*)(B*), void (*)(void));

 void benchitems(BItem[], int);

--

⑨