shithub: sysbench

ref: 721724916347c46df7cfa3105ac6fd1f6da652ed
dir: /bench.c/

View raw version
#include <u.h>
#include <tos.h>
#include <libc.h>
#include <bench.h>

#define Nsec 1000000000ULL
#define BENCHTIME (Nsec)	/* 1s in ns */

typedef struct Hist	Hist;

struct Hist {
	int	min;
	int	max;
	int	bktmax;
	int	bkt[64];
};

int	NPROC;
int	showhist;
/*
 * nsec() is wallclock and can be adjusted by timesync
 * so need to use cycles() instead, but fall back to
 * nsec() in case we can't
 */
uvlong
nanosec(void)
{
	static uvlong fasthz, xstart;
	uvlong x;

	if(fasthz == ~0ULL)
		return nsec() - xstart;

	if(fasthz == 0){
		if(_tos->cyclefreq){
			fasthz = _tos->cyclefreq;
			cycles(&xstart);
		} else {
			fasthz = ~0ULL;
			xstart = nsec();
		}
		return 0;
	}
	cycles(&x);
	x -= xstart;

	uvlong q = x / fasthz;
	uvlong r = x % fasthz;

	return q*Nsec + r*Nsec/fasthz;
}

static int
min(int x, int y)
{
        if(x > y) {
                return y;
        }
        return x;
}

static int
max(int x, int y)
{
        if(x < y) {
                return y;
        }
        return x;
}

void
histrecord(Hist *h, B *b)
{
	vlong ns;
	int i;

	if(h == nil)
		return;
	ns = b->ns;
	for(i = 0; i < nelem(h->bkt) - 1 && ns > 1; i++)
		ns >>= 1;
	h->bkt[i]++;
	if(h->bkt[i] > h->bktmax)
		h->bktmax = h->bkt[i];
	if(i < h->min || h->min == -1)
		h->min = i;
	if(i >= h->max || h->max == -1)
		h->max = i+1;
}

// run the benchmarking function once, looping n times
static void
benchrunn(B *b, int n, Hist *h)
{
	vlong now;

	// reset
	b->ns = 0;
	b->start = nanosec();
	cycles(&b->scycles);

	b->N = n;
	b->item.fn(b);

	// count
	cycles(&b->ecycles);
	now = nanosec();
	b->ns += nanosec() - b->start - b->overheadns;
	b->bcycles += b->ecycles - b->scycles - b->overheadcy;
	histrecord(h, b);
}

static vlong
nsperop(B *b)
{
	if(b->N <= 0)
		return 0;

	return b->ns / (vlong)b->N;
}

static uvlong
cyperop(B *b)
{
	if(b->N <= 0)
		return 0;

	return b->bcycles / (uvlong)b->N;
}

static int
rounddown10(vlong n)
{
	int tens, result, i;

	tens = 0;

	while(n >= 10) {
		n = n / 10;
		tens++;
	}

	result = 1;

	for(i = 0; i < tens; i++) {
		result *= 10;
	}

	return result;
}

static int
roundup(vlong ns, vlong div)
{
	vlong n;
	int base;

	if(div == 0 || ns/div >= BENCHTIME)
		return BENCHTIME;
	n = ns / div;
	base = rounddown10(n);
	if(n <= 5)
		return 5;
	if(n <= base)
		return base;
	if(n <= 2*base)
		return 2*base;
	if(n <= 5*base)
		return 5*base;
	return 10*base;
}

// run the benchmark for one function
static void
benchrun(B *b, Hist *h)
{
	int i, n;

	b->overheadns = 0;
	b->overheadcy = 0;
	/* warm caches */
	benchrunn(b, 0, nil);

	/* measure overhead */
	for(i = 0; i < 20; i++){
		b->ns = 0;
		b->bcycles = 0;
		benchrunn(b, 0, nil);
		if(i == 0 || b->ns < b->overheadns)
			b->overheadns = b->ns;
		if(i == 0 || b->bcycles < b->overheadcy)
			b->overheadcy = b->bcycles;
	}

	/* do the run */
	h->min = -1;
	h->max = -1;
	/* estimate */
	benchrunn(b, 1, h);
	n = roundup(BENCHTIME, b->ns);
	print("%10d N ", n);
	if(h != nil)
		for(i = 0; i < n; i++)
			benchrunn(b, 1, h);
	else
		benchrunn(b, n, nil);
}

double
scaletime(vlong ns, vlong n, char **unit)
{
	static const struct {
		char *name;
		vlong div;
	} units[] = {
		{"ns", 1},
		{"μs", 1000},
		{"ms", 1000*1000},
		{"s",  1000*1000*1000},
		{"m",  60*1000*1000*1000},
		{"h",  3600*1000*1000*1000},
	};
	int i;

	for(i = 0; i < nelem(units)-1; i++)
		if(ns / (n * units[i].div) < 1000)
			break;
	*unit = units[i].name;
	return (double)ns / (double)(n*units[i].div);
}

static void
benchres(B *res, Hist *h)
{
	char *unit;
	char bar[64];
	char tmop[64];
	char cyop[32];
	int i, j, lim;
	double nsperop;
	uvlong cyperop;

	if(res->N <= 0) {
		print("skipped\n");
		return;
	}

	nsperop = scaletime(res->ns, (vlong)res->N, &unit);
	snprint(tmop, sizeof(tmop), "%12.2f %s/op", nsperop, unit);

	cyperop = res->bcycles / (uvlong)res->N;
	if(cyperop < 10)
		snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->bcycles / (double)res->N);
	else if(cyperop < 100)
		snprint(cyop, sizeof(cyop), "%12.1f  cy/op", (double)res->bcycles / (double)res->N);
	else
		snprint(cyop, sizeof(cyop), "%10ulld  cy/op", cyperop);

	print("%s\t%s\n", tmop, cyop);
	if(h != nil){
		for(i = h->min; i < h->max; i++){
			if(h->bktmax < 30)
				lim = h->bkt[i];
			else
				lim = 30*h->bkt[i] / h->bktmax;
			if(lim == 0 && h->bkt[i] > 0)
				lim = 1;
			for(j = 0; j < lim; j++)
				bar[j] = '*';
			bar[j] = 0;
			print("\t%12lld | %8d %s\n", 1LL<<i, h->bkt[i], bar);
		}
	}
}

static void
usage(void)
{
	fprint(2, "usage: %s [-s]\n", argv0);
	exits("usage");
}

void
benchinit(int argc, char **argv)
{
	char *e;

	if((e = getenv("NPROC")) == nil)
		NPROC = 1;
	else
		NPROC = atoi(e);
	free(e);
	ARGBEGIN{
	case 'h':
		showhist++;
		break;
	default:
		usage();
	}ARGEND;
}

// bench a single function
void
bench(char *name, void (*fn)(B*))
{
	Hist h;
	B b;

	memset(&b, 0, sizeof(B));
	memset(&h, 0, sizeof(Hist));

	b.item.name = name;
	b.item.fn = fn;

	if(strncmp(name, "bench", 5) == 0)
		name += 5;
	print("%24s\t", name);
	benchrun(&b, showhist ? &h : nil);
	benchres(&b, showhist ? &h : nil);
}

// bench an array of functions
void
benchitems(BItem items[], int len)
{
	int i;

	for(i = 0; i < len; i++) {
		bench(items[i].name, items[i].fn);
	}
}