ref: d818e0fc9c7b36b6cc1494bb2253792ea2c35f4b
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Jul 13 11:09:02 EDT 2025
initial ocmmit
--- /dev/null
+++ b/bench.c
@@ -1,0 +1,263 @@
+#include <u.h>
+#include <tos.h>
+#include <libc.h>
+#include <bench.h>
+
+#define Nsec 1000000000ULL
+#define BENCHTIME (Nsec) /* 1s in ns */
+
+uvlong boverhead;
+
+/*
+ * nsec() is wallclock and can be adjusted by timesync
+ * so need to use cycles() instead, but fall back to
+ * nsec() in case we can't
+ */
+uvlong
+nanosec(void)
+{
+ static uvlong fasthz, xstart;
+ uvlong x;
+
+ if(fasthz == ~0ULL)
+ return nsec() - xstart;
+
+ if(fasthz == 0){
+ if(_tos->cyclefreq){
+ fasthz = _tos->cyclefreq;
+ cycles(&xstart);
+ } else {
+ fasthz = ~0ULL;
+ xstart = nsec();
+ }
+ return 0;
+ }
+ cycles(&x);
+ x -= xstart;
+
+ uvlong q = x / fasthz;
+ uvlong r = x % fasthz;
+
+ return q*Nsec + r*Nsec/fasthz;
+}
+
+static int
+min(int x, int y)
+{
+ if(x > y) {
+ return y;
+ }
+ return x;
+}
+
+static int
+max(int x, int y)
+{
+ if(x < y) {
+ return y;
+ }
+ return x;
+}
+
+// run the benchmarking function once, looping n times
+static void
+benchrunn(B *b, int n)
+{
+
+ b->N = n;
+
+ // reset
+ b->start = nanosec();
+ b->ns = 0;
+ cycles(&b->scycles);
+
+ b->item.fn(b);
+
+ // stop
+ cycles(&b->ecycles);
+ b->ns += nanosec() - b->start;
+ b->bcycles += b->ecycles - b->scycles - boverhead;
+}
+
+static vlong
+nsperop(B *b)
+{
+ if(b->N <= 0)
+ return 0;
+
+ return b->ns / (vlong)b->N;
+}
+
+static uvlong
+cyperop(B *b)
+{
+ if(b->N <= 0)
+ return 0;
+
+ return b->bcycles / (uvlong)b->N;
+}
+
+static int
+rounddown10(int n)
+{
+ int tens, result, i;
+
+ tens = 0;
+
+ while(n >= 10) {
+ n = n / 10;
+ tens++;
+ }
+
+ result = 1;
+
+ for(i = 0; i < tens; i++) {
+ result *= 10;
+ }
+
+ return result;
+}
+
+static int
+roundup(int n)
+{
+ int base;
+
+ base = rounddown10(n);
+
+ if(n <= base)
+ return base;
+ if(n <= 2*base)
+ return 2*base;
+ if(n <= 5*base)
+ return 5*base;
+
+ return 10*base;
+}
+
+// run the benchmark for one function
+static BResult
+benchrun(B *b)
+{
+ int n, last;
+ vlong d;
+ BResult res;
+
+ n = 1;
+
+ benchrunn(b, n);
+
+ d = BENCHTIME;
+
+ while(b->ns < d && n < 1000000000) {
+ last = n;
+ if(nsperop(b) == 0) {
+ n = 1000000000;
+ } else {
+ n = (int) d/nsperop(b);
+ }
+
+ n = max(min(n+n/2, 100*last), last+1);
+
+ n = roundup(n);
+ benchrunn(b, n);
+ }
+
+ res.N = b->N;
+ res.ns = b->ns;
+ res.cycles = b->bcycles;
+
+ return res;
+}
+
+static void
+benchres(BResult *res)
+{
+ char nsop[32];
+ char cyop[32];
+ vlong nsperop;
+ uvlong cyperop;
+
+ if(res->N <= 0) {
+ nsperop = 0;
+ cyperop = 0;
+ } else {
+ nsperop = res->ns / (vlong)res->N;
+ cyperop = res->cycles / (uvlong)res->N;
+ }
+
+ snprint(nsop, sizeof(nsop), "%10lld ns/op", nsperop);
+ snprint(cyop, sizeof(cyop), "%10ulld cy/op", cyperop);
+
+ if(res->N > 0 && nsperop < 100) {
+ if(nsperop < 10)
+ snprint(nsop, sizeof(nsop), "%13.2f ns/op", (double)res->ns / (double)res->N);
+ else
+ snprint(nsop, sizeof(nsop), "%12.1f ns/op", (double)res->ns / (double)res->N);
+ }
+
+ if(res->N > 0 && cyperop < 100) {
+ if(cyperop < 10)
+ snprint(cyop, sizeof(cyop), "%13.2f cy/op", (double)res->cycles / (double)res->N);
+ else
+ snprint(cyop, sizeof(cyop), "%12.1f cy/op", (double)res->cycles / (double)res->N);
+ }
+
+ print("%10d N %.16s\t%s (total %f s)\n", res->N, nsop, cyop, (double)res->ns / Nsec);
+}
+
+/*
+ * public api
+*/
+
+// setup. currently only calculates cycles() overhead.
+// not strictly necessary, but will give better cycle counts.
+void
+benchinit(void)
+{
+ int at;
+ uvlong a, b;
+
+ /* figure out cycles overhead */
+ boverhead = -1;
+
+ for(at = 0; at < 1000000; at++) {
+ cycles(&a);
+ cycles(&b);
+ if(boverhead > b - a)
+ boverhead = b - a;
+ }
+
+}
+
+// bench a single function
+void
+bench(char *name, BFn fn)
+{
+ B b;
+ BResult res;
+
+ memset(&b, 0, sizeof(B));
+ memset(&res, 0, sizeof(BResult));
+
+ b.item.name = name;
+ b.item.fn = fn;
+
+ if(strncmp(name, "bench", 5) == 0)
+ name += 5;
+ print("%16s\t", name);
+ res = benchrun(&b);
+
+ benchres(&res);
+}
+
+// bench an array of functions
+void
+benchitems(BItem items[], int len)
+{
+ int i;
+
+ for(i = 0; i < len; i++) {
+ bench(items[i].name, items[i].fn);
+ }
+}
--- /dev/null
+++ b/bench.h
@@ -1,0 +1,40 @@
+
+typedef struct BItem BItem;
+typedef struct BResult BResult;
+typedef struct B B;
+
+typedef void (*BFn)(B* b);
+
+// single benchmark function
+struct BItem
+{
+ char *name;
+ BFn fn;
+};
+
+// result of benchmarking
+struct BResult
+{
+ int N;
+ vlong ns;
+ uvlong cycles;
+};
+
+// type passed to bench functions
+struct B
+{
+ int N;
+ vlong start; /* start ns */
+ vlong ns; /* duration */
+ uvlong scycles; /* start cycles */
+ uvlong ecycles; /* end cycles */
+ uvlong bcycles; /* best cycles */
+ BItem item;
+};
+
+extern int NPROC;
+
+// public api
+void benchinit(void);
+void bench(char *name, BFn);
+void benchitems(BItem[], int);
--- /dev/null
+++ b/fcall.c
@@ -1,0 +1,42 @@
+#include <u.h>
+#include <libc.h>
+
+#include "bench.h"
+#include "fns.h"
+
+void
+f0(void)
+{
+}
+
+void
+f1(int)
+{
+}
+
+void
+f16(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int)
+{
+}
+
+void
+fcall0(B *b)
+{
+ int i;
+ for(i = 0; i < b->N; i++)
+ f0();
+}
+void
+fcall1(B *b)
+{
+ int i;
+ for(i = 0; i < b->N; i++)
+ f1(i);
+}
+void
+fcall16(B *b)
+{
+ int i;
+ for(i = 0; i < b->N; i++)
+ f16(i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i);
+}
\ No newline at end of file
--- /dev/null
+++ b/fns.h
@@ -1,0 +1,73 @@
+
+#define BMARKS \
+ \
+ H("== fork/exec ==\n")\
+ BM(benchfork1)\
+ BM(benchrforkm1)\
+ BM(benchexec1)\
+ BM(benchexecm1)\
+ BM(benchforkN)\
+ BM(benchrforkmN)\
+ BM(benchexecN)\
+ BM(benchexecmN)\
+ \
+ H("== nop io ==\n") \
+ BM(benchsysr1)\
+ BM(benchreadzero)\
+ BM(benchwritenull)\
+ BM(benchreadmordor)\
+ BM(benchwritemordor)\
+ \
+ H("== pipe io ==\n") \
+ BM(benchpipe1)\
+ BM(benchpipe16)\
+ BM(benchpipe256)\
+ BM(benchpipe4096)\
+ BM(benchpipe4097)\
+ BM(benchpipe32768)\
+ \
+ H("== locking (fast work) ==\n") \
+ BM(benchlock1)\
+ BM(benchqlock1)\
+ BM(benchslock1)\
+ BM(benchlock4)\
+ BM(benchqlock4)\
+ BM(benchslock4)\
+ BM(benchlock16)\
+ BM(benchqlock16)\
+ BM(benchslock16)\
+ BM(benchlock64)\
+ BM(benchqlock64)\
+ BM(benchslock64)\
+ BM(benchlock512)\
+ BM(benchqlock512)\
+ BM(benchslock512)\
+ \
+ H("== locking (slow work) ==\n") \
+ BM(benchlock1_w)\
+ BM(benchqlock1_w)\
+ BM(benchslock1_w)\
+ BM(benchlock4_w)\
+ BM(benchqlock4_w)\
+ BM(benchslock4_w)\
+ BM(benchlock16_w)\
+ BM(benchqlock16_w)\
+ BM(benchslock16_w)\
+ BM(benchlock64_w)\
+ BM(benchqlock64_w)\
+ BM(benchslock64_w)\
+ BM(benchlock512_w)\
+ BM(benchqlock512_w)\
+ BM(benchslock512_w)\
+ \
+ H("== function call overhead ==\n") \
+ BM(fcall0)\
+ BM(fcall1)\
+ BM(fcall16)\
+
+#define H(x)
+#define BM(n) void n(B *b);
+BMARKS
+#undef BM
+#undef H
+
--- /dev/null
+++ b/lock.c
@@ -1,0 +1,147 @@
+#include <u.h>
+#include <libc.h>
+
+#include "bench.h"
+#include "fns.h"
+
+#define NLockIters 1
+
+typedef struct SLock SLock;
+struct SLock {
+ long state;
+ long sem;
+};
+
+int casl(long *, long, long);
+
+Lock l;
+QLock q;
+SLock s;
+static int count;
+
+void
+nop(void)
+{
+}
+
+void
+work(void)
+{
+ int i;
+
+ for(i = 0; i < 100*1000; i++)
+ count++;
+}
+
+void
+slock(SLock *s)
+{
+ int i;
+
+ for(i = 0; i < 1000; i++){
+ if(casl(&s->state, 0, 1))
+ return;
+ sleep(0);
+ }
+ if(ainc(&s->state) == 1)
+ return;
+ while(semacquire(&s->sem, 1) == -1)
+ /* retry */;
+}
+
+void
+sunlock(SLock *s)
+{
+ if(adec(&s->state) == 0)
+ return;
+ semrelease(&s->sem, 1);
+}
+
+void
+dolock(void (*work)(void), int n)
+{
+ int i;
+
+ for(i = 0; i < n; i++){
+ lock(&l);
+ work();
+ unlock(&l);
+ }
+}
+
+void
+doqlock(void (*work)(void), int n)
+{
+ int i;
+
+ for(i = 0; i < n; i++){
+ qlock(&q);
+ work();
+ qunlock(&q);
+ }
+}
+
+void
+doslock(void (*work)(void), int n)
+{
+ int i;
+
+ for(i = 0; i < n; i++){
+ slock(&s);
+ work();
+ sunlock(&s);
+ }
+}
+
+void
+lockbench(void (*fn)(void(*)(void), int), int nthr, void (*work)(void), int ninc)
+{
+ static long thrwait;
+ int i, p;
+
+ thrwait = 0;
+ for(i = 0; i < nthr; i++){
+ if((p = rfork(RFPROC|RFMEM)) == -1)
+ sysfatal("rfork: %r");
+ if(p == 0){
+ semacquire(&thrwait, 1);
+ (*fn)(work, ninc);
+ exits(nil);
+ }
+ }
+ semrelease(&thrwait, nthr);
+ for(i = 0; i < nthr; i++)
+ free(wait());
+}
+
+void benchlock1(B *b) { lockbench(dolock, 1, nop, b->N); }
+void benchqlock1(B *b) { lockbench(doqlock, 1, nop, b->N); }
+void benchslock1(B *b) { lockbench(doslock, 1, nop, b->N); }
+void benchlock4(B *b) { lockbench(dolock, 4, nop, b->N); }
+void benchqlock4(B *b) { lockbench(doqlock, 4, nop, b->N); }
+void benchslock4(B *b) { lockbench(doslock, 4, nop, b->N); }
+void benchlock16(B *b) { lockbench(dolock, 16, nop, b->N); }
+void benchqlock16(B *b) { lockbench(doqlock, 16, nop, b->N); }
+void benchslock16(B *b) { lockbench(doslock, 16, nop, b->N); }
+void benchlock64(B *b) { lockbench(dolock, 64, nop, b->N); }
+void benchqlock64(B *b) { lockbench(doqlock, 64, nop, b->N); }
+void benchslock64(B *b) { lockbench(doslock, 64, nop, b->N); }
+void benchlock512(B *b) { lockbench(dolock, 512, nop, b->N); }
+void benchqlock512(B *b) { lockbench(doqlock, 512, nop, b->N); }
+void benchslock512(B *b) { lockbench(doslock, 512, nop, b->N); }
+
+void benchlock1_w(B *b) { lockbench(dolock, 1, work, b->N); }
+void benchqlock1_w(B *b) { lockbench(doqlock, 1, work, b->N); }
+void benchslock1_w(B *b) { lockbench(doslock, 1, work, b->N); }
+void benchlock4_w(B *b) { lockbench(dolock, 4, work, b->N); }
+void benchqlock4_w(B *b) { lockbench(doqlock, 4, work, b->N); }
+void benchslock4_w(B *b) { lockbench(doslock, 4, work, b->N); }
+void benchlock16_w(B *b) { lockbench(dolock, 16, work, b->N); }
+void benchqlock16_w(B *b) { lockbench(doqlock, 16, work, b->N); }
+void benchslock16_w(B *b) { lockbench(doslock, 16, work, b->N); }
+void benchlock64_w(B *b) { lockbench(dolock, 64, work, b->N); }
+void benchqlock64_w(B *b) { lockbench(doqlock, 64, work, b->N); }
+void benchslock64_w(B *b) { lockbench(doslock, 64, work, b->N); }
+void benchlock512_w(B *b) { lockbench(dolock, 512, work, b->N); }
+void benchqlock512_w(B *b) { lockbench(doqlock, 512, work, b->N); }
+void benchslock512_w(B *b) { lockbench(doslock, 512, work, b->N); }
--- /dev/null
+++ b/main.c
@@ -1,0 +1,30 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+#include "bench.h"
+#include "fns.h"
+
+int NPROC;
+
+
+void
+main(void)
+{
+ char *e;
+
+ if((e = getenv("NPROC")) == nil)
+ NPROC = 1;
+ else
+ NPROC = atoi(e);
+ free(e);
+
+
+
+#define S(n) n();
+#define H(x) print(x);
+#define BM(n) bench("n", n);
+ BMARKS
+#undef BM
+#undef H
+ exits(nil);
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,28 @@
+</$objtype/mkfile
+
+TARG=bench
+
+OFILES=\
+ main.$O\
+ bench.$O\
+ \
+ rdwr.$O\
+ lock.$O\
+ spawn.$O\
+ fcall.$O\
+
+HFILES=\
+ bench.h\
+ fns.h
+
+bench:V: all $O.nop
+ $O.out
+
+</sys/src/cmd/mkone
+
+nop.$O: nop.c
+ $CC $CFLAGS nop.c
+
+$O.nop: nop.$O
+ $LD -o $target $prereq
+
--- /dev/null
+++ b/nop.c
@@ -1,0 +1,8 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(void)
+{
+ exits(nil);
+}
--- /dev/null
+++ b/rdwr.c
@@ -1,0 +1,82 @@
+#include <u.h>
+#include <libc.h>
+
+#include "bench.h"
+#include "fns.h"
+
+void
+benchsysr1(B *b)
+{
+ int i;
+
+ extern int sysr1(void);
+ for(i = 0; i < b->N; i++)
+ sysr1();
+}
+
+void
+benchread(B *b, char *path, int n)
+{
+ int i, fd;
+ char buf[128*IOUNIT];
+
+ if((fd = open(path, OREAD)) == -1)
+ sysfatal("open %s: %r", path);
+ for(i = 0; i < b->N; i++)
+ pread(fd, buf, n, 0);
+ close(fd);
+}
+
+void
+benchwrite(B *b, char *path, int n)
+{
+ int i, fd;
+ char buf[128*IOUNIT];
+
+ if((fd = open(path, OWRITE)) == -1)
+ sysfatal("open %s: %r", path);
+ for(i = 0; i < b->N; i++)
+ pwrite(fd, buf, n, 0);
+ close(fd);
+}
+
+void
+benchpipe(B *b, int n)
+{
+ char buf[128*IOUNIT];
+ int i, pfd[2];
+
+ if(pipe(pfd) == -1)
+ sysfatal("pipe: %r");
+
+ switch(rfork(RFPROC)){
+ case -1:
+ sysfatal("fork: %r");
+ case 0:
+ for(i = 0; i < b->N; i++)
+ if(write(pfd[0], buf, n) == -1)
+ sysfatal("write: %r");
+ exits(nil);
+ break;
+ default:
+ for(i = 0; i < b->N; i++)
+ if(read(pfd[1], buf, n) == -1)
+ sysfatal("read: %r");
+ free(wait());
+ close(pfd[0]);
+ close(pfd[1]);
+ break;
+ }
+}
+
+
+void benchreadzero(B *b) { benchread(b, "/dev/zero", 4); }
+void benchwritenull(B *b) { benchwrite(b, "/dev/null", 4); }
+void benchreadmordor(B *b) { benchread(b, "/dev/mordor", 4); }
+void benchwritemordor(B *b) { benchwrite(b, "/dev/mordor", 4); }
+void benchpipe1(B *b) { benchpipe(b, 1); }
+void benchpipe16(B *b) { benchpipe(b, 16); }
+void benchpipe256(B *b) { benchpipe(b, 256); }
+void benchpipe4096(B *b) { benchpipe(b, 4096); }
+void benchpipe4097(B *b) { benchpipe(b, 4097); }
+void benchpipe32768(B *b) { benchpipe(b, 32768); }
--- /dev/null
+++ b/spawn.c
@@ -1,0 +1,149 @@
+#include <u.h>
+#include <libc.h>
+
+#include "bench.h"
+#include "fns.h"
+
+#define DirtySz 1024*1024*1024
+void *junk;
+
+#ifdef NOPE
+void
+setupfork(void)
+{
+ if(DirtySz != 0){
+ if((junk = malloc(DirtySz)) == nil)
+ sysfatal("malloc: %r");
+ memset(junk, 42, DirtySz);
+ }
+}
+#endif
+
+void
+doit(void (*fn)(int), int n)
+{
+ int r, i;
+
+ for(i = 0; i < NPROC; i++){
+ r = fork();
+ if(r == -1)
+ sysfatal("rfork: %r");
+ if(r == 0){
+ sleep(1000);
+ fn(n/NPROC);
+ exits(nil);
+ }
+ }
+ for(i = 0; i < NPROC; i++)
+ waitpid();
+}
+
+void
+benchfork(int N)
+{
+ int i;
+
+ for(i = 0; i < N; i++){
+ if(!fork())
+ exits(nil);
+ waitpid();
+ }
+}
+
+void
+benchrforkm(int N)
+{
+ int i;
+
+ for(i = 0; i < N; i++){
+ if(!rfork(RFPROC|RFMEM))
+ exits(nil);
+ waitpid();
+ }
+}
+
+void
+benchexec(int N)
+{
+ int i;
+
+ for(i = 0; i < N; i++){
+ switch(fork()){
+ case -1:
+ abort();
+ case 0:
+ execl("./6.nop", "6.nop", nil);
+ print("exec: %r");
+ abort();
+ default:
+ waitpid();
+ }
+ }
+}
+
+void
+benchexecm(int N)
+{
+ int i;
+
+ for(i = 0; i < N; i++){
+ switch(rfork(RFPROC|RFMEM)){
+ case -1:
+ abort();
+ case 0:
+ execl("./6.nop", "6.nop", nil);
+ print("exec: %r");
+ abort();
+ default:
+ waitpid();
+ }
+ }
+}
+
+void
+benchfork1(B *b)
+{
+ benchfork(b->N);
+}
+
+void
+benchforkN(B *b)
+{
+ doit(benchfork, b->N);
+}
+
+void
+benchrforkm1(B *b)
+{
+ benchrforkm(b->N);
+}
+
+void
+benchrforkmN(B *b)
+{
+ doit(benchrforkm, b->N);
+}
+
+void
+benchexec1(B *b)
+{
+ benchexec(b->N);
+}
+
+void
+benchexecN(B *b)
+{
+ doit(benchexec, b->N);
+}
+
+void
+benchexecm1(B *b)
+{
+ benchexecm(b->N);
+}
+
+void
+benchexecmN(B *b)
+{
+ doit(benchexecm, b->N);
+}
--
⑨