ref: c0e7c83c5eb13de7e799f7bf0b3359b6c1569b24
parent: 015bcdb965ed6055a2075ce1acdd94b4bb81990e
author: Ori Bernstein <ori@eigenstate.org>
date: Thu Dec 28 22:48:40 EST 2023
all: add tracing facility, on by default
--- a/blk.c
+++ b/blk.c
@@ -249,6 +249,7 @@
initblk(lb, o, -1, Tlog);
finalize(lb);
syncblk(lb);
+ traceb("logblk" , lb->bp);
return lb;
}
@@ -335,6 +336,7 @@
dprint("loadlog %B\n", bp);
+ traceb("loadlog", bp);
while(1){
b = getblk(bp, 0);
dprint("\tload %B chain %B\n", bp, b->logp);
@@ -401,8 +403,11 @@
Bptr hd;
char *p;
- if(a->logtl != nil && checkflag(a->logtl, Bdirty))
+ tracem("compresslog");
+ if(a->logtl != nil){
+ finalize(a->logtl);
syncblk(a->logtl);
+ }
/*
* Prepare what we're writing back.
* Arenas must be sized so that we can
@@ -650,6 +655,7 @@
b = cachepluck();
initblk(b, bp, t->memgen, ty);
b->alloced = getcallerpc(&t);
+ tracex("newblk" , b->bp, ty, -1);
return b;
}
@@ -661,6 +667,7 @@
if((r = newblk(t, b->type, 0)) == nil)
return nil;
+ tracex("dup" , b->bp, b->type, t->gen);
setflag(r, Bdirty);
r->bp.hash = -1;
r->nval = b->nval;
@@ -720,6 +727,7 @@
int i;
i = ihash(bp.addr) % nelem(fs->blklk);
+ tracex("get" , bp, getcallerpc(&bp), -1);
qlock(&fs->blklk[i]);
if(waserror()){
qunlock(&fs->blklk[i]);
@@ -822,6 +830,7 @@
return;
}
+ tracex("freeb", bp, getcallerpc(&t), -1);
f = emalloc(sizeof(Bfree), 0);
f->op = DFblk;
f->bp = bp;
@@ -936,6 +945,7 @@
a = getarena(b->bp.addr);
holdblk(b);
finalize(b);
+ traceb("queueb", b->bp);
setflag(b, Bqueued);
b->queued = getcallerpc(&b);
qe.op = Qwrite;
@@ -1045,6 +1055,7 @@
qe = qpop(q);
switch(qe.op){
case Qfree:
+ tracex("qfreeb", qe.bp, qe.qgen, -1);
a = getarena(qe.bp.addr);
qlock(a);
cachedel(qe.bp.addr);
@@ -1054,6 +1065,7 @@
qunlock(a);
break;
case Qfence:
+ tracev("qfence", qe.qgen);
qlock(&fs->synclk);
if(--fs->syncing == 0)
rwakeupall(&fs->syncrz);
@@ -1060,6 +1072,7 @@
qunlock(&fs->synclk);
break;
case Qwrite:
+ tracex("qsyncb", qe.bp, qe.qgen, -1);
if(checkflag(qe.b, Bfreed) == 0)
syncblk(qe.b);
dropblk(qe.b);
--- a/cache.c
+++ b/cache.c
@@ -83,6 +83,7 @@
h = ihash(b->bp.addr);
bkt = &fs->bcache[h % fs->cmax];
qlock(&fs->lrulk);
+ traceb("cache", b->bp);
lock(bkt);
if(checkflag(b, Bcached)){
unlock(bkt);
@@ -110,6 +111,7 @@
if(addr == -1)
return;
+ tracex("uncache", Zb, addr, getcallerpc(&addr));
h = ihash(addr);
bkt = &fs->bcache[h % fs->cmax];
lock(bkt);
@@ -130,6 +132,7 @@
cachedel(vlong addr)
{
qlock(&fs->lrulk);
+ tracex("uncachelk", Zb, addr, getcallerpc(&addr));
cachedel_lk(addr);
qunlock(&fs->lrulk);
}
--- a/cons.c
+++ b/cons.c
@@ -2,6 +2,7 @@
#include <libc.h>
#include <fcall.h>
#include <avl.h>
+#include <bio.h>
#include "dat.h"
#include "fns.h"
@@ -296,6 +297,37 @@
}
static void
+savetrace(int fd, char **ap, int na)
+{
+ Biobuf *bfd;
+ Trace *t;
+ int i;
+
+ if(na == 0)
+ bfd = Bfdopen(dup(fd, -1), OWRITE);
+ else
+ bfd = Bopen(ap[0], OWRITE);
+ if(bfd == nil){
+ fprint(fd, "error opening output");
+ return;
+ }
+ for(i = 0; i < fs->tracesz; i++){
+ t = &fs->trace[(fs->traceidx + i) % fs->tracesz];
+ if(t->msg[0] == 0)
+ continue;
+ Bprint(bfd, "[%d@%d] %s", t->tid, t->qgen, t->msg);
+ if(t->bp.addr != -1)
+ Bprint(bfd, " %B", t->bp);
+ if(t->v0 != -1)
+ Bprint(bfd, " %llx", t->v0);
+ if(t->v1 != -1)
+ Bprint(bfd, " %llx", t->v1);
+ Bprint(bfd, "\n");
+ }
+ fprint(fd, "saved\n");
+}
+
+static void
unreserve(int fd, char **ap, int)
{
if(strcmp(ap[0], "on") == 0)
@@ -306,6 +338,7 @@
fprint(2, "unknown reserve %s\n", ap[0]);
fprint(fd, "reserve: %d → %d\n", !permissive, permissive);
}
+
static void
help(int fd, char**, int)
{
@@ -344,7 +377,7 @@
{.name="show", .sub="users", .minarg=0, .maxarg=0, .fn=showusers},
{.name="show", .sub="bstate", .minarg=0, .maxarg=0, .fn=showbstate},
{.name="debug", .sub=nil, .minarg=0, .maxarg=1, .fn=setdbg},
-
+ {.name="save", .sub="trace", .minarg=0, .maxarg=1, .fn=savetrace},
{.name=nil, .sub=nil},
};
--- a/dat.h
+++ b/dat.h
@@ -20,6 +20,7 @@
typedef struct Chan Chan;
typedef struct Syncq Syncq;
typedef struct Qent Qent;
+typedef struct Trace Trace;
typedef struct Tree Tree;
typedef struct Dlist Dlist;
typedef struct Mount Mount;
@@ -361,6 +362,7 @@
};
struct Errctx {
+ long tid;
char err[128];
jmp_buf errlab[Estacksz];
int nerrlab;
@@ -479,6 +481,15 @@
int heapsz;
};
+struct Trace {
+ int tid;
+ int qgen;
+ char msg[16];
+ Bptr bp;
+ vlong v0;
+ vlong v1;
+};
+
/*
* Overall state of the file sytem.
* Shadows the superblock contents.
@@ -562,6 +573,10 @@
RWLock flushq[Nflushtab];
int flushop[Nflushtab];
+
+ Trace *trace;
+ long traceidx;
+ long tracesz;
};
struct Arena {
--- a/fns.h
+++ b/fns.h
@@ -12,7 +12,7 @@
extern int permissive;
extern int usereserve;
extern char* reamuser;
-extern void** errctx;
+extern Errctx** errctx;
extern Blk* blkbuf;
extern int noneid;
extern int nogroupid;
@@ -131,6 +131,7 @@
do{ \
if(debug) fprint(2, __VA_ARGS__); \
}while(0)
+
#define fatal(...) \
do{ \
fprint(2, __VA_ARGS__); \
@@ -137,15 +138,25 @@
abort(); \
}while(0)
+#define tracex(msg, bp, v0, v1) \
+ do{ \
+ if(fs->trace != nil) \
+ _trace(msg, bp, v0, v1); \
+ } while(0)
+
+#define traceb(msg, bp) tracex(msg, bp, -1, -1)
+#define tracev(msg, v0) tracex(msg, Zb, v0, -1)
+#define tracem(msg) tracex(msg, Zb, -1, -1)
+
jmp_buf* _waserror(void);
_Noreturn void error(char*, ...);
_Noreturn void broke(char*, ...);
_Noreturn void nexterror(void);
#define waserror() (setjmp(*_waserror()))
-#define errmsg() (((Errctx*)*errctx)->err)
-#define poperror() assert(((Errctx*)*errctx)->nerrlab-- > 0)
-#define estacksz() (((Errctx*)*errctx)->nerrlab)
-
+#define errmsg() ((*errctx)->err)
+#define poperror() assert((*errctx)->nerrlab-- > 0)
+#define estacksz() ((*errctx)->nerrlab)
+void _trace(char*, Bptr, vlong, vlong);
char* packstr(char*, char*, char*);
void dir2kv(vlong, Xdir*, Kvp*, char*, int);
--- a/fs.c
+++ b/fs.c
@@ -38,6 +38,7 @@
int i;
aincv(&fs->qgen, 1);
+ tracev("barrier", fs->qgen);
fs->syncing = fs->nsyncers;
for(i = 0; i < fs->nsyncers; i++){
qe.op = Qfence;
@@ -50,6 +51,7 @@
aincv(&fs->qgen, 1);
while(fs->syncing != 0)
rsleep(&fs->syncrz);
+ tracev("flushed", fs->qgen);
}
static void
@@ -76,6 +78,7 @@
/*
* Wait for data that we're syncing to hit disk
*/
+ tracem("flush1");
wrbarrier();
/*
* pass 0: Update all open snapshots, and
@@ -86,6 +89,7 @@
* can take a consistent snapshot.
*/
qlock(&fs->mutlk);
+ tracem("packb");
dlsync();
for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next)
updatesnap(&mnt->root, mnt->root, mnt->name);
@@ -124,6 +128,7 @@
* the block footers are consistent, and we can
* use them.
*/
+ tracem("arenas0");
for(i = 0; i < fs->narena; i++)
enqueue(fs->arenas[i].h0);
wrbarrier();
@@ -136,6 +141,7 @@
* time around.
*/
qlock(&fs->mutlk);
+ tracem("supers");
syncblk(fs->sb0);
syncblk(fs->sb1);
@@ -144,6 +150,7 @@
* the block headers are consistent, and we can
* use them.
*/
+ tracem("arenas1");
for(i = 0; i < fs->narena; i++)
enqueue(fs->arenas[i].h1);
@@ -150,6 +157,7 @@
/*
* Pass 4: clean up the old snap tree's deadlist
*/
+ tracem("snapdl");
freedl(&fs->snapdl, 1);
fs->snapdl.hd.addr = -1;
fs->snapdl.hd.hash = -1;
@@ -161,6 +169,7 @@
wrbarrier();
qunlock(&fs->mutlk);
qunlock(&fs->synclk);
+ tracem("synced");
poperror();
}
@@ -2337,6 +2346,7 @@
}
switch(am->op){
case AOsync:
+ tracem("syncreq");
if(waserror()){
fprint(2, "sync error: %s\n", errmsg());
ainc(&fs->rdonly);
@@ -2352,7 +2362,7 @@
if(a->nlog < a->reserve/(10*Blksz)){
oldhd[i].addr = -1;
oldhd[i].hash = -1;
- oldhd[i].gen = -1; //(Bptr){-1, -1, -1};
+ oldhd[i].gen = -1;
qunlock(a);
continue;
}
@@ -2390,6 +2400,7 @@
break;
case AOsnap:
+ tracem("snapreq");
if(waserror()){
fprint(2, "taking snap: %s\n", errmsg());
ainc(&fs->rdonly);
@@ -2418,6 +2429,7 @@
break;
case AOclear:
+ tracem("bgclear");
if(waserror()){
fprint(2, "clear file %llx: %s\n", am->qpath, errmsg());
ainc(&fs->rdonly);
--- a/main.c
+++ b/main.c
@@ -19,7 +19,9 @@
int permissive;
int usereserve;
char *reamuser;
+int notrace;
char *dev;
+vlong tracesz = 16*MiB;
vlong cachesz = 512*MiB;
char *srvname = "gefs";
int noneid = 0;
@@ -26,8 +28,24 @@
int nogroupid = 9999;
int admid = -1;
Blk *blkbuf;
-void **errctx;
+Errctx **errctx;
+void
+_trace(char *msg, Bptr bp, vlong v0, vlong v1)
+{
+ Trace *t;
+ int idx;
+
+ idx = aincl(&fs->traceidx, 1);
+ t = &fs->trace[(idx-1) % fs->tracesz];
+ strecpy(t->msg, t->msg+sizeof(t->msg), msg);
+ t->tid = (*errctx)->tid;
+ t->qgen = agetv(&fs->qgen);
+ t->bp = bp;
+ t->v0 = v0;
+ t->v1 = v1;
+}
+
static void
nokill(void)
{
@@ -143,6 +161,10 @@
if((fs = mallocz(sizeof(Gefs), 1)) == nil)
sysfatal("malloc: %r");
+ if(!notrace){
+ fs->trace = emalloc(tracesz, 1);
+ fs->tracesz = tracesz/sizeof(Trace);
+ }
fs->lrurz.l = &fs->lrulk;
fs->syncrz.l = &fs->synclk;
fs->noauth = noauth;
@@ -184,6 +206,7 @@
id = aincl(&fs->nworker, 1);
if((*errctx = mallocz(sizeof(Errctx), 1)) == nil)
sysfatal("malloc: %r");
+ (*errctx)->tid = id;
procsetname("%s.%ld", text, id);
(*f)(id, arg);
exits("child returned");
@@ -272,6 +295,10 @@
case 'g':
grow = 1;
break;
+ case 't':
+ tracesz = strtoll(EARGF(usage()), &e, 0);
+ tracesz *= MiB;
+ break;
case 'm':
v = strtoll(EARGF(usage()), &e, 0);
switch(*e){
@@ -308,6 +335,9 @@
break;
case 'C':
check = 1;
+ break;
+ case 'T':
+ notrace=1;
break;
default:
usage();
--- a/snap.c
+++ b/snap.c
@@ -18,6 +18,7 @@
if(!checkflag(dl->ins, Bdirty))
return;
+ traceb("dlflush", dl->ins->bp);
enqueue(dl->ins);
m.op = Oinsert;
dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
@@ -165,10 +166,12 @@
fb.addr = UNPACK64(p);
fb.hash = -1;
fb.gen = -1;
+ traceb("dlclear", fb);
freeblk(nil, nil, fb);
}
}
bp = b->logp;
+ traceb("dlfreeb", b->bp);
freeblk(&fs->snap, b, b->bp);
dropblk(b);
}
@@ -402,6 +405,7 @@
if(!o->dirty)
return;
+ traceb("updatesnap", o->bp);
/* update the old kvp */
o->nlbl--;
o->nref++;
@@ -518,6 +522,7 @@
{
Dlist *dl, *n;
+ tracem("dlsync");
for(dl = fs->dlhead; dl != nil; dl = n){
n = dl->cnext;
dlflush(dl);
@@ -546,6 +551,7 @@
*/
if(bp.gen <= t->base)
return;
+ traceb("killb", bp);
if(t == &fs->snap)
dl = &fs->snapdl;
else
--- a/tree.c
+++ b/tree.c
@@ -1282,6 +1282,7 @@
assert(rb->bp.addr != 0);
lock(&t->lk);
+ traceb("setroot", rb->bp);
t->ht += dh;
t->bp = rb->bp;
t->dirty = 1;
@@ -1291,8 +1292,10 @@
freepath(t, path, npath);
poperror();
- if(npull != nmsg)
+ if(npull != nmsg){
+ tracem("short pull");
goto Again;
+ }
}
Blk*