shithub: gefs

Download patch

ref: 24ad9103062c541fca712b51889cde8297e8a406
parent: 681429107c5f459c7bb9e4cd3f4c5405ac3fb70a
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Dec 10 00:28:01 EST 2021

fs: first cut at qsbr.

--- a/blk.c
+++ b/blk.c
@@ -570,6 +570,7 @@
 	b->bp.hash = -1;
 	b->bp.gen = fs->nextgen;
 	b->data = b->buf + Hdrsz;
+	b->fnext = nil;
 
 	b->flag = Bdirty;
 	b->nval = 0;
@@ -735,18 +736,23 @@
 void
 putblk(Blk *b)
 {
-	if(b == nil)
+	if(b == nil || adec(&b->ref) != 0)
 		return;
-	if(adec(&b->ref) == 0){
-		fprint(2, "wat: free %B @ %ld\n", b->bp, b->ref);
-		assert(0);
-		assert((b->flag & Bqueued) || !(b->flag & Bdirty));
-		free(b);
-	}
+	assert((b->flag & Bqueued) || !(b->flag & Bdirty));
+	free(b);
 }
 
 void
 freeblk(Blk *b)
+{
+	lock(&fs->freelk);
+	b->fnext = fs->freehd;
+	fs->freehd = b;
+	unlock(&fs->freelk);
+}
+
+void
+reclaimblk(Blk *b)
 {
 	Arena *a;
 
--- a/cons.c
+++ b/cons.c
@@ -7,7 +7,7 @@
 #include "fns.h"
 
 void
-runcons(void *pfd)
+runcons(int, void *pfd)
 {
 	char buf[256], *arg[4];
 	int fd, n, narg;
--- a/dat.h
+++ b/dat.h
@@ -34,6 +34,7 @@
 	Nsec	= 1000*1000*1000,	/* nanoseconds to the second */
 	Maxname	= 256,			/* maximum size of a name element */
 	Maxent	= 9+Maxname+1,		/* maximum size of ent key, with terminator */
+	Maxproc	= 8,			/* maximum number of worker procs */
 
 	/*
 	 * Kpmax must be no more than 1/4 of pivspc, or
@@ -68,7 +69,9 @@
 	 */
 	Kdat,	/* qid[8] off[8] => ptr[16]:	pointer to data page */
 	Kent,	/* pqid[8] name[n] => dir[n]:	serialized Dir */
-	Ksnap,	/* name[n] => dent[16] ptr[16]:	snapshot root */
+	Ksnap,	/* id[8] => tree[]:		snapshot */
+	Ksnapid,	/* qid[8] => tree[]:		snapshot for exec, transient */
+	
 	Ksuper,	/* qid[8] => pqid[8]:		parent dir */
 };
 
@@ -81,7 +84,7 @@
 };
 
 //#define Efs	"i will not buy this fs, it is scratched"
-#define Efs (abort(), "nope")
+#define Efs	(abort(), "broken")
 #define Eio	"i/o error"
 #define Efid	"bad fid"
 #define Edscan	"invalid dir scan offset"
@@ -164,7 +167,7 @@
  *
  *	nval[2]
  *	valsz[2]
- *	_pad[4]sure, 
+ *	pad[4]sure, 
  *
  * Within these nodes, pointers have the following
  * layout:
@@ -268,7 +271,15 @@
 
 	Chan	*wrchan;
 	Chan	*rdchan;
+	int	nproc;
 
+	Lock	activelk;
+	int	active[Maxproc];
+	int	lastactive[Maxproc];
+	Lock	freelk;
+	Blk	*freep;
+	Blk	*freehd;
+
 	int	fd;
 	long	broken;
 
@@ -356,6 +367,7 @@
 };
 
 struct Mount {
+	long	ref;
 	Msg	m;
 	char	kbuf[Keymax];
 	char	vbuf[Rootsz+Ptrsz];
@@ -373,7 +385,6 @@
 	 */
 	char	snap[64];
 	Mount	*mnt;
-//	Tree	root;
 
 	u32int	fid;
 	vlong	qpath;
@@ -438,6 +449,9 @@
 	Blk	*cnext;
 	Blk	*cprev;
 	Blk	*hnext;
+
+	/* Freelist entry */
+	Blk	*fnext;
 
 	short	flag;
 
--- a/fns.h
+++ b/fns.h
@@ -21,13 +21,15 @@
 void	putblk(Blk*);
 int	syncblk(Blk*);
 void	enqueue(Blk*);
+void	quiesce(int);
 void	freeblk(Blk*);
+void	reclaimblk(Blk*);
 ushort	blkfill(Blk*);
 uvlong	blkhash(Blk*);
 u32int	ihash(vlong);
 void	finalize(Blk*);
 char*	fillsuper(Blk*);
-int	snapshot(Mount*);
+char*	snapshot(Mount*);
 uvlong	siphash(void*, usize);
 void	reamfs(char*);
 int	loadarena(Arena*, vlong);
@@ -38,10 +40,10 @@
 int	compresslog(Arena*);
 void	setval(Blk*, int, Kvp*);
 
-int	btupsert(Tree*, Msg*, int);
-char	*btlookup(Tree*, Key*, Kvp*, char*, int);
-char	*btscan(Tree*, Scan*, char*, int);
-char	*btnext(Scan*, Kvp*, int*);
+char*	btupsert(Tree*, Msg*, int);
+char*	btlookup(Tree*, Key*, Kvp*, char*, int);
+char*	btscan(Tree*, Scan*, char*, int);
+char*	btnext(Scan*, Kvp*, int*);
 void	btdone(Scan*);
 
 
@@ -106,10 +108,10 @@
 Chan	*mkchan(int);
 Fmsg	*chrecv(Chan*);
 void	chsend(Chan*, Fmsg*);
-void	runfs(void*);
-void	runwrite(void*);
-void	runread(void*);
-void	runcons(void*);
+void	runfs(int, void*);
+void	runwrite(int, void*);
+void	runread(int, void*);
+void	runcons(int, void*);
 
 /* it's in libc... */
 extern int cas(long *, long, long);
--- a/fs.c
+++ b/fs.c
@@ -91,6 +91,13 @@
 }
 
 static void
+clunkmount(Mount *mnt)
+{
+	if(mnt != nil && adec(&mnt->ref) == 0)
+		free(mnt);
+}
+
+static void
 clunkdent(Dent *de)
 {
 	Dent *e, **pe;
@@ -150,10 +157,11 @@
 void
 putfid(Fid *f)
 {
-	if(adec(&f->ref) == 0){
-		clunkdent(f->dent);
-		free(f);
-	}
+	if(adec(&f->ref) != 0)
+		return;
+	clunkmount(f->mnt);
+	clunkdent(f->dent);
+	free(f);
 }
 
 Fid*
@@ -171,7 +179,8 @@
 	n->ref = 2; /* one for dup, one for clunk */
 	n->mode = -1;
 	n->next = nil;
-	n->mnt = f->mnt;
+	if(n->mnt != nil)
+		ainc(&n->mnt->ref);
 
 	lock(&fs->fidtablk);
 	ainc(&n->dent->ref);
@@ -375,7 +384,7 @@
 	Fid f;
 	Dir d;
 
-	if((mnt = malloc(sizeof(Mount))) == nil){
+	if((mnt = mallocz(sizeof(Mount), 1)) == nil){
 		rerror(m, Emem);
 		return;
 	}
@@ -605,7 +614,7 @@
 void
 fscreate(Fmsg *m)
 {
-	char buf[Kvmax];
+	char *e, buf[Kvmax];
 	Dent *dent;
 	Fcall r;
 	Msg mb;
@@ -650,8 +659,8 @@
 		putfid(f);
 		return;
 	}
-	if(btupsert(&f->mnt->root, &mb, 1) == -1){
-		rerror(m, "%r");
+	if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
+		rerror(m, e);
 		putfid(f);
 		return;
 	}
@@ -684,8 +693,8 @@
 	r.type = Rcreate;
 	r.qid = d.qid;
 	r.iounit = f->iounit;
-	if(snapshot(f->mnt) == -1){
-		rerror(m, Efs);
+	if((e = snapshot(f->mnt)) != nil){
+		rerror(m, e);
 		putfid(f);
 		return;
 	}
@@ -699,6 +708,7 @@
 	Fcall r;
 	Msg mb;
 	Fid *f;
+	char *e;
 
 	if((f = getfid(m->fid)) == nil){
 		rerror(m, "no such fid");
@@ -711,9 +721,9 @@
 	mb.nk = f->dent->nk;
 	mb.nv = 0;
 //showfs("preremove");
-	if(btupsert(&f->mnt->root, &mb, 1) == -1){
+	if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
 		runlock(f->dent);
-		rerror(m, "remove: %r");
+		rerror(m, e);
 		putfid(f);
 		return;
 	}
@@ -720,8 +730,8 @@
 	runlock(f->dent);
 	clunkfid(f);
 
-	if(snapshot(f->mnt) == -1){
-		rerror(m, Efs);
+	if((e = snapshot(f->mnt)) != nil){
+		rerror(m, e);
 		putfid(f);
 		return;
 	}
@@ -1019,7 +1029,8 @@
 void
 fswrite(Fmsg *m)
 {
-	char sbuf[8], offbuf[4][Ptrsz+Offksz], *p;
+	char sbuf[8], offbuf[4][Ptrsz+Offksz];
+	char *p, *e;
 	vlong n, o, c;
 	Msg kv[4];
 	Fcall r;
@@ -1074,8 +1085,8 @@
 		PBIT64(kv[i].v, m->offset+m->count);
 		f->dent->length = m->offset+m->count;
 	}
-	if(btupsert(&f->mnt->root, kv, i+1) == -1){
-		fprint(2, "upsert: %r\n");
+	if((e = btupsert(&f->mnt->root, kv, i+1)) != nil){
+		rerror(m, e);
 		putfid(f);
 		abort();
 		return;
@@ -1082,8 +1093,8 @@
 	}
 	wunlock(f->dent);
 
-	if(snapshot(f->mnt) == -1){
-		rerror(m, Efs);
+	if((e = snapshot(f->mnt)) != nil){
+		rerror(m, e);
 		putfid(f);
 		return;
 	}
@@ -1095,7 +1106,7 @@
 }
 
 void
-runfs(void *pfd)
+runfs(int wid, void *pfd)
 {
 	int fd, msgmax, versioned;
 	char err[128];
@@ -1113,6 +1124,7 @@
 			fshangup(fd, "truncated message: %r");
 			return;
 		}
+		quiesce(wid);
 		if(convM2S(m->buf, m->sz, m) == 0){
 			fshangup(fd, "invalid message: %r");
 			return;
@@ -1151,16 +1163,18 @@
 			respond(m, &r);
 			break;
 		}
+		quiesce(wid);
 	}
 }
 
 void
-runwrite(void *)
+runwrite(int wid, void *)
 {
 	Fmsg *m;
 
 	while(1){
 		m = chrecv(fs->wrchan);
+		quiesce(wid);
 		switch(m->type){
 		case Tflush:	rerror(m, "unimplemented flush");	break;
 		case Tcreate:	fscreate(m);	break;
@@ -1168,20 +1182,68 @@
 		case Twstat:	fswstat(m);	break;
 		case Tremove:	fsremove(m);	break;
 		}
+		quiesce(wid);
 	}
 }
 
 void
-runread(void *)
+runread(int wid, void *)
 {
 	Fmsg *m;
 
 	while(1){
 		m = chrecv(fs->rdchan);
+		quiesce(wid);
 		switch(m->type){
 		case Twalk:	fswalk(m);	break;
 		case Tread:	fsread(m);	break;
 		case Tstat:	fsstat(m);	break;
 		}
+		quiesce(wid);
 	}
+}
+
+void
+quiesce(int tid)
+{
+	int i, allquiesced;
+	Blk *p, *n;
+
+	lock(&fs->activelk);
+	allquiesced = 1;
+	fs->active[tid]++;
+	for(i = 0; i < fs->nproc; i++){
+		/*
+		 * Odd parity on quiescence implies
+		 * that we're between the exit from
+		 * and waiting for the next message
+		 * that enters us into the critical
+		 * section.
+		 */
+		if((fs->active[i] & 1) == 0)
+			continue;
+		if(fs->active[i] == fs->lastactive[i])
+			allquiesced = 0;
+	}
+	if(allquiesced)
+		for(i = 0; i < fs->nproc; i++)
+			fs->lastactive[i] = fs->active[i];
+	unlock(&fs->activelk);
+	if(!allquiesced)
+		return;
+
+	lock(&fs->freelk);
+	p = nil;
+	if(fs->freep != nil){
+		p = fs->freep->fnext;
+		fs->freep->fnext = nil;
+	}
+	unlock(&fs->freelk);
+
+	while(p != nil){
+		n = p->fnext;
+		reclaimblk(p);
+		p = n;
+	}
+	fs->freep = fs->freehd;
 }
--- a/main.c
+++ b/main.c
@@ -27,7 +27,7 @@
 }
 
 void
-launch(void (*f)(void *), void *arg, char *text)
+launch(void (*f)(int, void *), int wid, void *arg, char *text)
 {
 	int pid;
 
@@ -37,7 +37,7 @@
 		sysfatal("can't fork: %r");
 	if (pid == 0) {
 		procsetname("%s", text);
-		(*f)(arg);
+		(*f)(wid, arg);
 		exits("child returned");
 	}
 }
@@ -121,16 +121,14 @@
 		srvfd = postfd(srvname, "");
 		ctlfd = postfd(srvname, ".cmd");
 		loadfs(argv[0]);
-		launch(runcons, (void*)ctlfd, "ctl");
-		launch(runwrite, nil, "writeio");
-		launch(runread, nil, "readio");
-//		launch(runfs, (void*)srvfd, "fs");
-		runfs((void*)srvfd);
-//		launch(syncproc, nil, "sync");
-//		launch(flushproc, &fs->flushev, "flush");
-//		for(i = 1; i < argc; i++)
-//			if(test(argv[i]) == -1)
-//				sysfatal("test %s: %r\n", argv[i]);
+		launch(runcons, fs->nproc++, (void*)ctlfd, "ctl");
+		launch(runwrite, fs->nproc++, nil, "writeio");
+		launch(runread, fs->nproc++, nil, "readio");
+//		launch(runfs, fs->nproc++, (void*)srvfd, "fs");
+//		launch(taskproc, fs->nproc++, nil, "tasks");
+//		launch(syncproc, fs->nproc++, &fs->flushev, "sync");
+		assert(fs->nproc < Maxproc);
+		runfs(fs->nproc++, (void*)srvfd);
 		exits(nil);
 	}
 }
--- a/tree.c
+++ b/tree.c
@@ -1100,7 +1100,7 @@
 	return keycmp((Msg*)a, (Msg*)b);
 }
 
-int
+char*
 btupsert(Tree *t, Msg *msg, int nmsg)
 {
 	int i, npath, redo, dh, sz, height;
@@ -1111,18 +1111,14 @@
 	sz = 0;
 	qsort(msg, nmsg, sizeof(Msg), msgcmp);
 	for(i = 0; i < nmsg; i++){
-		if(msg[i].nk + 2 > Keymax){
-			werrstr("overlong key");
-			return -1;
-		}
+		if(msg[i].nk + 2 > Keymax)
+			return Efs;
 		sz += msgsz(&msg[i]);
 	}
 
 Again:
-	if((b = getroot(t, &height)) == nil){
-		werrstr("get root: %r");
-		return -1;
-	}
+	if((b = getroot(t, &height)) == nil)
+		return Efs;
 
 	/*
 	 * The tree can grow in height by 1 when we
@@ -1132,7 +1128,7 @@
 	redo = 0;
 	npath = 0;
 	if((path = calloc((height + 2), sizeof(Path))) == nil)
-		return -1;
+		return Emem;
 	path[npath].b = nil;
 	path[npath].idx = -1;
 	path[npath].midx = -1;
@@ -1195,7 +1191,7 @@
 Error:
 	freepath(path, npath);
 	free(path);
-	return -1;
+	return;
 }
 
 Blk*
@@ -1416,9 +1412,12 @@
 	free(s->path);
 }
 
-int
+
+char*
 snapshot(Mount *mnt)
 {
+	char *e;
+
 	mnt->m.op = Oinsert;
 	PBIT32(mnt->m.v +  0, mnt->root.ht);
 	PBIT64(mnt->m.v +  4, mnt->root.bp.addr);
@@ -1427,10 +1426,10 @@
 	PBIT64(mnt->m.v + 28, mnt->dead.addr);
 	PBIT64(mnt->m.v + 36, mnt->dead.hash);
 	PBIT64(mnt->m.v + 42, mnt->dead.gen);
-	if(btupsert(&fs->snap, &mnt->m, 1) == -1)
-		return -1;
+	if((e = btupsert(&fs->snap, &mnt->m, 1)) != nil)
+		return e;
 	if(sync() == -1)
-		return -1;
+		return Eio;
 	return 0;
 }