ref: 24ad9103062c541fca712b51889cde8297e8a406
parent: 681429107c5f459c7bb9e4cd3f4c5405ac3fb70a
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Dec 10 00:28:01 EST 2021
fs: first cut at qsbr.
--- a/blk.c
+++ b/blk.c
@@ -570,6 +570,7 @@
b->bp.hash = -1;
b->bp.gen = fs->nextgen;
b->data = b->buf + Hdrsz;
+ b->fnext = nil;
b->flag = Bdirty;
b->nval = 0;
@@ -735,18 +736,23 @@
void
putblk(Blk *b)
{
- if(b == nil)
+ if(b == nil || adec(&b->ref) != 0)
return;
- if(adec(&b->ref) == 0){
- fprint(2, "wat: free %B @ %ld\n", b->bp, b->ref);
- assert(0);
- assert((b->flag & Bqueued) || !(b->flag & Bdirty));
- free(b);
- }
+ assert((b->flag & Bqueued) || !(b->flag & Bdirty));
+ free(b);
}
void
freeblk(Blk *b)
+{
+ lock(&fs->freelk);
+ b->fnext = fs->freehd;
+ fs->freehd = b;
+ unlock(&fs->freelk);
+}
+
+void
+reclaimblk(Blk *b)
{
Arena *a;
--- a/cons.c
+++ b/cons.c
@@ -7,7 +7,7 @@
#include "fns.h"
void
-runcons(void *pfd)
+runcons(int, void *pfd)
{
char buf[256], *arg[4];
int fd, n, narg;
--- a/dat.h
+++ b/dat.h
@@ -34,6 +34,7 @@
Nsec = 1000*1000*1000, /* nanoseconds to the second */
Maxname = 256, /* maximum size of a name element */
Maxent = 9+Maxname+1, /* maximum size of ent key, with terminator */
+ Maxproc = 8, /* maximum number of worker procs */
/*
* Kpmax must be no more than 1/4 of pivspc, or
@@ -68,7 +69,9 @@
*/
Kdat, /* qid[8] off[8] => ptr[16]: pointer to data page */
Kent, /* pqid[8] name[n] => dir[n]: serialized Dir */
- Ksnap, /* name[n] => dent[16] ptr[16]: snapshot root */
+ Ksnap, /* id[8] => tree[]: snapshot */
+ Ksnapid, /* qid[8] => tree[]: snapshot for exec, transient */
+
Ksuper, /* qid[8] => pqid[8]: parent dir */
};
@@ -81,7 +84,7 @@
};
//#define Efs "i will not buy this fs, it is scratched"
-#define Efs (abort(), "nope")
+#define Efs (abort(), "broken")
#define Eio "i/o error"
#define Efid "bad fid"
#define Edscan "invalid dir scan offset"
@@ -164,7 +167,7 @@
*
* nval[2]
* valsz[2]
- * _pad[4]sure,
+ * pad[4]sure,
*
* Within these nodes, pointers have the following
* layout:
@@ -268,7 +271,15 @@
Chan *wrchan;
Chan *rdchan;
+ int nproc;
+ Lock activelk;
+ int active[Maxproc];
+ int lastactive[Maxproc];
+ Lock freelk;
+ Blk *freep;
+ Blk *freehd;
+
int fd;
long broken;
@@ -356,6 +367,7 @@
};
struct Mount {
+ long ref;
Msg m;
char kbuf[Keymax];
char vbuf[Rootsz+Ptrsz];
@@ -373,7 +385,6 @@
*/
char snap[64];
Mount *mnt;
-// Tree root;
u32int fid;
vlong qpath;
@@ -438,6 +449,9 @@
Blk *cnext;
Blk *cprev;
Blk *hnext;
+
+ /* Freelist entry */
+ Blk *fnext;
short flag;
--- a/fns.h
+++ b/fns.h
@@ -21,13 +21,15 @@
void putblk(Blk*);
int syncblk(Blk*);
void enqueue(Blk*);
+void quiesce(int);
void freeblk(Blk*);
+void reclaimblk(Blk*);
ushort blkfill(Blk*);
uvlong blkhash(Blk*);
u32int ihash(vlong);
void finalize(Blk*);
char* fillsuper(Blk*);
-int snapshot(Mount*);
+char* snapshot(Mount*);
uvlong siphash(void*, usize);
void reamfs(char*);
int loadarena(Arena*, vlong);
@@ -38,10 +40,10 @@
int compresslog(Arena*);
void setval(Blk*, int, Kvp*);
-int btupsert(Tree*, Msg*, int);
-char *btlookup(Tree*, Key*, Kvp*, char*, int);
-char *btscan(Tree*, Scan*, char*, int);
-char *btnext(Scan*, Kvp*, int*);
+char* btupsert(Tree*, Msg*, int);
+char* btlookup(Tree*, Key*, Kvp*, char*, int);
+char* btscan(Tree*, Scan*, char*, int);
+char* btnext(Scan*, Kvp*, int*);
void btdone(Scan*);
@@ -106,10 +108,10 @@
Chan *mkchan(int);
Fmsg *chrecv(Chan*);
void chsend(Chan*, Fmsg*);
-void runfs(void*);
-void runwrite(void*);
-void runread(void*);
-void runcons(void*);
+void runfs(int, void*);
+void runwrite(int, void*);
+void runread(int, void*);
+void runcons(int, void*);
/* it's in libc... */
extern int cas(long *, long, long);
--- a/fs.c
+++ b/fs.c
@@ -91,6 +91,13 @@
}
static void
+clunkmount(Mount *mnt)
+{
+ if(mnt != nil && adec(&mnt->ref) == 0)
+ free(mnt);
+}
+
+static void
clunkdent(Dent *de)
{
Dent *e, **pe;
@@ -150,10 +157,11 @@
void
putfid(Fid *f)
{
- if(adec(&f->ref) == 0){
- clunkdent(f->dent);
- free(f);
- }
+ if(adec(&f->ref) != 0)
+ return;
+ clunkmount(f->mnt);
+ clunkdent(f->dent);
+ free(f);
}
Fid*
@@ -171,7 +179,8 @@
n->ref = 2; /* one for dup, one for clunk */
n->mode = -1;
n->next = nil;
- n->mnt = f->mnt;
+ if(n->mnt != nil)
+ ainc(&n->mnt->ref);
lock(&fs->fidtablk);
ainc(&n->dent->ref);
@@ -375,7 +384,7 @@
Fid f;
Dir d;
- if((mnt = malloc(sizeof(Mount))) == nil){
+ if((mnt = mallocz(sizeof(Mount), 1)) == nil){
rerror(m, Emem);
return;
}
@@ -605,7 +614,7 @@
void
fscreate(Fmsg *m)
{
- char buf[Kvmax];
+ char *e, buf[Kvmax];
Dent *dent;
Fcall r;
Msg mb;
@@ -650,8 +659,8 @@
putfid(f);
return;
}
- if(btupsert(&f->mnt->root, &mb, 1) == -1){
- rerror(m, "%r");
+ if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
+ rerror(m, e);
putfid(f);
return;
}
@@ -684,8 +693,8 @@
r.type = Rcreate;
r.qid = d.qid;
r.iounit = f->iounit;
- if(snapshot(f->mnt) == -1){
- rerror(m, Efs);
+ if((e = snapshot(f->mnt)) != nil){
+ rerror(m, e);
putfid(f);
return;
}
@@ -699,6 +708,7 @@
Fcall r;
Msg mb;
Fid *f;
+ char *e;
if((f = getfid(m->fid)) == nil){
rerror(m, "no such fid");
@@ -711,9 +721,9 @@
mb.nk = f->dent->nk;
mb.nv = 0;
//showfs("preremove");
- if(btupsert(&f->mnt->root, &mb, 1) == -1){
+ if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
runlock(f->dent);
- rerror(m, "remove: %r");
+ rerror(m, e);
putfid(f);
return;
}
@@ -720,8 +730,8 @@
runlock(f->dent);
clunkfid(f);
- if(snapshot(f->mnt) == -1){
- rerror(m, Efs);
+ if((e = snapshot(f->mnt)) != nil){
+ rerror(m, e);
putfid(f);
return;
}
@@ -1019,7 +1029,8 @@
void
fswrite(Fmsg *m)
{
- char sbuf[8], offbuf[4][Ptrsz+Offksz], *p;
+ char sbuf[8], offbuf[4][Ptrsz+Offksz];
+ char *p, *e;
vlong n, o, c;
Msg kv[4];
Fcall r;
@@ -1074,8 +1085,8 @@
PBIT64(kv[i].v, m->offset+m->count);
f->dent->length = m->offset+m->count;
}
- if(btupsert(&f->mnt->root, kv, i+1) == -1){
- fprint(2, "upsert: %r\n");
+ if((e = btupsert(&f->mnt->root, kv, i+1)) != nil){
+ rerror(m, e);
putfid(f);
abort();
return;
@@ -1082,8 +1093,8 @@
}
wunlock(f->dent);
- if(snapshot(f->mnt) == -1){
- rerror(m, Efs);
+ if((e = snapshot(f->mnt)) != nil){
+ rerror(m, e);
putfid(f);
return;
}
@@ -1095,7 +1106,7 @@
}
void
-runfs(void *pfd)
+runfs(int wid, void *pfd)
{
int fd, msgmax, versioned;
char err[128];
@@ -1113,6 +1124,7 @@
fshangup(fd, "truncated message: %r");
return;
}
+ quiesce(wid);
if(convM2S(m->buf, m->sz, m) == 0){
fshangup(fd, "invalid message: %r");
return;
@@ -1151,16 +1163,18 @@
respond(m, &r);
break;
}
+ quiesce(wid);
}
}
void
-runwrite(void *)
+runwrite(int wid, void *)
{
Fmsg *m;
while(1){
m = chrecv(fs->wrchan);
+ quiesce(wid);
switch(m->type){
case Tflush: rerror(m, "unimplemented flush"); break;
case Tcreate: fscreate(m); break;
@@ -1168,20 +1182,68 @@
case Twstat: fswstat(m); break;
case Tremove: fsremove(m); break;
}
+ quiesce(wid);
}
}
void
-runread(void *)
+runread(int wid, void *)
{
Fmsg *m;
while(1){
m = chrecv(fs->rdchan);
+ quiesce(wid);
switch(m->type){
case Twalk: fswalk(m); break;
case Tread: fsread(m); break;
case Tstat: fsstat(m); break;
}
+ quiesce(wid);
}
+}
+
+void
+quiesce(int tid)
+{
+ int i, allquiesced;
+ Blk *p, *n;
+
+ lock(&fs->activelk);
+ allquiesced = 1;
+ fs->active[tid]++;
+ for(i = 0; i < fs->nproc; i++){
+ /*
+ * Odd parity on quiescence implies
+ * that we're between the exit from
+ * and waiting for the next message
+ * that enters us into the critical
+ * section.
+ */
+ if((fs->active[i] & 1) == 0)
+ continue;
+ if(fs->active[i] == fs->lastactive[i])
+ allquiesced = 0;
+ }
+ if(allquiesced)
+ for(i = 0; i < fs->nproc; i++)
+ fs->lastactive[i] = fs->active[i];
+ unlock(&fs->activelk);
+ if(!allquiesced)
+ return;
+
+ lock(&fs->freelk);
+ p = nil;
+ if(fs->freep != nil){
+ p = fs->freep->fnext;
+ fs->freep->fnext = nil;
+ }
+ unlock(&fs->freelk);
+
+ while(p != nil){
+ n = p->fnext;
+ reclaimblk(p);
+ p = n;
+ }
+ fs->freep = fs->freehd;
}
--- a/main.c
+++ b/main.c
@@ -27,7 +27,7 @@
}
void
-launch(void (*f)(void *), void *arg, char *text)
+launch(void (*f)(int, void *), int wid, void *arg, char *text)
{
int pid;
@@ -37,7 +37,7 @@
sysfatal("can't fork: %r");
if (pid == 0) {
procsetname("%s", text);
- (*f)(arg);
+ (*f)(wid, arg);
exits("child returned");
}
}
@@ -121,16 +121,14 @@
srvfd = postfd(srvname, "");
ctlfd = postfd(srvname, ".cmd");
loadfs(argv[0]);
- launch(runcons, (void*)ctlfd, "ctl");
- launch(runwrite, nil, "writeio");
- launch(runread, nil, "readio");
-// launch(runfs, (void*)srvfd, "fs");
- runfs((void*)srvfd);
-// launch(syncproc, nil, "sync");
-// launch(flushproc, &fs->flushev, "flush");
-// for(i = 1; i < argc; i++)
-// if(test(argv[i]) == -1)
-// sysfatal("test %s: %r\n", argv[i]);
+ launch(runcons, fs->nproc++, (void*)ctlfd, "ctl");
+ launch(runwrite, fs->nproc++, nil, "writeio");
+ launch(runread, fs->nproc++, nil, "readio");
+// launch(runfs, fs->nproc++, (void*)srvfd, "fs");
+// launch(taskproc, fs->nproc++, nil, "tasks");
+// launch(syncproc, fs->nproc++, &fs->flushev, "sync");
+ assert(fs->nproc < Maxproc);
+ runfs(fs->nproc++, (void*)srvfd);
exits(nil);
}
}
--- a/tree.c
+++ b/tree.c
@@ -1100,7 +1100,7 @@
return keycmp((Msg*)a, (Msg*)b);
}
-int
+char*
btupsert(Tree *t, Msg *msg, int nmsg)
{
int i, npath, redo, dh, sz, height;
@@ -1111,18 +1111,14 @@
sz = 0;
qsort(msg, nmsg, sizeof(Msg), msgcmp);
for(i = 0; i < nmsg; i++){
- if(msg[i].nk + 2 > Keymax){
- werrstr("overlong key");
- return -1;
- }
+ if(msg[i].nk + 2 > Keymax)
+ return Efs;
sz += msgsz(&msg[i]);
}
Again:
- if((b = getroot(t, &height)) == nil){
- werrstr("get root: %r");
- return -1;
- }
+ if((b = getroot(t, &height)) == nil)
+ return Efs;
/*
* The tree can grow in height by 1 when we
@@ -1132,7 +1128,7 @@
redo = 0;
npath = 0;
if((path = calloc((height + 2), sizeof(Path))) == nil)
- return -1;
+ return Emem;
path[npath].b = nil;
path[npath].idx = -1;
path[npath].midx = -1;
@@ -1195,7 +1191,7 @@
Error:
freepath(path, npath);
free(path);
- return -1;
+ return;
}
Blk*
@@ -1416,9 +1412,12 @@
free(s->path);
}
-int
+
+char*
snapshot(Mount *mnt)
{
+ char *e;
+
mnt->m.op = Oinsert;
PBIT32(mnt->m.v + 0, mnt->root.ht);
PBIT64(mnt->m.v + 4, mnt->root.bp.addr);
@@ -1427,10 +1426,10 @@
PBIT64(mnt->m.v + 28, mnt->dead.addr);
PBIT64(mnt->m.v + 36, mnt->dead.hash);
PBIT64(mnt->m.v + 42, mnt->dead.gen);
- if(btupsert(&fs->snap, &mnt->m, 1) == -1)
- return -1;
+ if((e = btupsert(&fs->snap, &mnt->m, 1)) != nil)
+ return e;
if(sync() == -1)
- return -1;
+ return Eio;
return 0;
}