ref: d79d058a950abf6ab6985cc7f44e851fa88aa4b5
parent: c5b298956bde7c034464c88be0c85d9e12bc5d0a
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Dec 5 11:24:18 EST 2021
snap: keep root in snapshot tree. we don't yet support taking new snapshots, we free the wrong blocks, but we're making progress.
--- a/blk.c
+++ b/blk.c
@@ -295,9 +295,7 @@
case LogFlush:
dprint("log@%d: flush: %llx\n", i, off>>8);
- lock(&fs->root.lk);
- fs->root.bp.gen = off >> 8;
- unlock(&fs->root.lk);
+ fs->nextgen = (off >> 8)+1;
break;
case LogAlloc:
case LogAlloc1:
@@ -563,14 +561,17 @@
* on an allocation.
*/
b->ref = 1;
+ b->cnext = nil;
+ b->cprev = nil;
+ b->hnext = nil;
}
b->type = t;
- b->flag = Bdirty;
b->bp.addr = bp;
b->bp.hash = -1;
b->bp.gen = fs->nextgen;
b->data = b->buf + Hdrsz;
+ b->flag = Bdirty;
b->nval = 0;
b->valsz = 0;
b->nbuf = 0;
@@ -577,9 +578,6 @@
b->bufsz = 0;
b->logsz = 0;
b->lognxt = 0;
- b->cnext = nil;
- b->cprev = nil;
- b->hnext = nil;
dprint("new block %B from %p, flag=%x\n", b->bp, getcallerpc(&t), b->flag);
return cacheblk(b);
@@ -623,10 +621,10 @@
PBIT32(p, Blksz); p += 4;
PBIT32(p, Bufspc); p += 4;
PBIT32(p, Hdrsz); p += 4;
- PBIT32(p, fs->root.ht); p += 4;
- PBIT64(p, fs->root.bp.addr); p += 8;
- PBIT64(p, fs->root.bp.hash); p += 8;
- PBIT64(p, fs->root.bp.gen); p += 8;
+ PBIT32(p, fs->snap.ht); p += 4;
+ PBIT64(p, fs->snap.bp.addr); p += 8;
+ PBIT64(p, fs->snap.bp.hash); p += 8;
+ PBIT64(p, fs->nextgen); p += 8;
PBIT32(p, fs->narena); p += 4;
PBIT64(p, fs->arenasz); p += 8;
PBIT64(p, fs->nextqid); p += 8;
@@ -692,7 +690,7 @@
return nil;
}
if(blkhash(b) != bp.hash){
- werrstr("corrupt block %B: %llx != %llx", bp, blkhash(b), bp.hash);
+ fprint(2, "corrupt block %B: %llx != %llx\n", bp, blkhash(b), bp.hash);
qunlock(&blklock);
abort();
return nil;
@@ -763,27 +761,4 @@
lock(a);
blkdealloc_lk(b->bp.addr);
unlock(a);
-}
-
-int
-sync(void)
-{
- int i, r;
- Blk *b;
-
- r = 0;
- for(i = 0; i < fs->narena; i++){
- b = fs->arenas[i].logtl;
- finalize(b);
- if(syncblk(b) == -1)
- r = -1;
- }
- /* FIXME: hit it with a big hammer -- flush the whole cache */
- for(b = fs->chead; b != nil; b = b->cnext){
- if(!(b->flag & Bdirty))
- continue;
- if(syncblk(b) == -1)
- r = -1;
- }
- return r;
}
--- a/check.c
+++ b/check.c
@@ -184,7 +184,7 @@
ok = 1;
if(badfree())
ok = 0;
- if((b = getroot(&fs->root, &height)) != nil){
+ if((b = getroot(&fs->snap, &height)) != nil){
if(badblk(b, height-1, nil, 0))
ok = 0;
putblk(b);
@@ -206,6 +206,7 @@
fprint(fd, "NIL\n");
return;
}
+ fprint(fd, "%.*s +{%B}\n", 4*indent, spc, b->bp);
if(b->type == Tpivot){
for(i = 0; i < b->nbuf; i++){
getmsg(b, i, &m);
@@ -249,10 +250,10 @@
Blk *b;
int h;
- fprint(fd, "=== %s\n", m);
- fprint(fd, "\tht: %d\n", fs->root.ht);
- fprint(fd, "\trt: %B\n", fs->root.bp);
- b = getroot(&fs->root, &h);
+ fprint(fd, "=== %s %B\n", m, fs->snap.bp);
+ fprint(fd, "\tht: %d\n", fs->snap.ht);
+ fprint(fd, "\trt: %B\n", fs->snap.bp);
+ b = getroot(&fs->snap, &h);
rshowblk(fd, b, 0, 1);
putblk(b);
}
--- a/dat.h
+++ b/dat.h
@@ -16,6 +16,7 @@
typedef struct Bucket Bucket;
typedef struct Chan Chan;
typedef struct Tree Tree;
+typedef struct Mount Mount;
enum {
KiB = 1024ULL,
@@ -50,6 +51,7 @@
Hdrsz = 10,
+ Rootsz = 4+Ptrsz, /* root pointer */
Blkspc = Blksz - Hdrsz,
Bufspc = Blkspc / 2,
Pivspc = Blkspc - Bufspc,
@@ -95,6 +97,8 @@
#define Emem "out of memory"
#define Ename "invalid file name"
#define Enomem "out of memory"
+#define Eattach "attach required"
+#define Enosnap "no snapshot by that name exists"
/*
* All metadata blocks share a common header:
@@ -268,7 +272,7 @@
int fd;
long broken;
- Tree root;
+ Tree snap;
Lock qidlk;
vlong nextqid;
@@ -351,6 +355,14 @@
char buf[Maxent];
};
+struct Mount {
+ Msg m;
+ char kbuf[Keymax];
+ char vbuf[Rootsz+Ptrsz];
+ Tree root;
+ Bptr dead;
+};
+
struct Fid {
Lock;
Fid *next;
@@ -359,7 +371,9 @@
* instead of the most recent root, to prevent
* paging in the wrong executable.
*/
- Tree root;
+ char snap[64];
+ Mount *mnt;
+// Tree root;
u32int fid;
vlong qpath;
--- a/dump.c
+++ b/dump.c
@@ -25,8 +25,8 @@
case Kent: /* pqid[8] name[n] => dir[n]: serialized Dir */
n = fmtprint(fmt, "ent dir:%llx, name:\"%.*s\")", GBIT64(k->k+1), k->nk-11, k->k+11);
break;
- case Ksnap: /* name[n] => dent[16] ptr[16]: snapshot root */
- n = fmtprint(fmt, "snap dent:%llx ptr:%llx", GBIT64(k->k+1), GBIT64(k->k+9));
+ case Ksnap: /* name[n] => tree[24]: snapshot root */
+ n = fmtprint(fmt, "snap name:\"%.*s\"", k->nk-1, k->k+1);
break;
case Ksuper: /* qid[8] => pqid[8]: parent dir */
n = fmtprint(fmt, "up parent:%llx ptr:%llx", GBIT64(k->k+1), GBIT64(k->k+9));
@@ -42,13 +42,17 @@
showval(Fmt *fmt, Kvp *v, int op)
{
char *p;
+ Bptr bp;
Dir d;
- int n;
+ int n, ht;
n = 0;
switch(v->k[0]){
case Kdat: /* qid[8] off[8] => ptr[16]: pointer to data page */
- n = fmtprint(fmt, "blk:%llx, hash:%llx", GBIT64(v->v), GBIT64(v->v+8));
+ bp.addr = GBIT64(v->v+0);
+ bp.hash = GBIT64(v->v+8);
+ bp.gen = GBIT64(v->v+16);
+ n = fmtprint(fmt, "ptr:%B", bp);
break;
case Kent: /* pqid[8] name[n] => dir[n]: serialized Dir */
switch(op){
@@ -84,7 +88,11 @@
}
break;
case Ksnap: /* name[n] => dent[16] ptr[16]: snapshot root */
- n = fmtprint(fmt, "blk:%llx, hash:%llx", GBIT64(v->v), GBIT64(v->v+8));
+ ht = GBIT32(v->v);
+ bp.addr = GBIT64(v->v+4);
+ bp.hash = GBIT64(v->v+12);
+ bp.gen = GBIT64(v->v+20);
+ n = fmtprint(fmt, "ht:%d, ptr:%B", ht, bp);
break;
case Ksuper: /* qid[8] => pqid[8]: parent dir */
n = fmtprint(fmt, "parent: %llx", GBIT64(v->v));
--- a/fns.h
+++ b/fns.h
@@ -27,7 +27,7 @@
u32int ihash(vlong);
void finalize(Blk*);
char* fillsuper(Blk*);
-int snapshot(void);
+int snapshot(Mount*);
uvlong siphash(void*, usize);
void reamfs(char*);
int loadarena(Arena*, vlong);
@@ -40,7 +40,6 @@
int btupsert(Tree*, Msg*, int);
char *btlookup(Tree*, Key*, Kvp*, char*, int);
-char *btlookupat(Blk*, int, Key*, Kvp*, char*, int);
char *btscan(Tree*, Scan*, char*, int);
char *btnext(Scan*, Kvp*, int*);
void btdone(Scan*);
--- a/fs.c
+++ b/fs.c
@@ -40,24 +40,19 @@
fslookup(Fid *f, Key *k, Kvp *kv, char *buf, int nbuf, int lk)
{
char *e;
- Blk *b;
- int h;
- assert(f->root.bp.addr == -1);
- if((b = getroot(&fs->root, &h)) == nil)
- return Efs;
-
+ if(f->mnt == nil)
+ return Eattach;
if(lk)
rlock(f->dent);
- e = btlookupat(b, h, k, kv, buf, nbuf);
+ e = btlookup(&f->mnt->root, k, kv, buf, nbuf);
if(lk)
runlock(f->dent);
- putblk(b);
return e;
}
static Dent*
-getdent(vlong root, vlong pqid, Dir *d)
+getdent(vlong pqid, Dir *d)
{
Dent *e;
char *ek, *eb;
@@ -64,10 +59,10 @@
u32int h;
int err;
- h = (ihash(d->qid.path) ^ ihash(root)) % Ndtab;
+ h = ihash(d->qid.path) % Ndtab;
lock(&fs->dtablk);
for(e = fs->dtab[h]; e != nil; e = e->next){
- if(e->qid.path == d->qid.path && e->rootb == root){
+ if(e->qid.path == d->qid.path){
ainc(&e->ref);
unlock(&fs->dtablk);
return e;
@@ -79,7 +74,6 @@
return nil;
e->ref = 1;
e->qid = d->qid;
- e->rootb = root;
e->k = e->buf;
e->nk = 9 + strlen(d->name) + 1;
@@ -177,6 +171,7 @@
n->ref = 2; /* one for dup, one for clunk */
n->mode = -1;
n->next = nil;
+ n->mnt = f->mnt;
lock(&fs->fidtablk);
ainc(&n->dent->ref);
@@ -370,8 +365,9 @@
void
fsattach(Fmsg *m, int iounit)
{
- char *p, *ep, buf[Kvmax], kvbuf[Kvmax];
+ char *p, *ep, dbuf[Kvmax], kvbuf[Kvmax];
int err;
+ Mount *mnt;
Dent *e;
Fcall r;
Kvp kv;
@@ -379,24 +375,58 @@
Fid f;
Dir d;
+ if((mnt = malloc(sizeof(Mount))) == nil){
+ rerror(m, Emem);
+ return;
+ }
+
+ if(1+strlen(m->aname) >= sizeof(mnt->kbuf)){
+ rerror(m, Ename);
+ return;
+ }
+ print("attach %s\n", m->aname);
+ mnt->m.k = mnt->kbuf;
+ mnt->m.k[0] = Ksnap;
+ mnt->m.nk = 1+snprint(mnt->m.k+1, sizeof(mnt->kbuf)-1, "%s", m->aname);
+ mnt->m.v = mnt->vbuf;
+ mnt->m.nv = sizeof(mnt->vbuf);
+ if(btlookup(&fs->snap, &mnt->m, &kv, kvbuf, sizeof(kvbuf)) != nil){
+ rerror(m, Enosnap);
+ return;
+ }
+
+ if(kv.nv != Rootsz+Ptrsz){
+ rerror(m, Efs);
+ return;
+ }
+ p = kv.v;
+ mnt->root.ht = GBIT32(p); p += 4;
+ mnt->root.bp.addr = GBIT64(p); p += 8;
+ mnt->root.bp.hash = GBIT64(p); p += 8;
+ mnt->root.bp.gen = GBIT64(p); p += 8;
+ mnt->dead.addr = GBIT64(p); p += 8;
+ mnt->dead.hash = GBIT64(p); p += 8;
+ mnt->dead.gen = GBIT64(p);
+
err = 0;
- p = buf;
- ep = buf + sizeof(buf);
+ p = dbuf;
+ ep = dbuf + sizeof(dbuf);
p = pack8(&err, p, ep, Kent);
p = pack64(&err, p, ep, -1ULL);
p = packstr(&err, p, ep, "");
- dk.k = buf;
- dk.nk = p - buf;
- if(btlookup(&fs->root, &dk, &kv, kvbuf, sizeof(kvbuf)) != nil){
+ if(err)
+ abort();
+ dk.k = dbuf;
+ dk.nk = p - dbuf;
+ if(btlookup(&mnt->root, &dk, &kv, kvbuf, sizeof(kvbuf)) != nil){
rerror(m, Efs);
return;
}
- r.type = Rattach;
if(kv2dir(&kv, &d) == -1){
rerror(m, Efs);
return;
}
- if((e = getdent(-1, -1, &d)) == nil){
+ if((e = getdent(-1, &d)) == nil){
rerror(m, Efs);
return;
}
@@ -411,10 +441,9 @@
memset(&f, 0, sizeof(Fid));
f.fid = NOFID;
+ f.mnt = mnt;
f.qpath = d.qid.path;
f.mode = -1;
- f.root.bp.addr = -1;
- f.root.bp.hash = -1;
f.iounit = iounit;
f.dent = e;
if(dupfid(m->fid, &f) == nil){
@@ -421,6 +450,8 @@
rerror(m, Enomem);
return;
}
+
+ r.type = Rattach;
r.qid = d.qid;
respond(m, &r);
return;
@@ -493,7 +524,7 @@
putfid(o);
}
if(i > 0){
- dent = getdent(f->root.bp.addr, up, &d);
+ dent = getdent(up, &d);
if(dent == nil){
if(m->fid != m->newfid)
clunkfid(f);
@@ -523,7 +554,7 @@
rerror(m, "no such fid");
return;
}
- if((err = btlookup(&fs->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
+ if((err = btlookup(&f->mnt->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
rerror(m, err);
putfid(f);
return;
@@ -619,12 +650,12 @@
putfid(f);
return;
}
- if(btupsert(&fs->root, &mb, 1) == -1){
+ if(btupsert(&f->mnt->root, &mb, 1) == -1){
rerror(m, "%r");
putfid(f);
return;
}
- dent = getdent(f->root.bp.addr, f->qpath, &d);
+ dent = getdent(f->qpath, &d);
if(dent == nil){
if(m->fid != m->newfid)
clunkfid(f);
@@ -653,6 +684,11 @@
r.type = Rcreate;
r.qid = d.qid;
r.iounit = f->iounit;
+ if(snapshot(f->mnt) == -1){
+ rerror(m, Efs);
+ putfid(f);
+ return;
+ }
respond(m, &r);
putfid(f);
}
@@ -675,7 +711,7 @@
mb.nk = f->dent->nk;
mb.nv = 0;
//showfs("preremove");
- if(btupsert(&fs->root, &mb, 1) == -1){
+ if(btupsert(&f->mnt->root, &mb, 1) == -1){
runlock(f->dent);
rerror(m, "remove: %r");
putfid(f);
@@ -684,6 +720,11 @@
runlock(f->dent);
clunkfid(f);
+ if(snapshot(f->mnt) == -1){
+ rerror(m, Efs);
+ putfid(f);
+ return;
+ }
r.type = Rremove;
respond(m, &r);
putfid(f);
@@ -761,7 +802,6 @@
{
char pfx[9], *p, *e;
int n, ns, done;
- Tree *t;
Scan *s;
Dir d;
@@ -774,8 +814,7 @@
pfx[0] = Kent;
PBIT64(pfx+1, f->qpath);
- t = (f->root.bp.addr != -1) ? &f->root : &fs->root;
- if((e = btscan(t, s, pfx, sizeof(pfx))) != nil){
+ if((e = btscan(&f->mnt->root, s, pfx, sizeof(pfx))) != nil){
free(r->data);
btdone(s);
return e;
@@ -1035,7 +1074,7 @@
PBIT64(kv[i].v, m->offset+m->count);
f->dent->length = m->offset+m->count;
}
- if(btupsert(&fs->root, kv, i+1) == -1){
+ if(btupsert(&f->mnt->root, kv, i+1) == -1){
fprint(2, "upsert: %r\n");
putfid(f);
abort();
@@ -1043,9 +1082,15 @@
}
wunlock(f->dent);
+ if(snapshot(f->mnt) == -1){
+ rerror(m, Efs);
+ putfid(f);
+ return;
+ }
+
r.type = Rwrite;
r.count = m->count;
- respond(m, &r);
+ respond(m, &r);
putfid(f);
}
--- a/load.c
+++ b/load.c
@@ -61,19 +61,18 @@
blksz = GBIT32(p); p += 4;
bufspc = GBIT32(p); p += 4;
hdrsz = GBIT32(p); p += 4;
- fs->root.ht = GBIT32(p); p += 4;
- fs->root.bp.addr = GBIT64(p); p += 8;
- fs->root.bp.hash = GBIT64(p); p += 8;
- fs->root.bp.gen = GBIT64(p); p += 8;
+ fs->snap.ht = GBIT32(p); p += 4;
+ fs->snap.bp.addr = GBIT64(p); p += 8;
+ fs->snap.bp.hash = GBIT64(p); p += 8;
+ fs->snap.bp.gen = 1;
+ fs->nextgen = GBIT64(p); p += 8;
fs->narena = GBIT32(p); p += 4;
fs->arenasz = GBIT64(p); p += 8;
fs->nextqid = GBIT64(p); p += 8;
fs->super = b;
- fs->nextgen = fs->root.bp.gen+1;
fprint(2, "load: %8s\n", p);
- fprint(2, "\theight:\t%d\n", fs->root.ht);
- fprint(2, "\troot:\t%B\n", fs->root.bp);
+ fprint(2, "\tsnaptree:\t%B\n", fs->snap.bp);
fprint(2, "\tarenas:\t%d\n", fs->narena);
fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
--- a/ream.c
+++ b/ream.c
@@ -41,6 +41,27 @@
}
static void
+initsnap(Blk *s, Blk *r)
+{
+ char kbuf[32], vbuf[Rootsz+Ptrsz];
+ Kvp kv;
+
+ kv.k = kbuf;
+ kv.v = vbuf;
+ kv.k[0] = Ksnap;
+ kv.nk = 1 + snprint(kv.k+1, sizeof(kbuf)-1, "main");
+ kv.nv = sizeof(vbuf);
+ PBIT32(kv.v + 0, 1);
+ PBIT64(kv.v + 4, r->bp.addr);
+ PBIT64(kv.v + 12, r->bp.hash);
+ PBIT64(kv.v + 20, r->bp.gen);
+ PBIT64(kv.v + 28, -1ULL);
+ PBIT64(kv.v + 36, -1ULL);
+ PBIT64(kv.v + 42, -1ULL);
+ setval(s, 0, &kv);
+}
+
+static void
reamarena(Arena *a, vlong start, vlong asz)
{
vlong addr, bo, bh;
@@ -89,7 +110,8 @@
reamfs(char *dev)
{
vlong sz, asz, off;
- Blk *s, *r;
+ Blk *s, *r, *t;
+ Mount *mnt;
Dir *d;
int i;
@@ -101,6 +123,9 @@
sysfatal("ream: disk too small");
if((s = mallocz(sizeof(Blk), 1)) == nil)
sysfatal("ream: %r");
+ if((mnt = mallocz(sizeof(Mount), 1)) == nil)
+ sysfatal("ream: alloc mount: %r");
+ fs->super = s;
refblk(s);
sz = d->length;
@@ -130,33 +155,45 @@
s->bp.addr = sz;
s->data = s->buf + Hdrsz;
s->ref = 2;
- fillsuper(s);
- finalize(s);
- syncblk(s);
for(i = 0; i < fs->narena; i++)
if((loadarena(&fs->arenas[i], i*asz)) == -1)
sysfatal("ream: loadarena: %r");
+ if((t = newblk(Tleaf)) == nil)
+ sysfatal("ream: allocate root: %r");
+ refblk(t);
+ initroot(t);
+ finalize(t);
+ syncblk(t);
+
+ mnt->root.ht = 1;
+ mnt->root.bp = t->bp;
+
/*
* Now that we have a completely empty fs, give it
- * a single root block that the tree will insert
+ * a single snap block that the tree will insert
* into, and take a snapshot as the initial state.
*/
if((r = newblk(Tleaf)) == nil)
- sysfatal("ream: allocate root: %r");
+ sysfatal("ream: allocate snaps: %r");
refblk(r);
- initroot(r);
+ initsnap(r, t);
finalize(r);
syncblk(r);
+ fs->snap.bp = r->bp;
+ fs->snap.ht = 1;
- fs->super = s;
- fs->root.bp = r->bp;
- fs->root.ht = 1;
- snapshot();
+ fillsuper(s);
+ finalize(s);
+ syncblk(s);
+ sync();
+
+ putblk(t);
putblk(s);
putblk(r);
+ free(mnt);
if(sync() == -1)
sysfatal("ream: sync: %r");
}
--- a/tree.c
+++ b/tree.c
@@ -1189,7 +1189,6 @@
showpath(path, npath);
abort();
}
- snapshot();
if(redo)
goto Again;
return 0;
@@ -1418,20 +1417,35 @@
}
int
-snapshot(void)
+snapshot(Mount *mnt)
{
- Arena *a;
- Blk *s;
+ mnt->m.op = Oinsert;
+ PBIT32(mnt->m.v + 0, mnt->root.ht);
+ PBIT64(mnt->m.v + 4, mnt->root.bp.addr);
+ PBIT64(mnt->m.v + 12, mnt->root.bp.hash);
+ PBIT64(mnt->m.v + 20, mnt->root.bp.gen);
+ PBIT64(mnt->m.v + 28, mnt->dead.addr);
+ PBIT64(mnt->m.v + 36, mnt->dead.hash);
+ PBIT64(mnt->m.v + 42, mnt->dead.gen);
+ if(btupsert(&fs->snap, &mnt->m, 1) == -1)
+ return -1;
+ if(sync() == -1)
+ return -1;
+ return 0;
+}
+
+int
+sync(void)
+{
int i, r;
+ Arena *a;
+ Blk *b, *s;
+ qlock(&fs->snaplk);
r = 0;
s = fs->super;
-
- qlock(&fs->snaplk);
- lock(&fs->root.lk);
fillsuper(s);
enqueue(s);
- unlock(&fs->root.lk);
for(i = 0; i < fs->narena; i++){
a = &fs->arenas[i];
@@ -1439,8 +1453,15 @@
if(syncblk(a->logtl) == -1)
r = -1;
}
+ for(b = fs->chead; b != nil; b = b->cnext){
+ if(!(b->flag & Bdirty))
+ continue;
+ if(syncblk(b) == -1)
+ r = -1;
+ }
if(r != -1)
r = syncblk(s);
+
qunlock(&fs->snaplk);
return r;
}