ref: 99c900d718ea1343036656b7384af56bcad2f28f
parent: c46f9289b1ad30f5263292b93fb64c7fdefb35af
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Dec 22 19:06:28 EST 2021
snapshots: record deadlists we don't free them yet.
--- a/blk.c
+++ b/blk.c
@@ -176,7 +176,7 @@
char *p;
assert(off % Blksz == 0);
- assert(op == LogAlloc || op == LogFree);
+ assert(op == LogAlloc || op == LogFree || op == LogDead);
lb = ol->tail;
if(lb == nil || lb->logsz > Logspc - 8){
pb = lb;
@@ -211,16 +211,17 @@
}
}
+ if(len == Blksz && op == LogAlloc)
+ op = LogAlloc1;
+ if(len == Blksz && op == LogFree)
+ op = LogFree1;
+ off |= op;
p = lb->data + lb->logsz;
- if(len == Blksz){
- off |= (op & ~Log2w);
- PBIT64(p, off);
- lb->logsz += 8;
- }else{
- off |= op;
- PBIT64(p+0, off);
+ PBIT64(p, off);
+ lb->logsz += 8;
+ if(op >= Log2wide){
PBIT64(p+8, len);
- lb->logsz += 16;
+ lb->logsz += 8;
}
/* this gets overwritten by the next append */
p = lb->data + lb->logsz;
@@ -228,6 +229,30 @@
return lb;
}
+int
+deadlistappend(Tree *t, Bptr bp)
+{
+ Oplog *l;
+ Blk *b;
+ int i;
+
+ dprint("deadlisted %B\n", bp);
+ l = nil;
+ for(i = 0; i < Ndead; i++){
+ if(bp.gen >= t->prev[i]){
+ l = &t->dead[i];
+ break;
+ }
+ }
+ if((b = logappend(l, nil, bp.addr, Blksz, LogDead)) == nil)
+ return -1;
+ if(l->head == -1)
+ l->head = b->bp.addr;
+ if(l->tail != b)
+ l->tail = b;
+ return 0;
+}
+
/*
* Logs an allocation. Must be called
* with arena lock held. Duplicates some/c
@@ -235,7 +260,7 @@
* recursion.
*/
int
-logop(Arena *a, vlong off, int op)
+freelistappend(Arena *a, vlong off, int op)
{
Blk *b;
@@ -243,7 +268,7 @@
return -1;
if(a->log.head == -1)
a->log.head = b->bp.addr;
- if(b != a->log.tail)
+ if(a->log.tail != b)
a->log.tail = b;
return 0;
}
@@ -251,11 +276,11 @@
int
loadlog(Arena *a)
{
- Blk *b;
vlong bp, ent, off, len;
- uvlong bh;
- char *p, *d;
int op, i, n;
+ uvlong bh;
+ char *d;
+ Blk *b;
bp = a->log.head;
@@ -264,10 +289,9 @@
if((b = readblk(bp, 0)) == nil)
return -1;
cacheblk(b);
- p = b->data;
- bh = GBIT64(p + 0);
+ bh = GBIT64(b->data);
/* the hash covers the log and offset */
- if(bh != siphash(p+8, Blkspc-8)){
+ if(bh != siphash(b->data+8, Blkspc-8)){
werrstr("corrupt log");
return -1;
}
@@ -276,7 +300,7 @@
ent = GBIT64(d);
op = ent & 0xff;
off = ent & ~0xff;
- n = (op & Log2w) ? 16 : 8;
+ n = (op >= Log2wide) ? 16 : 8;
switch(op){
case LogEnd:
dprint("log@%d: end\n", i);
@@ -299,7 +323,7 @@
break;
case LogAlloc:
case LogAlloc1:
- len = (op & Log2w) ? GBIT64(d+8) : Blksz;
+ len = (op >= Log2wide) ? GBIT64(d+8) : Blksz;
dprint("log@%d alloc: %llx+%llx\n", i, off, len);
if(grabrange(a->free, off & ~0xff, len) == -1)
return -1;
@@ -306,7 +330,7 @@
break;
case LogFree:
case LogFree1:
- len = (op & Log2w) ? GBIT64(d+8) : Blksz;
+ len = (op >= Log2wide) ? GBIT64(d+8) : Blksz;
dprint("log@%d free: %llx+%llx\n", i, off, len);
if(freerange(a->free, off & ~0xff, len) == -1)
return -1;
@@ -431,7 +455,7 @@
for(i = Loghdsz; i < Logspc; i += n){
p = b->data + i;
v = GBIT64(p);
- n = (v & Log2w) ? 16 : 8;
+ n = ((v&0xff) >= Log2wide) ? 16 : 8;
if((v&0xff) == LogChain){
nb = v & ~0xff;
break;
@@ -501,7 +525,7 @@
a = getarena(b);
if(freerange(a->free, b, Blksz) == -1)
goto out;
- if(logop(a, b, LogFree) == -1)
+ if(freelistappend(a, b, LogFree) == -1)
goto out;
r = 0;
out:
@@ -540,7 +564,7 @@
unlock(a);
goto Again;
}
- if(logop(a, b, LogAlloc) == -1){
+ if(freelistappend(a, b, LogAlloc) == -1){
unlock(a);
return -1;
}
@@ -608,7 +632,7 @@
PBIT32(p, fs->snap.ht); p += 4;
PBIT64(p, fs->snap.bp.addr); p += 8;
PBIT64(p, fs->snap.bp.hash); p += 8;
- PBIT64(p, fs->nextgen); p += 8;
+ PBIT64(p, fs->snap.bp.gen); p += 8;
PBIT32(p, fs->narena); p += 4;
PBIT64(p, fs->arenasz); p += 8;
PBIT64(p, fs->nextqid); p += 8;
@@ -620,7 +644,6 @@
{
vlong h;
-// assert((b->flag & Bfinal) == 0);
lock(b);
b->flag |= Bfinal;
if(b->type != Traw)
@@ -645,6 +668,8 @@
case Tlog:
h = siphash(b->data + 8, Blkspc-8);
PBIT64(b->data, h);
+ b->bp.hash = blkhash(b);
+ break;
case Traw:
b->bp.hash = blkhash(b);
break;
@@ -725,17 +750,16 @@
free(b);
}
-static void
-deadlist(Bptr bp)
-{
- fprint(2, "cross-snap free: %B\n", bp);
-}
-
void
-freebp(Bptr bp)
+freebp(Tree *t, Bptr bp)
{
Bfree *f;
+ dprint("[%s] free blk %B\n", (t == &fs->snap) ? "snap" : "data", bp);
+ if(bp.gen <= t->prev[0]){
+ deadlistappend(t, bp);
+ return;
+ }
if((f = malloc(sizeof(Bfree))) == nil)
return;
f->bp = bp;
@@ -746,17 +770,13 @@
}
void
-freeblk(Blk *b)
+freeblk(Tree *t, Blk *b)
{
lock(b);
assert((b->flag & Bqueued) == 0);
b->freed = getcallerpc(&b);
unlock(b);
- dprint("freeing block %B @ %ld, from 0x%p\n", b->bp, b->ref, getcallerpc(&b));
- if(b->bp.gen == fs->nextgen)
- freebp(b->bp);
- else
- deadlist(b->bp);
+ freebp(t, b->bp);
}
void
@@ -768,4 +788,81 @@
lock(a);
blkdealloc_lk(bp.addr);
unlock(a);
+}
+
+void
+quiesce(int tid)
+{
+ int i, allquiesced;
+ Bfree *p, *n;
+
+ lock(&fs->activelk);
+ allquiesced = 1;
+ fs->active[tid]++;
+ for(i = 0; i < fs->nproc; i++){
+ /*
+ * Odd parity on quiescence implies
+ * that we're between the exit from
+ * and waiting for the next message
+ * that enters us into the critical
+ * section.
+ */
+ if((fs->active[i] & 1) == 0)
+ continue;
+ if(fs->active[i] == fs->lastactive[i])
+ allquiesced = 0;
+ }
+ if(allquiesced)
+ for(i = 0; i < fs->nproc; i++)
+ fs->lastactive[i] = fs->active[i];
+ unlock(&fs->activelk);
+ if(!allquiesced)
+ return;
+
+ lock(&fs->freelk);
+ p = nil;
+ if(fs->freep != nil){
+ p = fs->freep->next;
+ fs->freep->next = nil;
+ }
+ unlock(&fs->freelk);
+
+ while(p != nil){
+ n = p->next;
+ reclaimblk(p->bp);
+ p = n;
+ }
+ fs->freep = fs->freehd;
+}
+
+int
+sync(void)
+{
+ int i, r;
+ Blk *b, *s;
+ Arena *a;
+
+ qlock(&fs->snaplk);
+ r = 0;
+ s = fs->super;
+ fillsuper(s);
+ enqueue(s);
+
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ finalize(a->log.tail);
+ if(syncblk(a->log.tail) == -1)
+ r = -1;
+ }
+ for(b = fs->chead; b != nil; b = b->cnext){
+ if(!(b->flag & Bdirty))
+ continue;
+ if(syncblk(b) == -1)
+ r = -1;
+ }
+ if(r != -1)
+ r = syncblk(s);
+
+ qunlock(&fs->snaplk);
+ return r;
}
--- a/check.c
+++ b/check.c
@@ -122,6 +122,8 @@
case Oinsert: /* new kvp */
case Odelete: /* delete kvp */
case Oclearb: /* delete kvp if exists */
+ case Orefsnap:
+ case Ounrefsnap:
break;
case Owstat: /* kvp dirent */
if((my.v[0] & ~(Owsize|Owmode|Owmtime)) != 0){
--- a/cons.c
+++ b/cons.c
@@ -34,6 +34,7 @@
static void
snapfs(int fd, char **ap, int na)
{
+ vlong gen, old;
char *e;
Tree t;
@@ -41,10 +42,21 @@
fprint(fd, "snap: open %s: %s\n", ap[0], e);
return;
}
- if((e = snapshot(&t, ap[na-1], 0)) != nil){
+ if((e = snapshot(&t, &gen, &old)) != nil){
fprint(fd, "snap: save %s: %s\n", ap[na-1], e);
return;
}
+ if((e = labelsnap(gen, ap[na-1])) != nil){
+ fprint(fd, "snap: save %s: %s\n", ap[na-1], e);
+ return;
+ }
+ if(na <= 1 || strcmp(ap[0], ap[1]) == 0){
+ /* the label moved */
+ if((e = unrefsnap(old)) != nil){
+ fprint(fd, "snap: unref old: %s\n", e);
+ return;
+ }
+ }
fprint(fd, "snap %s: ok\n", ap[na-1]);
}
@@ -87,7 +99,7 @@
/* debugging */
{.name="show", .sub="cache", .minarg=0, .maxarg=0, .fn=showcache},
- {.name="show", .sub="fs", .minarg=0, .maxarg=1, .fn=showfs},
+ {.name="show", .sub="tree", .minarg=0, .maxarg=1, .fn=showtree},
{.name="show", .sub="snap", .minarg=0, .maxarg=1, .fn=showsnap},
{.name="show", .sub="fid", .minarg=0, .maxarg=0, .fn=showfid},
{.name="show", .sub="free", .minarg=0, .maxarg=0, .fn=showfree},
--- a/dat.h
+++ b/dat.h
@@ -46,16 +46,18 @@
*/
Loghdsz = 8, /* log hash */
Keymax = 128, /* key data limit */
- Inlmax = 256, /* inline data limit */
+ Inlmax = 512, /* inline data limit */
Ptrsz = 24, /* off, hash, gen */
Pptrsz = 26, /* off, hash, gen, fill */
Fillsz = 2, /* block fill count */
Offksz = 17, /* type, qid, off */
Snapsz = 9, /* tag, snapid */
- Treesz = 4+Ptrsz+Ptrsz, /* height, root, deadlist */
- Wstatmax = 1+4+8+8+8, /* op, mode, len, mtime, atime */
+ Ndead = 8, /* number of deadlist heads */
+ Deadsz = 8+8+8+8+8, /* prev, head, head hash, tail, tail hash */
+ Treesz = 8+Ptrsz+Ndead*Deadsz, /* ref, height, root, deadlist */
Kvmax = Keymax + Inlmax, /* Key and value */
Kpmax = Keymax + Ptrsz, /* Key and pointer */
+ Wstatmax = 4+8+8+8, /* mode, size, atime, mtime */
Hdrsz = 10,
@@ -76,7 +78,7 @@
*/
Kdat, /* qid[8] off[8] => ptr[16]: pointer to data page */
Kent, /* pqid[8] name[n] => dir[n]: serialized Dir */
- Kdset, /* name[] => snapid[]: dataset (snapshot ref) */
+ Klabel, /* name[] => snapid[]: dataset (snapshot ref) */
Ksnap, /* sid[8] => ref[8], tree[52]: snapshot root */
Ksuper, /* qid[8] => pqid[8]: parent dir */
};
@@ -243,20 +245,30 @@
enum {
GBraw = 1<<0,
GBwrite = 1<<1,
+ GBunchk = 1<<2,
};
enum {
- Onop = 0, /* nothing */
- Oinsert = 1, /* new kvp */
- Odelete = 2, /* delete kvp */
- Oclearb = 3, /* free block ptr if exists */
- Owstat = 4, /* kvp dirent */
+ Onop, /* nothing */
+ Oinsert, /* new kvp */
+ Odelete, /* delete kvp */
+ Oclearb, /* free block ptr if exists */
+ Owstat, /* kvp dirent */
+ Orefsnap, /* increment snap refcount */
+ Ounrefsnap, /* decrement snap refcount */
+ Nmsgtype, /* maximum message type */
+};
+/*
+ * Wstat ops come with associated data, in the order
+ * of the bit flags.
+ */
+enum{
/* wstat flags */
- Owsize = 1<<4,
- Owmode = 1<<5,
- Owmtime = 1<<6,
- Owatime = 1<<7,
+ Owsize = 1<<0, /* [8]fsize: update file size */
+ Owmode = 1<<1, /* [4]mode: update file mode */
+ Owmtime = 1<<2, /* [8]mtime: update mtime, in nsec */
+ Owatime = 1<<3, /* [8]atime: update atime, in nsec */
};
/*
@@ -265,18 +277,24 @@
enum {
/* 1-wide entries */
LogAlloc1, /* alloc a block */
- LogFree1, /* alloc a block */
- LogDead1, /* free a block */
+ LogFree1, /* free a block */
LogFlush, /* flush log, bump gen */
LogChain, /* point to next log block */
LogEnd, /* last entry in log */
/* 2-wide entries */
- Log2w = 1<<5,
- LogAlloc = LogAlloc1|Log2w, /* alloc a range */
- LogFree = LogFree1|Log2w, /* free a range */
+#define Log2wide LogAlloc
+ LogAlloc, /* alloc a range */
+ LogFree, /* free a range */
+ LogDead , /* deadlist a block */
};
+struct Oplog {
+ vlong head; /* log head */
+ vlong hash; /* log head hash */
+ Blk *tail; /* tail block open for writing */
+};
+
struct Arange {
Avl;
vlong off;
@@ -304,9 +322,11 @@
struct Tree {
Lock lk;
- Bptr bp;
- Bptr dp;
+ int ref;
int ht;
+ Bptr bp;
+ vlong prev[Ndead];
+ Oplog dead[Ndead];
};
struct Bfree {
@@ -366,12 +386,6 @@
int cmax;
};
-struct Oplog {
- vlong head; /* log head */
- vlong hash; /* log head hash */
- Blk *tail; /* tail block open for writing */
-};
-
struct Arena {
Lock;
Avltree *free;
@@ -427,6 +441,7 @@
struct Mount {
long ref;
+ vlong gen;
char *name;
Tree root;
};
--- a/dump.c
+++ b/dump.c
@@ -27,8 +27,8 @@
case Kent: /* pqid[8] name[n] => dir[n]: serialized Dir */
n = fmtprint(fmt, "ent dir:%llx, name:\"%.*s\")", GBIT64(k->k+1), k->nk-11, k->k+11);
break;
- case Kdset: /* name[n] => tree[24]: snapshot ref */
- n = fmtprint(fmt, "dset name:\"%.*s\"", k->nk-1, k->k+1);
+ case Klabel: /* name[n] => tree[24]: snapshot ref */
+ n = fmtprint(fmt, "label name:\"%.*s\"", k->nk-1, k->k+1);
break;
case Ksnap: /* name[n] => tree[24]: snapshot root */
n = fmtprint(fmt, "snap id:\"%llx\"", GBIT64(k->k+1));
@@ -46,15 +46,15 @@
static int
showval(Fmt *fmt, Kvp *v, int op, int flg)
{
+ int n, ws;
char *p;
- Bptr bp;
+ Tree t;
Dir d;
- int n, wop, ht;
n = 0;
if(flg){
assert(v->nv == Ptrsz+2);
- n = fmtprint(fmt, "(%B,%d)", unpackbp(v->v), GBIT16(v->v+Ptrsz));
+ n = fmtprint(fmt, "(%B,%d)", unpackbp(v->v, v->nv), GBIT16(v->v+Ptrsz));
return n;
}
switch(v->k[0]){
@@ -66,7 +66,7 @@
break;
case Onop:
case Oinsert:
- n = fmtprint(fmt, "ptr:%B", unpackbp(v->v));
+ n = fmtprint(fmt, "ptr:%B", unpackbp(v->v, v->nv));
break;
}
case Kent: /* pqid[8] name[n] => dir[n]: serialized Dir */
@@ -85,19 +85,23 @@
break;
case Owstat:
p = v->v;
- wop = *p++;
- if(wop & Owmtime){
- n += fmtprint(fmt, "mtime:%llx ", GBIT64(p));
- p += 8;
- }
- if(wop & Owsize){
+ ws = *p++;
+ if(ws & Owsize){
n += fmtprint(fmt, "size:%llx ", GBIT64(p));
p += 8;
}
- if(wop & Owmode){
+ if(ws & Owmode){
n += fmtprint(fmt, "mode:%o ", GBIT32(p));
p += 4;
}
+ if(ws & Owmtime){
+ n += fmtprint(fmt, "mtime:%llx ", GBIT64(p));
+ p += 8;
+ }
+ if(ws & Owatime){
+ n += fmtprint(fmt, "mtime:%llx ", GBIT64(p));
+ p += 8;
+ }
if(p != v->v + v->nv)
abort();
break;
@@ -104,13 +108,21 @@
}
break;
case Ksnap: /* name[n] => dent[16] ptr[16]: snapshot root */
- ht = GBIT32(v->v);
- bp.addr = GBIT64(v->v+4);
- bp.hash = GBIT64(v->v+12);
- bp.gen = GBIT64(v->v+20);
- n = fmtprint(fmt, "ht:%d, ptr:%B", ht, bp);
+ switch(op){
+ case Orefsnap:
+ n = fmtprint(fmt, "ref");
+ break;
+ case Ounrefsnap:
+ n = fmtprint(fmt, "unref");
+ break;
+ default:
+ if(unpacktree(&t, v->v, v->nv) == nil)
+ return fmtprint(fmt, "corrupt tree");
+ n = fmtprint(fmt, "ref: %d, ht: %d, bp: %B, prev=%lld", t.ref, t.ht, t.bp, t.prev[0]);
+ break;
+ }
break;
- case Kdset:
+ case Klabel:
n = fmtprint(fmt, "snap id:\"%llx\"", GBIT64(v->v+1));
break;
case Ksuper: /* qid[8] => pqid[8]: parent dir */
@@ -136,11 +148,13 @@
int
Mconv(Fmt *fmt)
{
- char *opname[] = {
+ char *opname[Nmsgtype] = {
[Oinsert] "Oinsert",
[Odelete] "Odelete",
[Oclearb] "Oclearb",
[Owstat] "Owstat",
+ [Orefsnap] "Orefsnap",
+ [Ounrefsnap] "Ounrefsnap",
};
Msg *m;
int f, n;
@@ -237,7 +251,7 @@
getval(b, i, &kv);
if(b->type == Tpivot){
fprint(fd, "%.*s[%03d]|%#P\n", 4*indent, spc, i, &kv);
- bp = unpackbp(kv.v);
+ bp = unpackbp(kv.v, kv.nv);
if((c = getblk(bp, 0)) == nil)
sysfatal("failed load: %r");
if(recurse)
@@ -257,40 +271,85 @@
}
void
-showtree(int fd, Tree *t, char *m)
+showtree(int fd, char **ap, int na)
{
+ char *e, *name;
+ Tree t;
Blk *b;
int h;
- fprint(fd, "=== [%s] %B\n", m, fs->snap.bp);
- fprint(fd, "\tht: %d\n", fs->snap.ht);
- fprint(fd, "\trt: %B\n", fs->snap.bp);
- b = getroot(t, &h);
- rshowblk(fd, b, 0, 1);
- putblk(b);
-}
-
-void
-showfs(int fd, char **ap, int na)
-{
- char *e, *name;
- Tree t;
-
name = "main";
memset(&t, 0, sizeof(t));
if(na == 1)
name = ap[0];
- if((e = opensnap(&t, name)) != nil){
+ if(strcmp(name, "dump") == 0)
+ t = fs->snap;
+ else if((e = opensnap(&t, name)) != nil){
fprint(fd, "open %s: %s\n", name, e);
return;
}
- showtree(fd, &t, name);
+ b = getroot(&t, &h);
+ fprint(fd, "=== [%s] %B @%d\n", name, t.bp, t.ht);
+ rshowblk(fd, b, 0, 1);
+ putblk(b);
}
void
-showsnap(int fd, char **, int)
+showsnap(int fd, char **ap, int na)
{
- showtree(fd, &fs->snap, "snaps");
+ char *e, pfx[Snapsz];
+ int i, sz, done;
+ vlong id;
+ Scan *s;
+ Tree t;
+
+ if((s = mallocz(sizeof(Scan), 1)) == nil){
+ fprint(fd, "no memory\n");
+ return;
+ }
+ pfx[0] = Ksnap;
+ sz = 1;
+ if(na != 0){
+ sz = Snapsz;
+ id = atoll(ap[0]);
+ PBIT64(pfx+1, id);
+ }
+ if((e = btscan(&fs->snap, s, pfx, sz)) != nil){
+ fprint(fd, "scan: %s\n", e);
+ btdone(s);
+ return;
+ }
+ while(1){
+ if((e = btnext(s, &s->kv, &done)) != nil){
+ fprint(fd, "scan: %s\n", e);
+ break;
+ }
+ if(done)
+ break;
+ fprint(fd, "snap: %P\n", &s->kv);
+ if(unpacktree(&t, s->kv.v, s->kv.nv) == nil){
+ fprint(fd, "unpack: garbled tree\n");
+ break;
+ }
+ fprint(fd, "\tref:\t%d\n", t.ref);
+ fprint(fd, "\tht:\t%d\n", t.ht);
+ fprint(fd, "\taddr:\t%llx\n", t.bp.addr);
+ fprint(fd, "\thash:\t%llx\n", t.bp.hash);
+ fprint(fd, "\tgen:\t%llx\n", t.bp.gen);
+ for(i = 0; i < Ndead; i++){
+ fprint(fd, "\tdeadlist %d\n", i);
+ fprint(fd, "\t\tprev:\t%llx\n", t.prev[i]);
+ fprint(fd, "\t\tfheadp:\t%llx\n", t.dead[i].head);
+ fprint(fd, "\t\tfheadh:\t%llx\n", t.dead[i].hash);
+ if(t.dead[i].tail != nil){
+ fprint(fd, "\t\tftailp:%llx\n", t.dead[i].tail->bp.addr);
+ fprint(fd, "\t\tftailh:%llx\n", t.dead[i].tail->bp.hash);
+ }else{
+ fprint(fd, "\t\tftailp:\t-1\n");
+ fprint(fd, "\t\tftailh:\t-1\n");
+ }
+ }
+ }
}
void
@@ -340,6 +399,7 @@
p[i].pullsz, p[i].npull,
p[i].lo, p[i].hi);
}
+#undef A
}
void
--- a/fns.h
+++ b/fns.h
@@ -7,7 +7,7 @@
#pragma varargck type "X" char*
#pragma varargck type "Q" Qid
-extern Gefs *fs;
+extern Gefs* fs;
extern int debug;
Blk* newblk(int type);
@@ -22,8 +22,8 @@
int syncblk(Blk*);
void enqueue(Blk*);
void quiesce(int);
-void freeblk(Blk*);
-void freebp(Bptr);
+void freeblk(Tree*, Blk*);
+void freebp(Tree*, Bptr);
void reclaimblk(Bptr);
ushort blkfill(Blk*);
uvlong blkhash(Blk*);
@@ -30,7 +30,11 @@
u32int ihash(vlong);
void finalize(Blk*);
char* fillsuper(Blk*);
-char* snapshot(Tree*, char*, int);
+char* snapshot(Tree*, vlong*, vlong*);
+char* labelsnap(vlong, char*);
+char* unlabelsnap(vlong, char*);
+char* refsnap(vlong);
+char* unrefsnap(vlong);
char* opensnap(Tree*, char*);
uvlong siphash(void*, usize);
void reamfs(char*);
@@ -64,9 +68,8 @@
void initshow(void);
void showblk(int, Blk*, char*, int);
void showpath(int, Path*, int);
-void showtree(int, Tree*, char*);
-void showfs(int, char**, int);
+void showtree(int, char**, int);
void showsnap(int, char**, int);
void showfid(int, char**, int);
void showcache(int, char**, int);
@@ -78,25 +81,28 @@
if(debug) fprint(2, __VA_ARGS__); \
}while(0)
-char *pack8(int*, char*, char*, uchar);
-char *pack16(int*, char*, char*, ushort);
-char *pack32(int*, char*, char*, uint);
-char *pack64(int*, char*, char*, uvlong);
-char *packstr(int*, char*, char*, char*);
+char* pack8(int*, char*, char*, uchar);
+char* pack16(int*, char*, char*, ushort);
+char* pack32(int*, char*, char*, uint);
+char* pack64(int*, char*, char*, uvlong);
+char* packstr(int*, char*, char*, char*);
/* void* is a bit hacky, but we want both signed and unsigned to work */
-char *unpack8(int*, char*, char*, void*);
-char *unpack16(int*, char*, char*, void*);
-char *unpack32(int*, char*, char*, void*);
-char *unpack64(int*, char*, char*, void*);
-char *unpackstr(int*, char*, char*, char**);
+char* unpack8(int*, char*, char*, void*);
+char* unpack16(int*, char*, char*, void*);
+char* unpack32(int*, char*, char*, void*);
+char* unpack64(int*, char*, char*, void*);
+char* unpackstr(int*, char*, char*, char**);
int dir2kv(vlong, Dir*, Kvp*, char*, int);
int kv2statbuf(Kvp*, char*, int);
int kv2dir(Kvp*, Dir*);
int kv2qid(Kvp*, Qid*);
-char *packbp(char*, Bptr*);
-Bptr unpackbp(char*);
+char* packbp(char*, int, Bptr*);
+Bptr unpackbp(char*, int);
+char* packtree(char*, int, Tree*);
+Tree* unpacktree(Tree*, char*, int);
+char* packdkey(char*, int, vlong, char*);
/* fmt */
int Bconv(Fmt*);
@@ -111,8 +117,8 @@
void bufinsert(Blk *, Msg *);
void victim(Blk *b, Path *p);
-Chan *mkchan(int);
-Fmsg *chrecv(Chan*);
+Chan* mkchan(int);
+Fmsg* chrecv(Chan*);
void chsend(Chan*, Fmsg*);
void runfs(int, void*);
void runwrite(int, void*);
--- a/fs.c
+++ b/fs.c
@@ -9,6 +9,28 @@
static char* clearb(Fid*, vlong, vlong);
+// FIXME: hack.
+static char*
+updatesnap(Fid *f)
+{
+ vlong gen, old;
+ char *e;
+
+ if((e = snapshot(&f->mnt->root, &gen, &old)) != nil){
+ fprint(2, "snap: save %s: %s\n", f->mnt->name, e);
+ abort();
+ }
+ if((e = labelsnap(gen, f->mnt->name)) != nil){
+ fprint(2, "snap: save %s: %s\n", f->mnt->name, e);
+ abort();
+ }
+ if((e = unrefsnap(old)) != nil){
+ fprint(2, "snap: unref old: %s\n", e);
+ abort();
+ }
+ return nil;
+}
+
static int
okname(char *name)
{
@@ -150,6 +172,10 @@
return e;
}
+/*
+ * Clears all blocks in that intersect with
+ * the range listed.
+ */
static char*
clearb(Fid *f, vlong o, vlong sz)
{
@@ -156,6 +182,7 @@
char *e, buf[Offksz];
Msg m;
+ o &= ~(Blksz - 1);
for(; o < sz; o += Blksz){
m.k = buf;
m.nk = sizeof(buf);
@@ -174,7 +201,7 @@
static int
readb(Fid *f, char *d, vlong o, vlong n, int sz)
{
- char *e, buf[Offksz], kvbuf[Offksz+Ptrsz];
+ char *e, buf[17], kvbuf[17+32];
vlong fb, fo;
Bptr bp;
Blk *b;
@@ -199,7 +226,7 @@
return -1;
}
- bp = unpackbp(kv.v);
+ bp = unpackbp(kv.v, kv.nv);
if((b = getblk(bp, GBraw)) == nil)
return -1;
if(fo+n > Blksz)
@@ -213,12 +240,12 @@
}
static int
-writeb(Fid *f, Msg *m, char *s, vlong o, vlong n, vlong sz)
+writeb(Fid *f, Msg *m, Bptr *ret, char *s, vlong o, vlong n, vlong sz)
{
char buf[Kvmax];
vlong fb, fo;
- Bptr bp;
Blk *b, *t;
+ Bptr bp;
Kvp kv;
fb = o & ~(Blksz-1);
@@ -233,14 +260,13 @@
if(b == nil)
return -1;
if(fb < sz && (fo != 0 || n != Blksz)){
- dprint("\tappending to block %B\n", b->bp);
if(lookup(f, m, &kv, buf, sizeof(buf), 0) != nil)
return -1;
- bp = unpackbp(kv.v);
+ bp = unpackbp(kv.v, kv.nv);
if((t = getblk(bp, GBraw)) == nil)
return -1;
memcpy(b->buf, t->buf, Blksz);
- freeblk(t);
+ freeblk(&f->mnt->root, t);
putblk(t);
}
if(fo+n > Blksz)
@@ -248,9 +274,8 @@
memcpy(b->buf+fo, s, n);
enqueue(b);
- bp.gen = fs->nextgen;
- assert(b->flag & Bfinal);
- packbp(m->v, &b->bp);
+ packbp(m->v, m->nv, &b->bp);
+ *ret = b->bp;
putblk(b);
return n;
}
@@ -258,41 +283,36 @@
static Dent*
getdent(vlong pqid, Dir *d)
{
- Dent *e;
- char *ek, *eb;
+ Dent *de;
+ char *e;
u32int h;
- int err;
h = ihash(d->qid.path) % Ndtab;
lock(&fs->dtablk);
- for(e = fs->dtab[h]; e != nil; e = e->next){
- if(e->qid.path == d->qid.path){
- ainc(&e->ref);
+ for(de = fs->dtab[h]; de != nil; de = de->next){
+ if(de->qid.path == d->qid.path){
+ ainc(&de->ref);
unlock(&fs->dtablk);
- return e;
+ return de;
}
}
- err = 0;
- if((e = mallocz(sizeof(Dent), 1)) == nil)
+ if((de = mallocz(sizeof(Dent), 1)) == nil)
return nil;
- e->ref = 1;
- e->qid = d->qid;
- e->length = d->length;
- e->k = e->buf;
- e->nk = 9 + strlen(d->name) + 1;
+ de->ref = 1;
+ de->qid = d->qid;
+ de->length = d->length;
+ de->k = de->buf;
+ de->nk = 9 + strlen(d->name) + 1;
- ek = e->buf;
- eb = ek + sizeof(e->buf);
- ek = pack8(&err, ek, eb, Kent);
- ek = pack64(&err, ek, eb, pqid);
- ek = packstr(&err, ek, eb, d->name);
- e->nk = ek - e->buf;
- e->next = fs->dtab[h];
- fs->dtab[h] = e;
+ if((e = packdkey(de->buf, sizeof(de->buf), pqid, d->name)) == nil)
+ return nil;
+ de->nk = e - de->buf;
+ de->next = fs->dtab[h];
+ fs->dtab[h] = de;
unlock(&fs->dtablk);
- return e;
+ return de;
}
static void
@@ -483,8 +503,7 @@
static void
fsattach(Fmsg *m, int iounit)
{
- char *e, *p, *ep, dbuf[Kvmax], kvbuf[Kvmax];
- int err;
+ char *e, *p, dbuf[Kvmax], kvbuf[Kvmax];
Mount *mnt;
Dent *de;
Fcall r;
@@ -505,18 +524,14 @@
return;
}
- err = 0;
- p = dbuf;
- ep = dbuf + sizeof(dbuf);
- p = pack8(&err, p, ep, Kent);
- p = pack64(&err, p, ep, -1ULL);
- p = packstr(&err, p, ep, "");
- if(err)
- abort();
+ if((p = packdkey(dbuf, sizeof(dbuf), -1ULL, "")) == nil){
+ rerror(m, Elength);
+ return;
+ }
dk.k = dbuf;
dk.nk = p - dbuf;
- if(btlookup(&mnt->root, &dk, &kv, kvbuf, sizeof(kvbuf)) != nil){
- rerror(m, Efs);
+ if((e = btlookup(&mnt->root, &dk, &kv, kvbuf, sizeof(kvbuf))) != nil){
+ rerror(m, e);
return;
}
if(kv2dir(&kv, &d) == -1){
@@ -557,9 +572,8 @@
void
fswalk(Fmsg *m)
{
- char *p, *e, *estr, kbuf[Maxent], kvbuf[Kvmax];
+ char *p, *e, kbuf[Maxent], kvbuf[Kvmax];
vlong up, prev;
- int i, err;
Fid *o, *f;
Dent *dent;
Fcall r;
@@ -566,6 +580,7 @@
Kvp kv;
Key k;
Dir d;
+ int i;
if((o = getfid(m->fid)) == nil){
rerror(m, Efid);
@@ -575,26 +590,20 @@
rerror(m, Einuse);
return;
}
- err = 0;
- estr = nil;
+ e = nil;
up = o->qpath;
prev = o->qpath;
r.type = Rwalk;
for(i = 0; i < m->nwname; i++){
up = prev;
- p = kbuf;
- e = kbuf + sizeof(kbuf);
- p = pack8(&err, p, e, Kent);
- p = pack64(&err, p, e, up);
- p = packstr(&err, p, e, m->wname[i]);
- if(err){
- rerror(m, "bad walk: %r");
+ if((p = packdkey(kbuf, sizeof(kbuf), up, m->wname[i])) == nil){
+ rerror(m, Elength);
putfid(o);
return;
}
k.k = kbuf;
k.nk = p - kbuf;
- if((estr = lookup(o, &k, &kv, kvbuf, sizeof(kvbuf), 0)) != nil){
+ if((e = lookup(o, &k, &kv, kvbuf, sizeof(kvbuf), 0)) != nil){
break;
}
if(kv2dir(&kv, &d) == -1){
@@ -607,7 +616,7 @@
}
r.nwqid = i;
if(i == 0 && m->nwname != 0){
- rerror(m, estr);
+ rerror(m, e);
putfid(o);
return;
}
@@ -779,7 +788,7 @@
rerror(m, e);
goto Out;
}
- if((e = snapshot(&f->mnt->root, f->mnt->name, 1)) != nil){
+ if((e = updatesnap(f)) != nil){
rerror(m, e);
goto Out;
}
@@ -903,7 +912,7 @@
r.type = Rcreate;
r.qid = d.qid;
r.iounit = f->iounit;
- if((e = snapshot(&f->mnt->root, f->mnt->name, 1)) != nil){
+ if((e = updatesnap(f)) != nil){
rerror(m, e);
putfid(f);
return;
@@ -945,7 +954,7 @@
runlock(f->dent);
clunkfid(f);
- if((e = snapshot(&f->mnt->root, f->mnt->name, 1)) != nil){
+ if((e = updatesnap(f)) != nil){
rerror(m, e);
putfid(f);
return;
@@ -1161,13 +1170,14 @@
void
fswrite(Fmsg *m)
{
- char sbuf[Wstatmax], offbuf[4][Offksz+Ptrsz];
+ char sbuf[Wstatmax], kbuf[4][Offksz], vbuf[4][Ptrsz];
char *p, *e;
vlong n, o, c;
+ int i, j;
+ Bptr bp[4];
Msg kv[4];
Fcall r;
Fid *f;
- int i;
if((f = getfid(m->fid)) == nil){
rerror(m, Efid);
@@ -1186,14 +1196,14 @@
c = m->count;
for(i = 0; i < nelem(kv)-1 && c != 0; i++){
kv[i].op = Oinsert;
- kv[i].k = offbuf[i];
- kv[i].nk = Offksz;
- kv[i].v = offbuf[i]+Offksz;
- kv[i].nv = 16;
- n = writeb(f, &kv[i], p, o, c, f->dent->length);
+ kv[i].k = kbuf[i];
+ kv[i].nk = sizeof(kbuf[i]);
+ kv[i].v = vbuf[i];
+ kv[i].nv = sizeof(vbuf[i]);
+ n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
if(n == -1){
- // badwrite(f, i);
- // FIXME: free pages
+ for(j = 0; j < i; j++)
+ freebp(&f->mnt->root, bp[i]);
wunlock(f->dent);
rerror(m, "%r");
putfid(f);
@@ -1224,7 +1234,7 @@
}
wunlock(f->dent);
- if((e = snapshot(&f->mnt->root, f->mnt->name, 1)) != nil){
+ if((e = updatesnap(f)) != nil){
rerror(m, e);
putfid(f);
return;
--- a/load.c
+++ b/load.c
@@ -56,19 +56,19 @@
sysfatal("corrupt superblock: bad magic");
p = b->data + 8;
- dirty = GBIT32(p); p += 4; /* dirty */
- blksz = GBIT32(p); p += 4;
- bufspc = GBIT32(p); p += 4;
- hdrsz = GBIT32(p); p += 4;
- fs->snap.ht = GBIT32(p); p += 4;
- fs->snap.bp.addr = GBIT64(p); p += 8;
- fs->snap.bp.hash = GBIT64(p); p += 8;
- fs->snap.bp.gen = 1;
- fs->nextgen = GBIT64(p); p += 8;
- fs->narena = GBIT32(p); p += 4;
- fs->arenasz = GBIT64(p); p += 8;
- fs->nextqid = GBIT64(p); p += 8;
+ dirty = GBIT32(p); p += 4; /* dirty */
+ blksz = GBIT32(p); p += 4;
+ bufspc = GBIT32(p); p += 4;
+ hdrsz = GBIT32(p); p += 4;
+ fs->snap.ht = GBIT32(p); p += 4;
+ fs->snap.bp.addr = GBIT64(p); p += 8;
+ fs->snap.bp.hash = GBIT64(p); p += 8;
+ fs->snap.bp.gen = GBIT64(p); p += 8;
+ fs->narena = GBIT32(p); p += 4;
+ fs->arenasz = GBIT64(p); p += 8;
+ fs->nextqid = GBIT64(p); p += 8;
fs->super = b;
+ fs->nextgen = fs->snap.bp.gen + 1;
fprint(2, "load: %8s\n", p);
fprint(2, "\tsnaptree:\t%B\n", fs->snap.bp);
@@ -75,6 +75,7 @@
fprint(2, "\tarenas:\t%d\n", fs->narena);
fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
+ fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
sysfatal("malloc: %r");
for(i = 0; i < fs->narena; i++)
--- a/main.c
+++ b/main.c
@@ -98,9 +98,9 @@
* sanity checks -- I've tuned these to stupid
* values in the past.
*/
-// assert(4*Kpmax < Pivspc);
-// assert(2*Msgmax < Bufspc);
-
+ assert(4*Kpmax < Pivspc);
+ assert(2*Msgmax < Bufspc);
+ assert(Treesz < Inlmax);
initfs(cachesz);
initshow();
quotefmtinstall();
--- a/pack.c
+++ b/pack.c
@@ -267,21 +267,106 @@
}
char*
-packbp(char *p, Bptr *bp)
+packbp(char *p, int sz, Bptr *bp)
{
- PBIT64(p + 0, bp->addr);
- PBIT64(p + 8, bp->hash);
- PBIT64(p + 16, bp->gen);
- return p + 24;
+ assert(sz >= Ptrsz);
+ PBIT64(p, bp->addr); p += 8;
+ PBIT64(p, bp->hash); p += 8;
+ PBIT64(p, bp->gen); p += 8;
+ return p;
}
Bptr
-unpackbp(char *p)
+unpackbp(char *p, int sz)
{
Bptr bp;
- bp.addr = GBIT64(p + 0);
- bp.hash = GBIT64(p + 8);
- bp.gen = GBIT64(p + 16);
+ assert(sz >= Ptrsz);
+ bp.addr = GBIT64(p); p += 8;
+ bp.hash = GBIT64(p); p += 8;
+ bp.gen = GBIT64(p);
return bp;
+}
+
+char*
+packdkey(char *p, int sz, vlong up, char *name)
+{
+ char *ep;
+ int err;
+
+ err = 0;
+ ep = p + sz;
+ p = pack8(&err, p, ep, Kent);
+ p = pack64(&err, p, ep, up);
+ p = packstr(&err, p, ep, name);
+ if(err)
+ return 0;
+ return p;
+}
+
+Tree*
+unpacktree(Tree *t, char *p, int sz)
+{
+ int i, j;
+ Bptr bp;
+ Blk *b;
+
+ assert(sz >= Treesz);
+ memset(t, 0, sizeof(Tree));
+ t->ref = GBIT32(p); p += 4;
+ t->ht = GBIT32(p); p += 4;
+ t->bp.addr = GBIT64(p); p += 8;
+ t->bp.hash = GBIT64(p); p += 8;
+ t->bp.gen = GBIT64(p); p += 8;
+ for(i = 0; i < Ndead; i++){
+ t->prev[i] = GBIT64(p); p += 8;
+ t->dead[i].head = GBIT64(p); p += 8;
+ t->dead[i].hash = GBIT64(p); p += 8;
+ bp.addr = GBIT64(p); p += 8;
+ bp.hash = GBIT64(p); p += 8;
+ bp.gen = -1;
+ if(bp.addr == -1)
+ continue;
+ if((b = getblk(bp, 0)) == nil){
+ for(j = 0; j < i; j++)
+ putblk(t->dead[j].tail);
+ return nil;
+ }
+ t->dead[i].tail = b;
+ cacheblk(b);
+ }
+ return t;
+}
+
+char*
+packtree(char *p, int sz, Tree *t)
+{
+ vlong tladdr, tlhash;
+ Blk *tl;
+ int i;
+
+ assert(sz >= Treesz);
+ PBIT32(p, t->ref); p += 4;
+ PBIT32(p, t->ht); p += 4;
+ PBIT64(p, t->bp.addr); p += 8;
+ PBIT64(p, t->bp.hash); p += 8;
+ PBIT64(p, t->bp.gen); p += 8;
+ for(i = 0; i < Ndead; i++){
+ tladdr = -1;
+ tlhash = -1;
+ if(t->dead[i].tail != nil){
+ tl = t->dead[i].tail;
+ lock(tl);
+ assert(tl->flag & Bfinal);
+ unlock(tl);
+ tladdr = tl->bp.addr;
+ tlhash = tl->bp.hash;
+ }
+ PBIT64(p, t->prev[i]); p += 8;
+ PBIT64(p, t->dead[i].head); p += 8;
+ PBIT64(p, t->dead[i].hash); p += 8;
+ PBIT64(p, tladdr); p += 8;
+ PBIT64(p, tlhash); p += 8;
+ }
+ return p;
}
--- a/ream.c
+++ b/ream.c
@@ -43,31 +43,38 @@
static void
initsnap(Blk *s, Blk *r)
{
- char kbuf[Keymax], vbuf[Treesz];
+ char *p, kbuf[Keymax], vbuf[Treesz];
+ Tree t;
Kvp kv;
+ int i;
kv.k = kbuf;
kv.v = vbuf;
- kv.k[0] = Kdset;
+ kv.k[0] = Klabel;
kv.nk = 1 + snprint(kv.k+1, sizeof(kbuf)-1, "main");
kv.v[0] = Ksnap;
PBIT64(kv.v+1, 0);
kv.nv = Snapsz;
setval(s, 0, &kv);
-
+
kv.k[0] = Ksnap;
PBIT64(kv.k+1, 0);
kv.nk = Snapsz;
- kv.nv = sizeof(vbuf);
- PBIT32(kv.v + 0, 1);
- PBIT64(kv.v + 4, r->bp.addr);
- PBIT64(kv.v + 12, r->bp.hash);
- PBIT64(kv.v + 20, r->bp.gen);
- PBIT64(kv.v + 28, -1ULL);
- PBIT64(kv.v + 36, -1ULL);
- PBIT64(kv.v + 42, -1ULL);
+ memset(&t, 0, sizeof(Tree));
+ t.ref = 1;
+ t.ht = 1;
+ t.bp = r->bp;
+ for(i = 0; i < Ndead; i++){
+ t.prev[i] = -1;
+ t.dead[i].head = -1;
+ t.dead[i].hash = -1;
+ t.dead[i].tail = nil;
+ }
+ p = packtree(vbuf, sizeof(vbuf), &t);
+ kv.v = vbuf;
+ kv.nv = p - vbuf;
setval(s, 1, &kv);
}
@@ -120,7 +127,7 @@
reamfs(char *dev)
{
vlong sz, asz, off;
- Blk *s, *r, *t;
+ Blk *sb, *rb, *tb;
Mount *mnt;
Dir *d;
int i;
@@ -131,12 +138,12 @@
sysfatal("ream: %r");
if(d->length < 64*MiB)
sysfatal("ream: disk too small");
- if((s = mallocz(sizeof(Blk), 1)) == nil)
+ if((sb = mallocz(sizeof(Blk), 1)) == nil)
sysfatal("ream: %r");
if((mnt = mallocz(sizeof(Mount), 1)) == nil)
sysfatal("ream: alloc mount: %r");
- fs->super = s;
- refblk(s);
+ fs->super = sb;
+ refblk(sb);
sz = d->length;
sz = sz - (sz % Blksz) - Blksz;
@@ -161,24 +168,24 @@
asz += off;
}
- s->type = Tsuper;
- s->bp.addr = sz;
- s->data = s->buf + Hdrsz;
- s->ref = 2;
+ sb->type = Tsuper;
+ sb->bp.addr = sz;
+ sb->data = sb->buf + Hdrsz;
+ sb->ref = 2;
for(i = 0; i < fs->narena; i++)
if((loadarena(&fs->arenas[i], i*asz)) == -1)
sysfatal("ream: loadarena: %r");
- if((t = newblk(Tleaf)) == nil)
+ if((tb = newblk(Tleaf)) == nil)
sysfatal("ream: allocate root: %r");
- refblk(t);
- initroot(t);
- finalize(t);
- syncblk(t);
+ refblk(tb);
+ initroot(tb);
+ finalize(tb);
+ syncblk(tb);
mnt->root.ht = 1;
- mnt->root.bp = t->bp;
+ mnt->root.bp = tb->bp;
/*
* Now that we have a completely empty fs, give it
@@ -185,24 +192,22 @@
* a single snap block that the tree will insert
* into, and take a snapshot as the initial state.
*/
- if((r = newblk(Tleaf)) == nil)
+ if((rb = newblk(Tleaf)) == nil)
sysfatal("ream: allocate snaps: %r");
- refblk(r);
- initsnap(r, t);
- finalize(r);
- syncblk(r);
- fs->snap.bp = r->bp;
+ refblk(rb);
+ initsnap(rb, tb);
+ finalize(rb);
+ syncblk(rb);
+
+ fs->snap.bp = rb->bp;
fs->snap.ht = 1;
+ fillsuper(sb);
+ finalize(sb);
+ syncblk(sb);
- fillsuper(s);
- finalize(s);
- syncblk(s);
-
- sync();
-
- putblk(t);
- putblk(s);
- putblk(r);
+ putblk(tb);
+ putblk(sb);
+ putblk(rb);
free(mnt);
if(sync() == -1)
sysfatal("ream: sync: %r");
--- /dev/null
+++ b/snap.c
@@ -1,0 +1,252 @@
+#include <u.h>
+#include <libc.h>
+#include <fcall.h>
+#include <avl.h>
+
+#include "dat.h"
+#include "fns.h"
+
+vlong
+inc64(uvlong *v, uvlong dv)
+{
+ vlong ov, nv;
+
+ while(1){
+ ov = *v;
+ nv = ov + dv;
+ if(cas64(v, ov, nv))
+ return nv;
+ }
+}
+
+int
+syncblk(Blk *b)
+{
+ assert(b->flag & Bfinal);
+ lock(b);
+ b->flag &= ~(Bqueued|Bdirty);
+ unlock(b);
+ return pwrite(fs->fd, b->buf, Blksz, b->bp.addr);
+}
+
+void
+enqueue(Blk *b)
+{
+ assert(b->flag&Bdirty);
+ finalize(b);
+ if(syncblk(b) == -1){
+ ainc(&fs->broken);
+ fprint(2, "write: %r");
+ abort();
+ }
+}
+
+char*
+opensnap(Tree *t, char *name)
+{
+ char dbuf[Keymax], buf[Kvmax];
+ char *p, *e;
+ int n;
+ Key k;
+ Kvp kv;
+
+ n = strlen(name);
+ p = dbuf;
+ p[0] = Klabel; p += 1;
+ memcpy(p, name, n); p += n;
+ k.k = dbuf;
+ k.nk = p - dbuf;
+ if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
+ return e;
+ memmove(dbuf, kv.v, kv.nv);
+ k.k = dbuf;
+ k.nk = kv.nv;
+ if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
+ return e;
+ if(unpacktree(t, kv.v, kv.nv) == nil)
+ return Efs;
+ return nil;
+}
+
+static char*
+modifysnap(vlong gen, char *name, int del)
+{
+ char dbuf[Keymax], sbuf[Snapsz];
+ char *p, *e;
+ int n, nm;
+ Msg m[2];
+
+ p = sbuf;
+ nm = 0;
+ p[0] = Ksnap; p += 1;
+ PBIT64(p, gen); p += 8;
+ m[nm].op = del ? Ounrefsnap : Orefsnap;
+ m[nm].k = sbuf;
+ m[nm].nk = p - sbuf;
+ m[nm].v = nil;
+ m[nm].nv = 0;
+ nm++;
+ if(name != nil){
+ p = dbuf;
+ n = strlen(name);
+ m[nm].op = del ? Odelete : Oinsert;
+ p[0] = Klabel; p += 1;
+ memcpy(p, name, n); p += n;
+ m[nm].k = dbuf;
+ m[nm].nk = p - dbuf;
+ m[nm].v = m[nm-1].k;
+ m[nm].nv = m[nm-1].nk;
+
+ nm++;
+ }
+ if((e = btupsert(&fs->snap, m, nm)) != nil)
+ return e;
+ return nil;
+}
+
+char*
+labelsnap(vlong gen, char *name)
+{
+ return modifysnap(gen, name, 0);
+}
+
+char*
+unlabelsnap(vlong gen, char *name)
+{
+ return modifysnap(gen, name, 1);
+}
+
+char*
+refsnap(vlong gen)
+{
+ return modifysnap(gen, nil, 0);
+}
+
+char*
+unrefsnap(vlong gen)
+{
+ return modifysnap(gen, nil, 1);
+}
+
+char*
+snapshot(Tree *t, vlong *genp, vlong *oldp)
+{
+ char kbuf[Snapsz], vbuf[Treesz];
+ char *p, *e;
+ uvlong gen;
+ Msg m;
+ int i;
+
+ gen = inc64(&fs->nextgen, 1);
+ p = kbuf;
+ p[0] = Ksnap; p += 1;
+ PBIT64(p, gen); p += 8;
+ m.op = Oinsert;
+ m.k = kbuf;
+ m.nk = p - kbuf;
+
+ for(i = 0; i < Ndead; i++){
+ if(t->dead[i].tail != nil){
+ finalize(t->dead[i].tail);
+ syncblk(t->dead[i].tail);
+ putblk(t->dead[i].tail);
+ }
+ }
+
+ p = packtree(vbuf, sizeof(vbuf), t);
+ m.v = vbuf;
+ m.nv = p - vbuf;
+ if((e = btupsert(&fs->snap, &m, 1)) != nil)
+ return e;
+ if(sync() == -1)
+ return Eio;
+ /* shift deadlist down */
+ if(t->dead[Ndead-1].tail != nil)
+ putblk(t->dead[Ndead-1].tail);
+ for(i = Ndead-1; i >= 0; i--){
+ t->prev[i] = i == 0 ? gen : t->prev[i-1];
+ t->dead[i].head = -1;
+ t->dead[i].hash = -1;
+ t->dead[i].tail = nil;
+ }
+ *genp = gen;
+ *oldp = t->prev[0];
+ return nil;
+}
+
+int
+sync(void)
+{
+ int i, r;
+ Blk *b, *s;
+ Arena *a;
+
+ qlock(&fs->snaplk);
+ r = 0;
+ s = fs->super;
+ fillsuper(s);
+ enqueue(s);
+
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ finalize(a->log.tail);
+ if(syncblk(a->log.tail) == -1)
+ r = -1;
+ }
+ for(b = fs->chead; b != nil; b = b->cnext){
+ if(!(b->flag & Bdirty))
+ continue;
+ if(syncblk(b) == -1)
+ r = -1;
+ }
+ if(r != -1)
+ r = syncblk(s);
+
+ qunlock(&fs->snaplk);
+ return r;
+}
+
+void
+quiesce(int tid)
+{
+ int i, allquiesced;
+ Bfree *p, *n;
+
+ lock(&fs->activelk);
+ allquiesced = 1;
+ fs->active[tid]++;
+ for(i = 0; i < fs->nproc; i++){
+ /*
+ * Odd parity on quiescence implies
+ * that we're between the exit from
+ * and waiting for the next message
+ * that enters us into the critical
+ * section.
+ */
+ if((fs->active[i] & 1) == 0)
+ continue;
+ if(fs->active[i] == fs->lastactive[i])
+ allquiesced = 0;
+ }
+ if(allquiesced)
+ for(i = 0; i < fs->nproc; i++)
+ fs->lastactive[i] = fs->active[i];
+ unlock(&fs->activelk);
+ if(!allquiesced)
+ return;
+
+ lock(&fs->freelk);
+ p = nil;
+ if(fs->freep != nil){
+ p = fs->freep->next;
+ fs->freep->next = nil;
+ }
+ unlock(&fs->freelk);
+
+ while(p != nil){
+ n = p->next;
+ reclaimblk(p->bp);
+ p = n;
+ }
+ fs->freep = fs->freehd;
+}
--- a/sync.c
+++ /dev/null
@@ -1,200 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <fcall.h>
-#include <avl.h>
-
-#include "dat.h"
-#include "fns.h"
-
-vlong
-inc64(uvlong *v, uvlong dv)
-{
- vlong ov, nv;
-
- while(1){
- ov = *v;
- nv = ov + dv;
- if(cas64(v, ov, nv))
- return nv;
- }
-}
-
-int
-syncblk(Blk *b)
-{
- assert(b->flag & Bfinal);
- lock(b);
- b->flag &= ~(Bqueued|Bdirty);
- unlock(b);
- return pwrite(fs->fd, b->buf, Blksz, b->bp.addr);
-}
-
-void
-enqueue(Blk *b)
-{
- assert(b->flag&Bdirty);
- finalize(b);
- if(syncblk(b) == -1){
- ainc(&fs->broken);
- fprint(2, "write: %r");
- abort();
- }
-}
-
-char*
-opensnap(Tree *t, char *name)
-{
- char dbuf[Keymax], buf[Kvmax];
- char *p, *e;
- int n;
- Key k;
- Kvp kv;
-
- n = strlen(name);
- p = dbuf;
- p[0] = Kdset; p += 1;
- memcpy(p, name, n); p += n;
- k.k = dbuf;
- k.nk = p - dbuf;
- if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
- return e;
- memmove(dbuf, kv.v, kv.nv);
- k.k = dbuf;
- k.nk = kv.nv;
- if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
- return e;
- p = kv.v;
- memset(t, 0, sizeof(*t));
- t->ht = GBIT32(p); p += 4;
- t->bp.addr = GBIT64(p); p += 8;
- t->bp.hash = GBIT64(p); p += 8;
- t->bp.gen = GBIT64(p); p += 8;
- t->dp.addr = GBIT64(p); p += 8;
- t->dp.hash = GBIT64(p); p += 8;
- t->dp.gen = GBIT64(p);
- return nil;
-}
-
-char*
-snapshot(Tree *r, char *name, int update)
-{
- char dbuf[Keymax], snapbuf[Snapsz], treebuf[Treesz];
- char *p, *e;
- uvlong gen;
- int n;
- Msg m[2];
-
- n = strlen(name);
- if(update)
- gen = inc64(&fs->nextgen, 0);
- else
- gen = inc64(&fs->nextgen, 1);
-
- p = dbuf;
- m[0].op = Oinsert;
- p[0] = Kdset; p += 1;
- memcpy(p, name, n); p += n;
- m[0].k = dbuf;
- m[0].nk = p - dbuf;
-
- p = snapbuf;
- p[0] = Ksnap; p += 1;
- PBIT64(p, gen); p += 8;
- m[0].v = snapbuf;
- m[0].nv = p - snapbuf;
-
- m[1].op = Oinsert;
- m[1].k = snapbuf;
- m[1].nk = p - snapbuf;
- p = treebuf;
- PBIT32(p, r->ht); p += 4;
- PBIT64(p, r->bp.addr); p += 8;
- PBIT64(p, r->bp.hash); p += 8;
- PBIT64(p, r->bp.gen); p += 8;
- PBIT64(p, r->dp.addr); p += 8;
- PBIT64(p, r->dp.hash); p += 8;
- PBIT64(p, r->dp.gen); p += 8;
- m[1].v = treebuf;
- m[1].nv = p - treebuf;
- if((e = btupsert(&fs->snap, m, nelem(m))) != nil)
- return e;
- if(sync() == -1)
- return Eio;
- return 0;
-}
-
-sync(void)
-{
- int i, r;
- Arena *a;
- Blk *b, *s;
-
- qlock(&fs->snaplk);
- r = 0;
- s = fs->super;
- fillsuper(s);
- enqueue(s);
-
- for(i = 0; i < fs->narena; i++){
- a = &fs->arenas[i];
- finalize(a->log.tail);
- if(syncblk(a->log.tail) == -1)
- r = -1;
- }
- for(b = fs->chead; b != nil; b = b->cnext){
- if(!(b->flag & Bdirty))
- continue;
- if(syncblk(b) == -1)
- r = -1;
- }
- if(r != -1)
- r = syncblk(s);
-
- qunlock(&fs->snaplk);
- return r;
-}
-
-void
-quiesce(int tid)
-{
- int i, allquiesced;
- Bfree *p, *n;
-
- lock(&fs->activelk);
- allquiesced = 1;
- fs->active[tid]++;
- for(i = 0; i < fs->nproc; i++){
- /*
- * Odd parity on quiescence implies
- * that we're between the exit from
- * and waiting for the next message
- * that enters us into the critical
- * section.
- */
- if((fs->active[i] & 1) == 0)
- continue;
- if(fs->active[i] == fs->lastactive[i])
- allquiesced = 0;
- }
- if(allquiesced)
- for(i = 0; i < fs->nproc; i++)
- fs->lastactive[i] = fs->active[i];
- unlock(&fs->activelk);
- if(!allquiesced)
- return;
-
- lock(&fs->freelk);
- p = nil;
- if(fs->freep != nil){
- p = fs->freep->next;
- fs->freep->next = nil;
- }
- unlock(&fs->freelk);
-
- while(p != nil){
- n = p->next;
- reclaimblk(p->bp);
- p = n;
- }
- fs->freep = fs->freehd;
-}
--- a/tree.c
+++ b/tree.c
@@ -82,7 +82,7 @@
assert(kv->nv == Ptrsz || kv->nv == Ptrsz+2);
if(fill != nil)
*fill = GBIT16(kv->v + Ptrsz);
- return unpackbp(kv->v);
+ return unpackbp(kv->v, kv->nv);
}
void
@@ -127,7 +127,7 @@
kv.nk = k->nk;
kv.v = buf;
kv.nv = sizeof(buf);
- p = packbp(buf, &bp);
+ p = packbp(buf, sizeof(buf), &bp);
PBIT16(p, fill);
setval(b, i, &kv);
}
@@ -143,7 +143,7 @@
b->nbuf++;
b->bufsz += msgsz(m)-2;
assert(2*b->nbuf + b->bufsz <= Bufspc);
- assert(m->op >= 0 && m->op <= Owstat);
+ assert(m->op >= 0 && m->op < Nmsgtype);
p = b->data + Pivspc;
o = Pivspc - b->bufsz;
@@ -343,20 +343,37 @@
}
int
-apply(Kvp *r, Msg *m, char *buf, int nbuf)
+apply(Kvp *kv, Msg *m, char *buf, int nbuf)
{
+ int refs;
+
switch(m->op){
case Oclearb:
case Odelete:
- assert(keycmp(r, m) == 0);
+ assert(keycmp(kv, m) == 0);
return 0;
case Oinsert:
- cpkvp(r, m, buf, nbuf);
+ cpkvp(kv, m, buf, nbuf);
return 1;
case Owstat:
- assert(keycmp(r, m) == 0);
- statupdate(r, m);
+ assert(keycmp(kv, m) == 0);
+ statupdate(kv, m);
return 1;
+ case Orefsnap:
+ assert(keycmp(kv, m) == 0);
+ refs = GBIT32(kv->v) + 1;
+ PBIT32(kv->v, refs);
+ return 1;
+ case Ounrefsnap:
+ assert(keycmp(kv, m) == 0);
+ refs = GBIT32(kv->v) - 1;
+ if(refs == 0){
+ dprint("removing snap %P\n", kv);
+// freesnap(&t);
+ return 0;
+ }
+ PBIT32(kv->v, refs);
+ return 1;
}
abort();
return 0;
@@ -387,7 +404,7 @@
* When pidx != -1,
*/
int
-updateleaf(Path *up, Path *p)
+updateleaf(Tree *t, Path *up, Path *p)
{
char buf[Msgmax];
int i, j, o, ok, full, spc;
@@ -436,8 +453,8 @@
i++;
while(j < up->hi){
if(m.op == Oclearb){
- bp = unpackbp(v.v);
- freebp(bp);
+ bp = unpackbp(v.v, v.nv);
+ freebp(t, bp);
}
ok = apply(&v, &m, buf, sizeof(buf));
Copy:
@@ -485,7 +502,7 @@
* When pidx != -1,
*/
int
-updatepiv(Path *up, Path *p, Path *pp)
+updatepiv(Tree *, Path *up, Path *p, Path *pp)
{
char buf[Msgmax];
int i, j, o, sz, full, spc;
@@ -558,7 +575,7 @@
* grow the total height of the
*/
int
-splitleaf(Path *up, Path *p, Kvp *mid)
+splitleaf(Tree *t, Path *up, Path *p, Kvp *mid)
{
char buf[Msgmax];
int full, copied, spc, ok, halfsz;
@@ -576,8 +593,8 @@
l = newblk(b->type);
r = newblk(b->type);
if(l == nil || r == nil){
- freeblk(l);
- freeblk(r);
+ freeblk(t, l);
+ freeblk(t, r);
return -1;
}
@@ -658,11 +675,11 @@
* than one.
*/
int
-splitpiv(Path *, Path *p, Path *pp, Kvp *mid)
+splitpiv(Tree *t, Path *, Path *p, Path *pp, Kvp *mid)
{
int i, o, copied, halfsz;
Blk *b, *d, *l, *r;
- Kvp t;
+ Kvp tk;
Msg m;
/*
@@ -674,8 +691,8 @@
l = newblk(b->type);
r = newblk(b->type);
if(l == nil || r == nil){
- freeblk(l);
- freeblk(r);
+ freeblk(t, l);
+ freeblk(t, r);
return -1;
}
o = 0;
@@ -700,9 +717,9 @@
o = copyup(d, o, pp, &copied);
continue;
}
- getval(b, i, &t);
- setval(d, o++, &t);
- copied += valsz(&t);
+ getval(b, i, &tk);
+ setval(d, o++, &tk);
+ copied += valsz(&tk);
}
o = 0;
d = l;
@@ -807,7 +824,7 @@
}
int
-rotate(Path *p, Path *pp, int midx, Blk *a, Blk *b, int halfpiv)
+rotate(Tree *t, Path *p, Path *pp, int midx, Blk *a, Blk *b, int halfpiv)
{
int i, o, cp, sp, idx;
Blk *d, *l, *r;
@@ -816,8 +833,8 @@
l = newblk(a->type);
r = newblk(a->type);
if(l == nil || r == nil){
- freeblk(l);
- freeblk(r);
+ freeblk(t, l);
+ freeblk(t, r);
return -1;
}
o = 0;
@@ -875,7 +892,7 @@
}
int
-rotmerge(Path *p, Path *pp, int idx, Blk *a, Blk *b)
+rotmerge(Tree *t, Path *p, Path *pp, int idx, Blk *a, Blk *b)
{
int na, nb, ma, mb, imbalance;
@@ -897,13 +914,13 @@
if(na + nb < (Pivspc - 4*Msgmax) && ma + mb < Bufspc)
return merge(p, pp, idx, a, b);
else if(imbalance > 4*Msgmax)
- return rotate(p, pp, idx, a, b, (na + nb)/2);
+ return rotate(t, p, pp, idx, a, b, (na + nb)/2);
else
return 0;
}
int
-trybalance(Path *p, Path *pp, int idx)
+trybalance(Tree *t, Path *p, Path *pp, int idx)
{
Blk *l, *m, *r;
Kvp kl, kr;
@@ -925,7 +942,7 @@
if(fill + blkfill(m) < Blkspc){
if((l = getblk(bp, 0)) == nil)
goto Out;
- if(rotmerge(p, pp, idx-1, l, m) == -1)
+ if(rotmerge(t, p, pp, idx-1, l, m) == -1)
goto Out;
goto Done;
}
@@ -936,7 +953,7 @@
if(fill + blkfill(m) < Blkspc){
if((r = getblk(bp, 0)) == nil)
goto Out;
- if(rotmerge(p, pp, idx, m, r) == -1)
+ if(rotmerge(t, p, pp, idx, m, r) == -1)
goto Out;
goto Done;
}
@@ -951,7 +968,7 @@
}
static Blk*
-flush(Path *path, int npath, int *redo)
+flush(Tree *t, Path *path, int npath, int *redo)
{
Path *up, *p, *pp, *rp;
@@ -971,13 +988,13 @@
*redo = 0;
if(p->b->type == Tleaf){
if(!filledleaf(p->b, up->sz)){
- if(updateleaf(p-1, p) == -1)
+ if(updateleaf(t, p-1, p) == -1)
goto Error;
enqueue(p->nl);
rp = p;
}else{
- if(splitleaf(up, p, &mid) == -1)
+ if(splitleaf(t, up, p, &mid) == -1)
goto Error;
enqueue(p->nl);
enqueue(p->nr);
@@ -989,7 +1006,7 @@
}
while(p != path){
if(!filledpiv(p->b, 1)){
- if(trybalance(p, pp, p->idx) == -1)
+ if(trybalance(t, p, pp, p->idx) == -1)
goto Error;
/* If we merged the root node, break out. */
if(up == path && pp != nil && pp->op == POmerge && p->b->nval == 2){
@@ -996,12 +1013,12 @@
rp = pp;
goto Out;
}
- if(updatepiv(up, p, pp) == -1)
+ if(updatepiv(t, up, p, pp) == -1)
goto Error;
enqueue(p->nl);
rp = p;
}else{
- if(splitpiv(up, p, pp, &mid) == -1)
+ if(splitpiv(t, up, p, pp, &mid) == -1)
goto Error;
enqueue(p->nl);
enqueue(p->nr);
@@ -1028,15 +1045,15 @@
}
void
-freepath(Path *path, int npath)
+freepath(Tree *t, Path *path, int npath)
{
Path *p;
for(p = path; p != path + npath; p++){
if(p->b != nil)
- freeblk(p->b);
+ freeblk(t, p->b);
if(p->m != nil)
- freeblk(p->m);
+ freeblk(t, p->m);
putblk(p->b);
putblk(p->nl);
putblk(p->nr);
@@ -1154,7 +1171,7 @@
npath++;
dh = -1;
- rb = flush(path, npath, &redo);
+ rb = flush(t, path, npath, &redo);
if(rb == nil)
goto Error;
@@ -1173,18 +1190,15 @@
t->ht += dh;
t->bp = rb->bp;
unlock(&t->lk);
- freepath(path, npath);
+ freepath(t, path, npath);
free(path);
- if(!checkfs(2)){
- showtree(2, t, "broken");
- showpath(2, path, npath);
+ if(!checkfs(2))
abort();
- }
if(redo)
goto Again;
return 0;
Error:
- freepath(path, npath);
+ freepath(t, path, npath);
free(path);
return Efs;
}