ref: 32325a04016b615ffbdbe07d3f401dc2dd43d957
parent: f0563cbdbce1777d5672fee507acac7ab0d6dbc0
author: Ori Bernstein <ori@eigenstate.org>
date: Thu Sep 16 17:54:18 EDT 2021
tree: refactor to allow multiple parallel trees
--- a/blk.c
+++ b/blk.c
@@ -180,7 +180,7 @@
char *p;
assert(off % Blksz == 0);
- assert(op == LgAlloc || op == LgFree);
+ assert(op == LogAlloc || op == LogFree);
if(lb == nil || lb->logsz > Logspc - 8){
pb = lb;
if((o = blkalloc_lk(a)) == -1)
@@ -193,7 +193,7 @@
lb->off = o;
lb->logsz = Loghdsz;
p = lb->data + lb->logsz;
- PBIT64(p + 0, (uvlong)LgEnd);
+ PBIT64(p + 0, (uvlong)LogEnd);
finalize(lb);
if(syncblk(lb) == -1){
free(lb);
@@ -203,7 +203,7 @@
a->logtl = lb;
if(pb != nil){
p = pb->data + pb->logsz;
- PBIT64(p + 0, lb->off|LgChain);
+ PBIT64(p + 0, lb->off|LogChain);
finalize(pb);
if(syncblk(pb) == -1)
return nil;
@@ -212,7 +212,7 @@
p = lb->data + lb->logsz;
if(len == Blksz){
- off |= (op & ~Lg2w);
+ off |= (op & ~Log2w);
PBIT64(p, off);
lb->logsz += 8;
}else{
@@ -223,7 +223,7 @@
}
/* this gets overwritten by the next append */
p = lb->data + lb->logsz;
- PBIT64(p, (uvlong)LgEnd);
+ PBIT64(p, (uvlong)LogEnd);
return lb;
}
@@ -234,7 +234,7 @@
* recursion.
*/
int
-logalloc(Arena *a, vlong off, int op)
+logop(Arena *a, vlong off, int op)
{
Blk *b;
@@ -275,9 +275,9 @@
ent = GBIT64(d);
op = ent & 0xff;
off = ent & ~0xff;
- n = (op & Lg2w) ? 16 : 8;
+ n = (op & Log2w) ? 16 : 8;
switch(op){
- case LgEnd:
+ case LogEnd:
dprint("log@%d: end\n", i);
/*
* since we want the next insertion to overwrite
@@ -285,7 +285,7 @@
*/
b->logsz = i;
return 0;
- case LgChain:
+ case LogChain:
bp = off & ~0xff;
dprint("log@%d: chain %llx\n", i, bp);
b->logsz = i+n;
@@ -292,30 +292,26 @@
goto Nextblk;
break;
- case LgFlush:
+ case LogFlush:
dprint("log@%d: flush: %llx\n", i, off>>8);
lock(&fs->genlk);
fs->gen = off >> 8;
unlock(&fs->genlk);
break;
- case LgAlloc:
- case LgAlloc1:
- len = (op & Lg2w) ? GBIT64(d+8) : Blksz;
+ case LogAlloc:
+ case LogAlloc1:
+ len = (op & Log2w) ? GBIT64(d+8) : Blksz;
dprint("log@%d alloc: %llx+%llx\n", i, off, len);
if(grabrange(a->free, off & ~0xff, len) == -1)
return -1;
break;
- case LgFree:
- case LgFree1:
- len = (op & Lg2w) ? GBIT64(d+8) : Blksz;
+ case LogFree:
+ case LogFree1:
+ len = (op & Log2w) ? GBIT64(d+8) : Blksz;
dprint("log@%d free: %llx+%llx\n", i, off, len);
if(freerange(a->free, off & ~0xff, len) == -1)
return -1;
break;
- case LgRef:
- case LgUnref:
- fprint(2, "unimplemented ref op at log@%d: log op %d\n", i, op);
- break;
default:
n = 0;
dprint("log@%d: log op %d\n", i, op);
@@ -363,7 +359,7 @@
b->data = b->buf + Hdrsz;
b->logsz = Loghdsz;
- PBIT64(b->data+b->logsz, (uvlong)LgEnd);
+ PBIT64(b->data+b->logsz, (uvlong)LogEnd);
finalize(b);
if(syncblk(b) == -1){
free(b);
@@ -411,10 +407,10 @@
hd = b;
b->logsz = Loghdsz;
for(i = 0; i < n; i++)
- if((b = logappend(a, b, log[i].off, log[i].len, LgFree)) == nil)
+ if((b = logappend(a, b, log[i].off, log[i].len, LogFree)) == nil)
return -1;
p = b->data + b->logsz;
- PBIT64(p, LgChain|graft);
+ PBIT64(p, LogChain|graft);
free(log);
finalize(b);
if(syncblk(b) == -1)
@@ -426,6 +422,7 @@
ab = a->b;
PBIT64(ab->data + 0, a->log);
PBIT64(ab->data + 8, a->logh);
+ finalize(ab);
if(syncblk(ab) == -1)
return -1;
checkfs();
@@ -438,11 +435,11 @@
for(i = Loghdsz; i < Logspc; i += n){
p = b->data + i;
v = GBIT64(p);
- n = (v & Lg2w) ? 16 : 8;
- if((v&0xff) == LgChain){
+ n = (v & Log2w) ? 16 : 8;
+ if((v&0xff) == LogChain){
nb = v & ~0xff;
break;
- }else if((v&0xff) == LgEnd){
+ }else if((v&0xff) == LogEnd){
nb = -1;
break;
}
@@ -507,7 +504,7 @@
cachedel(b);
if(freerange(a->free, b, Blksz) == -1)
goto out;
- if(logalloc(a, b, LgFree) == -1)
+ if(logop(a, b, LogFree) == -1)
goto out;
r = 0;
out:
@@ -523,7 +520,7 @@
int tries;
tries = 0;
-again:
+Again:
a = pickarena(hint);
if(a == nil || tries == fs->narena){
werrstr("no empty arenas");
@@ -545,9 +542,9 @@
tries++;
if((b = blkalloc_lk(a)) == -1){
unlock(a);
- goto again;
+ goto Again;
}
- if(logalloc(a, b, LgAlloc) == -1){
+ if(logop(a, b, LogAlloc) == -1){
unlock(a);
return -1;
}
@@ -694,7 +691,8 @@
int
syncblk(Blk *b)
{
- dprint("\tsyncblk: %llx+%llx\n", b->off, Blksz);
+ assert(b->flag & Bfinal);
+ print("\tsyncblk: %llx+%llx\n", b->off, Blksz);
return pwrite(fs->fd, b->buf, Blksz, b->off);
}
@@ -702,6 +700,8 @@
void
enqueue(Blk *b)
{
+ print("sync %llx\n", b->off);
+ assert(b->flag & Bfinal);
if(syncblk(b) == -1){
ainc(&fs->broken);
fprint(2, "write: %r");
@@ -708,7 +708,7 @@
return;
}
wlock(b);
- b->flag &= ~(Bqueued|Bdirty|Bfinal);
+ b->flag &= ~(Bqueued|Bdirty);
wunlock(b);
}
@@ -725,9 +725,9 @@
PBIT32(p + 12, Blksz);
PBIT32(p + 16, Bufspc);
PBIT32(p + 20, Hdrsz);
- PBIT32(p + 24, fs->height);
- PBIT64(p + 32, fs->rootb);
- PBIT64(p + 40, fs->rooth);
+ PBIT32(p + 24, fs->root.ht);
+ PBIT64(p + 32, fs->root.bp);
+ PBIT64(p + 40, fs->root.bh);
PBIT32(p + 48, fs->narena);
PBIT64(p + 56, fs->arenasz);
PBIT64(p + 64, fs->gen);
@@ -768,12 +768,12 @@
}
Blk*
-getblk(vlong bp, uvlong bh)
+getblk(vlong bp, uvlong bh, int flg)
{
Blk *b;
if((b = lookupblk(bp)) == nil){
- if((b = readblk(bp, 0)) == nil)
+ if((b = readblk(bp, flg)) == nil)
return nil;
if(siphash(b->buf, Blksz) != bh){
werrstr("corrupt block %llx", bp);
@@ -797,32 +797,6 @@
return b;
}
-int
-refblk(Blk *b)
-{
- Arena *a;
- int r;
-
- a = getarena(b->off);
- lock(a);
- r = logalloc(a, b->off, LgRef);
- unlock(a);
- return r;
-}
-
-int
-unrefblk(Blk *b)
-{
- Arena *a;
- int r;
-
- a = getarena(b->off);
- lock(a);
- r = logalloc(a, b->off, LgUnref);
- unlock(a);
- return r;
-}
-
ushort
blkfill(Blk *b)
{
@@ -859,11 +833,7 @@
assert(b->ref == 1 && b->flag & (Bdirty|Bqueued) == Bdirty);
a = getarena(b->off);
lock(a);
- /*
- * TODO: what to do if we fail to log a free here??
- * This is already an error path!
- */
- logalloc(a, b->off, LgUnref);
+ logop(a, b->off, LogFree);
blkdealloc(b->off);
unlock(a);
free(b);
--- a/check.c
+++ b/check.c
@@ -61,7 +61,7 @@
fprint(2, "freed block in use: %llx\n", x.bp);
fail++;
}
- if((c = getblk(x.bp, x.bh)) == nil){
+ if((c = getblk(x.bp, x.bh, 0)) == nil){
fprint(2, "corrupt block: %r\n");
fail++;
continue;
@@ -126,7 +126,7 @@
ok = 1;
if(badfree())
ok = 0;
- if((b = getroot(&height)) != nil){
+ if((b = getroot(&fs->root, &height)) != nil){
if(badblk(b, height-1, nil, 0))
ok = 0;
putblk(b);
@@ -158,7 +158,7 @@
getval(b, i, &kv);
fprint(fd, "%.*s|%P\n", 4*indent, spc, &kv);
if(b->type == Tpivot){
- if((c = getblk(kv.bp, kv.bh)) == nil)
+ if((c = getblk(kv.bp, kv.bh, 0)) == nil)
sysfatal("failed load: %r");
if(recurse)
rshowblk(fd, c, indent + 1, 1);
@@ -191,8 +191,8 @@
int h;
fprint(fd, "=== %s\n", m);
- fprint(fd, "fs->height: %d\n", fs->height);
- rshowblk(fd, getroot(&h), 0, 1);
+ fprint(fd, "\tht: %d\n", fs->root.ht);
+ rshowblk(fd, getroot(&fs->root, &h), 0, 1);
}
void
@@ -207,10 +207,13 @@
{
int i;
+ print("path:\n");
+#define A(b) (b ? b->off : -1)
for(i = 0; i < np; i++){
- print("==> b=%p, n=%p, idx=%d\n", p[i].b, p[i].n, p[i].idx);
- print("\tclear=(%d, %d):%d\n", p[i].lo, p[i].hi, p[i].sz);
- print("\tl=%p, r=%p, n=%p, split=%d\n", p[i].l, p[i].r, p[i].n, p[i].split);
+ print("\t[%d] ==> b(%p)=%llx, n(%p)=%llx, nl(%p)=%llx, nr(%p)=%llx idx=%d\n",
+ i, p[i].b, A(p[i].b), p[i].n, A(p[i].n), p[i].nl, A(p[i].nl), p[i].nr, A(p[i].nr), p[i].idx);
+ print("\t\tclear=(%d, %d):%d\n", p[i].lo, p[i].hi, p[i].sz);
+ print("\t\tl=%p, r=%p, n=%p, split=%d\n", p[i].l, p[i].r, p[i].n, p[i].split);
}
}
--- a/dat.h
+++ b/dat.h
@@ -15,6 +15,7 @@
typedef struct Arange Arange;
typedef struct Bucket Bucket;
typedef struct Chan Chan;
+typedef struct Tree Tree;
enum {
KiB = 1024ULL,
@@ -22,7 +23,7 @@
GiB = 1024ULL*MiB,
TiB = 1024ULL*GiB,
- Lgblk = 9,
+ Lgblk = 13,
Blksz = (1ULL<<Lgblk),
Nrefbuf = 1024, /* number of ref incs before syncing */
@@ -198,18 +199,17 @@
*/
enum {
/* 1-wide entries */
- LgAlloc1, /* alloc a block */
- LgFree1, /* alloc a block */
- LgRef, /* ref a block */
- LgUnref, /* free a block */
- LgFlush, /* flush log, bump gen */
- LgChain, /* point to next log block */
- LgEnd, /* last entry in log */
+ LogAlloc1, /* alloc a block */
+ LogFree1, /* alloc a block */
+ LogDead1, /* free a block */
+ LogFlush, /* flush log, bump gen */
+ LogChain, /* point to next log block */
+ LogEnd, /* last entry in log */
/* 2-wide entries */
- Lg2w = 1<<5,
- LgAlloc = LgAlloc1|Lg2w, /* alloc a range */
- LgFree = LgFree1|Lg2w, /* free a range */
+ Log2w = 1<<5,
+ LogAlloc = LogAlloc1|Log2w, /* alloc a range */
+ LogFree = LogFree1|Log2w, /* free a range */
};
struct Arange {
@@ -231,6 +231,13 @@
uchar buf[];
};
+struct Tree {
+ Lock lk;
+ vlong bp;
+ vlong bh;
+ int ht;
+};
+
/*
* Overall state of the file sytem.
* Shadows the superblock contents.
@@ -250,11 +257,8 @@
int fd;
long broken;
- Lock rootlk;
/* protected by rootlk */
- vlong rootb;
- vlong rooth;
- int height;
+ Tree root;
Lock genlk;
vlong gen;
@@ -346,9 +350,7 @@
* instead of the most recent root, to prevent
* paging in the wrong executable.
*/
- vlong rootb;
- vlong rooth;
- int height;
+ Tree root;
u32int fid;
vlong qpath;
@@ -392,9 +394,7 @@
struct Scan {
vlong offset; /* last read offset */
- vlong rootb;
- vlong rooth;
- int height;
+ Tree root;
int done;
Kvp kv;
--- a/fns.h
+++ b/fns.h
@@ -11,8 +11,8 @@
Blk* newblk(int type);
Blk* shadow(Blk*, Path*, Path*);
-Blk* getroot(int*);
-Blk* getblk(vlong, uvlong);
+Blk* getroot(Tree*, int*);
+Blk* getblk(vlong, uvlong, int);
Blk* pinblk(Blk*);
Blk* readblk(vlong, int);
Arena* getarena(vlong);
@@ -35,10 +35,10 @@
int endfs(void);
int compresslog(Arena *a);
-int btupsert(Msg*, int);
-char *btlookup(Key*, Kvp*, Blk**);
+int btupsert(Tree*, Msg*, int);
+char *btlookup(Tree*, Key*, Kvp*, Blk**);
char *btlookupat(Blk*, Key*, Kvp*, Blk**);
-char *btscan(Scan*, Key*, vlong, vlong, int);
+char *btscan(Tree*, Scan*, Key*);
char *btnext(Scan*, Kvp*, int*);
void btdone(Scan*);
--- a/fs.c
+++ b/fs.c
@@ -42,10 +42,10 @@
char *e;
Blk *b;
- if(f->rootb == -1)
- b = getroot(nil);
+ if(f->root.bp == -1)
+ b = getroot(&fs->root, nil);
else
- b = getblk(f->rootb, f->rooth);
+ b = getblk(f->root.bp, f->root.bh, 0);
if(b == nil)
return Efs;
if(lk)
@@ -256,7 +256,7 @@
int w, n;
r->tag = m->tag;
-// dprint("→ %F\n", r);
+ dprint("→ %F\n", r);
if((n = convS2M(r, buf, sizeof(buf))) == 0)
abort();
qlock(m->wrlk);
@@ -384,7 +384,7 @@
dk.k = buf;
dk.nk = p - buf;
showfs("attach");
- if(btlookup(&dk, &kv, &b) != nil){
+ if(btlookup(&fs->root, &dk, &kv, &b) != nil){
rerror(m, Efs);
return;
}
@@ -413,8 +413,8 @@
f.fid = NOFID;
f.qpath = d.qid.path;
f.mode = -1;
- f.rooth = -1;
- f.rootb = -1;
+ f.root.bh = -1;
+ f.root.bp = -1;
f.iounit = iounit;
f.dent = e;
showfids();
@@ -497,7 +497,7 @@
}
if(i > 0){
d.name = m->wname[i-1];
- dent = getdent(f->rootb, up, &d);
+ dent = getdent(f->root.bp, up, &d);
if(dent == nil){
if(m->fid != m->newfid)
clunkfid(f);
@@ -525,7 +525,7 @@
rerror(m, "no such fid");
return;
}
- if(btlookup(f->dent, &kv, &b) != nil){
+ if(btlookup(&fs->root, f->dent, &kv, &b) != nil){
rerror(m, Eexist);
return;
}
@@ -609,12 +609,12 @@
return;
}
//showfs("precreate");
- if(btupsert(&mb, 1) == -1){
+ if(btupsert(&fs->root, &mb, 1) == -1){
rerror(m, "%r");
return;
}
//showfs("postcreate");
- dent = getdent(f->rootb, f->qpath, &d);
+ dent = getdent(f->root.bp, f->qpath, &d);
if(dent == nil){
if(m->fid != m->newfid)
clunkfid(f);
@@ -641,6 +641,39 @@
}
+void
+fsremove(Fmsg *m)
+{
+ Fcall r;
+ Msg mb;
+ Fid *f;
+
+ if(okname(m->name) == -1){
+ rerror(m, Ename);
+ return;
+ }
+ if((f = getfid(m->fid)) == nil){
+ rerror(m, "no such fid");
+ return;
+ }
+
+ rlock(f->dent);
+ mb.op = Odelete;
+ mb.k = f->dent->k;
+ mb.nk = f->dent->nk;
+ mb.nv = 0;
+ if(btupsert(&fs->root, &mb, 1) == -1){
+ runlock(f->dent);
+ rerror(m, "remove: %r");
+ return;
+ }
+ runlock(f->dent);
+ clunkfid(f);
+
+ r.type = Rremove;
+ respond(m, &r);
+}
+
int
fsaccess(Dir*, int)
{
@@ -691,12 +724,10 @@
}
f->mode = m->mode;
if((f->mode & 0x7) == OEXEC){
- lock(&fs->rootlk);
- f->rootb = fs->rootb;
- f->rooth = fs->rooth;
- f->height = fs->height;
-// refblk(fs->rootb);
- unlock(&fs->rootlk);
+ lock(&fs->root.lk);
+ f->root = fs->root;
+// refblk(fs->root.bp);
+ unlock(&fs->root.lk);
}
unlock(f);
respond(m, &r);
@@ -707,6 +738,7 @@
{
char pfx[9], *p, *e;
int n, ns, done;
+ Tree *t;
Scan *s;
Kvp kv;
@@ -722,7 +754,8 @@
print("scan starting\n");
if((s = mallocz(sizeof(Scan), 1)) == nil)
return "out of memory";
- if((e = btscan(s, &kv, f->rootb, f->rooth, f->height)) != nil){
+ t = (f->root.bp != -1) ? &f->root : &fs->root;
+ if((e = btscan(t, s, &kv)) != nil){
free(r->data);
btdone(s);
return e;
@@ -790,7 +823,7 @@
bh = GBIT64(kv.v+8);
putblk(b);
- if((b = getblk(bp, bh)) == nil)
+ if((b = getblk(bp, bh, GBraw)) == nil)
return -1;
fprint(2, "\treading from %llx (%llx) %s %s\n", bp, b->off, b->buf, b->data);
if(bo+n > Blksz)
@@ -906,7 +939,7 @@
bh = GBIT64(kv.v+8);
putblk(t);
- if((t = getblk(bp, bh)) == nil)
+ if((t = getblk(bp, bh, GBraw)) == nil)
return -1;
memcpy(b->buf, t->buf, Blksz);
putblk(t);
@@ -978,7 +1011,7 @@
PBIT64(kv[i].v, m->offset+m->count);
f->dent->length = m->offset+m->count;
}
- btupsert(kv, i+1);
+ btupsert(&fs->root, kv, i+1);
wunlock(f->dent);
r.type = Rwrite;
@@ -1015,7 +1048,7 @@
}
*/
m->wrlk = wrlk;
-// dprint("← %F\n", &m->Fcall);
+ dprint("← %F\n", &m->Fcall);
switch(m->type){
/* sync setup */
case Tversion: fsversion(m, &msgmax); break;
@@ -1058,7 +1091,7 @@
case Tcreate: fscreate(m); break;
case Twrite: fswrite(m); break;
case Twstat: fswstat(m); break;
- case Tremove: rerror(m, "unimplemented remove"); break;
+ case Tremove: fsremove(m); break;
}
}
}
--- a/load.c
+++ b/load.c
@@ -62,9 +62,9 @@
sysfatal("fs uses different buffer size");
if(GBIT32(p + 20) != Hdrsz)
sysfatal("fs uses different buffer size");
- fs->height = GBIT32(p + 24);
- fs->rootb = GBIT64(p + 32);
- fs->rooth = GBIT64(p + 40);
+ fs->root.ht = GBIT32(p + 24);
+ fs->root.bp = GBIT64(p + 32);
+ fs->root.bh = GBIT64(p + 40);
fs->narena = GBIT32(p + 48);
fs->arenasz = GBIT64(p + 56);
fs->arenasz = GBIT64(p + 56);
@@ -72,9 +72,9 @@
fs->nextqid = GBIT64(p + 72);
fs->super = b;
fprint(2, "load: %8s\n", p);
- fprint(2, "\theight:\t%d\n", fs->height);
- fprint(2, "\trootb:\t%llx\n", fs->rootb);
- fprint(2, "\trooth:\t%llx\n", fs->rooth);
+ fprint(2, "\theight:\t%d\n", fs->root.ht);
+ fprint(2, "\trootb:\t%llx\n", fs->root.bp);
+ fprint(2, "\trooth:\t%llx\n", fs->root.bh);
fprint(2, "\tarenas:\t%d\n", fs->narena);
fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
fprint(2, "\trootgen:\t%lld\n", fs->gen);
--- a/ream.c
+++ b/ream.c
@@ -60,11 +60,11 @@
b->flag |= Bdirty;
p = b->data+Loghdsz;
- PBIT64(p+ 0, off|LgFree); /* off */
+ PBIT64(p+ 0, off|LogFree); /* off */
PBIT64(p+ 8, asz); /* len */
- PBIT64(p+16, b->off|LgAlloc); /* off */
+ PBIT64(p+16, b->off|LogAlloc); /* off */
PBIT64(p+24, Blksz); /* len */
- PBIT64(p+32, (uvlong)LgEnd); /* done */
+ PBIT64(p+32, (uvlong)LogEnd); /* done */
finalize(b);
if(syncblk(b) == -1)
sysfatal("ream: init log");
@@ -147,9 +147,9 @@
finalize(r);
fs->super = s;
- fs->rootb = r->off;
- fs->rooth = blkhash(r);
- fs->height = 1;
+ fs->root.bp = r->off;
+ fs->root.bh = blkhash(r);
+ fs->root.ht = 1;
snapshot();
putblk(s);
--- a/tree.c
+++ b/tree.c
@@ -418,6 +418,7 @@
hi = (p != nil) ? p->hi : -1;
pidx = (p != nil) ? p->idx : -1;
midx = (p != nil && p->merge) ? p->midx : -1;
+if(p->merge) showblk(b, "preupdate", 0);
if((n = newblk(b->type)) == nil)
return -1;
for(i = 0; i < b->nval; i++){
@@ -426,10 +427,19 @@
continue;
}else if(i == midx){
getval(p->nl, 0, &m);
+ m.type = Vref;
+ m.bp = p->nl->off;
+ m.bh = blkhash(p->nl);
+ m.fill = blkfill(p->nl);
setval(n, j++, &m, 0);
if(p->nr){
getval(p->nr, 0, &m);
+ m.type = Vref;
+ m.bp = p->nr->off;
+ m.bh = blkhash(p->nr);
+ m.fill = blkfill(p->nr);
setval(n, j++, &m, 0);
+ i++;
}
continue;
}
@@ -630,6 +640,8 @@
setmsg(d, o++, &m);
}
}
+ finalize(d);
+ enqueue(d);
p->merge = 1;
p->midx = idx;
p->nl = d;
@@ -690,6 +702,7 @@
Blk *d, *l, *r;
Msg m;
+ print("a->type: %d\n", a->type);
l = newblk(a->type);
r = newblk(a->type);
if(l == nil || r == nil){
@@ -743,6 +756,10 @@
setmsg(d, o++, &m);
}
}
+ finalize(l);
+ finalize(r);
+ enqueue(l);
+ enqueue(r);
p->merge = 1;
p->midx = midx;
p->nl = l;
@@ -794,7 +811,7 @@
if((m = pp->n) == nil)
return 0;
}else{
- if((m = getblk(km.bp, km.bh)) == nil)
+ if((m = getblk(km.bp, km.bh, 0)) == nil)
return -1;
}
/* Try merging left */
@@ -803,7 +820,7 @@
getval(p->b, idx-1, &kl);
if(kl.fill + km.fill >= Blkspc)
goto next;
- if((l = getblk(kl.bp, kl.bh)) == nil)
+ if((l = getblk(kl.bp, kl.bh, 0)) == nil)
goto out;
if(rotmerge(p, idx-1, l, m) == -1)
goto out;
@@ -814,7 +831,7 @@
getval(p->b, idx+1, &kr);
if(kr.fill + km.fill >= Blkspc)
goto done;
- if((r = getblk(kr.bp, kr.bh)) == nil)
+ if((r = getblk(kr.bp, kr.bh, 0)) == nil)
goto out;
if(rotmerge(p, idx, m, r) == -1)
goto out;
@@ -883,10 +900,8 @@
}
for(; p > path; p--){
if(!filledpiv(p->b, 1)){
-#ifdef NOPE
if(trymerge(p, pp, p->idx) == -1)
goto error;
-#endif
/* If we merged the root node, break out. */
if(p[-1].b == nil && p[0].merge && p[0].nr == nil && p[0].b->nval == 2){
/* FIXME: shouldn't p[1].n already be right? */
@@ -917,7 +932,7 @@
bufinsert(b, &m);
}
finalize(p->l);
- Blk *x = getblk(p->l->off, -1);
+ Blk *x = getblk(p->l->off, -1, 0);
assert(p->l == x);
finalize(p->r);
}
@@ -993,7 +1008,7 @@
}
int
-btupsert(Msg *msg, int nmsg)
+btupsert(Tree *t, Msg *msg, int nmsg)
{
int i, npath, redo, dh, nm, height;
vlong rh;
@@ -1011,26 +1026,12 @@
}
again:
- if((b = getroot(&height)) == nil){
+ if((b = getroot(t, &height)) == nil){
werrstr("get root: %r");
return -1;
}
/*
- * Fast path: the root of the tree has room,
- * and nobody else has observed the node, so
- * we can modify it with gleeful abandon.
- */
- if(b->type == Tpivot && canwlock(b)) {
- if((b->flag & Bdirty) && !filledbuf(b, nm)){
- for(i = 0; i < nmsg; i++)
- bufinsert(b, &msg[i]);
- wunlock(b);
- return 0;
- }
- }
-
- /*
* The tree can grow in height by 1 when we
* split, so we allocate room for one extra
* node in the path.
@@ -1050,7 +1051,7 @@
break;
victim(b, &path[npath]);
getval(b, path[npath].idx, &sep);
- b = getblk(sep.bp, sep.bh);
+ b = getblk(sep.bp, sep.bh, 0);
if(b == nil)
goto error;
npath++;
@@ -1091,11 +1092,11 @@
assert(rb->off != 0);
rh = blkhash(rb);
- lock(&fs->rootlk);
- fs->height += dh;
- fs->rootb = rb->off;
- fs->rooth = rh;
- unlock(&fs->rootlk);
+ lock(&t->lk);
+ t->ht += dh;
+ t->bp = rb->off;
+ t->bh = rh;
+ unlock(&t->lk);
freepath(path, npath);
free(path);
@@ -1115,17 +1116,17 @@
}
Blk*
-getroot(int *h)
+getroot(Tree *t, int *h)
{
vlong bp, bh;
- lock(&fs->rootlk);
- bp = fs->rootb;
- bh = fs->rooth;
+ lock(&t->lk);
+ bp = t->bp;
+ bh = t->bh;
if(h != nil)
- *h = fs->height;
- unlock(&fs->rootlk);
- return getblk(bp, bh);
+ *h = t->ht;
+ unlock(&t->lk);
+ return getblk(bp, bh, 0);
}
static char*
@@ -1179,7 +1180,7 @@
if(idx == -1)
return Eexist;
putblk(b);
- if((b = getblk(r->bp, r->bh)) == nil)
+ if((b = getblk(r->bp, r->bh, 0)) == nil)
return Efs;
}
assert(b->type == Tleaf);
@@ -1193,13 +1194,13 @@
}
char*
-btlookup(Key *k, Kvp *r, Blk **bp)
+btlookup(Tree *t, Key *k, Kvp *r, Blk **bp)
{
char *ret;
Blk *b;
*bp = nil;
- if((b = getroot(nil)) == nil)
+ if((b = getroot(t, nil)) == nil)
return Efs;
ret = btlookupat(b, k, r, bp);
putblk(b);
@@ -1208,7 +1209,7 @@
}
char*
-btscan(Scan *s, Key *pfx, vlong rootb, vlong rooth, int height)
+btscan(Tree *t, Scan *s, Key *pfx)
{
int i, same;
Scanp *p;
@@ -1218,7 +1219,6 @@
s->done = 0;
s->offset = 0;
- s->height = height;
cpkey(&s->pfx, pfx, s->pfxbuf, sizeof(s->pfxbuf));
s->kv.v = s->kvbuf+pfx->nk;
@@ -1225,28 +1225,20 @@
s->kv.nv = 0;
cpkey(&s->kv, pfx, s->kvbuf, sizeof(s->kvbuf));
- if(rootb != -1){
- s->rootb = rootb;
- s->rooth = rooth;
- s->height = height;
- }else{
- lock(&fs->rootlk);
- s->rootb = fs->rootb;
- s->rooth = fs->rooth;
- s->height = fs->height;
-dprint("height %d\n", s->height);
- unlock(&fs->rootlk);
- }
- if((s->path = calloc(s->height, sizeof(Scanp))) == nil){
+ lock(&t->lk);
+ s->root = *t;
+//dprint("height %d\n", s->root.ht);
+ unlock(&t->lk);
+ if((s->path = calloc(s->root.ht, sizeof(Scanp))) == nil){
free(s);
return nil;
}
p = s->path;
- if((b = getblk(s->rootb, s->rooth)) == nil)
+ if((b = getblk(s->root.bp, s->root.bh, 0)) == nil)
return "error reading block";
p[0].b = b;
- for(i = 0; i < s->height; i++){
+ for(i = 0; i < s->root.ht; i++){
p[i].vi = blksearch(b, &s->kv, &v, &same);
if(p[i].vi == -1 || (!same && b->type == Tleaf))
getval(b, ++p[i].vi, &v);
@@ -1254,18 +1246,18 @@
p[i].bi = bufsearch(b, &s->kv, &m, &same);
if(p[i].bi == -1 || !same)
p[i].bi++;
- if((b = getblk(v.bp, v.bh)) == nil)
+ if((b = getblk(v.bp, v.bh, 0)) == nil)
return "error readivg block";
p[i+1].b = b;
}else{
- assert(i == s->height-1);
+ assert(i == s->root.ht-1);
}
}
-dprint("inited\n");
-for(i = 0; i < s->height; i++){
-dprint("\t%p", p[i].b);
-dprint(" (%d %d)\n", p[i].vi, p[i].bi);
-}
+//dprint("inited\n");
+//for(i = 0; i < s->root.ht; i++){
+//dprint("\t%p", p[i].b);
+//dprint(" (%d %d)\n", p[i].vi, p[i].bi);
+//}
return nil;
}
@@ -1278,15 +1270,15 @@
Kvp kv;
p = s->path;
- h = s->height;
+ h = s->root.ht;
*done = 0;
start = h;
for(i = h-1; i > 0; i--){
-dprint("advancing (i=%d)\n", i);
-for(j = 0; j < h; j++){
-dprint("\t%p", p[j].b);
-dprint(" (%d %d)\n", p[j].vi, p[j].bi);
-}
+//dprint("advancing (i=%d)\n", i);
+//for(j = 0; j < h; j++){
+//dprint("\t%p", p[j].b);
+//dprint(" (%d %d)\n", p[j].vi, p[j].bi);
+//}
if(p[i].vi < p[i].b->nval || p[i].bi < p[i].b->nbuf)
break;
if(i == 0){
@@ -1300,7 +1292,7 @@
}
for(i = start; i < h; i++){
getval(p[i-1].b, p[i-1].vi, &kv);
- if((p[i].b = getblk(kv.bp, kv.bh)) == nil)
+ if((p[i].b = getblk(kv.bp, kv.bh, 0)) == nil)
return "error reading block";
}
getval(p[h-1].b, p[h-1].vi, &m);
@@ -1335,7 +1327,7 @@
{
int i;
- for(i = 0; i < s->height; i++)
+ for(i = 0; i < s->root.ht; i++)
if(s->path[i].b != nil)
putblk(s->path[i].b);
free(s->path);
@@ -1352,10 +1344,10 @@
s = fs->super;
qlock(&fs->snaplk);
- lock(&fs->rootlk);
+ lock(&fs->root.lk);
fillsuper(s);
finalize(s);
- unlock(&fs->rootlk);
+ unlock(&fs->root.lk);
for(i = 0; i < fs->narena; i++){
a = &fs->arenas[i];
@@ -1367,49 +1359,4 @@
r = syncblk(s);
qunlock(&fs->snaplk);
return r;
-}
-
-int
-getrefpg(vlong off, Blk **p)
-{
- char kbuf[9], *e;
- vlong bp, bh;
- Blk *b;
- Kvp kv;
-
- *p = nil;
- kbuf[0] = Kref;
- PBIT64(kbuf+1, off & ~(Blksz-1));
- kv.k = kbuf;
- kv.nk = sizeof(kbuf);
- e = btlookup(&kv, &kv, &b);
- if(e == Eexist)
- return 0;
- if(e != nil || kv.nv != 16)
- return -1;
- bp = GBIT64(kv.v+0);
- bh = GBIT64(kv.v+8);
- putblk(b);
- if((*p = getblk(bp, bh)) == nil)
- return -1;
- return 0;
-}
-
-int
-setrefpg(vlong pg, vlong bp, vlong bh)
-{
- char kbuf[9], vbuf[16];
- Msg m;
-
- kbuf[0] = Kref;
- PBIT64(kbuf+1, pg & ~(Blksz-1));
- PBIT64(vbuf+0, bp);
- PBIT64(vbuf+8, bh);
-
- m.op = Oinsert;
- m.k = kbuf;
- m.nk = sizeof(kbuf);
- m.v = vbuf;
- m.nv = sizeof(vbuf);
- return btupsert(&m, 1);
}