shithub: gefs

Download patch

ref: b73fa273322688000ae7c71d7582b019c0265671
parent: af48e0e4130a9c7a0340fa61814d4f6d98359959
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Nov 21 12:10:44 EST 2021

splitleaf: switch blocks in the same loop

this cleans up the merge code later.

--- a/blk.c
+++ b/blk.c
@@ -2,7 +2,6 @@
 #include <libc.h>
 #include <fcall.h>
 #include <avl.h>
-#include <pool.h>
 
 #include "dat.h"
 #include "fns.h"
@@ -584,7 +583,7 @@
 	b->cprev = nil;
 	b->hnext = nil;
 
-	print("new block %B from %p, flag=%x\n", b->bp, getcallerpc(&t), b->flag);
+	dprint("new block %B from %p, flag=%x\n", b->bp, getcallerpc(&t), b->flag);
 	return cacheblk(b);
 }
 
@@ -745,7 +744,7 @@
 	wlock(b);
 	b->flag |= Bzombie;
 	wunlock(b);
-	fprint(2, "freeing block %B @ %ld, from 0x%p\n", b->bp, b->ref, getcallerpc(&b));
+	dprint("freeing block %B @ %ld, from 0x%p\n", b->bp, b->ref, getcallerpc(&b));
 
 	assert((b->flag & Bqueued) == 0);
 	a = getarena(b->bp.addr);
--- a/cache.c
+++ b/cache.c
@@ -74,7 +74,7 @@
 }
 
 static void
-cachedel_(vlong del)
+cachedel(vlong del)
 {
 	Bucket *bkt;
 	Blk *b, **p;
--- a/check.c
+++ b/check.c
@@ -28,6 +28,7 @@
 badblk(Blk *b, int h, Kvp *lo, Kvp *hi)
 {
 	Kvp x, y;
+	Msg mx, my;
 	int i, r;
 	Blk *c;
 	int fail;
@@ -53,8 +54,11 @@
 		fail++;
 	}
 	getval(b, 0, &x);
-	if(lo && keycmp(lo, &x) != 0)
+	if(lo && keycmp(lo, &x) > 0){
 		fprint(2, "out of range keys %P != %P\n", lo, &x);
+		showblk(b, "wut", 1);
+		fail++;
+	}
 	for(i = 1; i < b->nval; i++){
 		getval(b, i, &y);
 		if(hi && keycmp(&y, hi) >= 0){
@@ -94,6 +98,54 @@
 		}
 		x = y;
 	}
+	if(b->type == Tpivot){
+		getval(b, b->nval-1, &y);
+		if((c = getblk(x.bp, 0)) == nil){
+			fprint(2, "corrupt block: %r\n");
+			fail++;
+		}
+		if(c != nil && badblk(c, h - 1, &y, nil))
+			fail++;
+	}
+	if(b->type == Tpivot){
+		if(b->nbuf > 0){
+			getmsg(b, 0, &mx);
+			if(hi && keycmp(&mx, hi) >= 0){
+				fprint(2, "out of range messages %P != %M\n", hi, &mx);
+				fail++;
+			}
+		}
+		for(i = 1; i < b->nbuf; i++){
+			getmsg(b, i, &my);
+			switch(my.op){
+			case Oinsert:	/* new kvp */
+			case Odelete:	/* delete kvp */
+			case Oqdelete:	/* delete kvp if exists */
+				break;
+			case Owstat:		/* kvp dirent */
+				if((my.statop & ~(Owsize|Owname|Owmode|Owmtime)) != 0){
+					fprint(2, "invalid stat op %d\n", my.statop);
+					fail++;
+				}
+				break;
+			default:
+				fprint(2, "invalid message op %d\n", my.op);
+				fail++;
+				break;
+			}
+			if(hi && keycmp(&y, hi) > 0){
+				fprint(2, "out of range keys %P >= %P\n", &y, hi);
+				fail++;
+			}
+			if(keycmp(&mx, &my) == 1){
+				fprint(2, "misordered keys %P, %P\n", &x, &y);
+				fail++;
+				break;
+			}
+			mx = my;
+		}
+
+	}
 	return fail;
 }
 
@@ -239,15 +291,18 @@
 #define A(b) (b ? b->bp.addr : -1)
 	for(i = 0; i < np; i++){
 		print("\t[%d] ==>\n"
-			"\t\t%s: b(%p)=%llx\n"
+			"\t\t%s: b(%p)=%llx [%s]\n"
 			"\t\tnl(%p)=%llx, nr(%p)=%llx\n"
-			"\t\tidx=%d, midx=%d\n",
+			"\t\tidx=%d, midx=%d\n"
+			"\t\tpullsz=%d, npull=%d, \n"
+			"\t\tclear=(%d. %d)\n",
 			i, op[p[i].op],
-			p[i].b, A(p[i].b),
+			p[i].b, A(p[i].b), (p[i].b == nil) ? "nil" : (p[i].b->type == Tleaf ? "leaf" : "pivot"),
 			p[i].nl, A(p[i].nl),
 			p[i].nr, A(p[i].nr),
-			p[i].idx, p[i].midx);
-		print("\t\tclear=(%d, %d):%d\n", p[i].lo, p[i].hi, p[i].sz);
+			p[i].idx, p[i].midx,
+			p[i].pullsz, p[i].npull,
+			p[i].lo, p[i].hi);
 	}
 }
 
@@ -257,8 +312,6 @@
 	Arange *r;
 	int i;
 
-	if(!debug)
-		return;
 	print("=== %s\n", m);
 	for(i = 0; i < fs->narena; i++){
 		print("arena %d:\n", i);
--- a/cons.c
+++ b/cons.c
@@ -40,7 +40,7 @@
 			}
 		}else if(strcmp(arg[0], "check") == 0)
 			checkfs();
-		else if(strcmp(arg[0], "dbg") && narg == 2)
+		else if(strcmp(arg[0], "dbg") == 0 && narg == 2)
 			debug = atoi(arg[1]);
 		else
 			fprint(fd, "unknown command %s\n", arg[0]);
--- a/dat.h
+++ b/dat.h
@@ -191,7 +191,7 @@
 	Onop,
 	Oinsert,	/* new kvp */
 	Odelete,	/* delete kvp */
-	Odeletex,	/* delete kvp if exists */
+	Oqdelete,	/* delete kvp if exists */
 	Owstat,		/* kvp dirent */
 	/* wstat flags */
 	Owsize	= 1<<4,
@@ -335,6 +335,7 @@
 
 struct Msg {
 	char	op;
+	char	statop;
 	Kvp;
 };
 
@@ -378,7 +379,8 @@
 
 struct Path {
 	/* Flowing down for flush */
-	Blk	*b;	/* insertion */
+	Msg	*ins;	/* inserted values, bounded by lo..hi */
+	Blk	*b;	/* to shadow */
 	int	idx;	/* insert at */
 	int	lo;	/* key range */
 	int	hi;	/* key range */
@@ -390,6 +392,8 @@
 	Blk	*nl;	/* new left */
 	Blk	*nr;	/* new right, if we split or rotated */
 	int	midx;	/* modification index */
+	int	npull;	/* number of messages successfully pulled */
+	int	pullsz;	/* size of pulled messages */
 };
 
 struct Scanp {
@@ -401,7 +405,6 @@
 struct Scan {
 	vlong	offset;	/* last read offset */
 	Tree	root;
-	Dir	dir;
 
 	int	done;
 	int	overflow;
--- a/dump.c
+++ b/dump.c
@@ -51,7 +51,7 @@
 		n = fmtprint(fmt, "blk:%llx, hash:%llx", GBIT64(v->v), GBIT64(v->v+8));
 		break;
 	case Kent:	/* pqid[8] name[n] => dir[n]:	serialized Dir */
-		switch(op&0xf){
+		switch(op){
 		case Onop:
 		case Oinsert:
 			if(kv2dir(v, &d) == -1)
@@ -63,6 +63,7 @@
 			break;
 		case Odelete:
 			n = fmtprint(fmt, "delete");
+			break;
 		case Owstat:
 			p = v->v;
 			if(op & Owmtime){
@@ -79,6 +80,7 @@
 			}
 			if(p != v->v + v->nv)
 				abort();
+			break;
 		}
 		break;
 	case Ksnap:	/* name[n] => dent[16] ptr[16]:	snapshot root */
@@ -110,6 +112,7 @@
 	char *opname[] = {
 	[Oinsert]	"Oinsert",
 	[Odelete]	"Odelete",
+	[Oqdelete]	"Oqdelete",
 	[Owstat]	"Owstat",
 	};
 	Msg *m;
@@ -116,10 +119,12 @@
 	int n;
 
 	m = va_arg(fmt->args, Msg*);
-	n = fmtprint(fmt, "Msg(%s, ", opname[m->op&0xf]);
+	if(m == nil)
+		return fmtprint(fmt, "Msg{nil}");
+	n = fmtprint(fmt, "Msg(%s, ", opname[m->op]);
 	n += showkey(fmt, m);
 	n += fmtprint(fmt, ") => (");
-	n += showval(fmt, m, m->op);
+	n += showval(fmt, m, m->statop);
 	n += fmtprint(fmt, ")");
 	return n;
 }
@@ -131,6 +136,8 @@
 	int n;
 
 	kv = va_arg(fmt->args, Kvp*);
+	if(kv == nil)
+		return fmtprint(fmt, "Kvp{nil}");
 	n = fmtprint(fmt, "Kvp(");
 	n += showkey(fmt, kv);
 	n += fmtprint(fmt, ") => (");
@@ -149,6 +156,8 @@
 	int n;
 
 	k = va_arg(fmt->args, Key*);
+	if(k == nil)
+		return fmtprint(fmt, "Key{nil}");
 	n = fmtprint(fmt, "Key(");
 	n += showkey(fmt, k);
 	n += fmtprint(fmt, ")");
@@ -165,4 +174,13 @@
 		return fmtprint(fmt, "<Arange:nil>");
 	else
 		return fmtprint(fmt, "Arange(%lld+%lld)", r->off, r->len);
+}
+
+int
+Qconv(Fmt *fmt)
+{
+	Qid q;
+
+	q = va_arg(fmt->args, Qid);
+	return fmtprint(fmt, "(%llx %ld %d)", q.path, q.vers, q.type);
 }
--- a/fns.h
+++ b/fns.h
@@ -5,6 +5,7 @@
 #pragma varargck type "B"	Bptr
 #pragma varargck type "R"	Arange*
 #pragma varargck type "X"	char*
+#pragma varargck type "Q"	Qid
 
 extern Gefs	*fs;
 extern int	debug;
@@ -32,17 +33,19 @@
 int	loadarena(Arena*, vlong);
 void	loadfs(char*);
 int	sync(void);
-int	loadlog(Arena *a);
+int	loadlog(Arena*);
 int	endfs(void);
-int	compresslog(Arena *a);
+int	compresslog(Arena*);
+void	setval(Blk*, int, Kvp*);
 
 int	btupsert(Tree*, Msg*, int);
-char	*btlookup(Tree*, Key*, Kvp*, Blk**);
-char	*btlookupat(Blk*, Key*, Kvp*, Blk**);
+char	*btlookup(Tree*, Key*, Kvp*, char*, int);
+char	*btlookupat(Blk*, Key*, Kvp*, char*, int);
 char	*btscan(Tree*, Scan*, char*, int);
 char	*btnext(Scan*, Kvp*, int*);
 void	btdone(Scan*);
 
+
 void	setflag(Blk *b, int);
 int	chkflag(Blk *b, int);
 
@@ -94,12 +97,12 @@
 int	Pconv(Fmt*);
 int	Rconv(Fmt*);
 int	Kconv(Fmt*);
+int	Qconv(Fmt*);
 
 /* scratch */
 void	setmsg(Blk *, int, Msg *);
 void	bufinsert(Blk *, Msg *);
 void	victim(Blk *b, Path *p);
-void	blkinsert(Blk *b, Kvp *kv);
 
 Chan	*mkchan(int);
 Fmsg	*chrecv(Chan*);
--- a/fs.c
+++ b/fs.c
@@ -3,7 +3,6 @@
 #include <fcall.h>
 #include <avl.h>
 #include <bio.h>
-#include <pool.h>
 
 #include "dat.h"
 #include "fns.h"
@@ -38,7 +37,7 @@
 }
 
 static char*
-fslookup(Fid *f, Key *k, Kvp *kv, Blk **bp, int lk)
+fslookup(Fid *f, Key *k, Kvp *kv, char *buf, int nbuf, int lk)
 {
 	char *e;
 	Blk *b;
@@ -52,7 +51,7 @@
 
 	if(lk)
 		rlock(f->dent);
-	e = btlookupat(b, k, kv, bp);
+	e = btlookupat(b, k, kv, buf, nbuf);
 	if(lk)
 		runlock(f->dent);
 	putblk(b);
@@ -71,7 +70,6 @@
 	lock(&fs->dtablk);
 	for(e = fs->dtab[h]; e != nil; e = e->next){
 		if(e->qid.path == d->qid.path && e->rootb == root){
-			dprint("found %p [%K]\n", e, &e->Key);
 			ainc(&e->ref);
 			unlock(&fs->dtablk);
 			return e;
@@ -95,7 +93,6 @@
 	e->nk = ek - e->buf;
 	e->next = fs->dtab[h];
 	fs->dtab[h] = e;
-	dprint("created %p [%K]\n", e, &e->Key);
 
 	unlock(&fs->dtablk);
 	return e;
@@ -135,7 +132,8 @@
 	for(i = 0; i < Nfidtab; i++)
 		for(f = fs->fidtab[i]; f != nil; f = f->next){
 			rlock(f->dent);
-			fprint(fd, "\tfid[%d]: %d [refs=%ld, k=%K]\n", i, f->fid, f->dent->ref, &f->dent->Key);
+			fprint(fd, "\tfid[%d]: %d [refs=%ld, k=%K, qid=%Q]\n",
+				i, f->fid, f->dent->ref, &f->dent->Key, f->dent->qid);
 			runlock(f->dent);
 		}
 	unlock(&fs->fidtablk);
@@ -366,13 +364,12 @@
 void
 fsattach(Fmsg *m, int iounit)
 {
-	char *p, *ep, buf[128];
+	char *p, *ep, buf[Kvmax], kvbuf[Kvmax];
 	int err;
 	Dent *e;
 	Fcall r;
 	Kvp kv;
 	Key dk;
-	Blk *b;
 	Fid f;
 	Dir d;
 
@@ -384,7 +381,7 @@
 	p = packstr(&err, p, ep, "");
 	dk.k = buf;
 	dk.nk = p - buf;
-	if(btlookup(&fs->root, &dk, &kv, &b) != nil){
+	if(btlookup(&fs->root, &dk, &kv, kvbuf, sizeof(kvbuf)) != nil){
 		rerror(m, Efs);
 		return;
 	}
@@ -391,15 +388,12 @@
 	r.type = Rattach;
 	if(kv2dir(&kv, &d) == -1){
 		rerror(m, Efs);
-		putblk(b);
 		return;
 	}
 	if((e = getdent(-1, -1, &d)) == nil){
 		rerror(m, Efs);
-		putblk(b);
 		return;
 	}
-	putblk(b);
 
 	/*
 	 * A bit of a hack; we're duping a fid
@@ -429,13 +423,12 @@
 void
 fswalk(Fmsg *m)
 {
-	char *p, *e, *estr, kbuf[Maxent];
-	int i, nwalk, err;
+	char *p, *e, *estr, kbuf[Maxent], kvbuf[Kvmax];
 	vlong up, prev;
+	int i, err;
 	Fid *o, *f;
 	Dent *dent;
 	Fcall r;
-	Blk *b;
 	Kvp kv;
 	Key k;
 	Dir d;
@@ -450,11 +443,9 @@
 	}
 	err = 0;
 	estr = nil;
-	nwalk = 0;
 	up = o->qpath;
 	prev = o->qpath;
 	r.type = Rwalk;
-	r.nwqid = 0;
 	for(i = 0; i < m->nwname; i++){
 		up = prev;
 		p = kbuf;
@@ -468,22 +459,17 @@
 		}
 		k.k = kbuf;
 		k.nk = p - kbuf;
-//showfs("walking");
-//dprint("looking up %K\n", &k);
-		if((estr = fslookup(o, &k, &kv, &b, 0)) != nil){
+		if((estr = fslookup(o, &k, &kv, kvbuf, sizeof(kvbuf), 0)) != nil){
 			break;
 		}
 		if(kv2dir(&kv, &d) == -1){
 			rerror(m, Efs);
-			putblk(b);
 			return;
 		}
-		nwalk = i;
 		prev = d.qid.path;
-		putblk(b);
-		r.wqid[r.nwqid] = d.qid;
-		r.nwqid++;
+		r.wqid[i] = d.qid;
 	}
+	r.nwqid = i;
 	if(i == 0 && m->nwname != 0){
 		rerror(m, estr);
 		return;
@@ -496,8 +482,6 @@
 		}
 	}
 	if(i > 0){
-		d.name = m->wname[nwalk];
-		d.qid = m->wqid[nwalk];
 		dent = getdent(f->root.bp.addr, up, &d);
 		if(dent == nil){
 			if(m->fid != m->newfid)
@@ -516,11 +500,10 @@
 void
 fsstat(Fmsg *m)
 {
-	char *err, buf[STATMAX];
+	char *err, buf[STATMAX], kvbuf[Kvmax];
 	Fcall r;
 	Fid *f;
 	Kvp kv;
-	Blk *b;
 	int n;
 
 	if((f = getfid(m->fid)) == nil){
@@ -527,8 +510,7 @@
 		rerror(m, "no such fid");
 		return;
 	}
-	print("stat %K\n", &f->dent->Key);
-	if((err = btlookup(&fs->root, f->dent, &kv, &b)) != nil){
+	if((err = btlookup(&fs->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
 		rerror(m, err);
 		return;
 	}
@@ -540,7 +522,6 @@
 	r.stat = (uchar*)buf;
 	r.nstat = n;
 	respond(m, &r);
-	putblk(b);
 }
 
 void
@@ -615,6 +596,7 @@
 	d.gid = "glenda";
 	d.muid = "glenda";
 	mb.op = Oinsert;
+	mb.statop = 0;
 	if(dir2kv(f->qpath, &d, &mb, buf, sizeof(buf)) == -1){
 		rerror(m, "%r");
 		return;
@@ -693,11 +675,10 @@
 void
 fsopen(Fmsg *m)
 {
+	char *e, buf[Kvmax];
 	Fcall r;
-	char *e;
 	Dir d;
 	Fid *f;
-	Blk *b;
 	Kvp kv;
 
 	if((f = getfid(m->fid)) == nil){
@@ -704,17 +685,16 @@
 		rerror(m, Efid);
 		return;
 	}
-	if((e = fslookup(f, f->dent, &kv, &b, 0)) != nil){
+	if((e = fslookup(f, f->dent, &kv, buf, sizeof(buf), 0)) != nil){
 		rerror(m, e);
 		return;
 	}
 	if(kv2dir(&kv, &d) == -1){
 		rerror(m, Efs);
-		putblk(b);
+		return;
 	}
 	if(fsaccess(&d, m->mode) == -1){
 		rerror(m, Eperm);
-		putblk(b);
 		return;
 	}
 	wlock(f->dent);
@@ -723,7 +703,6 @@
 	r.type = Ropen;
 	r.qid = d.qid;
 	r.iounit = f->iounit;
-	putblk(b);
 
 	lock(f);
 	if(f->mode != -1){
@@ -755,12 +734,12 @@
 	int n, ns, done;
 	Tree *t;
 	Scan *s;
+	Dir d;
 
 	s = f->scan;
 	if(s != nil && s->offset != 0 && s->offset != m->offset)
 		return Edscan;
 	if(s == nil || m->offset == 0){
-		print("scan starting\n");
 		if((s = mallocz(sizeof(Scan), 1)) == nil)
 			return Enomem;
 
@@ -787,7 +766,8 @@
 	p = r->data;
 	n = m->count;
 	if(s->overflow){
-		if((ns = convD2M(&s->dir, (uchar*)p, n)) <= BIT16SZ)
+		kv2dir(&s->kv, &d);
+		if((ns = convD2M(&d, (uchar*)p, n)) <= BIT16SZ)
 			return Edscan;
 		s->overflow = 0;
 		p += ns;
@@ -798,7 +778,8 @@
 			return e;
 		if(done)
 			break;
-		if((ns = convD2M(&s->dir, (uchar*)p, n)) <= BIT16SZ){
+		kv2dir(&s->kv, &d);
+		if((ns = convD2M(&d, (uchar*)p, n)) <= BIT16SZ){
 			s->overflow = 1;
 			break;
 		}
@@ -812,7 +793,7 @@
 int
 readb(Fid *f, char *d, vlong o, vlong n, int sz)
 {
-	char *e, buf[17];
+	char *e, buf[17], kvbuf[17+32];
 	vlong fb, fo;
 	Bptr bp;
 	Blk *b;
@@ -831,22 +812,19 @@
 	PBIT64(k.k+1, f->qpath);
 	PBIT64(k.k+9, fb);
 
-	e = fslookup(f, &k, &kv, &b, 0);
+	e = fslookup(f, &k, &kv, kvbuf, sizeof(kvbuf), 0);
 	if(e != nil && e != Eexist){
 		fprint(2, "!!! error: %s", e);
 		werrstr(e);
 		return -1;
 	}
-	fprint(2, "\treadb: key=%K, val=%P\n", &k, &kv);
-	bp = unpackbp(kv.v);
-	putblk(b);
 
+	bp = unpackbp(kv.v);
 	if((b = getblk(bp, GBraw)) == nil)
 		return -1;
 	if(fo+n > Blksz)
 		n = Blksz-fo;
 	if(b != nil){
-		fprint(2, "\tcopying %lld to resp %p\n", n, d);
 		memcpy(d, b->buf+fo, n);
 		putblk(b);
 	}else
@@ -907,7 +885,6 @@
 		rerror(m, Efid);
 		return;
 	}
-	fprint(2, "\n");
 	r.type = Rread;
 	r.count = 0;
 	if((r.data = malloc(m->count)) == nil){
@@ -914,7 +891,6 @@
 		rerror(m, Emem);
 		return;
 	}
-	fprint(2, "\nread{{{{\n");
 	if(f->dent->qid.type & QTDIR)
 		e = fsreaddir(m, f, &r);
 	else
@@ -926,12 +902,12 @@
 		respond(m, &r);
 		free(r.data);
 	}
-	fprint(2, "\n}}}read\n");
 }
 
 int
 writeb(Fid *f, Msg *m, char *s, vlong o, vlong n, vlong sz)
 {
+	char buf[Kvmax];
 	vlong fb, fo;
 	Bptr bp;
 	Blk *b, *t;
@@ -949,16 +925,13 @@
 	if(b == nil)
 		return -1;
 	if(fb < sz && (fo != 0 || n != Blksz)){
-		fprint(2, "\tappending to block %B\n", b->bp);
-		if(fslookup(f, m, &kv, &t, 0) != nil){
-			putblk(b);
+		dprint("\tappending to block %B\n", b->bp);
+		if(fslookup(f, m, &kv, buf, sizeof(buf), 0) != nil){
 			return -1;
 		}
 		bp = unpackbp(kv.v);
-		putblk(t);
 
 		if((t = getblk(bp, GBraw)) == nil){
-			putblk(b);
 			return -1;
 		}
 		memcpy(b->buf, t->buf, Blksz);
@@ -974,8 +947,6 @@
 	assert(b->flag & Bfinal);
 	packbp(m->v, &b->bp);
 	putblk(b);
-	checkfs();
-	poolcheck(mainmem);
 	return n;
 }
 
@@ -1023,12 +994,13 @@
 	}
 
 	kv[i].op = Owstat;
+	kv[i].statop = 0;
 	kv[i].k = f->dent->k;
 	kv[i].nk = f->dent->nk;
 	kv[i].v = sbuf;
 	kv[i].nv = 0;
 	if(m->offset+m->count > f->dent->length){
-		kv[i].op |= Owsize;
+		kv[i].statop = Owsize;
 		kv[i].nv += 8;
 		PBIT64(kv[i].v, m->offset+m->count);
 		f->dent->length = m->offset+m->count;
--- a/main.c
+++ b/main.c
@@ -111,6 +111,7 @@
 	fmtinstall('K', Kconv);
 	fmtinstall('R', Rconv);
 	fmtinstall('F', fcallfmt);
+	fmtinstall('Q', Qconv);
 	if(ream){
 		reamfs(argv[0]);
 		exits(nil);
--- a/pack.c
+++ b/pack.c
@@ -242,7 +242,6 @@
 
 	if(kv2dir(kv, &d) == -1)
 		return -1;
-print("\tpackname: %s\n", d.name);
 	dprint("have %d bytes to pack into\n", nbuf);
 	if((n = convD2M(&d, (uchar*)buf, nbuf)) <= BIT16SZ){
 		fprint(2, "here...failed convert??, needed %d\n", GBIT16(buf));
--- a/ream.c
+++ b/ream.c
@@ -15,6 +15,7 @@
 	Kvp kv;
 	Dir d;
 
+	/* nb: values must be inserted in key order */
 	memset(&d, 0, sizeof(Dir));
 	d.qid = (Qid){fs->nextqid++, 0, QTDIR};
 	d.mode = 0755;
@@ -27,7 +28,7 @@
 	d.muid = "glenda";
 	if(dir2kv(-1, &d, &kv, buf, sizeof(buf)) == -1)
 		sysfatal("ream: pack root: %r");
-	blkinsert(r, &kv);
+	setval(r, 0, &kv);
 
 	kv.k = buf;
 	kv.nk = 9;
@@ -34,9 +35,9 @@
 	kv.v = buf+9;
 	kv.nv = 8;
 	buf[0] = Ksuper;
-	PBIT64(buf+1, 0);
-	PBIT64(buf+9, 0);
-	blkinsert(r, &kv);
+	PBIT64(buf+1, 0ULL);
+	PBIT64(buf+9, 0ULL);
+	setval(r, 1, &kv);
 }
 
 static void
--- a/tree.c
+++ b/tree.c
@@ -37,6 +37,7 @@
 cpmsg(Msg *dst, Msg *src, char *buf, int nbuf)
 {
 	dst->op = src->op;
+	dst->statop = src->statop;
 	cpkvp(dst, src, buf, nbuf);
 }
 
@@ -59,7 +60,8 @@
 int
 msgsz(Msg *m)
 {
-	return 1 + 2 + m->nk + 2 + m->nv;
+	/* disp + op + klen + key + vlen + v */
+	return 2+1+2+m->nk +2+ m->nv;
 }
 
 int
@@ -66,9 +68,9 @@
 valsz(Kvp *kv)
 {
 	if(kv->type == Vref)
-		return 2+kv->nk + Ptrsz + Fillsz;
+		return 2 + 2+kv->nk + Ptrsz + Fillsz;
 	else
-		return 2+kv->nk + 2+kv->nv;
+		return 2 + 2+kv->nk + 2+kv->nv;
 }
 
 void
@@ -93,42 +95,21 @@
 	}
 }
 
-static void
-setval(Blk *b, int i, Kvp *kv, int replace)
+void
+setval(Blk *b, int i, Kvp *kv)
 {
-	int o, nk, nv, ksz, vsz, spc;
+	int o, nk, nv, spc;
 	char *p;
 
-	assert(i >= 0 && i <= b->nval);
+	assert(i == b->nval);
 	spc = (b->type == Tleaf) ? Leafspc : Pivspc;
 	p = b->data + 2*i;
 	nk = 2 + kv->nk;
 	nv = (kv->type == Vref) ? Ptrsz+Fillsz : 2 + kv->nv;
-	if (i < 0)
-		i = 0;
-	if(!replace || b->nval == i){
-		memmove(p + 2, p, 2*(b->nval - i));
-		b->nval++;
-		b->valsz += nk + nv;
-		o = spc - b->valsz;
-	}else{
-		/*
-		 * If we can't put it where it was before,
-		 * we need to allocate new space for the
-		 * key-value data. It'll get compacted when
-		 * we split or merge.
-		 */
-		o = GBIT16(b->data + 2*i);
-		ksz = 2 + GBIT16(b->data + o);
-		if(b->type == Tleaf)
-			vsz = 2 + GBIT16(b->data + o + ksz);
-		else
-			vsz = 16;
-		if(ksz + vsz < nk + nv){
-			b->valsz += nk + nv;
-			o = spc - b->valsz;
-		}
-	}
+	memmove(p + 2, p, 2*(b->nval - i));
+	b->nval++;
+	b->valsz += nk + nv;
+	o = spc - b->valsz;
 
 	if(2*b->nval + b->valsz > spc){
 		dprint("2*%d + %d > %d [ksz: %d, vsz: %d]\n",
@@ -154,18 +135,6 @@
 	}
 }
 
-static int
-delval(Blk *b, int i)
-{
-	char *p;
-
-	assert(i >= 0 && i <= b->nval);
-	b->nval--;
-	p = b->data + 2*i;
-	memmove(p, p + 2, 2*(b->nval - i));
-	return 0;
-}
-
 void
 setmsg(Blk *b, int i, Msg *m)
 {
@@ -175,8 +144,9 @@
 	assert(b->type == Tpivot);
 	assert(i >= 0 && i <= b->nbuf);
 	b->nbuf++;
-	b->bufsz += msgsz(m);
+	b->bufsz += msgsz(m)-2;
 	assert(2*b->nbuf + b->bufsz <= Bufspc);
+	assert(m->op >= 0 && m->op <= Owstat);
 
 	p = b->data + Pivspc;
 	o = Pivspc - b->bufsz;
@@ -185,6 +155,8 @@
 
 	p = b->data + Bufspc + o;
 	*p = m->op;
+	if(m->op == Owstat)
+		*p |= m->statop;
 	PBIT16(p + 1, m->nk);
 	memcpy(p + 3, m->k, m->nk);
 	PBIT16(p + 3 + m->nk, m->nv);
@@ -200,10 +172,10 @@
 	assert(b->type == Tpivot);
 	assert(i >= 0 && i < b->nbuf);
 	o = GBIT16(b->data + Pivspc + 2*i);
-
 	p = b->data + Pivspc + o;
 	m->type = Vinl;
-	m->op = *p;
+	m->op = (*p & 0x0f);
+	m->statop = (*p & 0xf0);
 	m->nk = GBIT16(p + 1);
 	m->k = p + 3;
 	m->nv = GBIT16(p + 3 + m->nk);
@@ -210,51 +182,6 @@
 	m->v = p + 5 + m->nk;
 }
 
-void
-blkinsert(Blk *b, Kvp *kv)
-{
-	int lo, hi, mid, r;
-	Kvp cmp;
-
-	r = -1;
-	lo = 0;
-	hi = b->nval;
-	while(lo < hi){
-		mid = (hi + lo) / 2;
-		getval(b, mid, &cmp);
-		r = keycmp(kv, &cmp);
-		if(r <= 0)
-			hi = mid;
-		else
-			lo = mid + 1;
-	}
-	if(lo < b->nval){
-		getval(b, lo, &cmp);
-		r = keycmp(kv, &cmp);
-	}
-	setval(b, lo, kv, (r == 0));
-}
-
-void
-bufinsert(Blk *b, Msg *m)
-{
-	int lo, hi, mid, r;
-	Msg cmp;
-
-	lo = 0;
-	hi = b->nbuf;
-	while(lo < hi){
-		mid = (hi + lo) / 2;
-		getmsg(b, mid, &cmp);
-		r = keycmp(m, &cmp);
-		if(r < 0)
-			hi = mid;
-		else
-			lo = mid + 1;
-	}
-	setmsg(b, lo, m);
-}
-
 static int
 bufsearch(Blk *b, Key *k, Msg *m, int *same)
 {
@@ -283,28 +210,6 @@
 }
 
 int
-blkdelete(Blk *b, Kvp *kv)
-{
-	int lo, hi, mid, r;
-	Kvp cmp;
-
-	lo = 0;
-	hi = b->nval - 1;
-	while(lo <= hi){
-		mid = (hi + lo) / 2;
-		getval(b, mid, &cmp);
-		r = keycmp(kv, &cmp);
-		if(r == 0)
-			delval(b, mid);
-		if(r <= 0)
-			hi = mid - 1;
-		else
-			lo = mid + 1;
-	}
-	return -1;
-}
-
-int
 blksearch(Blk *b, Key *k, Kvp *rp, int *same)
 {
 	int lo, hi, mid, r;
@@ -333,6 +238,13 @@
 }
 
 int
+buffill(Blk *b)
+{
+	assert(b->type == Tpivot);
+	return 2*b->nbuf + b->bufsz;
+}
+
+int
 filledbuf(Blk *b, int nmsg, int needed)
 {
 	assert(b->type == Tpivot);
@@ -339,7 +251,6 @@
 	return 2*(b->nbuf+nmsg) + b->bufsz + needed > Bufspc;
 }
 
-
 int
 filledleaf(Blk *b, int needed)
 {
@@ -363,6 +274,7 @@
 copyup(Blk *n, int i, Path *pp, int *nbytes)
 {
 	Kvp kv;
+	Msg m;
 
 	/*
 	 * It's possible for the previous node to have
@@ -372,23 +284,31 @@
 	 */
 	if(pp->nl->nval > 0){
 		getval(pp->nl, 0, &kv);
+		if(pp->nl->nbuf > 0){
+			getmsg(pp->nl, 0, &m);
+			if(keycmp(&kv, &m) > 0)
+				kv.Key = m.Key;
+		}
 		kv.type = Vref;
 		kv.bp = pp->nl->bp;
 		kv.fill = blkfill(pp->nl);
-		setval(n, i++, &kv, 1);
+		setval(n, i++, &kv);
 		if(nbytes != nil)
-			*nbytes += 2 + valsz(&kv);
+			*nbytes += valsz(&kv);
 	}
-	if(pp->nr != nil){
-		if(pp->nr->nval > 0){
-			getval(pp->nr, 0, &kv);
-			kv.type = Vref;
-			kv.bp = pp->nr->bp;
-			kv.fill = blkfill(pp->nr);
-			setval(n, i++, &kv, 0);
-			if(nbytes != nil)
-				*nbytes += 2 + valsz(&kv);
+	if(pp->nr != nil && pp->nr->nval > 0){
+		getval(pp->nr, 0, &kv);
+		if(pp->nr->nbuf > 0){
+			getmsg(pp->nr, 0, &m);
+			if(keycmp(&kv, &m) > 0)
+				kv.Key = m.Key;
 		}
+		kv.type = Vref;
+		kv.bp = pp->nr->bp;
+		kv.fill = blkfill(pp->nr);
+		setval(n, i++, &kv);
+		if(nbytes != nil)
+			*nbytes += valsz(&kv);
 	}
 	return i;
 }
@@ -403,22 +323,22 @@
 	/* bump version */
 	v = GBIT32(kv->v+8);
 	PBIT32(kv->v+8, v+1);
-	if(m->op & Owmtime){
+	if(m->statop & Owmtime){
 		v = GBIT64(p);
 		p += 8;
 		PBIT32(kv->v+25, v);
 	}
-	if(m->op & Owsize){
+	if(m->statop & Owsize){
 		v = GBIT64(p);
 		p += 8;
 		PBIT64(kv->v+33, v);
 	}
-	if(m->op & Owmode){
+	if(m->statop & Owmode){
 		v = GBIT32(p);
 		p += 4;
 		PBIT32(kv->v+33, v);
 	}
-	if(m->op & Owname){
+	if(m->statop & Owname){
 		fprint(2, "renames not yet supported\n");
 		abort();
 	}
@@ -426,6 +346,39 @@
 		fprint(2, "malformed wstat message");
 }
 
+int
+apply(Kvp *r, Msg *m, char *buf, int nbuf)
+{
+	switch(m->op){
+	case Odelete:
+		return 0;
+	case Oinsert:
+		cpkvp(r, m, buf, nbuf);
+		return 1;
+	case Owstat:
+		statupdate(r, m);
+		return 1;
+	}
+	abort();
+	return 0;
+}
+
+int
+pullmsg(Path *p, int i, Kvp *v, Msg *m, int *full, int spc)
+{
+	if(i < 0 || i >= p->hi || *full)
+		return -1;
+
+	if(p->ins != nil)
+		*m = p->ins[i];
+	else
+		getmsg(p->b, i, m);
+	if(msgsz(m) <= spc)
+		return (v == nil) ? 0 : keycmp(v, m);
+	*full = 1;
+	return -1;
+}
+
 /*
  * Creates a new block with the contents of the old
  * block. When copying the contents, it repacks them
@@ -438,10 +391,10 @@
 updateleaf(Path *up, Path *p)
 {
 	char buf[Msgmax];
-	int i, j, o, ok, slack;
+	int i, j, o, ok, full, spc;
 	Blk *b, *n;
-	Kvp v;
 	Msg m;
+	Kvp v;
 
 	i = 0;
 	o = 0;
@@ -448,7 +401,7 @@
 	j = up->lo;
 	b = p->b;
 	/*
-	 * slack is the amount of room we have
+	 * spc is the amount of room we have
 	 * to copy data down from the parent; it's
 	 * necessarily a bit conservative, because
 	 * deletion messages don't take space -- but
@@ -455,18 +408,16 @@
 	 * we don't know how what the types of all
 	 * messages are.
 	 */
-	slack = Blkspc - blkfill(b);
+	full = 0;
+	spc = Blkspc - blkfill(b);
 	if((n = newblk(b->type)) == nil)
 		return -1;
-	while(i < b->nval && j < up->hi){
+	while(i < b->nval){
+		ok = 1;
 		getval(p->b, i, &v);
-		getmsg(up->b, j, &m);
-		if(msgsz(&m) >= slack)
-			break;
-
-		switch(keycmp(&v, &m)){
+		switch(pullmsg(up, j, &v, &m, &full, spc)){
 		case -1:
-			setval(n, o++, &v, 0);
+			setval(n, o++, &v);
 			i++;
 			break;
 		case 1:
@@ -474,48 +425,48 @@
 			 * new values must always start as
 			 * an insertion, mutations come after.
 			 */
-			assert(m.op == Oinsert);
-			setval(n, o++, &m, 0);
-			slack -= valsz(&m);
-			j++;
-			break;
+			if(m.op != Oinsert){
+				print("%d(/%d), %d: %M not insert\n", i, b->nval, j, &m);
+				abort();
+			}
+			spc -= valsz(&m);
+			goto Copy;
 		case 0:
-			ok = 1;
+			i++;
 			while(j < up->hi){
-				switch(m.op){
-				case Oinsert:
-					cpkvp(&v, &m, buf, sizeof(buf));
-					ok = 1;
-					break;
-				case Odelete:
-					ok = 0;
-					break;
-				case Owstat:
-					statupdate(&v, &m);
-					break;
-				}
+		Copy:
+				ok = apply(&v, &m, buf, sizeof(buf));
 				j++;
-				if(j < up->hi){
-					getmsg(up->b, j, &m);
-					if(keycmp(&v, &m) != 0)
-						break;
-					print("\tnext: %P, %M\n", &v, &m);
-				}
+				p->pullsz += msgsz(&m);
+				if(j >= up->hi || pullmsg(up, j, &v, &m, &full, spc) != 0)
+					break;
 			}
 			if(ok)
-				setval(n, o++, &v, 0);
-			i++;
+				setval(n, o++, &v);
 			break;
 		}
 	}
-	while(i < b->nval){
-		getval(p->b, i++, &v);
-		setval(n, o++, &v, 0);
-	}
 	while(j < up->hi) {
-		getmsg(up->b, j++, &m);
-		setval(n, o++, &m, 0);
+		/* can't fail */
+		pullmsg(up, j++, nil, &m, &full, spc);
+		ok = 1;
+		cpkvp(&v, &m, buf, sizeof(buf));
+		p->pullsz += msgsz(&m);
+		if(m.op != Oinsert){
+			print("%d(/%d), %d: %M not insert\n", i, b->nval, j, &m);
+			showblk(up->b, "parent", 0);
+			showblk(p->b, "current", 0);
+			abort();
+		}
+		while(pullmsg(up, j, &v, &m, &full, spc) == 0){
+			ok = apply(&v, &m, buf, sizeof(buf));
+			p->pullsz += msgsz(&m);
+			j++;
+		}
+		if(ok)
+			setval(n, o++, &v);
 	}
+	p->npull = (j - up->lo);
 	p->nl = n;
 	return 0;
 }
@@ -529,50 +480,69 @@
  * When pidx != -1, 
  */
 int
-updatepiv(Path *p, Path *pp)
+updatepiv(Path *up, Path *p, Path *pp)
 {
-	int i, j;
+	char buf[Msgmax];
+	int i, j, o, sz, full, spc;
 	Blk *b, *n;
-	Msg m;
+	Msg m, u;
 
-	j = 0;
+	o = 0;
 	b = p->b;
 	if((n = newblk(b->type)) == nil)
 		return -1;
 	for(i = 0; i < b->nval; i++){
-		if(i == p->midx){
-			if(pp->nl->nval > 0){
-				getval(pp->nl, 0, &m);
-				m.type = Vref;
-				m.bp = pp->nl->bp;
-				m.fill = blkfill(pp->nl);
-				setval(n, j++, &m, 0);
-			}
-			if(pp->nr != nil && pp->nr->nval > 0){
-				getval(pp->nr, 0, &m);
-				m.type = Vref;
-				m.bp = pp->nr->bp;
-				m.fill = blkfill(pp->nr);
-				setval(n, j++, &m, 0);
-			}
+		if(pp != nil && i == p->midx){
+			o = copyup(n, o, pp, nil);
 			if(pp->op == POrot || pp->op == POmerge)
 				i++;
 		}else{
 			getval(b, i, &m);
-			setval(n, j++, &m, 0);
+			setval(n, o++, &m);
 		}
 	}
+	o = 0;
 	i = 0;
-	j = 0;
+	j = up->lo;
+	sz = 0;
+	full = 0;
+	spc = Bufspc - buffill(b);
+	if(pp != nil)
+		spc += pp->pullsz;
 	while(i < b->nbuf){
 		if(i == p->lo)
-			i = p->hi;
+			i += pp->npull;
 		if(i == b->nbuf)
 			break;
-		getmsg(b, i++, &m);
-		setmsg(n, j++, &m);
+		getmsg(b, i, &m);
+		switch(pullmsg(up, j, &m, &u, &full, spc - sz)){
+		case -1:
+		case 0:
+			setmsg(n, o++, &m);
+			i++;
+			break;
+		case 1:
+			cpkvp(&m, &u, buf, sizeof(buf));
+			while(pullmsg(up, j, &m, &u, &full, spc) == 0){
+				setmsg(n, o++, &u);
+				sz = msgsz(&u);
+				p->pullsz += sz;
+				spc -= sz;
+				j++;
+			}
+		}
 	}
-	n->nbuf = j;
+	while(j < up->hi){
+		pullmsg(up, j, nil, &u, &full, spc);
+		if(full)
+			break;
+		setmsg(n, o++, &u);
+		sz = msgsz(&u);
+		p->pullsz += sz;
+		spc -= sz;
+		j++;
+	}
+	p->npull = (j - up->lo);
 	p->nl = n;
 	return 0;
 }
@@ -583,11 +553,14 @@
  * grow the total height of the 
  */
 int
-splitleaf(Path *p, Kvp *mid)
+splitleaf(Path *up, Path *p, Kvp *mid)
 {
-	int i, j, copied, halfsz;
-	Blk *b, *l, *r;
-	Kvp t;
+	char buf[Msgmax];
+	int full, copied, spc, ok, halfsz;
+	int i, j, o, c;
+	Blk *b, *d, *l, *r;
+	Msg m;
+	Kvp v;
 
 	/*
 	 * If the block one entry up the
@@ -603,11 +576,18 @@
 		return -1;
 	}
 
-	j = 0;
+	d = l;
+	o = 0;
+	i = 0;
+	j = up->lo;
+	full = 0;
 	copied = 0;
-	halfsz = (2*b->nval + b->valsz)/2;
+	halfsz = (2*b->nval + b->valsz + up->sz) / 2;
+	if(halfsz > Blkspc/2)
+		halfsz = Blkspc/2;
+	spc = Blkspc - (halfsz + Msgmax);
 	assert(b->nval >= 4);
-	for(i = 0; i < b->nval; i++){
+	while(i < b->nval){
 		/*
 		 * We're trying to balance size,
 		 * but we need at least 2 nodes
@@ -614,21 +594,50 @@
 		 * in each half of the split if
 		 * we want a valid tree.
 		 */
-		if(i == b->nval-2)
+		if(d == l)
+		if((i == b->nval-2) || (i >= 2 && copied >= halfsz)){
+			o = 0;
+			d = r;
+			full = 0;
+			spc = Blkspc - (halfsz + Msgmax);
+			getval(b, i, mid);
+		}
+		ok = 1;
+		getval(b, i, &v);
+ 		c = pullmsg(up, j, &v, &m, &full, spc);
+		switch(c){
+		case -1:
+			setval(d, o++, &v);
+			copied += valsz(&v);
+			i++;
 			break;
-		if(i >= 2 && copied >= halfsz)
+		case 1:
+			/*
+			 * new values must always start as
+			 * an insertion, mutations come after.
+			 */
+			if(m.op != Oinsert){
+				print("%d(/%d), %d: %M not insert\n", i, b->nval, j, &m);
+				abort();
+			}
+			spc -= valsz(&m);
+			goto Copy;
+		case 0:
+			i++;
+			while(j < up->hi){
+		Copy:
+				ok = apply(&v, &m, buf, sizeof(buf));
+				p->pullsz += msgsz(&m);
+				j++;
+				if(j == up->hi || pullmsg(up, j, &v, &m, &full, spc) != 0)
+					break;
+			}
+			if(ok)
+				setval(d, o++, &v);
 			break;
-
-		getval(b, i, &t);
-		setval(l, j++, &t, 0);
-		copied += 2 + valsz(&t);
+		}
 	}
-	j = 0;
-	getval(b, i, mid);
-	for(; i < b->nval; i++){
-		getval(b, i, &t);
-		setval(r, j++, &t, 0);
-	}
+	p->npull = (j - up->lo);
 	p->op = POsplit;
 	p->nl = l;
 	p->nr = r;
@@ -639,13 +648,14 @@
 /*
  * Splits a node, returning the block that msg
  * would be inserted into. Split must never
- * grow the total height of the 
+ * grow the total height of the tree by more
+ * than one.
  */
 int
-splitpiv(Path *p, Path *pp, Kvp *mid)
+splitpiv(Path *, Path *p, Path *pp, Kvp *mid)
 {
-	int i, j, copied, halfsz;
-	Blk *b, *l, *r;
+	int i, o, copied, halfsz;
+	Blk *b, *d, *l, *r;
 	Kvp t;
 	Msg m;
 
@@ -662,7 +672,8 @@
 		freeblk(r);
 		return -1;
 	}
-	j = 0;
+	o = 0;
+	d = l;
 	copied = 0;
 	halfsz = (2*b->nval + b->valsz)/2;
 	assert(b->nval >= 4);
@@ -673,48 +684,34 @@
 		 * in each half of the split if
 		 * we want a valid tree.
 		 */
-		if(i == b->nval-2)
-			break;
-		if(i >= 2 && copied >= halfsz)
-			break;
-
-		if(i == p->idx){
-			j = copyup(l, j, pp, &copied);
-			continue;
+		if(d == l)
+		if((i == b->nval-2) || (i >= 2 && copied >= halfsz)){
+			o = 0;
+			d = r;
+			getval(b, i, mid);
 		}
-		getval(b, i, &t);
-		setval(l, j++, &t, 0);
-		copied += 2 + valsz(&t);
-	}
-	j = 0;
-	getval(b, i, mid);
-	for(; i < b->nval; i++){
 		if(i == p->idx){
-			j = copyup(r, j, pp, nil);
+			o = copyup(d, o, pp, &copied);
 			continue;
 		}
 		getval(b, i, &t);
-		setval(r, j++, &t, 0);
+		setval(d, o++, &t);
+		copied += valsz(&t);
 	}
-	i = 0;
-	for(j = 0; i < b->nbuf; i++, j++){
+	o = 0;
+	d = l;
+	for(i = 0; i < b->nbuf; i++){
 		if(i == p->lo)
-			i = p->hi;
+			i += pp->npull;
 		if(i == b->nbuf)
 			break;
 		getmsg(b, i, &m);
-		if(keycmp(&m, mid) >= 0)
-			break;
-		setmsg(l, j, &m);
+		if(d == l && keycmp(&m, mid) >= 0){
+			o = 0;
+			d = r;
+		}
+		setmsg(d, o++, &m);
 	}
-	for(j = 0; i < b->nbuf; i++, j++){
-		if(i == p->lo)
-			i = p->hi;
-		if(i == b->nbuf)
-			break;
-		getmsg(b, i, &m);
-		setmsg(r, j, &m);
-	}
 	p->op = POsplit;
 	p->nl = l;
 	p->nr = r;
@@ -723,34 +720,6 @@
 }
 
 int
-apply(Blk *b, Msg *m)
-{
-	int idx, same;
-	Kvp kv;
-
-	assert(b->type == Tleaf);
-	switch(m->op&0xf){
-	case Oinsert:
-		blkinsert(b, m);
-		break;
-	case Odelete:
-		blkdelete(b, m);
-		break;
-	case Owstat:
-		idx = blksearch(b, m, &kv, &same);
-		if(idx == -1 || !same){
-			fprint(2, "missing keyval %P [idx=%d, same=%d]\n", &kv, idx, same);
-			abort();
-		}
-		statupdate(&kv, m);
-		break;
-	default:
-		abort();
-	}
-	return 0;
-}
-
-int
 merge(Path *p, Path *pp, int idx, Blk *a, Blk *b)
 {
 	int i, o;
@@ -757,19 +726,16 @@
 	Msg m;
 	Blk *d;
 
-print("!!!! merge\n");
-showblk(a, "premerge left", 0);
-showblk(b, "premerge right", 0);
 	if((d = newblk(a->type)) == nil)
 		return -1;
 	o = 0;
 	for(i = 0; i < a->nval; i++){
 		getval(a, i, &m);
-		setval(d, o++, &m, 0);
+		setval(d, o++, &m);
 	}
 	for(i = 0; i < b->nval; i++){
 		getval(b, i, &m);
-		setval(d, o++, &m, 0);
+		setval(d, o++, &m);
 	}
 	if(a->type == Tpivot){
 		o = 0;
@@ -783,10 +749,9 @@
 		}
 	}
 	enqueue(d);
-showblk(d, "postmerge", 0);
 	p->midx = idx;
-	pp->op = POmerge;
 	pp->nl = d;
+	pp->op = POmerge;
 	pp->nr = nil;
 	return 0;
 }
@@ -803,15 +768,13 @@
 	Msg n;
 
 	used = 2*d->nbuf + d->bufsz;
-	for(i = 0; i < b->nbuf; i++){
+	for(i = *idx; i < b->nbuf; i++){
 		getmsg(b, i, &n);
-		print("check %P before %P: %d\n", &n.Kvp, &m->Kvp, keycmp(&n, m));
 		if(keycmp(m, &n) <= 0){
 			*idx = i + o;
 			return 0;
 		}
-		print("would copy %P before %P: %d\n", &n.Kvp, &m->Kvp, keycmp(&n, m));
-		used += 2 + msgsz(&n);
+		used += msgsz(&n);
 		if(used > Bufspc)
 			return 1;
 	}
@@ -844,9 +807,6 @@
 	Blk *d, *l, *r;
 	Msg m;
 
-showfs(2, "!!!! rotate\n");
-showblk(a, "prerotate left", 0);
-showblk(b, "prerotate right", 0);
 	l = newblk(a->type);
 	r = newblk(a->type);
 	if(l == nil || r == nil){
@@ -861,13 +821,12 @@
 	idx = 0;
 	for(i = 0; i < a->nval; i++){
 		getval(a, i, &m);
-		
 		if(d == l && (cp >= halfpiv || spillsbuf(d, a, b, &m, &idx))){
 			sp = idx;
 			d = r;
 			o = 0;
 		}
-		setval(d, o++, &m, 0);
+		setval(d, o++, &m);
 		cp += valsz(&m);
 	}
 	for(i = 0; i < b->nval; i++){
@@ -877,7 +836,7 @@
 			d = r;
 			o = 0;
 		}
-		setval(d, o++, &m, 0);
+		setval(d, o++, &m);
 		cp += valsz(&m);
 	}
 	if(a->type == Tpivot){
@@ -902,8 +861,6 @@
 	}
 	enqueue(l);
 	enqueue(r);
-showblk(l, "postrotate left", 0);
-showblk(r, "postrotate right", 0);
 	p->midx = midx;
 	pp->op = POrot;
 	pp->nl = l;
@@ -931,16 +888,12 @@
 	if(imbalance < 0)
 		imbalance *= -1;
 	/* works for leaf, because 0 always < Bufspc */
-	if(na + nb < Pivspc && ma + mb < Bufspc){
-print("!!!!!!! merging needed\n");
+	if(na + nb < (Pivspc - 4*Msgmax) && ma + mb < Bufspc)
 		return merge(p, pp, idx, a, b);
-	}else if(imbalance > 4*Msgmax){
-print("!!!!!!! rotating needed\n");
+	else if(imbalance > 4*Msgmax)
 		return rotate(p, pp, idx, a, b, (na + nb)/2);
-	}else{
-print("!!!!!!! no balancing needed\n");
+	else
 		return 0;
-	}
 }
 
 int
@@ -955,6 +908,8 @@
 	ret = -1;
 	if(p->idx == -1 || pp == nil || pp->nl == nil)
 		return 0;
+	if(pp->op != POmod || pp->op != POmerge)
+		return 0;
 
 	m = refblk(pp->nl);
 	if(idx-1 >= 0){
@@ -961,10 +916,10 @@
 		getval(p->b, idx-1, &kl);
 		if(kl.fill + blkfill(m) < Blkspc){
 			if((l = getblk(kl.bp, 0)) == nil)
-				goto out;
+				goto Out;
 			if(rotmerge(p, pp, idx-1, l, m) == -1)
-				goto out;
-			goto done;
+				goto Out;
+			goto Done;
 		}
 	}
 	if(idx+1 < p->b->nval){
@@ -971,15 +926,15 @@
 		getval(p->b, idx+1, &kr);
 		if(kr.fill + blkfill(m) < Blkspc){
 			if((r = getblk(kr.bp, 0)) == nil)
-				goto out;
+				goto Out;
 			if(rotmerge(p, pp, idx, m, r) == -1)
-				goto out;
-			goto done;
+				goto Out;
+			goto Done;
 		}
 	}
-done:
+Done:
 	ret = 0;
-out:
+Out:
 	putblk(m);
 	putblk(l);
 	putblk(r);
@@ -986,31 +941,12 @@
 	return ret;
 }
 
-int
-insertmsg(Blk *rb, Msg *msg, int nmsg, int sz)
-{
-	int i;
-
-	if(rb->type == Tleaf && !filledleaf(rb, sz))
-		for(i = 0; i < nmsg; i++)
-			apply(rb, &msg[i]);
-	else if(rb->type == Tpivot && !filledbuf(rb, nmsg, sz))
-		for(i = 0; i < nmsg; i++)
-			bufinsert(rb, &msg[i]);
-	else
-		return 1;
-	return 0;
-}
-
 static Blk*
-flush(Path *path, int npath, Msg *msg, int nmsg, int *redo)
+flush(Path *path, int npath, int *redo)
 {
 
-	Path *p, *pp;
-	Blk *b, *r;
+	Path *up, *p, *pp, *rp;
 	Kvp mid;
-	Msg m;
-	int i;
 
 	/*
 	 * The path must contain at minimum two elements:
@@ -1019,94 +955,66 @@
 	 * we put the new root into if the root gets split.
 	 */
 	assert(npath >= 2);
-	r = nil;
+	rp = nil;
 	pp = nil;
 	p = &path[npath - 1];
-	*redo = 1;
+	up = &path[npath - 2];
+	*redo = 0;
 	if(p->b->type == Tleaf){
-		if(!filledleaf(p->b, p[-1].sz)){
+		if(!filledleaf(p->b, up->sz)){
 			if(updateleaf(p-1, p) == -1)
-				goto error;
-			if(p == &path[1]){
-				r = p->nl;
-				*redo = insertmsg(r, msg, nmsg, path[0].sz);
-			}
+				goto Error;
 			enqueue(p->nl);
+			rp = p;
 		}else{
-			if(splitleaf(p, &mid) == -1)
-				goto error;
-			b = p->nl;
-			for(i = p[-1].lo; i < p[-1].hi; i++){
-				getmsg(p[-1].b, i, &m);
-				if(keycmp(&m, &mid) >= 0)
-					b = p->nr;
-				if(filledleaf(b, msgsz(&m)))
-					continue;
-				if(apply(b, &m) == -1)
-					goto error;
-			}
+
+			if(splitleaf(up, p, &mid) == -1)
+				goto Error;
 			enqueue(p->nl);
 			enqueue(p->nr);
 		}
-		assert(p->nl || (p->nl && p->nr));
 		p->midx = -1;
 		pp = p;
+		up--;
 		p--;
 	}
-	for(; p > path; p--){
+	while(p != path){
 		if(!filledpiv(p->b, 1)){
 			if(trybalance(p, pp, p->idx) == -1)
-				goto error;
+				goto Error;
 			/* If we merged the root node, break out. */
-			if(p == &path[1] && p->nl !=  nil && p->nr == nil && p->nl->nval == 2){
-				*redo = insertmsg(p[1].nl, msg, nmsg, path[0].sz);
-				freeblk(p[0].nl);
-				putblk(p[0].nl);
-				p[0].nl = nil;
-				r = p[1].nl;
-				break;
+			if(up == path && pp != nil && pp->op == POmerge && p->b->nval == 2){
+				rp = pp;
+				goto Out;
 			}
-			if(updatepiv(p, pp) == -1)
-				goto error;
-			for(i = p[-1].lo; i < p[-1].hi; i++){
-				getmsg(p[-1].b, i, &m);
-				if(filledbuf(p->nl, 1, msgsz(&m)))
-					break;
-				bufinsert(p->nl, &m);
-			}
-			if(p == &path[1] && !filledbuf(p->nl, nmsg, path[0].sz)){
-				r = p->nl;
-				*redo = insertmsg(r, msg, nmsg, path[0].sz);
-			}
+			if(updatepiv(up, p, pp) == -1)
+				goto Error;
 			enqueue(p->nl);
+			rp = p;
 		}else{
-			dprint("-- split\n");
-			if(splitpiv(p, pp, &mid) == -1)
-				goto error;
-			b = p->nl;
-			for(i = p[-1].lo; i < p[-1].hi; i++){
-				getmsg(p[-1].b, i, &m);
-				if(keycmp(&m, &mid) >= 0)
-					b = p->nr;
-				if(filledbuf(b, 1, msgsz(&m)))
-					continue;
-				bufinsert(b, &m);
-			}
+			if(splitpiv(up, p, pp, &mid) == -1)
+				goto Error;
 			enqueue(p->nl);
 			enqueue(p->nr);
 		}
 		pp = p;
+		up--;
+		p--;
 	}
-	if(path[1].nl != nil && path[1].nr != nil){
-		if((r = newblk(Tpivot)) == nil)
-			goto error;
-		path[0].nl = r;
-		*redo = insertmsg(r, msg, nmsg, path[0].sz);
-		copyup(path[0].nl, 0, &path[1], nil);
-		enqueue(r);
+	if(pp->nl != nil && pp->nr != nil){
+		rp = &path[0];
+		rp->nl = newblk(Tpivot);
+		if(rp->nl == nil)
+			goto Error;
+		rp->npull = pp->npull;
+		rp->pullsz = pp->pullsz;
+		copyup(rp->nl, 0, pp, nil);
+		enqueue(rp->nl);
 	}
-	return r;
-error:
+Out:
+	*redo = (rp->npull != (path[0].hi - path[0].lo));
+	return rp->nl;
+Error:
 	return nil;
 }
 
@@ -1155,7 +1063,7 @@
 			if(i < b->nval && keycmp(&m, &kv) >= 0)
 				break;
 			/* 2 bytes for offset, plus message size in buffer */
-			cursz += 2 + msgsz(&m);
+			cursz += msgsz(&m);
 		}
 		if(cursz > maxsz){
 			maxsz = cursz;
@@ -1165,11 +1073,19 @@
 			p->sz = maxsz;
 			p->idx = i - 1;
 			p->midx = i - 1;
+			p->npull = 0;
+			p->pullsz = 0;
 		}
 	}
 }
 
 int
+msgcmp(void *a, void *b)
+{
+	return keycmp((Msg*)a, (Msg*)b);
+}
+
+int
 btupsert(Tree *t, Msg *msg, int nmsg)
 {
 	int i, npath, redo, dh, sz, height;
@@ -1178,6 +1094,7 @@
 	Kvp sep;
 
 	sz = 0;
+	qsort(msg, nmsg, sizeof(Msg), msgcmp);
 	for(i = 0; i < nmsg; i++){
 		if(msg[i].nk + 2 > Keymax){
 			werrstr("overlong key");
@@ -1186,7 +1103,7 @@
 		sz += msgsz(&msg[i]);
 	}
 
-again:
+Again:
 	if((b = getroot(t, &height)) == nil){
 		werrstr("get root: %r");
 		return -1;
@@ -1207,6 +1124,9 @@
 	npath++;
 
 	path[0].sz = sz;
+	path[0].ins = msg;
+	path[0].lo = 0;
+	path[0].hi = nmsg;
 	while(b->type == Tpivot){
 		if(!filledbuf(b, nmsg, path[npath - 1].sz))
 			break;
@@ -1214,22 +1134,22 @@
 		getval(b, path[npath].idx, &sep);
 		b = getblk(sep.bp, 0);
 		if(b == nil)
-			goto error;
+			goto Error;
 		npath++;
 	}
 	path[npath].b = b;
 	path[npath].idx = -1;
 	path[npath].midx = -1;
-	path[npath].lo = 0;
-	path[npath].hi = 0;
+	path[npath].lo = -1;
+	path[npath].hi = -1;
+	path[npath].npull = 0;
+	path[npath].pullsz = 0;
 	npath++;
 
 	dh = -1;
-	rb = flush(path, npath, msg, nmsg, &redo);
-	if(redo)
-		goto again;
+	rb = flush(path, npath, &redo);
 	if(rb == nil)
-		goto error;
+		goto Error;
 
 	if(path[0].nl != nil)
 		dh = 1;
@@ -1247,7 +1167,6 @@
 	t->bp = rb->bp;
 	fs->nextgen++;
 	unlock(&t->lk);
-
 	freepath(path, npath);
 	free(path);
 	if(!checkfs()){
@@ -1255,11 +1174,11 @@
 		showpath(path, npath);
 		abort();
 	}
-	if(redo)
-		goto again;
 	snapshot();
+	if(redo)
+		goto Again;
 	return 0;
-error:
+Error:
 	freepath(path, npath);
 	free(path);
 	return -1;
@@ -1280,7 +1199,7 @@
 }
 
 static char*
-collect(Blk *b, Key *k, Kvp *r, int *done)
+collect(Blk *b, Key *k, Kvp *r, char *buf, int nbuf, int *done)
 {
 	int i, idx, same;
 	char *err;
@@ -1295,9 +1214,9 @@
 		getmsg(b, i, &m);
 		if(keycmp(&m, k) != 0)
 			break;
-		switch(m.op&0xf){
+		switch(m.op){
 		case Oinsert:
-			*r = m.Kvp;
+			cpkvp(r, &m, buf, nbuf);
 			*done = 1;
 			err = nil;
 			break;
@@ -1306,6 +1225,7 @@
 			err = Eexist;
 			break;
 		case Owstat:
+//			statupdate(r, &m);
 			break;
 		default:
 			return Efs;
@@ -1315,22 +1235,19 @@
 }
 
 char*
-btlookupat(Blk *b0, Key *k, Kvp *r, Blk **bp)
+btlookupat(Blk *b0, Key *k, Kvp *r, char *buf, int nbuf)
 {
 	int idx, same, done, r0;
-	char *ret;
+	char *ret, *err;
 	Blk *b, *c;
 
-	*bp = nil;
 	r0 = b0->ref;
 	b = refblk(b0);
 	assert(k != r);
 	while(b->type == Tpivot){
-		ret = collect(b, k, r, &done);
-		if(done){
-			*bp = b;
+		ret = collect(b, k, r, buf, nbuf, &done);
+		if(done)
 			return ret;
-		}
 		idx = blksearch(b, k, r, nil);
 		if(idx == -1){
 			assert(b0->ref == r0 + (b == b0) ? 1 : 0);
@@ -1343,27 +1260,25 @@
 		b = c;
 	}
 	assert(b->type == Tleaf);
+	err = Eexist;
 	blksearch(b, k, r, &same);
-	if(!same){
-		assert(b0->ref == r0 + (b == b0) ? 1 : 0);
-		putblk(b);
-		return Eexist;
+	if(same){
+		cpkvp(r, r, buf, nbuf);
+		err = nil;
 	}
-	assert(b0->ref == r0 + (b == b0) ? 1 : 0);
-	*bp = b;
-	return nil;
+	putblk(b);
+	return err;
 }
 
 char*
-btlookup(Tree *t, Key *k, Kvp *r, Blk **bp)
+btlookup(Tree *t, Key *k, Kvp *r, char *buf, int nbuf)
 {
 	char *ret;
 	Blk *b;
 
-	*bp = nil;
 	if((b = getroot(t, nil)) == nil)
 		return Efs;
-	ret = btlookupat(b, k, r, bp);
+	ret = btlookupat(b, k, r, buf, nbuf);
 	putblk(b);
 
 	return ret;
@@ -1418,101 +1333,62 @@
 	return nil;
 }
 
-int
-accum(Scan *s, Msg *m)
-{
-	vlong v;
-	char *p;
-	Dir *d;
-
-	d = &s->dir;
-	switch(m->op&0xf){
-	case Onop:
-	case Oinsert:
-		s->present = 1;
-		kv2dir(m, d);
-		break;
-	case Odelete:
-		s->present = 0;
-		break;
-	case Owstat:
-		p = m->v;
-		d->qid.vers++;
-		if(m->op & Owmtime){
-			v = GBIT64(p);
-			p += 8;
-			d->mtime = v;
-		}
-		if(m->op & Owsize){
-			v = GBIT64(p);
-			p += 8;
-			d->length = v;
-		}
-		if(m->op & Owmode){
-			v = GBIT32(p);
-			p += 4;
-			d->mode = v;
-		}
-		if(m->op & Owname){
-			fprint(2, "renames not yet supported\n");
-			abort();
-		}
-		if(p != m->v + m->nv){
-			fprint(2, "malformed wstat message");
-			abort();
-		}
-		break;
-	default:
-		abort();
-	}
-	return 0;
-
-}
-
 char *
 btnext(Scan *s, Kvp *r, int *done)
 {
 	Scanp *p;
-	int i, j, h, start;
-	Msg m, n, t;
+	int i, j, h, ok, start, srcbuf;
+	Msg m, n;
 	Kvp kv;
 
-Again:
 	/* load up the correct blocks for the scan */
+Again:
 	p = s->path;
 	h = s->root.ht;
 	*done = 0;
 	start = h;
-
-	for(i = h-1; i > 0; i--){
-		if(p[i].vi < p[i].b->nval || p[i].bi < p[i].b->nbuf)
+	srcbuf = -1;
+	for(i = h-1; i >= 0; i--){
+		if(p[i].b != nil
+		&&(p[i].vi < p[i].b->nval || p[i].bi < p[i].b->nbuf))
 			break;
 		if(i == 0){
 			*done = 1;
 			return nil;
 		}
-		start = i;
-		p[i-1].vi++;
-		putblk(p[i].b);
+		if(p[i].b != nil)
+			putblk(p[i].b);
+		p[i].b = nil;
 		p[i].vi = 0;
 		p[i].bi = 0;
-		p[i].b = nil;
+		p[i-1].vi++;
+		start = i;
 	}
-	for(i = start; i < h; i++){
-		getval(p[i-1].b, p[i-1].vi, &kv);
-		if((p[i].b = getblk(kv.bp, 0)) == nil)
-			return "error reading block";
+
+	if(p[start-1].vi < p[start-1].b->nval){
+		for(i = start; i < h; i++){
+			getval(p[i-1].b, p[i-1].vi, &kv);
+			if((p[i].b = getblk(kv.bp, 0)) == nil)
+				return "error reading block";
+		}
+	
+		/* find the minimum key along the path up */
+		m.op = Onop;
+		getval(p[h-1].b, p[h-1].vi, &m);
+	}else{
+		getmsg(p[start-1].b, p[start-1].bi, &m);
+		assert(m.op == Oinsert);
+		srcbuf = start-1;
 	}
 
-	/* find the minimum key along the path up */
-	m.op = Onop;
-	getval(p[h-1].b, p[h-1].vi, &m);
 	for(i = h-2; i >= 0; i--){
 		if(p[i].bi == p[i].b->nbuf)
 			continue;
 		getmsg(p[i].b, p[i].bi, &n);
-		if(keycmp(&n, &m) < 0)
+		if(keycmp(&n, &m) < 0){
+			srcbuf = i;
 			m = n;
+		}
 	}
 	if(m.nk < s->pfx.nk || memcmp(m.k, s->pfx.k, s->pfx.nk) != 0){
 		*done = 1;
@@ -1520,25 +1396,23 @@
 	}
 
 	/* scan all messages applying to the message */
-	getval(p[h-1].b, p[h-1].vi, &t);
-	if(keycmp(&m, &t) == 0){
-		t.op = Onop;
-		accum(s, &t);
+	ok = 1;
+	cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
+	if(srcbuf == -1)
 		p[h-1].vi++;
-	}
+	else
+		p[srcbuf].bi++;
 	for(i = h-2; i >= 0; i--){
 		for(j = p[i].bi; j < p[i].b->nbuf; j++){
-			getmsg(p[i].b, j, &t);
-			if(keycmp(&m, &t) != 0)
+			getmsg(p[i].b, j, &m);
+			if(keycmp(r, &m) != 0)
 				break;
-			accum(s, &t);
+			ok = apply(r, &m, s->kvbuf, sizeof(s->kvbuf));
 			p[i].bi++;
-			m = t;
 		}
 	}
-	if(m.op == Odelete)
+	if(!ok)
 		goto Again;
-	cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
 	return nil;
 }