shithub: gefs

Download patch

ref: f0563cbdbce1777d5672fee507acac7ab0d6dbc0
parent: 6b02773727cc3b9c434ef1ce5bbd94dd6b706aef
author: Ori Bernstein <ori@eigenstate.org>
date: Tue Sep 14 19:10:31 EDT 2021

blk: fix freelist implementaton

--- a/blk.c
+++ b/blk.c
@@ -2,11 +2,20 @@
 #include <libc.h>
 #include <fcall.h>
 #include <avl.h>
+#include <pool.h>
 
 #include "dat.h"
 #include "fns.h"
 
+typedef struct Range Range;
+struct Range {
+	vlong off;
+	vlong len;
+};
+
 static vlong	blkalloc_lk(Arena*);
+static int	blkdealloc(vlong);
+static void	cachedel(vlong);
 static Blk	*cacheblk(Blk*);
 static Blk	*lookupblk(vlong);
 
@@ -78,7 +87,7 @@
 	return &fs->arenas[n];
 }
 
-static Arena*
+Arena*
 getarena(vlong b)
 {
 	int i;
@@ -95,49 +104,33 @@
 static int
 freerange(Avltree *t, vlong off, vlong len)
 {
-	Arange *r, *s, q;
+	Arange *r, *s;
 
 	assert(len % Blksz == 0);
+	if((r = calloc(1, sizeof(Arange))) == nil)
+		return -1;
+	r->off = off;
+	r->len = len;
+	avlinsert(t, r);
 
-	q.off = off;
-	q.len = len;
-	r = (Arange*)avllookup(t, &q.Avl, -1);
-	if(r == nil){
-		if((r = calloc(1, sizeof(Arange))) == nil)
-			return -1;
-		r->off = off;
-		r->len = len;
-		avlinsert(t, r);
-	}else if(off+len == r->off){
-		r->len += len;
-		r->off -= len;
-	}else if(off == r->off + r->len){
-		r->len += len;
-	}else{
-		r = (Arange*)avlnext(r);
-		if(r != nil && off+len == r->off){
-			r->off -= len;
-			r->len += len;
-		} else {
-			if((r = calloc(1, sizeof(Arange))) == nil)
-				return -1;
-			r->off = off;
-			r->len = len;
-			avlinsert(t, r);
-		}
-	}
+Again:
 
-	/* merge with previous */
 	s = (Arange*)avlprev(r);
-	if(s != nil && s->off + s->len == r->off){
-		s->off += r->len;
+	if(s != nil && s->off+s->len == r->off){
 		avldelete(t, r);
+		s->len = s->len + r->len;
+		free(r);
+		r = s;
+		goto Again;
 	}
-	/* merge with next */
 	s = (Arange*)avlnext(r);
-	if(s != nil && r->off + r->len == s->off){
-		r->off += r->len;
-		avldelete(t, s);
+	if(s != nil && r->off+r->len == s->off){
+		avldelete(t, r);
+		s->off = r->off;
+		s->len = s->len + r->len;
+		free(r);
+		r = s;
+		goto Again;
 	}
 	return 0;
 }
@@ -146,30 +139,32 @@
 grabrange(Avltree *t, vlong off, vlong len)
 {
 	Arange *r, *s, q;
+	vlong l;
 
 	assert(len % Blksz == 0);
 	q.off = off;
 	q.len = len;
 	r = (Arange*)avllookup(t, &q.Avl, -1);
-	if(r == nil || off + len > r->off + r->len){
-		fprint(2, "no blocks: %llx+%llx in:\n", off, len);
-		for(r = (Arange*)avlmin(t); r != nil; r = (Arange*)avlnext(r))
-			fprint(2, "\t%llx+%llx\n", r->off, r->len);
+	if(r == nil || off + len > r->off + r->len)
 		abort();
-		return -1;
-	}
-	if(off == r->off)
+
+	print("\tmerge (%llx,%llx) (%llx,%llx)\n", off, len, r->off, r->len);
+	if(off == r->off){
 		r->off += len;
-	else if(off == r->off + r->len)
 		r->len -= len;
-	else if(off + len > r->off && off > r->off + r->len){
+	}else if(off + len == r->off + r->len){
+		r->len -= len;
+	}else if(off > r->off && off+len < r->off + r->len){
 		if((s = malloc(sizeof(Arange))) == nil)
 			return -1;
-		s->off = off;
-		s->len = r->len - (off - r->off) - len;;
+		l = r->len;
+		s->off = off + len;
 		r->len = off - r->off;
+		s->len = l - r->len - len;
 		avlinsert(t, s);
-	}
+	}else
+		abort();
+
 	if(r->len == 0){
 		avldelete(t, r);
 		free(r);
@@ -177,29 +172,16 @@
 	return 0;
 }
 
-int
-synclog(Blk *b, vlong link, vlong sz)
-{
-
-	if(sz == -1)
-		sz = b->logsz;
-	PBIT64(b->data+8, link);
-	PBIT64(b->data+16, sz);
-	finalize(b);
-fprint(2, "synclog: @%llx\n", b->off);
-showfree("synclog");
-	return pwrite(fs->fd, b->buf, Blksz, b->off);
-}
-
 Blk*
-logappend(Arena *a, Blk *lb, vlong off, vlong len, int op, vlong graft)
+logappend(Arena *a, Blk *lb, vlong off, vlong len, int op)
 {
 	Blk *pb;
-	vlong o, n;
+	vlong o;
 	char *p;
 
 	assert(off % Blksz == 0);
-	if(lb == nil || lb->logsz+16+24 > Blkspc){
+	assert(op == LgAlloc || op == LgFree);
+	if(lb == nil || lb->logsz > Logspc - 8){
 		pb = lb;
 		if((o = blkalloc_lk(a)) == -1)
 			return nil;
@@ -209,26 +191,39 @@
 		lb->flag |= Bdirty;
 		lb->type = Tlog;
 		lb->off = o;
-		lb->logsz = 0;
-		if(synclog(lb, graft, 0) == -1)
+		lb->logsz = Loghdsz;
+		p = lb->data + lb->logsz;
+		PBIT64(p + 0, (uvlong)LgEnd);
+		finalize(lb);
+		if(syncblk(lb) == -1){
+			free(lb);
 			return nil;
+		}
 
 		a->logtl = lb;
-		if(pb != nil)
-			if(synclog(pb, lb->off, -1) == -1)
+		if(pb != nil){
+			p = pb->data + pb->logsz;
+			PBIT64(p + 0, lb->off|LgChain);
+			finalize(pb);
+			if(syncblk(pb) == -1)
 				return nil;
+		}
 	}
-	n = 8;
-	p = lb->data + 24 + lb->logsz;
-	if(len == Blksz && op == LgAlloc)
-		op = LgAlloc1;
-	off |= op;
-	PBIT64(p, off);
-	if(op == LgAlloc || op == LgFree){
+
+	p = lb->data + lb->logsz;
+	if(len == Blksz){
+		off |= (op & ~Lg2w);
+		PBIT64(p, off);
+		lb->logsz += 8;
+	}else{
+		off |= op;
+		PBIT64(p+0, off);
 		PBIT64(p+8, len);
-		n += 8;
+		lb->logsz += 16;
 	}
-	lb->logsz += n;
+	/* this gets overwritten by the next append */
+	p = lb->data + lb->logsz;
+	PBIT64(p, (uvlong)LgEnd);
 	return lb;
 }
 
@@ -243,12 +238,12 @@
 {
 	Blk *b;
 
-	if((b = logappend(a, a->logtl, off, Blksz, op, -1)) == nil)
+	if((b = logappend(a, a->logtl, off, Blksz, op)) == nil)
 		return -1;
-	if(a->logtl == nil){
+	if(a->log == -1)
 		a->log = b->off;
+	if(b != a->logtl)
 		a->logtl = b;
-	}
 	return 0;
 }
 
@@ -261,67 +256,75 @@
 	char *p, *d;
 	int op, i, n;
 
+
 	bp = a->log;
-	while(bp != -1){
-		dprint("log block: %llx\n", bp);
-		if((b = readblk(bp, 0)) == nil)
-			return -1;
-		p = b->data;
-		bh = GBIT64(p + 0);
-		bp = GBIT64(p + 8);
-		b->logsz = GBIT64(p + 16);
-		/* the hash covers the log and offset */
-		if(bh != siphash(p+8, Blkspc-8)){
-			werrstr("corrupt log");
-			return -1;
+
+Nextblk:
+	dprint("block: %llx\n", bp);
+	if((b = readblk(bp, 0)) == nil)
+		return -1;
+	p = b->data;
+	bh = GBIT64(p + 0);
+	/* the hash covers the log and offset */
+	if(bh != siphash(p+8, Blkspc-8)){
+		werrstr("corrupt log");
+		return -1;
+	}
+	for(i = Loghdsz; i < Logspc; i += n){
+		d = b->data + i;
+		ent = GBIT64(d);
+		op = ent & 0xff;
+		off = ent & ~0xff;
+		n = (op & Lg2w) ? 16 : 8;
+		switch(op){
+		case LgEnd:
+			dprint("log@%d: end\n", i);
+			/*
+			 * since we want the next insertion to overwrite
+			 * this, don't include the size in this entry.
+			 */
+			b->logsz = i;
+			return 0;
+		case LgChain:
+			bp = off & ~0xff;
+			dprint("log@%d: chain %llx\n", i, bp);
+			b->logsz = i+n;
+			goto Nextblk;
+			break;
+
+		case LgFlush:
+			dprint("log@%d: flush: %llx\n", i, off>>8);
+			lock(&fs->genlk);
+			fs->gen = off >> 8;
+			unlock(&fs->genlk);
+			break;
+		case LgAlloc:
+		case LgAlloc1:
+			len = (op & Lg2w) ? GBIT64(d+8) : Blksz;
+			dprint("log@%d alloc: %llx+%llx\n", i, off, len);
+			if(grabrange(a->free, off & ~0xff, len) == -1)
+				return -1;
+			break;
+		case LgFree:
+		case LgFree1:
+			len = (op & Lg2w) ? GBIT64(d+8) : Blksz;
+			dprint("log@%d free: %llx+%llx\n", i, off, len);
+			if(freerange(a->free, off & ~0xff, len) == -1)
+				return -1;
+			break;
+		case LgRef:
+		case LgUnref:
+			fprint(2, "unimplemented ref op at log@%d: log op %d\n", i, op);
+			break;
+		default:
+			n = 0;
+			dprint("log@%d: log op %d\n", i, op);
+			abort();
+			break;
 		}
-		for(i = 0; i < b->logsz; i += n){
-			d = p + i + 24;
-			ent = GBIT64(d);
-			op = ent & 0xff;
-			off = ent & ~0xff;
-			switch(op){
-			case LgFlush:
-				dprint("log@%d: flush: %llx\n", i, off>>8);
-				n = 8;
-				lock(&fs->genlk);
-				fs->gen = off >> 8;
-				unlock(&fs->genlk);
-				break;
-			case LgAlloc1:
-				n = 8;
-				dprint("log@%d alloc1: %llx\n", i, off);
-				if(grabrange(a->free, off & ~0xff, Blksz) == -1)
-					return -1;
-				break;
-			case LgAlloc:
-				n = 16;
-				len = GBIT64(d+8);
-				dprint("log@%d alloc: %llx+%llx\n", i, off, len);
-				if(grabrange(a->free, off & ~0xff, len) == -1)
-					return -1;
-				break;
-			case LgFree:
-				n = 16;
-				len = GBIT64(d+8);
-				dprint("log@%d free: %llx+%llx\n", i, off, len);
-				if(freerange(a->free, off & ~0xff, len) == -1)
-					return -1;
-				break;
-			case LgRef1:
-			case LgUnref1:
-				n = 8;
-				fprint(2, "unimplemented ref op at log@%d: log op %d\n", i, op);
-				break;
-			default:
-				n = 0;
-				dprint("log@%d: log op %d\n", i, op);
-				abort();
-				break;
-			}
-		}
+showfree("after");
 	}
-	return 0;
+	return -1;
 }
 
 int
@@ -328,24 +331,26 @@
 compresslog(Arena *a)
 {
 	Arange *r;
-	vlong *log, *p, bp, hd, hh, graft;
+	Range *log, *nlog;
+	vlong v, bp, nb, graft, oldhd;
 	int i, n, sz;
-	Blk *pb, *ab, *b;
+	Blk *hd, *ab, *b;
+	char *p;
 
 showfree("precompress");
 fprint(2, "compress start\n");
+
 	/*
-	 * Sync the current log to disk. While
+	 * Sync the current log to disk, and
+	 * set up a new block log tail.  While
 	 * compressing the log, nothing else is
 	 * using this arena, so any allocs come
-	 * from the log compression.
+	 * from the log compression, and go into
+	 * this new log segment.
 	 *
 	 * A bit of copy paste from newblk,
-	 * because otherwise we have bootstrap
-	 * issues.
-	 *
-	 * Set up the head of the new log as
-	 * an empty block.
+	 * because otherwise we have a deadlock
+	 * allocating the block.
 	 */
 	if((bp = blkalloc_lk(a)) == -1)
 		return -1;
@@ -356,21 +361,25 @@
 	b->off = bp;
 	b->ref = 1;
 	b->data = b->buf + Hdrsz;
-checkfs();
-	if(synclog(b, -1, 0) == -1)
+	b->logsz = Loghdsz;
+
+	PBIT64(b->data+b->logsz, (uvlong)LgEnd);
+	finalize(b);
+	if(syncblk(b) == -1){
+		free(b);
 		return -1;
-checkfs();
+	}
 
-	/*
-	 * When reaming or loading, we may not
-	 * have set up the log.
-	 */
-checkfs();
-	if(a->logtl != nil)
-		synclog(a->logtl, b->off, -1);
-checkfs();
 	graft = b->off;
+	if(a->logtl != nil){
+		finalize(a->logtl);
+		if(syncblk(a->logtl) == -1){
+			free(b);
+			return -1;
+		}
+	}
 	a->logtl = b;
+print("\tnew log block: %llx\n", b->off);
 
 	/*
 	 * Prepare what we're writing back.
@@ -380,21 +389,19 @@
 	 */
 	n = 0;
 	sz = 512;
-checkfs();
-	if((log = malloc(2*sz*sizeof(vlong))) == nil)
+	if((log = malloc(sz*sizeof(Range))) == nil)
 		return -1;
-checkfs();
 	for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r)){
 		if(n == sz){
 			sz *= 2;
-			if((p = realloc(log, 2*sz*sizeof(vlong))) == nil){
+			if((nlog = realloc(log, sz*sizeof(Range))) == nil){
 				free(log);
 				return -1;
 			}
-			log = p;
+			log = nlog;
 		}
-		log[2*n+0] = r->off;
-		log[2*n+1] = r->len;
+		log[n].off = r->off;
+		log[n].len = r->len;
 		n++;
 	}
 	if((b = newblk(Tlog)) == nil){
@@ -401,45 +408,54 @@
 		free(log);
 		return -1;
 	}
-checkfs();
-	pb = b;
-	hd = b->off;
-	hh = -1;
-	PBIT64(b->data +  8, graft);	/* link */
-	PBIT64(b->data + 16, 0ULL);	/* count */
-checkfs();
-
-	for(i = 0; i < n; i++){
-		if((b = logappend(a, b, log[2*i], log[2*i+1], LgFree, graft)) == nil)
+	hd = b;
+	b->logsz = Loghdsz;
+	for(i = 0; i < n; i++)
+		if((b = logappend(a, b, log[i].off, log[i].len, LgFree)) == nil)
 			return -1;
-checkfs();
-		if(b != pb){
-checkfs();
-			synclog(pb, b->off, -1);
-checkfs();
-			if(pb->off == hd)
-				hh = blkhash(pb);
-			b = pb;
-		}
-	}
+	p = b->data + b->logsz;
+	PBIT64(p, LgChain|graft);
 	free(log);
-checkfs();
-	if(synclog(b, graft, -1) == -1)
+	finalize(b);
+	if(syncblk(b) == -1)
 		return -1;
-checkfs();
-	if(pb->off == hd)
-		hh = blkhash(pb);
-checkfs();
 
-	a->log = hd;
-	a->logh = hh;
+	oldhd = a->log;
+	a->log = hd->off;
+	a->logh = blkhash(hd);
 	ab = a->b;
-	PBIT64(ab->data + 0, hd);
-	PBIT64(ab->data + 8, hh);
+	PBIT64(ab->data + 0, a->log);
+	PBIT64(ab->data + 8, a->logh);
+	if(syncblk(ab) == -1)
+		return -1;
 checkfs();
-	pwrite(fs->fd, ab->buf, Blksz, ab->off);
-checkfs();
-fprint(2, "compress done\n");
+showfree("postcompress");
+	if(oldhd != -1){
+		for(bp = oldhd; bp != -1; bp = nb){
+			nb = -1;
+			if((b = readblk(bp, 0)) == nil)
+				return -1;
+			for(i = Loghdsz; i < Logspc; i += n){
+				p = b->data + i;
+				v = GBIT64(p);
+				n = (v & Lg2w) ? 16 : 8;
+				if((v&0xff) == LgChain){
+					nb = v & ~0xff;
+					break;
+				}else if((v&0xff) == LgEnd){
+					nb = -1;
+					break;
+				}
+			}
+			fprint(2, "\tpostscan: freeing %llx\n", bp);
+			if(blkdealloc(bp) == -1)
+				return -1;
+		}
+	}
+	finalize(a->logtl);
+	if(syncblk(a->logtl) == -1)
+		return -1;
+showfree("postreclaim");
 	return 0;
 }
 /*
@@ -475,11 +491,12 @@
 		avldelete(t, r);
 		free(r);
 	}
+fprint(2, "\talloc %llx\n", b);
 	return b;
 }
 
-int
-blkrelease(vlong b)
+static int
+blkdealloc(vlong b)
 {
 	Arena *a;
 	int r;
@@ -487,6 +504,7 @@
 	r = -1;
 	a = getarena(b);
 	lock(a);
+	cachedel(b);
 	if(freerange(a->free, b, Blksz) == -1)
 		goto out;
 	if(logalloc(a, b, LgFree) == -1)
@@ -638,7 +656,7 @@
 }
 
 static void
-cachedel(Blk *del)
+cachedel(vlong del)
 {
 	Bucket *bkt;
 	Blk *b, **p;
@@ -645,19 +663,21 @@
 	u32int h;
 
 	/* FIXME: better hash. */
-	h = ihash(del->off);
+	h = ihash(del);
 
 	bkt = &fs->cache[h % fs->cmax];
 	lock(bkt);
 	p = &bkt->b;
 	for(b = bkt->b; b != nil; b = b->hnext){
-		if(b == del){
-			*p = del->hnext;
+		if(b->off == del){
+			*p = b->hnext;
 			break;
 		}
 		p = &b->hnext;
 	}
 	unlock(bkt);
+	if(b == nil)
+		return;
 
 	lock(&fs->lrulk);
 	if(b->cnext != nil)
@@ -671,10 +691,18 @@
 	unlock(&fs->lrulk);
 }
 
+int
+syncblk(Blk *b)
+{
+	dprint("\tsyncblk: %llx+%llx\n", b->off, Blksz);
+	return pwrite(fs->fd, b->buf, Blksz, b->off);
+}
+
+
 void
 enqueue(Blk *b)
 {
-	if(pwrite(fs->fd, b->buf, Blksz, b->off) == -1){
+	if(syncblk(b) == -1){
 		ainc(&fs->broken);
 		fprint(2, "write: %r");
 		return;
@@ -777,7 +805,7 @@
 
 	a = getarena(b->off);
 	lock(a);
-	r = logalloc(a, b->off, LgRef1);
+	r = logalloc(a, b->off, LgRef);
 	unlock(a);
 	return r;
 }
@@ -790,7 +818,7 @@
 
 	a = getarena(b->off);
 	lock(a);
-	r = logalloc(a, b->off, LgUnref1);
+	r = logalloc(a, b->off, LgUnref);
 	unlock(a);
 	return r;
 }
@@ -818,7 +846,7 @@
 	if((b->flag & (Bdirty|Bqueued)) == Bdirty)
 		enqueue(b);
 	if(adec(&b->ref) == 0){
-		cachedel(b);
+		cachedel(b->off);
 		free(b);
 	}
 }
@@ -835,7 +863,8 @@
 	 * TODO: what to do if we fail to log a free here??
 	 * This is already an error path!
 	 */
-	logalloc(a, b->off, LgRef1);
+	logalloc(a, b->off, LgUnref);
+	blkdealloc(b->off);
 	unlock(a);
 	free(b);
 }
@@ -848,14 +877,18 @@
 
 	dprint("syncing\n");
 	r = 0;
-	for(i = 0; i < fs->narena; i++)
-		if(synclog(fs->arenas[i].logtl, -1, -1) == -1)
+	for(i = 0; i < fs->narena; i++){
+		b = fs->arenas[i].logtl;
+		finalize(b);
+		if(syncblk(b) == -1)
 			r = -1;
+	}
+	/* FIXME: hit it with a big hammer -- flush the whole cache */
 	for(b = fs->chead; b != nil; b = b->cnext){
 //		dprint("sync %p\n", b);
 		if(!(b->flag & Bdirty))
 			continue;
-		if(pwrite(fs->fd, b->buf, Blksz, b->off) == -1)
+		if(syncblk(b) == -1)
 			r = -1;
 	}
 	return r;
--- a/check.c
+++ b/check.c
@@ -9,6 +9,22 @@
 char	spc[128];
 
 static int
+isfree(vlong bp)
+{
+	Arange *r, q;
+	Arena *a;
+
+	q.off = bp;
+	q.len = Blksz;
+
+	a = getarena(bp);
+	r = (Arange*)avllookup(a->free, &q, -1);
+	if(r == nil)
+		return 0;
+	return bp < (r->off + r->len);
+}
+
+static int
 badblk(Blk *b, int h, Kvp *lo, Kvp *hi)
 {
 	Kvp x, y;
@@ -41,6 +57,10 @@
 			fail++;
 		}
 		if(b->type == Tpivot){
+			if(isfree(x.bp)){
+				fprint(2, "freed block in use: %llx\n", x.bp);
+				fail++;
+			}
 			if((c = getblk(x.bp, x.bh)) == nil){
 				fprint(2, "corrupt block: %r\n");
 				fail++;
@@ -71,6 +91,31 @@
 	return fail;
 }
 
+static int
+badfree(void)
+{
+	Arange *r, *n;
+	int i, fail;
+
+	fail = 0;
+	for(i = 0; i < fs->narena; i++){
+		r = (Arange*)avlmin(fs->arenas[i].free);
+		for(n = (Arange*)avlnext(r); n != nil; n = (Arange*)avlnext(n)){
+			if(r->off >= n->off){
+				fprint(2, "misordered length %llx >= %llx\n", r->off, n->off);
+				fail++;
+			}
+			if(r->off+r->len >= n->off){
+				fprint(2, "overlaping range %llx+%llx >= %llx\n", r->off, r->len, n->off);
+				abort();
+				fail++;
+			}
+			r = n;
+		}
+	}
+	return fail;
+}
+
 /* TODO: this will grow into fsck. */
 int
 checkfs(void)
@@ -79,6 +124,8 @@
 	Blk *b;
 
 	ok = 1;
+	if(badfree())
+		ok = 0;
 	if((b = getroot(&height)) != nil){
 		if(badblk(b, height-1, nil, 0))
 			ok = 0;
@@ -177,7 +224,7 @@
 		return;
 	print("=== %s\n", m);
 	for(i = 0; i < fs->narena; i++){
-		print("allocs[%d]:\n", i);
+		print("arena %d:\n", i);
 		for(r = (Arange*)avlmin(fs->arenas[i].free); r != nil; r = (Arange*)avlnext(r))
 			print("\t%llx+%llx\n", r->off, r->len);
 	}
--- a/dat.h
+++ b/dat.h
@@ -38,16 +38,19 @@
 	 * there is no way to get a valid split of a
 	 * maximally filled tree.
 	 */
+	Loghdsz	= 8,			/* log hash */
 	Keymax	= 32,			/* key data limit */
 	Inlmax	= 128,			/* inline data limit */
 	Ptrsz	= 18,			/* off, hash, fill */
 	Kvmax	= Keymax + Inlmax,	/* Key and value */
 	Kpmax	= Keymax + Ptrsz,	/* Key and pointer */
+	
 
 	Hdrsz	= 10,
 	Blkspc	= Blksz - Hdrsz,
 	Bufspc  = Blkspc / 2,
 	Pivspc	= Blkspc - Bufspc,
+	Logspc	= Blkspc - Loghdsz,
 	Leafspc = Blkspc,
 	Msgmax  = 1 + (Kvmax > Kpmax ? Kvmax : Kpmax)
 };
@@ -124,9 +127,7 @@
  * The log blocks have this layout, and are one of
  * two types of blocks that get overwritten in place:
  *
- *	hash[8]		The hash of the rest of the block
- *	link[8]		The next block, or -1 if nil.
- *	entsz[8]	number of bytes of log entries.
+ *	hash[8]		The hash of the previous log block
  *
  *	The remainder of the block is filled with log
  *	entries. Each log entry has at least 8 bytes
@@ -154,7 +155,7 @@
  *
  *	nval[2]
  *	valsz[2]
- *	_pad[4]
+ *	_pad[4]sure, 
  *
  * Within these nodes, pointers have the following
  * layout:
@@ -196,12 +197,19 @@
  * Operations for the allocation log.
  */
 enum {
-	LgFree,		/* free a range: 16 byte */
-	LgAlloc,	/* alloc a range: 16 byte */
+	/* 1-wide entries */
 	LgAlloc1,	/* alloc a block */
-	LgRef1,		/* ref a block */
-	LgUnref1,	/* free a block */
+	LgFree1,	/* alloc a block */
+	LgRef,		/* ref a block */
+	LgUnref,	/* free a block */
 	LgFlush,	/* flush log, bump gen */
+	LgChain,	/* point to next log block */
+	LgEnd,		/* last entry in log */	
+
+	/* 2-wide entries */
+	Lg2w	= 1<<5,
+	LgAlloc = LgAlloc1|Lg2w,	/* alloc a range */
+	LgFree	= LgFree1|Lg2w,		/* free a range */
 };
 
 struct Arange {
--- a/fns.h
+++ b/fns.h
@@ -15,7 +15,9 @@
 Blk*	getblk(vlong, uvlong);
 Blk*	pinblk(Blk*);
 Blk*	readblk(vlong, int);
+Arena*	getarena(vlong);
 void	putblk(Blk*);
+int	syncblk(Blk*);
 void	enqueue(Blk*);
 void	freeblk(Blk*);
 ushort	blkfill(Blk*);
@@ -30,7 +32,6 @@
 void	loadfs(char*);
 int	sync(void);
 int	loadlog(Arena *a);
-int	synclog(Blk*, vlong, vlong);
 int	endfs(void);
 int	compresslog(Arena *a);
 
--- a/fs.c
+++ b/fs.c
@@ -256,7 +256,7 @@
 	int w, n;
 
 	r->tag = m->tag;
-	dprint("→ %F\n", r);
+//	dprint("→ %F\n", r);
 	if((n = convS2M(r, buf, sizeof(buf))) == 0)
 		abort();
 	qlock(m->wrlk);
@@ -471,7 +471,7 @@
 		k.k = kbuf;
 		k.nk = p - kbuf;
 //showfs("walking");
-dprint("looking up %K\n", &k);
+//dprint("looking up %K\n", &k);
 		if((estr = fslookup(o, &k, &kv, &b, 0)) != nil)
 			break;
 		if(kv2dir(&kv, &d) == -1){
@@ -1015,7 +1015,7 @@
 		}
 */
 		m->wrlk = wrlk;
-		dprint("← %F\n", &m->Fcall);
+//		dprint("← %F\n", &m->Fcall);
 		switch(m->type){
 		/* sync setup */
 		case Tversion:	fsversion(m, &msgmax);	break;
--- a/ream.c
+++ b/ream.c
@@ -59,13 +59,15 @@
 	b->data = b->buf + Hdrsz;
 	b->flag |= Bdirty;
 
-	p = b->data;
-	PBIT64(p+24, off|LgFree);		/* off */
-	PBIT64(p+32, asz);			/* len */
-	PBIT64(p+40, b->off|LgAlloc);		/* off */
-	PBIT64(p+48, Blksz);			/* len */
+	p = b->data+Loghdsz;
+	PBIT64(p+ 0, off|LgFree);		/* off */
+	PBIT64(p+ 8, asz);			/* len */
+	PBIT64(p+16, b->off|LgAlloc);		/* off */
+	PBIT64(p+24, Blksz);			/* len */
+	PBIT64(p+32, (uvlong)LgEnd);		/* done */
 	finalize(b);
-	synclog(b, -1, 32);
+	if(syncblk(b) == -1)
+		sysfatal("ream: init log");
 
 	bh = blkhash(b);
 	bo = b->off;
@@ -78,7 +80,7 @@
 	PBIT64(p+0, bo);
 	PBIT64(p+8, bh);
 	finalize(b);
-	if(pwrite(fs->fd, b->buf, Blksz, b->off) == -1)
+	if(syncblk(b) == -1)
 		sysfatal("ream: write arena: %r");
 }
 
--- a/tree.c
+++ b/tree.c
@@ -1136,17 +1136,12 @@
 	Msg m;
 
 	*done = 0;
-dprint("collecting %K\n", k);
-//showblk(b, "collecting from", 0);
 	idx = bufsearch(b, k, &m, &same);
-dprint("idx=%d, same=%d\n", idx, same);
 	if(!same)
 		return nil;
 	err = Eexist;
 	for(i = idx; i < b->nbuf; i++){
 		getmsg(b, i, &m);
-dprint("msg=%M\n", &m);
-dprint("cmp=%d\n", keycmp(&m, k));
 		if(keycmp(&m, k) != 0)
 			break;
 		switch(m.op){
@@ -1349,18 +1344,27 @@
 int
 snapshot(void)
 {
+	Arena *a;
 	Blk *s;
-	int r;
+	int i, r;
 
+	r = 0;
 	s = fs->super;
+
 	qlock(&fs->snaplk);
 	lock(&fs->rootlk);
 	fillsuper(s);
 	finalize(s);
 	unlock(&fs->rootlk);
-print("snap superblock @%llx\n", s->off);
-showfree("snap");
-	r = pwrite(fs->fd, s->buf, Blksz, s->off);
+
+	for(i = 0; i < fs->narena; i++){
+		a = &fs->arenas[i];
+		finalize(a->logtl);
+		if(syncblk(a->logtl) == -1)
+			r = -1;
+	}
+	if(r != -1)
+		r = syncblk(s);
 	qunlock(&fs->snaplk);
 	return r;
 }