shithub: gefs

Download patch

ref: 3878b725987e7429d21fb916672523ac81edb0ca
parent: 3482fd60b47db33b1873fe1912799360ff38c493
author: Ori Bernstein <ori@eigenstate.org>
date: Sat Sep 30 18:43:00 EDT 2023

gefs: remove deferred frees, use proper snapshots

--- a/blk.c
+++ b/blk.c
@@ -15,7 +15,7 @@
 
 static vlong	blkalloc_lk(Arena*, int);
 static vlong	blkalloc(int, uint);
-static int	blkdealloc_lk(vlong);
+static int	blkdealloc_lk(Arena*, vlong);
 static Blk*	initblk(Blk*, vlong, vlong, int);
 
 int
@@ -286,7 +286,7 @@
  * recursion.
  */
 static int
-logappend(Arena *a, vlong off, vlong len, int op, int defer)
+logappend(Arena *a, vlong off, vlong len, int op)
 {
 	vlong o, ao;
 	Blk *nl, *lb;
@@ -293,7 +293,7 @@
 	char *p, *name;
 
 	nl = nil;
-	lb = defer ? a->defertl : a->logtl;
+	lb = a->logtl;
 	assert((off & 0xff) == 0);
 	assert(op == LogAlloc || op == LogFree || op == LogSync);
 	if(op != LogSync){
@@ -326,10 +326,7 @@
 		if(chainlog(lb, nl->bp.addr) == -1)
 			return -1;
 		lb = nl;
-		if(defer)
-			a->defertl = lb;
-		else
-			a->logtl = lb;
+		a->logtl = lb;
 		a->nlog++;
 	}
 
@@ -355,7 +352,7 @@
 	 * current allocation. so that we don't
 	 * reorder allocs and frees.
 	 */
-	if(nl != nil && !defer){
+	if(nl != nil){
 		p = lb->data + lb->logsz;
 		ao = nl->bp.addr|LogAlloc1;
 		PACK64(p, ao);
@@ -599,7 +596,7 @@
 			}
 			lock(a);
 			cachedel(b->bp.addr);
-			if(blkdealloc_lk(ba) == -1){
+			if(blkdealloc_lk(a, ba) == -1){
 				unlock(a);
 				return -1;
 			}
@@ -619,7 +616,7 @@
 int
 syncbarrier(Arena *a, vlong gen)
 {
-	if(logappend(a, gen<<8, 0, LogSync, 0) == -1)
+	if(logappend(a, gen<<8, 0, LogSync) == -1)
 		return -1;
 	if(a->loghd.addr == -1)
 		a->loghd = a->logtl->bp;
@@ -667,14 +664,12 @@
 }
 
 static int
-blkdealloc_lk(vlong b)
+blkdealloc_lk(Arena *a, vlong b)
 {
-	Arena *a;
 	int r;
 
 	r = -1;
-	a = getarena(b);
-	if(logappend(a, b, Blksz, LogFree, 0) == -1)
+	if(logappend(a, b, Blksz, LogFree) == -1)
 		return -1;
 	if(a->loghd.addr == -1)
 		a->loghd = a->logtl->bp;
@@ -687,19 +682,14 @@
 }
 
 int
-blkrelease(vlong b)
+blkdealloc(vlong b)
 {
 	Arena *a;
 	int r;
 
 	a = getarena(b);
-	lock(a);
-	r = -1;
-	a = getarena(b);
-	if(logappend(a, b, Blksz, LogFree, 1) == -1)
-		return -1;
-	if(a->deferhd.addr == -1)
-		a->deferhd = a->defertl->bp;
+ 	lock(a);
+	r = blkdealloc_lk(a, b);
 	unlock(a);
 	return r;
 }
@@ -736,7 +726,7 @@
 		unlock(a);
 		goto Again;
 	}
-	if(logappend(a, b, Blksz, LogAlloc, 0) == -1){
+	if(logappend(a, b, Blksz, LogAlloc) == -1){
 		unlock(a);
 		return -1;
 	}
@@ -1010,6 +1000,7 @@
 {
 	ulong e, ge;
 	Bfree *p, *n;
+	Arena *a;
 	int i;
 
 	ge = agetl(&fs->epoch);
@@ -1024,9 +1015,12 @@
 	while(p != nil){
 		n = p->next;
 		cachedel(p->bp.addr);
-		blkrelease(p->bp.addr);
 		if(p->b != nil)
 			dropblk(p->b);
+		a = getarena(p->bp.addr);
+		lock(a);
+		blkdealloc_lk(a, p->bp.addr);
+		unlock(a);
 		free(p);
 		p = n;
 	}
@@ -1179,14 +1173,6 @@
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
 		lock(a);
-		if(a->deferhd.addr != -1){
-			finalize(a->defertl);
-			chainlog(a->logtl, a->deferhd.addr);
-			loadlog(a, a->deferhd);
-			a->logtl = a->defertl;
-			a->deferhd = (Bptr){-1, -1, -1};
-			a->defertl = nil;
-		}
 		syncbarrier(a, gen);
 		finalize(a->logtl);
 		if(syncblk(a->logtl) == -1)
--- a/dat.h
+++ b/dat.h
@@ -454,6 +454,8 @@
 	/* superblocks */
 	Blk	*sb0;	/* primary */
 	Blk	*sb1;	/* backup */
+	long	*statemap;	/* for debugging */
+
 	/* arena allocation */
 	Arena	*arenas;
 	long	roundrobin;
@@ -536,8 +538,6 @@
 	vlong	nlog;		/* logged since last copression */
 	Bptr	loghd;		/* allocation log */
 	Blk	*logtl;		/* end of the log, open for writing */
-	Bptr	deferhd;	/* allocation log */
-	Blk	*defertl;	/* end of the log, open for writing */
 	Syncq	*sync;
 };
 
--- a/fns.h
+++ b/fns.h
@@ -57,7 +57,7 @@
 void	freebp(Tree*, Bptr);
 int	dlappend(Dlist *dl, Bptr);
 int	killblk(Tree*, Bptr);
-int	blkrelease(vlong);
+int	blkdealloc(vlong);
 ushort	blkfill(Blk*);
 uvlong	blkhash(Blk*);
 uvlong	bufhash(void*, usize);
@@ -66,8 +66,9 @@
 
 char*	updatesnap(Tree**, Tree*, char*);
 char*	labelsnap(Tree*, char*);
-char*	delsnap(Tree*, char*);
+char*	delsnap(Tree*, vlong, char*);
 Tree*	opensnap(char*);
+vlong	successor(vlong);
 
 void	closesnap(Tree*);
 void	reamfs(char*);
--- a/fs.c
+++ b/fs.c
@@ -40,6 +40,7 @@
 snapfs(int fd, char *old, char *new)
 {
 	Mount *mnt;
+	vlong succ;
 	Tree *t, *s;
 	char *e;
 
@@ -62,7 +63,8 @@
 		return;
 	}
 	if(strlen(new) == 0){
-		if((e = delsnap(t, old)) != nil){
+		succ = successor(t->gen);
+		if((e = delsnap(t, succ, old)) != nil){
 			fprint(fd, "snap: error deleting '%s': %s\n", new, e);
 			unlock(&fs->mountlk);
 			return;
--- a/load.c
+++ b/load.c
@@ -60,6 +60,7 @@
 	char *e;
 	Tree *t;
 	int i, k;
+	Dir *d;
 
 	if((mnt = mallocz(sizeof(*mnt), 1)) == nil)
 		sysfatal("malloc: %r");
@@ -87,6 +88,10 @@
 		sysfatal("superblock: %r");
 	if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
 		sysfatal("malloc: %r");
+	if((d = dirfstat(fs->fd)) == nil)
+		sysfatal("wut");
+	if((fs->statemap = malloc((d->length/Blksz+1)*sizeof(long))) == nil)
+		sysfatal("wut");
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
 		memset(a, 0, sizeof(Arena));
--- a/pack.c
+++ b/pack.c
@@ -551,10 +551,6 @@
 	a->used = UNPACK64(p);		p += 8;
 	a->logtl = nil;
 
-	a->deferhd.addr = -1;
-	a->deferhd.hash = -1;
-	a->deferhd.gen = -1;
-	a->defertl = nil;
 	assert(p <= e);
 	return p;
 }
--- a/ream.c
+++ b/ream.c
@@ -177,6 +177,8 @@
 	b->type = Tlog;
 	b->bp.addr = addr;
 	b->data = b->buf + Loghdsz;
+	if(b->bp.addr == 512*MiB)
+		b->bp.addr += Blksz;
 	setflag(b, Bdirty);
 
 	p = b->data + Loghashsz;
--- a/snap.c
+++ b/snap.c
@@ -154,7 +154,7 @@
 			return Efs;
 		if(docontents){
 			for(p = b->data; p != b->data+b->deadsz; p += 8){
-				if(blkrelease(UNPACK64(p)) == -1){
+				if(blkdealloc(UNPACK64(p)) == -1){
 					dropblk(b);
 					return Efs;
 				}
@@ -224,7 +224,7 @@
 	return err;
 }
 
-static vlong
+vlong
 successor(vlong gen)
 {
 	char *e, pfx[9];
@@ -254,21 +254,15 @@
 }
 
 static char*
-reclaimblocks(vlong gen, vlong prev)
+reclaimblocks(vlong gen, vlong succ, vlong prev)
 {
 	char *e, pfx[9];
-	vlong succ;
 	Dlist dl;
 	Scan s;
 	Msg m;
 
-	succ = successor(gen);
-
 	pfx[0] = Kdlist;
-	if(succ == -1)
-		PACK64(pfx+1, gen);
-	else
-		PACK64(pfx+1, succ);
+	PACK64(pfx+1, gen);
 	btnewscan(&s, pfx, sizeof(pfx));
 	if((e = btenter(&fs->snap, &s)) != nil)
 		return e;
@@ -309,26 +303,30 @@
  * it will be merged with that successor.
  */
 char*
-delsnap(Tree *t, char *name)
+delsnap(Tree *t, vlong succ, char *name)
 {
 	char buf[2][Kvmax], *e;
 	Msg m[2];
 	int nm;
 
-	if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0)
-		return Ename;
-
 	nm = 0;
-	m[nm].op = Odelete;
-	m[nm].k = buf[nm];
-	if((e = packlabel(buf[nm], sizeof(buf[nm]), name)) == nil)
-		return Ename;
-	m[nm].nk = e - m[nm].k;
-	m[nm].v = nil;
-	m[nm].nv = 0;
-	nm++;
+	if(name != nil){
+		if(strcmp(name, "dump") == 0
+		|| strcmp(name, "empty") == 0
+		|| strcmp(name, "adm") == 0)
+			return Ename;
+
+		m[nm].op = Odelete;
+		m[nm].k = buf[nm];
+		if((e = packlabel(buf[nm], sizeof(buf[nm]), name)) == nil)
+			return Ename;
+		m[nm].nk = e - m[nm].k;
+		m[nm].v = nil;
+		m[nm].nv = 0;
+		t->nlbl--;
+		nm++;
+	}
  
-	t->nlbl--;
 	if(t->nlbl == 0 && t->nsucc <= 1){
 		m[nm].op = Odelete;
 		m[nm].k = buf[nm];
@@ -347,7 +345,7 @@
 		return e;
 	if((e = btupsert(&fs->snap, m, nm)) != nil)
 		return e;
-	if((e = reclaimblocks(t->gen, t->prev)) != nil)
+	if((e = reclaimblocks(t->gen, succ, t->prev)) != nil)
 		return e;
 	return nil;
 }
@@ -363,7 +361,9 @@
 	char buf[2][Kvmax];
 	Msg m[2];
 
-	if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0)
+	if(strcmp(name, "dump") == 0
+	|| strcmp(name, "empty") == 0
+	|| strcmp(name, "adm") == 0)
 		return Ename;
 	t->nlbl++;
 	m[0].op = Oinsert;
@@ -390,15 +390,6 @@
 
 	if(!o->dirty)
 		return nil;
-	/* this snap can be modified in place, so do that */
-	if(o->nlbl == 1 && o->nsucc == 0){
-		m[0].op = Oinsert;
-		tree2kv(o, &m[0], buf[0], sizeof(buf[0]));
-		if((e = btupsert(&fs->snap, &m[0], 1)) != nil)
-			return e;
-		o->dirty = 0;
-		return nil;
-	}
 
 	/* update the old kvp */
 	o->nsucc++;
@@ -416,7 +407,10 @@
 	t->nsucc = 0;
 	t->ht = o->ht;
 	t->bp = o->bp;
-	t->prev = o->gen;
+	if(o->nlbl == 0 && o->nsucc == 1)
+		t->prev = o->prev;
+	else
+		t->prev = o->gen;
 	t->gen = aincv(&fs->nextgen, 1);
 
 	m[1].op = Oinsert;
@@ -432,6 +426,10 @@
 
 	/* only update the dirty status after we sync */
 	o->dirty = 0;
+
+	/* this was the last ref to the snap */
+	if(o->nlbl == 0 && o->nsucc == 1)
+		delsnap(o, t->gen, nil);
 	closesnap(o);
 	*r = t;
 	return nil;