shithub: gefs

Download patch

ref: 2f3bfc642a102df8f0c3f765be6ebf638a2f52a5
parent: ff80bf501c401356fef1b3090301c85e13c380c3
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Nov 17 18:18:30 EST 2023

fs: sweep snapshot heads that are going away

--- a/blk.c
+++ b/blk.c
@@ -614,7 +614,7 @@
 
 	ob = cacheget(bp);
 	if(ob != nil){
-		fprint(2, "dup block: %#p %B (alloced %#llx freed %#llx lasthold: %#llx, lastdrop: %#llx)\n",
+		fprint(2, "dup block: %#p %B (azoced %#zx freed %#zx lasthold: %#zx, lastdrop: %#zx)\n",
 			ob, ob->bp, ob->alloced, ob->freed, ob->lasthold, ob->lastdrop);
 		abort();
 	}
@@ -880,23 +880,17 @@
 }
 
 void
-epochclean(void)
+epochwait(void)
 {
-	ulong c, e, ge, delay;
-	Bfree *p, *n;
-	Arena *a;
-	Qent qe;
-	int i;
+	int i, delay;
+	ulong e, ge;
 
 	delay = 0;
 Again:
-	c = agetl(&fs->nlimbo);
 	ge = agetl(&fs->epoch);
 	for(i = 0; i < fs->nworker; i++){
 		e = agetl(&fs->lepoch[i]);
 		if((e & Eactive) && e != (ge | Eactive)){
-			if(c < fs->cmax/4)
-				return;
 			if(delay < 100)
 				delay++;
 			else
@@ -905,6 +899,28 @@
 			goto Again;
 		}
 	}
+}
+
+void
+epochclean(void)
+{
+	ulong c, e, ge;
+	Bfree *p, *n;
+	Arena *a;
+	Qent qe;
+	int i;
+
+	c = agetl(&fs->nlimbo);
+	ge = agetl(&fs->epoch);
+	for(i = 0; i < fs->nworker; i++){
+		e = agetl(&fs->lepoch[i]);
+		if((e & Eactive) && e != (ge | Eactive)){
+			if(c < fs->cmax/4)
+				return;
+			epochwait();
+		}
+	}
+	epochwait();
 	p = asetp(&fs->limbo[(ge+1)%3], nil);
 	asetl(&fs->epoch, (ge+1)%3);
 
--- a/fns.h
+++ b/fns.h
@@ -51,6 +51,7 @@
 void	enqueue(Blk*);
 void	epochstart(int);
 void	epochend(int);
+void	epochwait(void);
 void	epochclean(void);
 void	limbo(Bfree*);
 void	freeblk(Tree*, Blk*, Bptr);
--- a/fs.c
+++ b/fs.c
@@ -34,7 +34,7 @@
 }
 
 static void
-snapfs(Amsg *a)
+snapfs(Amsg *a, Bptr *bp, vlong *pred)
 {
 	Tree *t, *s;
 	Mount *mnt;
@@ -42,6 +42,8 @@
 
 	lock(&fs->mountlk);
 	t = nil;
+	*pred = -1;
+	*bp = (Bptr){-1, -1, -1};
 	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
 		if(strcmp(a->old, mnt->name) == 0){
 			updatesnap(&mnt->root, mnt->root, mnt->name);
@@ -61,6 +63,12 @@
 			unlock(&fs->mountlk);
 			return;
 		}
+		if(t->nlbl == 1 && t->nref <= 1 && t->succ == -1){
+			lock(&t->lk);
+			*pred = t->pred;
+			*bp = t->bp;
+			unlock(&t->lk);
+		}
 		if((e = delsnap(t, t->succ, a->old)) != nil){
 			fprint(a->fd, "snap: error deleting '%s': %s\n", a->new, e);
 			unlock(&fs->mountlk);
@@ -768,10 +776,6 @@
 		rerror(m, Eauth);
 		return;
 	}
-	if(strcmp(m->uname, "none") == 0){
-		rerror(m, Enone);
-		return;
-	}
 	if((de = mallocz(sizeof(Dent), 1)) == nil){
 		rerror(m, Enomem);
 		return;
@@ -947,7 +951,7 @@
 			goto Out;
 		}
 		putfid(af);
-	}else if(!fs->noauth && strcmp(m->uname, "none") != 0){
+	}else if(!fs->noauth){
 		rerror(m, Ebadu);
 		goto Out;
 	}
@@ -2237,15 +2241,73 @@
 	}
 }
 
+/*
+ * Here, we clean epochs frequently, but we run outside of
+ * an epoch; this is because the caller of this function
+ * has already waited for an epoch to tick over, there's
+ * nobody that can be accessing the tree other than us,
+ * and we just need to keep the limbo list short.
+ */
 void
+sweeptree(Bptr pb, vlong pred)
+{
+	Bptr bp;
+	Blk *b;
+	Kvp kv;
+	Msg m;
+	int i;
+
+	if((b = getblk(pb, 0)) == nil){
+		fprint(2, "sweep %B: %r", bp);
+		return;
+	}
+	switch(b->type){
+	case Tleaf:
+		for(i = 0; i < b->nval; i++){
+			getval(b, i, &kv);
+			if(kv.k[0] == Kdat){
+				bp = unpackbp(kv.v, kv.nv);
+				if(bp.gen > pred)
+					freeblk(nil, nil, bp);
+			}
+			epochclean();
+		}
+		break;
+	case Tpivot:
+		for(i = 0; i < b->nbuf; i++){
+			getmsg(b, i, &m);
+			if(m.op == Oinsert && m.k[0] == Kdat){
+				bp = unpackbp(m.v, m.nv);
+				if(bp.gen > pred)
+					freeblk(nil, nil, bp);
+				epochclean();
+			}
+		}
+		for(i = 0; i < b->nval; i++){
+			getval(b, i, &kv);
+			bp = unpackbp(kv.v, kv.nv);
+			sweeptree(bp, pred);
+			epochclean();
+		}
+		break;
+	default:
+		fprint(2, "broken tree %B\n", pb);
+		abort();
+	}
+	if(pb.gen > pred)
+		freeblk(nil, nil, pb);
+	dropblk(b);
+}
+
+void
 runsweep(int id, void*)
 {
 	char *e, buf[Offksz];
-	Mount *mnt;
 	Bptr bp, nb, *oldhd;
+	vlong off, pred;
+	Mount *mnt;
 	Arena *a;
 	Amsg *am;
-	vlong off;
 	Blk *b;
 	Msg m;
 	int i;
@@ -2309,10 +2371,14 @@
 		case AOsnap:
 			qlock(&fs->mutlk);
 			epochstart(id);
-			snapfs(am);
+			snapfs(am, &bp, &pred);
 			sync();
 			epochend(id);
 			qunlock(&fs->mutlk);
+			if(pred != -1){
+				epochwait();
+				sweeptree(bp, pred);
+			}
 			break;
 
 		case AOclear: