shithub: gefs

Download patch

ref: 2d851cca16a65146bc6d38c6d54418d20cb38d04
parent: 702a8a1b749e84d1a417b437007f327a97a782e9
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Sep 11 02:45:32 EDT 2022

blk: preallocate blocks to ensure bounded memory

block until we write something back.

--- a/blk.c
+++ b/blk.c
@@ -24,6 +24,7 @@
 static vlong	blkalloc(int);
 static int	blkdealloc_lk(vlong);
 static Blk*	blkbuf(void);
+static void	blkfree(Blk*);
 static Blk*	initblk(Blk*, vlong, int);
 static int	logop(Arena *, vlong, vlong, int);
 
@@ -83,7 +84,7 @@
 	while(rem != 0){
 		n = pread(fs->fd, b->buf, rem, off);
 		if(n <= 0){
-			free(b);
+			blkfree(b);
 			return nil;
 		}
 		off += n;
@@ -447,7 +448,7 @@
 	if(a->tail != nil){
 		finalize(a->tail);
 		if(syncblk(a->tail) == -1){
-			free(b);
+			blkfree(b);
 			return -1;
 		}
 	}
@@ -640,10 +641,23 @@
 static Blk*
 blkbuf(void)
 {
+	uvlong *p;
 	Blk *b;
 
-	if((b = malloc(sizeof(Blk))) == nil)
-		return nil;
+	qlock(&fs->freelk);
+	while(fs->free == nil)
+		rsleep(&fs->freerz);
+	b = fs->free;
+	fs->free = b->fnext;
+
+	/* check for corruption */
+	p = (uvlong*)b - 1;
+	assert(*p == HdMagic);
+
+	p = (uvlong*)(b + 1);
+	assert(*p == TlMagic);
+	qunlock(&fs->freelk);
+
 	/*
 	 * If the block is cached,
 	 * then the cache holds a ref
@@ -660,6 +674,16 @@
 	return b;
 }
 
+static void
+blkfree(Blk *b)
+{
+	b->bp.addr = -1;
+	qlock(&fs->freelk);
+	b->fnext = fs->free;
+	fs->free = b;
+	rwakeup(&fs->freerz);
+	qunlock(&fs->freelk);
+}
 
 static Blk*
 initblk(Blk *b, vlong bp, int t)
@@ -710,8 +734,8 @@
 	if((b = blkbuf()) == nil)
 		return nil;
 	initblk(b, bp, t);
-	setmalloctag(b, getcallerpc(&t));
 	cacheblk(b);
+	b->alloced = getcallerpc(&t);
 	assert(b->ref == 2);
 	return b;
 }
@@ -733,7 +757,7 @@
 	r->logsz = b->logsz;
 	r->lognxt = b->lognxt;
 	memcpy(r->buf, b->buf, sizeof(r->buf));
-	setmalloctag(b, getcallerpc(&b));
+	b->alloced = getcallerpc(&b);
 	return r;
 }
 
@@ -792,7 +816,7 @@
 		qunlock(&fs->blklk[i]);
 		return nil;
 	}else
-		setmalloctag(b, getcallerpc(&bp));
+		b->alloced = getcallerpc(&bp);
 	h = blkhash(b);
 	if((flg&GBnochk) == 0 && h != bp.hash){
 		fprint(2, "corrupt block %B: %.16llux != %.16llux\n", bp, h, bp.hash);
@@ -838,7 +862,7 @@
 		return;
 	assert(!checkflag(b, Bcached));
 	assert(checkflag(b, Bfreed) || !checkflag(b, Bdirty));
-	free(b);
+	blkfree(b);
 }
 
 void
@@ -854,10 +878,10 @@
 	if((f = malloc(sizeof(Bfree))) == nil)
 		return;
 	f->bp = bp;
-	lock(&fs->freelk);
-	f->next = fs->freehd;
-	fs->freehd = f;
-	unlock(&fs->freelk);
+	lock(&fs->dealloclk);
+	f->next = fs->deallochd;
+	fs->deallochd = f;
+	unlock(&fs->dealloclk);
 }
 
 void
@@ -908,13 +932,13 @@
 		for(i = 0; i < fs->nquiesce; i++)
 			fs->lastactive[i] = fs->active[i];
 
-		lock(&fs->freelk);
-		if(fs->freep != nil){
-			p = fs->freep->next;
-			fs->freep->next = nil;
+		lock(&fs->dealloclk);
+		if(fs->deallocp != nil){
+			p = fs->deallocp->next;
+			fs->deallocp->next = nil;
 		}
-		fs->freep = fs->freehd;
-		unlock(&fs->freelk);
+		fs->deallocp = fs->deallochd;
+		unlock(&fs->dealloclk);
 	}
 	unlock(&fs->activelk);
 
--- a/dat.h
+++ b/dat.h
@@ -239,6 +239,11 @@
 	Nmsgtype,	/* maximum message type */
 };
 
+enum {
+	HdMagic = 0x68646d6167696373,
+	TlMagic = 0x979b929e98969c8c,
+};
+
 /*
  * Wstat ops come with associated data, in the order
  * of the bit flags.
@@ -428,10 +433,15 @@
 	int	active[32];
 	int	lastactive[32];
 	Chan	*chsync[32];
-	Lock	freelk;
-	Bfree	*freep;
-	Bfree	*freehd;
 
+	QLock	freelk;
+	Rendez	freerz;
+	Blk	*free;
+
+	Lock	dealloclk;
+	Bfree	*deallocp;
+	Bfree	*deallochd;
+
 	int	fd;
 	long	broken;
 	long	rdonly;
@@ -594,6 +604,7 @@
 	vlong	logsz;	/* for allocation log */
 	vlong	lognxt;	/* for allocation log */
 
+	uintptr	alloced;
 	uintptr	freed;	/* debug */
 
 	Bptr	bp;
--- a/main.c
+++ b/main.c
@@ -33,15 +33,40 @@
 static void
 initfs(vlong cachesz)
 {
+	char *p, *buf, *ebuf;
+	usize sz;
+	uvlong *ck;
+	Blk *b;
+
 	if((fs = mallocz(sizeof(Gefs), 1)) == nil)
 		sysfatal("malloc: %r");
 
+	fs->freerz.l = &fs->freelk;
+	fs->syncrz.l = &fs->synclk;
 	fs->noauth = noauth;
 	fs->cmax = cachesz/Blksz;
-	if(fs->cmax >= (2ULL*GiB)/sizeof(Bucket))
+	if(fs->cmax > (1<<30))
 		sysfatal("cache too big");
 	if((fs->cache = mallocz(fs->cmax*sizeof(Bucket), 1)) == nil)
 		sysfatal("malloc: %r");
+
+	/* leave room for corruption check magic */
+	sz = 8+sizeof(Blk)+8;
+	buf = sbrk(fs->cmax * sz);
+	if(buf == (void*)-1)
+		sysfatal("sbrk: %r");
+	ebuf = buf + fs->cmax*sz;
+	for(p = buf; p != ebuf; p += sz){
+		ck = (uvlong*)p;
+		*ck = HdMagic;
+
+		b = (Blk*)(p+8);
+		b->fnext = fs->free;
+		fs->free = b;
+
+		ck = (uvlong*)(b+1);
+		*ck = TlMagic;
+	}
 }
 
 static void
@@ -158,6 +183,7 @@
 	assert(4*Kpmax < Pivspc);
 	assert(2*Msgmax < Bufspc);
 	assert(Treesz < Inlmax);
+
 	initfs(cachesz);
 	initshow();
 	fmtinstall('H', encodefmt);
@@ -182,7 +208,6 @@
 
 	loadfs(dev);
 
-	fs->syncrz.l = &fs->synclk;
 	fs->rdchan = mkchan(32);
 	fs->wrchan = mkchan(32);
 	fs->nsyncers = nproc;