shithub: gefs

Download patch

ref: 72009cbf559c825f25594a6bf72d69e67b14cebb
parent: 20955da8f1e6cf60c5755c850e4e453c29a43b23
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Jan 23 00:02:27 EST 2022

blk: remove superblock

The superblock isn't needed if we put the block
information into the arenas. This gives us redundancy,
and removes a spare IO operation.

--- a/blk.c
+++ b/blk.c
@@ -112,7 +112,6 @@
 		break;
 	case Tarena:
 	case Traw:
-	case Tsuper:
 	case Tlog:
 	case Tdead:
 		break;
@@ -191,13 +190,7 @@
 static int
 syncarena(Arena *a)
 {
-	char *p;
-
-	p = a->b->data;
-	PBIT64(p, a->head.addr);	p += 8;	/* freelist addr */
-	PBIT64(p, a->head.hash);	p += 8;	/* freelist hash */
-	PBIT64(p, a->size);		p += 8;	/* arena size */
-	PBIT64(p, a->used);			/* arena used */
+	packarena(a->b->data, Blkspc, a, fs);
 	finalize(a->b);
 	return syncblk(a->b);
 }
@@ -408,7 +401,6 @@
 	Bptr bp;
 	char *p;
 
-//	Oplog ol;
 	/*
 	 * Sync the current log to disk, and
 	 * set up a new block log tail.  While
@@ -688,27 +680,6 @@
 	return b;
 }
 
-char*
-fillsuper(Blk *b)
-{
-	char *p;
-
-	assert(b->type == Tsuper);
-	p = b->data;
-	memcpy(p, "gefs0001", 8); p += 8;
-	PBIT32(p, Blksz); p += 4;
-	PBIT32(p, Bufspc); p += 4;
-	PBIT32(p, Hdrsz); p += 4;
-	PBIT32(p, fs->snap.ht); p += 4;
-	PBIT64(p, fs->snap.bp.addr); p += 8;
-	PBIT64(p, fs->snap.bp.hash); p += 8;
-	PBIT64(p, fs->snap.bp.gen); p += 8;
-	PBIT32(p, fs->narena); p += 4;
-	PBIT64(p, fs->arenasz); p += 8;
-	PBIT64(p, fs->nextqid); p += 8;
-	return p;
-}
-
 void
 finalize(Blk *b)
 {
@@ -743,7 +714,6 @@
 	case Traw:
 		b->bp.hash = blkhash(b);
 		break;
-	case Tsuper:
 	case Tarena:
 		break;
 	}
@@ -928,11 +898,6 @@
 //		if(syncblk(b) == -1)
 //			r = -1;
 //	}
-	fillsuper(fs->super);
-	finalize(fs->super);
-	enqueue(fs->super);
-	if(r != -1)
-		r = syncblk(fs->super);
 	qunlock(&fs->snaplk);
 	return r;
 }
--- a/dat.h
+++ b/dat.h
@@ -14,6 +14,7 @@
 typedef struct Scan	Scan;
 typedef struct Dent	Dent;
 typedef struct Scanp	Scanp;
+typedef struct Fshdr	Fshdr;
 typedef struct Arena	Arena;
 typedef struct Arange	Arange;
 typedef struct Bucket	Bucket;
@@ -243,7 +244,6 @@
 	Traw,
 	Tpivot,
 	Tleaf,
-	Tsuper,
 	Tarena,
 	Tlog,
 	Tdead,
@@ -393,21 +393,31 @@
 	vlong	cachelook;
 };
 
+struct Fshdr {
+	int	blksz;
+	int	bufspc;
+	int	hdrsz;
+	Tree	snap;
+	int	narena;
+	vlong	arenasz;
+	vlong	nextqid;
+	vlong	nextgen;
+};
+
 /*
  * Overall state of the file sytem.
  * Shadows the superblock contents.
  */
 struct Gefs {
-	/* immutable data */
-	int	blksz;	/* immutable */
-	int	bufsz;	/* immutable */
-	int	pivsz;	/* immutable */
-	int	hdrsz;	/* immutable */
+	Fshdr;
+	/* arena allocation */
+	Arena	*arenas;
+	long	roundrobin;
+	int	gotinfo;
 
 	QLock	snaplk;	/* snapshot lock */
 	Mount	*mounts;
 	Tree	*osnap;
-	Blk	*super;
 
 	Chan	*wrchan;
 	Chan	*rdchan;
@@ -424,18 +434,6 @@
 	long	broken;
 	long	rdonly;
 	int	noauth;
-
-	/* root snapshot tree */
-	Tree	snap;
-
-	vlong	nextqid;
-	vlong	nextgen;
-
-	/* arena allocation */
-	Arena	*arenas;
-	int	narena;
-	long	roundrobin;
-	vlong	arenasz;
 
 	/* user list */
 	RWLock	userlk;
--- a/fns.h
+++ b/fns.h
@@ -9,6 +9,7 @@
 
 extern Gefs*	fs;
 extern int	debug;
+extern char*	forceuser;
 
 Blk*	newblk(int type);
 Blk*	getroot(Tree*, int*);
@@ -30,7 +31,6 @@
 uvlong	blkhash(Blk*);
 u32int	ihash(vlong);
 void	finalize(Blk*);
-char*	fillsuper(Blk*);
 Tree*	newsnap(Tree*);
 char*	freesnap(Tree*, Tree*);
 char*	labelsnap(char*, vlong);
@@ -42,7 +42,7 @@
 void	closesnap(Tree*);
 uvlong	siphash(void*, usize);
 void	reamfs(char*);
-int	loadarena(Arena*, vlong);
+int	loadarena(Arena*, Fshdr *fi, vlong);
 void	loadfs(char*);
 int	sync(void);
 int	loadlog(Arena*);
@@ -120,6 +120,8 @@
 char*	packsnap(char*, int, vlong);
 char*	packlabel(char*, int, char*);
 char*	packsuper(char*, int, vlong);
+char*	packarena(char*, int, Arena*, Fshdr*);
+char*	unpackarena(Arena*, Fshdr*, char*, int);
 
 /* fmt */
 int	Bconv(Fmt*);
--- a/fs.c
+++ b/fs.c
@@ -670,7 +670,7 @@
 static void
 fsattach(Fmsg *m, int iounit)
 {
-	char *e, *p, dbuf[Kvmax], kvbuf[Kvmax];
+	char *e, *p, *n, dbuf[Kvmax], kvbuf[Kvmax];
 	Mount *mnt;
 	Dent *de;
 	User *u;
@@ -689,7 +689,8 @@
 		return;
 	}
 	rlock(&fs->userlk);
-	if((u = name2user(m->uname)) == nil){
+	n = (forceuser == nil) ? m->uname : forceuser;
+	if((u = name2user(n)) == nil){
 		rerror(m, Enouser);
 		runlock(&fs->userlk);
 		return;
--- a/load.c
+++ b/load.c
@@ -2,6 +2,7 @@
 #include <libc.h>
 #include <fcall.h>
 #include <avl.h>
+#include <pool.h>
 
 #include "dat.h"
 #include "fns.h"
@@ -12,32 +13,33 @@
 	return ((Arange*)a)->off - ((Arange*)b)->off;
 }
 
+void
+mergeinfo(Gefs *fs, Fshdr *fi)
+{
+	if(fi->blksz != Blksz || fi->bufspc != Bufspc || fi->hdrsz != Hdrsz)
+		sysfatal("parameter mismatch");
+	if(fs->gotinfo && fs->narena != fi->narena)
+		sysfatal("arena count mismatch");
+	if(fs->gotinfo && fi->snap.gen < fs->snap.gen)
+		fprint(2, "not all arenas synced: rolling back\n");
+	fs->Fshdr = *fi;
+}
+
 int
-loadarena(Arena *a, vlong o)
+loadarena(Arena *a, Fshdr *fi, vlong o)
 {
 	Blk *b;
-	char *p;
 	Bptr bp;
 
-	if((a->free = avlcreate(rangecmp)) == nil)
-		return -1;
 	bp.addr = o;
 	bp.hash = -1;
 	bp.gen = -1;
 	if((b = getblk(bp, GBnochk)) == nil)
 		return -1;
-	p = b->data;
-	a->b = b;
-	a->head.addr = GBIT64(p);	p += 8;
-	a->head.hash = GBIT64(p);	p += 8;
-	a->head.gen = -1;
-	a->size = GBIT64(p);	p += 8;
-	a->used = GBIT64(p);
-	a->tail = nil;
-	if(loadlog(a) == -1)
+	unpackarena(a, fi, b->data, Blkspc);
+	if((a->free = avlcreate(rangecmp)) == nil)
 		return -1;
-	if(compresslog(a) == -1)
-		return -1;
+	a->b = b;
 	return 0;
 }
 
@@ -44,52 +46,47 @@
 void
 loadfs(char *dev)
 {
-	int i, blksz, bufspc, hdrsz;
-	vlong sb;
-	char *p, *e;
-	Bptr bp;
+	Fshdr fi;
+	Arena *a;
+	char *e;
 	Tree *t;
-	Blk *b;
-	Dir *d;
+	int i;
 
 	fs->osnap = nil;
+	fs->gotinfo = 0;
+	fs->narena = 8;
 	if((fs->fd = open(dev, ORDWR)) == -1)
 		sysfatal("open %s: %r", dev);
-	if((d = dirfstat(fs->fd)) == nil)
-		sysfatal("ream: %r");
-	sb = d->length - (d->length % Blksz) - Blksz;
-	free(d);
-
-	bp.addr = sb;
-	bp.hash = -1;
-	bp.gen = -1;
-	if((b = getblk(bp, GBnochk)) == nil)
-		sysfatal("read superblock: %r");
-	if(b->type != Tsuper)
-		sysfatal("corrupt superblock: bad type");
-	if(memcmp(b->data, "gefs0001", 8) != 0)
-		sysfatal("corrupt superblock: bad magic");
-	p = b->data + 8;
-
-	blksz = GBIT32(p);		p += 4;
-	bufspc = GBIT32(p);		p += 4;
-	hdrsz = GBIT32(p);		p += 4;
-	fs->snap.ht = GBIT32(p);	p += 4;
-	fs->snap.bp.addr = GBIT64(p);	p += 8;
-	fs->snap.bp.hash = GBIT64(p);	p += 8;
-	fs->snap.bp.gen = GBIT64(p);	p += 8;
-	fs->narena = GBIT32(p);		p += 4;
-	fs->arenasz = GBIT64(p);	p += 8;
-	fs->nextqid = GBIT64(p);	p += 8;
-	fs->super = b;
-	fs->nextgen = fs->snap.bp.gen + 1;
+	if((fs->arenas = calloc(1, sizeof(Arena))) == nil)
+		sysfatal("malloc: %r");
+	for(i = 0; i < fs->narena; i++){
+		a = &fs->arenas[i];
+		if((loadarena(a, &fi, i*fs->arenasz)) == -1)
+			sysfatal("loadfs: %r");
+		mergeinfo(fs, &fi);
+		if(!fs->gotinfo){
+			if((fs->arenas = realloc(fs->arenas, fs->narena*sizeof(Arena))) == nil)
+				sysfatal("malloc: %r");
+			memset(fs->arenas+1, 0, (fs->narena-1)*sizeof(Arena));
+			fs->gotinfo = 1;
+		}
+	}
+	for(i = 0; i < fs->narena; i++){
+		a = &fs->arenas[i];
+		if(loadlog(a) == -1)
+			sysfatal("load log: %r");
+		if(compresslog(a) == -1)
+			sysfatal("compress log: %r");
+	}
 	for(i = 0; i < Ndead; i++){
 		fs->snap.dead[i].prev = -1;
 		fs->snap.dead[i].head.addr = -1;
 		fs->snap.dead[i].head.hash = -1;
 		fs->snap.dead[i].head.gen = -1;
+		fs->snap.dead[i].ins = nil;
 	}
-	fprint(2, "load: %8s\n", p);
+
+	fprint(2, "load:\n");
 	fprint(2, "\tsnaptree:\t%B\n", fs->snap.bp);
 	fprint(2, "\tnarenas:\t%d\n", fs->narena);
 	fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
@@ -96,17 +93,6 @@
 	fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
 	fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
 	fprint(2, "\tcachesz:\t%lld MiB\n", fs->cmax*Blksz/MiB);
-	if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
-		sysfatal("malloc: %r");
-	for(i = 0; i < fs->narena; i++)
-		if((loadarena(&fs->arenas[i], i*fs->arenasz)) == -1)
-			sysfatal("loadfs: %r");
-	if(bufspc != Bufspc)
-		sysfatal("fs uses different buffer size");
-	if(hdrsz != Hdrsz)
-		sysfatal("fs uses different buffer size");
-	if(blksz != Blksz)
-		sysfatal("fs uses different block size");
 	if((t = openlabel("main")) == nil)
 		sysfatal("load users: no main label");
 	if((e = loadusers(2, t)) != nil)
--- a/main.c
+++ b/main.c
@@ -13,6 +13,7 @@
 int	debug;
 int	noauth;
 int	nproc;
+char	*forceuser;
 char	*srvname = "gefs";
 
 vlong
@@ -79,7 +80,7 @@
 static void
 usage(void)
 {
-	fprint(2, "usage: %s [-r] dev\n", argv0);
+	fprint(2, "usage: %s [-rA] [-m mem] [-s srv] [-u usr] dev\n", argv0);
 	exits("usage");
 }
 
@@ -90,12 +91,12 @@
 	vlong cachesz;
 	char *s;
 
-	cachesz = 16*MiB;
+	cachesz = 512*MiB;
 	ARGBEGIN{
 	case 'r':
 		ream = 1;
 		break;
-	case 'c':
+	case 'm':
 		cachesz = strtoll(EARGF(usage()), nil, 0)*MiB;
 		break;
 	case 'd':
@@ -107,6 +108,9 @@
 	case 'A':
 		noauth = 1;
 		break;
+	case 'u':
+		forceuser = EARGF(usage());
+		break;
 	default:
 		usage();
 		break;
@@ -152,7 +156,7 @@
 		for(i = 0; i < nproc; i++)
 			launch(runread, fs->nquiesce++, nil, "readio");
 		if(srvfd != -1)
-			launch(runfs, -1, (void*)srvfd, "srvio");
+			launch(runfs, fs->nquiesce++, (void*)srvfd, "srvio");
 		exits(nil);
 	}
 }
--- a/pack.c
+++ b/pack.c
@@ -3,6 +3,7 @@
 #include <fcall.h>
 #include <avl.h>
 #include <bio.h>
+#include <pool.h>
 
 #include "dat.h"
 #include "fns.h"
@@ -434,5 +435,56 @@
 		PBIT64(p, bp.addr);	p += 8;
 		PBIT64(p, bp.hash);	p += 8;
 	}
+	return p;
+}
+
+char*
+packarena(char *p, int sz, Arena *a, Fshdr *fi)
+{
+	assert(sz >= Blkspc);
+	memcpy(p, "gefs0001", 8);	p += 8;
+	PBIT32(p, Blksz);		p += 4;
+	PBIT32(p, Bufspc);		p += 4;
+	PBIT32(p, Hdrsz);		p += 4;
+	PBIT32(p, fi->snap.ht);		p += 4;
+	PBIT64(p, fi->snap.bp.addr);	p += 8;
+	PBIT64(p, fi->snap.bp.hash);	p += 8;
+	PBIT64(p, fi->snap.bp.gen);	p += 8;
+	PBIT32(p, fi->narena);		p += 4;
+	PBIT64(p, fi->arenasz);		p += 8;
+	PBIT64(p, fi->nextqid);		p += 8;
+	fi->nextgen = fi->snap.bp.gen + 1;
+	PBIT64(p, a->head.addr);	p += 8;	/* freelist addr */
+	PBIT64(p, a->head.hash);	p += 8;	/* freelist hash */
+	PBIT64(p, a->size);		p += 8;	/* arena size */
+	PBIT64(p, a->used);		p += 8;	/* arena used */
+	return p;
+}
+
+char*
+unpackarena(Arena *a, Fshdr *fi, char *p, int sz)
+{
+	assert(sz >= Blkspc);
+	memset(a, 0, sizeof(*a));
+	memset(fi, 0, sizeof(*fi));
+	if(memcmp(p, "gefs0001", 8) != 0)
+		return nil;
+	p += 8;
+	fi->blksz = GBIT32(p);		p += 4;
+	fi->bufspc = GBIT32(p);		p += 4;
+	fi->hdrsz = GBIT32(p);		p += 4;
+	fi->snap.ht = GBIT32(p);	p += 4;
+	fi->snap.bp.addr = GBIT64(p);	p += 8;
+	fi->snap.bp.hash = GBIT64(p);	p += 8;
+	fi->snap.bp.gen = GBIT64(p);	p += 8;
+	fi->narena = GBIT32(p);		p += 4;
+	fi->arenasz = GBIT64(p);	p += 8;
+	fi->nextqid = GBIT64(p);	p += 8;
+	a->head.addr = GBIT64(p);	p += 8;
+	a->head.hash = GBIT64(p);	p += 8;
+	a->head.gen = -1;		p += 0;
+	a->size = GBIT64(p);		p += 8;
+	a->used = GBIT64(p);		p += 8;
+	a->tail = nil;
 	return p;
 }
--- a/ream.c
+++ b/ream.c
@@ -82,16 +82,15 @@
 }
 
 static void
-initarena(Arena *a, vlong start, vlong asz)
+initarena(Arena *a, Fshdr *fi, vlong start, vlong asz)
 {
 	vlong addr, bo, bh;
 	char *p;
 	Blk *b;
 
-	addr = start;
 	if((b = mallocz(sizeof(Blk), 1)) == nil)
 		sysfatal("ream: %r");
-	addr += Blksz;	/* arena header */
+	addr = start+Blksz;	/* arena header */
 
 	a->head.addr = -1;
 	a->head.hash = -1;
@@ -120,11 +119,14 @@
 	memset(b, 0, sizeof(Blk));
 	b->type = Tarena;
 	b->bp.addr = start;
-	p = b->buf + Hdrsz;
-	PBIT64(p, bo);		p += 8;	/* freelist addr */
-	PBIT64(p, bh);		p += 8;	/* freelist hash */
-	PBIT64(p, asz);		p += 8;	/* arena size */
-	PBIT64(p, Blksz);	/* arena used */
+	b->data = b->buf + Hdrsz;
+	a->head.addr = bo;
+	a->head.hash = bh;
+	a->head.gen = -1;
+	a->size = asz;
+	a->used = Blksz;
+	a->tail = nil;
+	packarena(b->data, Blkspc, a, fi);
 	finalize(b);
 	if(syncblk(b) == -1)
 		sysfatal("ream: write arena: %r");
@@ -134,8 +136,9 @@
 reamfs(char *dev)
 {
 	vlong sz, asz, off;
-	Blk *sb, *rb, *tb;
+	Blk *rb, *tb;
 	Mount *mnt;
+	Arena *a;
 	Dir *d;
 	int i;
 
@@ -145,14 +148,10 @@
 		sysfatal("ream: %r");
 	if(d->length < 64*MiB)
 		sysfatal("ream: disk too small");
-	if((sb = mallocz(sizeof(Blk), 1)) == nil)
-		sysfatal("ream: %r");
 	if((mnt = mallocz(sizeof(Mount), 1)) == nil)
 		sysfatal("ream: alloc mount: %r");
 	if((mnt->root = mallocz(sizeof(Tree), 1)) == nil)
 		sysfatal("ream: alloc tree: %r");
-	fs->super = sb;
-	refblk(sb);
 
 	sz = d->length;
 	sz = sz - (sz % Blksz) - Blksz;
@@ -171,23 +170,27 @@
 		sysfatal("disk too small");
 	fs->arenasz = asz;
 	off = 0;
-	fprint(2, "reaming %d arenas:\n", fs->narena);
-
 	for(i = 0; i < fs->narena; i++){
 		print("\tarena %d: %lld blocks at %llx\n", i, asz/Blksz, off);
-		initarena(&fs->arenas[i], off, asz);
+		initarena(&fs->arenas[i], fs, off, asz);
 		off += asz;
 	}
+	for(i = 0; i < Ndead; i++){
+		fs->snap.dead[i].prev = -1;
+		fs->snap.dead[i].head.addr = -1;
+		fs->snap.dead[i].head.hash = -1;
+		fs->snap.dead[i].head.gen = -1;
+	}
 	
-	sb->type = Tsuper;
-	sb->bp.addr = sz;
-	sb->data = sb->buf + Hdrsz;
-	sb->ref = 2;
-
-	for(i = 0; i < fs->narena; i++)
-		if((loadarena(&fs->arenas[i], i*asz)) == -1)
+	for(i = 0; i < fs->narena; i++){
+		a = &fs->arenas[i];
+		if((loadarena(a, fs, i*asz)) == -1)
 			sysfatal("ream: loadarena: %r");
-
+		if(loadlog(a) == -1)
+			sysfatal("load log: %r");
+		if(compresslog(a) == -1)
+			sysfatal("compress log: %r");
+	}
 	if((tb = newblk(Tleaf)) == nil)
 		sysfatal("ream: allocate root: %r");
 	refblk(tb);
@@ -212,12 +215,8 @@
 
 	fs->snap.bp = rb->bp;
 	fs->snap.ht = 1;
-	fillsuper(sb);
-	finalize(sb);
-	syncblk(sb);
 
 	putblk(tb);
-	putblk(sb);
 	putblk(rb);
 	free(mnt);
 	if(sync() == -1)
--- a/snap.c
+++ b/snap.c
@@ -392,7 +392,7 @@
 	if(modifysnap(Oinsert, t) != nil)
 		return nil;
 
-	if((r = malloc(sizeof(Tree))) == nil)
+	if((r = calloc(sizeof(Tree), 1)) == nil)
 		return nil;
 	gen = inc64(&fs->nextgen, 1);
 	memset(&r->lk, 0, sizeof(r->lk));
--- a/tree.c
+++ b/tree.c
@@ -113,7 +113,7 @@
 	o = spc - b->valsz;
 
 	if(2*b->nval + b->valsz > spc){
-		dprint("2*%d + %d > %d [ksz: %d, vsz: %d]\n",
+		fprint(2, "2*%d + %d > %d [ksz: %d, vsz: %d]\n",
 			2*b->nval, b->valsz, spc, kv->nk, kv->nv);
 		showblk(2, b, "setval overflow", 1);
 		abort();