ref: 72009cbf559c825f25594a6bf72d69e67b14cebb
parent: 20955da8f1e6cf60c5755c850e4e453c29a43b23
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Jan 23 00:02:27 EST 2022
blk: remove superblock The superblock isn't needed if we put the block information into the arenas. This gives us redundancy, and removes a spare IO operation.
--- a/blk.c
+++ b/blk.c
@@ -112,7 +112,6 @@
break;
case Tarena:
case Traw:
- case Tsuper:
case Tlog:
case Tdead:
break;
@@ -191,13 +190,7 @@
static int
syncarena(Arena *a)
{
- char *p;
-
- p = a->b->data;
- PBIT64(p, a->head.addr); p += 8; /* freelist addr */
- PBIT64(p, a->head.hash); p += 8; /* freelist hash */
- PBIT64(p, a->size); p += 8; /* arena size */
- PBIT64(p, a->used); /* arena used */
+ packarena(a->b->data, Blkspc, a, fs);
finalize(a->b);
return syncblk(a->b);
}
@@ -408,7 +401,6 @@
Bptr bp;
char *p;
-// Oplog ol;
/*
* Sync the current log to disk, and
* set up a new block log tail. While
@@ -688,27 +680,6 @@
return b;
}
-char*
-fillsuper(Blk *b)
-{
- char *p;
-
- assert(b->type == Tsuper);
- p = b->data;
- memcpy(p, "gefs0001", 8); p += 8;
- PBIT32(p, Blksz); p += 4;
- PBIT32(p, Bufspc); p += 4;
- PBIT32(p, Hdrsz); p += 4;
- PBIT32(p, fs->snap.ht); p += 4;
- PBIT64(p, fs->snap.bp.addr); p += 8;
- PBIT64(p, fs->snap.bp.hash); p += 8;
- PBIT64(p, fs->snap.bp.gen); p += 8;
- PBIT32(p, fs->narena); p += 4;
- PBIT64(p, fs->arenasz); p += 8;
- PBIT64(p, fs->nextqid); p += 8;
- return p;
-}
-
void
finalize(Blk *b)
{
@@ -743,7 +714,6 @@
case Traw:
b->bp.hash = blkhash(b);
break;
- case Tsuper:
case Tarena:
break;
}
@@ -928,11 +898,6 @@
// if(syncblk(b) == -1)
// r = -1;
// }
- fillsuper(fs->super);
- finalize(fs->super);
- enqueue(fs->super);
- if(r != -1)
- r = syncblk(fs->super);
qunlock(&fs->snaplk);
return r;
}
--- a/dat.h
+++ b/dat.h
@@ -14,6 +14,7 @@
typedef struct Scan Scan;
typedef struct Dent Dent;
typedef struct Scanp Scanp;
+typedef struct Fshdr Fshdr;
typedef struct Arena Arena;
typedef struct Arange Arange;
typedef struct Bucket Bucket;
@@ -243,7 +244,6 @@
Traw,
Tpivot,
Tleaf,
- Tsuper,
Tarena,
Tlog,
Tdead,
@@ -393,21 +393,31 @@
vlong cachelook;
};
+struct Fshdr {
+ int blksz;
+ int bufspc;
+ int hdrsz;
+ Tree snap;
+ int narena;
+ vlong arenasz;
+ vlong nextqid;
+ vlong nextgen;
+};
+
/*
* Overall state of the file sytem.
* Shadows the superblock contents.
*/
struct Gefs {
- /* immutable data */
- int blksz; /* immutable */
- int bufsz; /* immutable */
- int pivsz; /* immutable */
- int hdrsz; /* immutable */
+ Fshdr;
+ /* arena allocation */
+ Arena *arenas;
+ long roundrobin;
+ int gotinfo;
QLock snaplk; /* snapshot lock */
Mount *mounts;
Tree *osnap;
- Blk *super;
Chan *wrchan;
Chan *rdchan;
@@ -424,18 +434,6 @@
long broken;
long rdonly;
int noauth;
-
- /* root snapshot tree */
- Tree snap;
-
- vlong nextqid;
- vlong nextgen;
-
- /* arena allocation */
- Arena *arenas;
- int narena;
- long roundrobin;
- vlong arenasz;
/* user list */
RWLock userlk;
--- a/fns.h
+++ b/fns.h
@@ -9,6 +9,7 @@
extern Gefs* fs;
extern int debug;
+extern char* forceuser;
Blk* newblk(int type);
Blk* getroot(Tree*, int*);
@@ -30,7 +31,6 @@
uvlong blkhash(Blk*);
u32int ihash(vlong);
void finalize(Blk*);
-char* fillsuper(Blk*);
Tree* newsnap(Tree*);
char* freesnap(Tree*, Tree*);
char* labelsnap(char*, vlong);
@@ -42,7 +42,7 @@
void closesnap(Tree*);
uvlong siphash(void*, usize);
void reamfs(char*);
-int loadarena(Arena*, vlong);
+int loadarena(Arena*, Fshdr *fi, vlong);
void loadfs(char*);
int sync(void);
int loadlog(Arena*);
@@ -120,6 +120,8 @@
char* packsnap(char*, int, vlong);
char* packlabel(char*, int, char*);
char* packsuper(char*, int, vlong);
+char* packarena(char*, int, Arena*, Fshdr*);
+char* unpackarena(Arena*, Fshdr*, char*, int);
/* fmt */
int Bconv(Fmt*);
--- a/fs.c
+++ b/fs.c
@@ -670,7 +670,7 @@
static void
fsattach(Fmsg *m, int iounit)
{
- char *e, *p, dbuf[Kvmax], kvbuf[Kvmax];
+ char *e, *p, *n, dbuf[Kvmax], kvbuf[Kvmax];
Mount *mnt;
Dent *de;
User *u;
@@ -689,7 +689,8 @@
return;
}
rlock(&fs->userlk);
- if((u = name2user(m->uname)) == nil){
+ n = (forceuser == nil) ? m->uname : forceuser;
+ if((u = name2user(n)) == nil){
rerror(m, Enouser);
runlock(&fs->userlk);
return;
--- a/load.c
+++ b/load.c
@@ -2,6 +2,7 @@
#include <libc.h>
#include <fcall.h>
#include <avl.h>
+#include <pool.h>
#include "dat.h"
#include "fns.h"
@@ -12,32 +13,33 @@
return ((Arange*)a)->off - ((Arange*)b)->off;
}
+void
+mergeinfo(Gefs *fs, Fshdr *fi)
+{
+ if(fi->blksz != Blksz || fi->bufspc != Bufspc || fi->hdrsz != Hdrsz)
+ sysfatal("parameter mismatch");
+ if(fs->gotinfo && fs->narena != fi->narena)
+ sysfatal("arena count mismatch");
+ if(fs->gotinfo && fi->snap.gen < fs->snap.gen)
+ fprint(2, "not all arenas synced: rolling back\n");
+ fs->Fshdr = *fi;
+}
+
int
-loadarena(Arena *a, vlong o)
+loadarena(Arena *a, Fshdr *fi, vlong o)
{
Blk *b;
- char *p;
Bptr bp;
- if((a->free = avlcreate(rangecmp)) == nil)
- return -1;
bp.addr = o;
bp.hash = -1;
bp.gen = -1;
if((b = getblk(bp, GBnochk)) == nil)
return -1;
- p = b->data;
- a->b = b;
- a->head.addr = GBIT64(p); p += 8;
- a->head.hash = GBIT64(p); p += 8;
- a->head.gen = -1;
- a->size = GBIT64(p); p += 8;
- a->used = GBIT64(p);
- a->tail = nil;
- if(loadlog(a) == -1)
+ unpackarena(a, fi, b->data, Blkspc);
+ if((a->free = avlcreate(rangecmp)) == nil)
return -1;
- if(compresslog(a) == -1)
- return -1;
+ a->b = b;
return 0;
}
@@ -44,52 +46,47 @@
void
loadfs(char *dev)
{
- int i, blksz, bufspc, hdrsz;
- vlong sb;
- char *p, *e;
- Bptr bp;
+ Fshdr fi;
+ Arena *a;
+ char *e;
Tree *t;
- Blk *b;
- Dir *d;
+ int i;
fs->osnap = nil;
+ fs->gotinfo = 0;
+ fs->narena = 8;
if((fs->fd = open(dev, ORDWR)) == -1)
sysfatal("open %s: %r", dev);
- if((d = dirfstat(fs->fd)) == nil)
- sysfatal("ream: %r");
- sb = d->length - (d->length % Blksz) - Blksz;
- free(d);
-
- bp.addr = sb;
- bp.hash = -1;
- bp.gen = -1;
- if((b = getblk(bp, GBnochk)) == nil)
- sysfatal("read superblock: %r");
- if(b->type != Tsuper)
- sysfatal("corrupt superblock: bad type");
- if(memcmp(b->data, "gefs0001", 8) != 0)
- sysfatal("corrupt superblock: bad magic");
- p = b->data + 8;
-
- blksz = GBIT32(p); p += 4;
- bufspc = GBIT32(p); p += 4;
- hdrsz = GBIT32(p); p += 4;
- fs->snap.ht = GBIT32(p); p += 4;
- fs->snap.bp.addr = GBIT64(p); p += 8;
- fs->snap.bp.hash = GBIT64(p); p += 8;
- fs->snap.bp.gen = GBIT64(p); p += 8;
- fs->narena = GBIT32(p); p += 4;
- fs->arenasz = GBIT64(p); p += 8;
- fs->nextqid = GBIT64(p); p += 8;
- fs->super = b;
- fs->nextgen = fs->snap.bp.gen + 1;
+ if((fs->arenas = calloc(1, sizeof(Arena))) == nil)
+ sysfatal("malloc: %r");
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ if((loadarena(a, &fi, i*fs->arenasz)) == -1)
+ sysfatal("loadfs: %r");
+ mergeinfo(fs, &fi);
+ if(!fs->gotinfo){
+ if((fs->arenas = realloc(fs->arenas, fs->narena*sizeof(Arena))) == nil)
+ sysfatal("malloc: %r");
+ memset(fs->arenas+1, 0, (fs->narena-1)*sizeof(Arena));
+ fs->gotinfo = 1;
+ }
+ }
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ if(loadlog(a) == -1)
+ sysfatal("load log: %r");
+ if(compresslog(a) == -1)
+ sysfatal("compress log: %r");
+ }
for(i = 0; i < Ndead; i++){
fs->snap.dead[i].prev = -1;
fs->snap.dead[i].head.addr = -1;
fs->snap.dead[i].head.hash = -1;
fs->snap.dead[i].head.gen = -1;
+ fs->snap.dead[i].ins = nil;
}
- fprint(2, "load: %8s\n", p);
+
+ fprint(2, "load:\n");
fprint(2, "\tsnaptree:\t%B\n", fs->snap.bp);
fprint(2, "\tnarenas:\t%d\n", fs->narena);
fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
@@ -96,17 +93,6 @@
fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
fprint(2, "\tcachesz:\t%lld MiB\n", fs->cmax*Blksz/MiB);
- if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
- sysfatal("malloc: %r");
- for(i = 0; i < fs->narena; i++)
- if((loadarena(&fs->arenas[i], i*fs->arenasz)) == -1)
- sysfatal("loadfs: %r");
- if(bufspc != Bufspc)
- sysfatal("fs uses different buffer size");
- if(hdrsz != Hdrsz)
- sysfatal("fs uses different buffer size");
- if(blksz != Blksz)
- sysfatal("fs uses different block size");
if((t = openlabel("main")) == nil)
sysfatal("load users: no main label");
if((e = loadusers(2, t)) != nil)
--- a/main.c
+++ b/main.c
@@ -13,6 +13,7 @@
int debug;
int noauth;
int nproc;
+char *forceuser;
char *srvname = "gefs";
vlong
@@ -79,7 +80,7 @@
static void
usage(void)
{
- fprint(2, "usage: %s [-r] dev\n", argv0);
+ fprint(2, "usage: %s [-rA] [-m mem] [-s srv] [-u usr] dev\n", argv0);
exits("usage");
}
@@ -90,12 +91,12 @@
vlong cachesz;
char *s;
- cachesz = 16*MiB;
+ cachesz = 512*MiB;
ARGBEGIN{
case 'r':
ream = 1;
break;
- case 'c':
+ case 'm':
cachesz = strtoll(EARGF(usage()), nil, 0)*MiB;
break;
case 'd':
@@ -107,6 +108,9 @@
case 'A':
noauth = 1;
break;
+ case 'u':
+ forceuser = EARGF(usage());
+ break;
default:
usage();
break;
@@ -152,7 +156,7 @@
for(i = 0; i < nproc; i++)
launch(runread, fs->nquiesce++, nil, "readio");
if(srvfd != -1)
- launch(runfs, -1, (void*)srvfd, "srvio");
+ launch(runfs, fs->nquiesce++, (void*)srvfd, "srvio");
exits(nil);
}
}
--- a/pack.c
+++ b/pack.c
@@ -3,6 +3,7 @@
#include <fcall.h>
#include <avl.h>
#include <bio.h>
+#include <pool.h>
#include "dat.h"
#include "fns.h"
@@ -434,5 +435,56 @@
PBIT64(p, bp.addr); p += 8;
PBIT64(p, bp.hash); p += 8;
}
+ return p;
+}
+
+char*
+packarena(char *p, int sz, Arena *a, Fshdr *fi)
+{
+ assert(sz >= Blkspc);
+ memcpy(p, "gefs0001", 8); p += 8;
+ PBIT32(p, Blksz); p += 4;
+ PBIT32(p, Bufspc); p += 4;
+ PBIT32(p, Hdrsz); p += 4;
+ PBIT32(p, fi->snap.ht); p += 4;
+ PBIT64(p, fi->snap.bp.addr); p += 8;
+ PBIT64(p, fi->snap.bp.hash); p += 8;
+ PBIT64(p, fi->snap.bp.gen); p += 8;
+ PBIT32(p, fi->narena); p += 4;
+ PBIT64(p, fi->arenasz); p += 8;
+ PBIT64(p, fi->nextqid); p += 8;
+ fi->nextgen = fi->snap.bp.gen + 1;
+ PBIT64(p, a->head.addr); p += 8; /* freelist addr */
+ PBIT64(p, a->head.hash); p += 8; /* freelist hash */
+ PBIT64(p, a->size); p += 8; /* arena size */
+ PBIT64(p, a->used); p += 8; /* arena used */
+ return p;
+}
+
+char*
+unpackarena(Arena *a, Fshdr *fi, char *p, int sz)
+{
+ assert(sz >= Blkspc);
+ memset(a, 0, sizeof(*a));
+ memset(fi, 0, sizeof(*fi));
+ if(memcmp(p, "gefs0001", 8) != 0)
+ return nil;
+ p += 8;
+ fi->blksz = GBIT32(p); p += 4;
+ fi->bufspc = GBIT32(p); p += 4;
+ fi->hdrsz = GBIT32(p); p += 4;
+ fi->snap.ht = GBIT32(p); p += 4;
+ fi->snap.bp.addr = GBIT64(p); p += 8;
+ fi->snap.bp.hash = GBIT64(p); p += 8;
+ fi->snap.bp.gen = GBIT64(p); p += 8;
+ fi->narena = GBIT32(p); p += 4;
+ fi->arenasz = GBIT64(p); p += 8;
+ fi->nextqid = GBIT64(p); p += 8;
+ a->head.addr = GBIT64(p); p += 8;
+ a->head.hash = GBIT64(p); p += 8;
+ a->head.gen = -1; p += 0;
+ a->size = GBIT64(p); p += 8;
+ a->used = GBIT64(p); p += 8;
+ a->tail = nil;
return p;
}
--- a/ream.c
+++ b/ream.c
@@ -82,16 +82,15 @@
}
static void
-initarena(Arena *a, vlong start, vlong asz)
+initarena(Arena *a, Fshdr *fi, vlong start, vlong asz)
{
vlong addr, bo, bh;
char *p;
Blk *b;
- addr = start;
if((b = mallocz(sizeof(Blk), 1)) == nil)
sysfatal("ream: %r");
- addr += Blksz; /* arena header */
+ addr = start+Blksz; /* arena header */
a->head.addr = -1;
a->head.hash = -1;
@@ -120,11 +119,14 @@
memset(b, 0, sizeof(Blk));
b->type = Tarena;
b->bp.addr = start;
- p = b->buf + Hdrsz;
- PBIT64(p, bo); p += 8; /* freelist addr */
- PBIT64(p, bh); p += 8; /* freelist hash */
- PBIT64(p, asz); p += 8; /* arena size */
- PBIT64(p, Blksz); /* arena used */
+ b->data = b->buf + Hdrsz;
+ a->head.addr = bo;
+ a->head.hash = bh;
+ a->head.gen = -1;
+ a->size = asz;
+ a->used = Blksz;
+ a->tail = nil;
+ packarena(b->data, Blkspc, a, fi);
finalize(b);
if(syncblk(b) == -1)
sysfatal("ream: write arena: %r");
@@ -134,8 +136,9 @@
reamfs(char *dev)
{
vlong sz, asz, off;
- Blk *sb, *rb, *tb;
+ Blk *rb, *tb;
Mount *mnt;
+ Arena *a;
Dir *d;
int i;
@@ -145,14 +148,10 @@
sysfatal("ream: %r");
if(d->length < 64*MiB)
sysfatal("ream: disk too small");
- if((sb = mallocz(sizeof(Blk), 1)) == nil)
- sysfatal("ream: %r");
if((mnt = mallocz(sizeof(Mount), 1)) == nil)
sysfatal("ream: alloc mount: %r");
if((mnt->root = mallocz(sizeof(Tree), 1)) == nil)
sysfatal("ream: alloc tree: %r");
- fs->super = sb;
- refblk(sb);
sz = d->length;
sz = sz - (sz % Blksz) - Blksz;
@@ -171,23 +170,27 @@
sysfatal("disk too small");
fs->arenasz = asz;
off = 0;
- fprint(2, "reaming %d arenas:\n", fs->narena);
-
for(i = 0; i < fs->narena; i++){
print("\tarena %d: %lld blocks at %llx\n", i, asz/Blksz, off);
- initarena(&fs->arenas[i], off, asz);
+ initarena(&fs->arenas[i], fs, off, asz);
off += asz;
}
+ for(i = 0; i < Ndead; i++){
+ fs->snap.dead[i].prev = -1;
+ fs->snap.dead[i].head.addr = -1;
+ fs->snap.dead[i].head.hash = -1;
+ fs->snap.dead[i].head.gen = -1;
+ }
- sb->type = Tsuper;
- sb->bp.addr = sz;
- sb->data = sb->buf + Hdrsz;
- sb->ref = 2;
-
- for(i = 0; i < fs->narena; i++)
- if((loadarena(&fs->arenas[i], i*asz)) == -1)
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ if((loadarena(a, fs, i*asz)) == -1)
sysfatal("ream: loadarena: %r");
-
+ if(loadlog(a) == -1)
+ sysfatal("load log: %r");
+ if(compresslog(a) == -1)
+ sysfatal("compress log: %r");
+ }
if((tb = newblk(Tleaf)) == nil)
sysfatal("ream: allocate root: %r");
refblk(tb);
@@ -212,12 +215,8 @@
fs->snap.bp = rb->bp;
fs->snap.ht = 1;
- fillsuper(sb);
- finalize(sb);
- syncblk(sb);
putblk(tb);
- putblk(sb);
putblk(rb);
free(mnt);
if(sync() == -1)
--- a/snap.c
+++ b/snap.c
@@ -392,7 +392,7 @@
if(modifysnap(Oinsert, t) != nil)
return nil;
- if((r = malloc(sizeof(Tree))) == nil)
+ if((r = calloc(sizeof(Tree), 1)) == nil)
return nil;
gen = inc64(&fs->nextgen, 1);
memset(&r->lk, 0, sizeof(r->lk));
--- a/tree.c
+++ b/tree.c
@@ -113,7 +113,7 @@
o = spc - b->valsz;
if(2*b->nval + b->valsz > spc){
- dprint("2*%d + %d > %d [ksz: %d, vsz: %d]\n",
+ fprint(2, "2*%d + %d > %d [ksz: %d, vsz: %d]\n",
2*b->nval, b->valsz, spc, kv->nk, kv->nv);
showblk(2, b, "setval overflow", 1);
abort();