ref: 8417419572f7f1a14fe052fa46edfd4d2bc15a04
parent: 6c35d0b041df30eacf7601d50d187adb90542b4d
author: Ori Bernstein <ori@eigenstate.org>
date: Sat Jan 1 21:15:35 EST 2022
fs: move tree mutations from console to admin messages
--- a/blk.c
+++ b/blk.c
@@ -738,7 +738,6 @@
p = b->data;
setflag(b, Bdirty);
memcpy(p, "gefs0001", 8); p += 8;
- PBIT32(p, 0); p += 4; /* dirty */
PBIT32(p, Blksz); p += 4;
PBIT32(p, Bufspc); p += 4;
PBIT32(p, Hdrsz); p += 4;
--- a/cons.c
+++ b/cons.c
@@ -24,40 +24,65 @@
}
static void
+sendsync(int fd, int halt)
+{
+ Fmsg *m;
+ Amsg *a;
+
+ m = mallocz(sizeof(Fmsg), 1);
+ a = mallocz(sizeof(Amsg), 1);
+ if(m == nil || a == nil){
+ fprint(fd, "alloc sync msg: %r\n");
+ free(m);
+ free(a);
+ return;
+ }
+ a->op = AOsync;
+ a->halt = halt;
+ a->fd = fd;
+ m->a = a;
+ chsend(fs->wrchan, m);
+}
+
+static void
syncfs(int fd, char **, int)
{
- fprint(fd, "sync\n");
+ sendsync(fd, 0);
}
static void
-snapfs(int fd, char **ap, int na)
+haltfs(int fd, char **, int)
{
- Tree *t, *n;
- char *e;
+ sendsync(fd, 1);
+}
- if((t = openlabel(ap[0])) == nil){
- fprint(fd, "snap: open %s: does not exist\n", ap[0]);
- return;
+static void
+snapfs(int fd, char **ap, int)
+{
+ Fmsg *m;
+ Amsg *a;
+
+ m = mallocz(sizeof(Fmsg), 1);
+ a = mallocz(sizeof(Amsg), 1);
+ if(m == nil || a == nil){
+ fprint(fd, "alloc sync msg: %r\n");
+ goto Error;
}
- if((n = newsnap(t)) == nil){
- fprint(fd, "snap: save %s: failed\n", ap[na-1]);
- return;
+ if(strcmp(ap[0], ap[1]) == 0){
+ fprint(fd, "not a new snap: %s\n", ap[1]);
+ goto Error;
}
- if((e = labelsnap(ap[na-1], n->gen)) != nil){
- fprint(fd, "snap: save %s: %s\n", ap[na-1], e);
- return;
- }
- if(na <= 1 || strcmp(ap[0], ap[1]) == 0){
- /* the label moved */
- if((e = unrefsnap(t->gen, n->gen)) != nil){
- fprint(fd, "snap: unref old: %s\n", e);
- return;
- }
- }
- closesnap(n);
- closesnap(t);
- sync();
- fprint(fd, "snap %s: ok\n", ap[na-1]);
+ strecpy(a->old, a->old+sizeof(a->old), ap[0]);
+ strecpy(a->new, a->new+sizeof(a->new), ap[1]);
+ a->op = AOsnap;
+ a->fd = fd;
+ m->a = a;
+ chsend(fs->wrchan, m);
+ return;
+Error:
+ free(m);
+ free(a);
+ return;
}
static void
@@ -124,8 +149,8 @@
" show this help"
"sync\n"
" flush all p[ending writes to disk\n"
- "snap name [new]\n"
- " create or update a new snapshot\n"
+ "snap old new\n"
+ " create or update a new snapshot based off old\n"
"check\n"
" run a consistency check on the file system\n"
"users\n"
@@ -150,7 +175,8 @@
Cmd cmdtab[] = {
{.name="sync", .sub=nil, .minarg=0, .maxarg=0, .fn=syncfs},
- {.name="snap", .sub=nil, .minarg=1, .maxarg=2, .fn=snapfs},
+ {.name="halt", .sub=nil, .minarg=0, .maxarg=0, .fn=haltfs},
+ {.name="snap", .sub=nil, .minarg=2, .maxarg=2, .fn=snapfs},
{.name="check", .sub=nil, .minarg=1, .maxarg=1, .fn=fsckfs},
{.name="help", .sub=nil, .minarg=0, .maxarg=0, .fn=help},
{.name="users", .sub=nil, .minarg=0, .maxarg=1, .fn=refreshusers},
--- a/dat.h
+++ b/dat.h
@@ -1,4 +1,5 @@
typedef struct Blk Blk;
+typedef struct Amsg Amsg;
typedef struct Gefs Gefs;
typedef struct Fmsg Fmsg;
typedef struct Fid Fid;
@@ -76,13 +77,15 @@
/*
* dent: pqid[8] qid[8] -- a directory entry key.
* ptr: off[8] hash[8] -- a key for an Dir block.
- * dir: fixed statbuf header, user ids
+ * dir: serialized Xdir
*/
Kdat, /* qid[8] off[8] => ptr[16]: pointer to data page */
Kent, /* pqid[8] name[n] => dir[n]: serialized Dir */
- Klabel, /* name[] => snapid[]: dataset (snapshot ref) */
+ Klabel, /* name[] => snapid[]: snapshot label */
+ Ktref, /* tag[8] = snapid[] scratch snapshot label */
Ksnap, /* sid[8] => ref[8], tree[52]: snapshot root */
Ksuper, /* qid[8] => pqid[8]: parent dir */
+ Kdirty, /* [0] => [0]: mark dirty unmount */
};
enum {
@@ -94,7 +97,7 @@
//#define Efs "i will not buy this fs, it is scratched"
#define Eimpl "not implemented"
-#define Efs (abort(), "nope")
+#define Efs (abort(), "fs broke")
#define Eio "i/o error"
#define Efid "unknown fid"
#define Etype "invalid fid type"
@@ -118,6 +121,7 @@
#define Enouser "user does not exist"
#define Efsize "file too big"
#define Ebadu "attach -- unknown user or failed authentication"
+#define Erdonly "file system read only"
#define Ewstatb "wstat -- unknown bits in qid.type/mode"
#define Ewstatd "wstat -- attempt to change directory"
@@ -132,34 +136,32 @@
#define Enempty "remove -- directory not empty"
-//#define Echar "bad character in directory name",
-//#define Eopen "read/write -- on non open fid",
-//#define Ecount "read/write -- count too big",
-//#define Ealloc "phase error -- directory entry not allocated",
-//#define Eqid "phase error -- qid does not match",
-//#define Eaccess "access permission denied",
-//#define Eentry "directory entry not found",
-//#define Emode "open/create -- unknown mode",
-//#define Edir1 "walk -- in a non-directory",
-//#define Edir2 "create -- in a non-directory",
-//#define Ephase "phase error -- cannot happen",
-//#define Eexist "create/wstat -- file exists",
-//#define Edot "create/wstat -- . and .. illegal names",
-//#define Ewalk "walk -- too many (system wide)",
-//#define Eronly "file system read only",
-//#define Efull "file system full",
-//#define Eoffset "read/write -- offset negative",
-//#define Elocked "open/create -- file is locked",
-//#define Ebroken "read/write -- lock is broken",
-//#define Eauth "attach -- authentication failed",
-//#define Eauth2 "read/write -- authentication unimplemented",
-//#define Etoolong "name too long",
-//#define Efidinuse "fid in use",
-//#define Econvert "protocol botch",
-//#define Eversion "version conversion",
-//#define Eauthnone "auth -- user 'none' requires no authentication",
+//#define Echar "bad character in directory name"
+//#define Eopen "read/write -- on non open fid"
+//#define Ecount "read/write -- count too big"
+//#define Ealloc "phase error -- directory entry not allocated"
+//#define Eqid "phase error -- qid does not match"
+//#define Eaccess "access permission denied"
+//#define Eentry "directory entry not found"
+//#define Emode "open/create -- unknown mode"
+//#define Edir1 "walk -- in a non-directory"
+//#define Edir2 "create -- in a non-directory"
+//#define Ephase "phase error -- cannot happen"
+//#define Eexist "create/wstat -- file exists"
+//#define Edot "create/wstat -- . and .. illegal names"
+//#define Ewalk "walk -- too many (system wide)"
+//#define Eoffset "read/write -- offset negative"
+//#define Elocked "open/create -- file is locked"
+//#define Ebroken "read/write -- lock is broken"
+//#define Eauth "attach -- authentication failed"
+//#define Eauth2 "read/write -- authentication unimplemented"
+//#define Etoolong "name too long"
+//#define Efidinuse "fid in use"
+//#define Econvert "protocol botch"
+//#define Eversion "version conversion"
+//#define Eauthnone "auth -- user 'none' requires no authentication"
//#define Eauthdisabled "auth -- authentication disabled", /* development */
-//#define Eauthfile "auth -- out of auth files",
+//#define Eauthfile "auth -- out of auth files"
/*
* All metadata blocks share a common header:
@@ -171,8 +173,6 @@
*
* The superblock has this layout:
* version[8] always "gefs0001"
- * flags[4] status flags:
- * dirty=1<<0,
* blksz[4] block size in bytes
* bufsz[4] portion of leaf nodes
* allocated to buffers,
@@ -297,6 +297,12 @@
LogDead , /* deadlist a block */
};
+enum {
+ AOnone,
+ AOsnap,
+ AOsync,
+};
+
struct Bptr {
vlong addr;
vlong hash;
@@ -319,11 +325,25 @@
Blk *b;
};
+struct Amsg {
+ int op;
+ int fd;
+ union {
+ struct { /* AOsnap */
+ char old[128];
+ char new[128];
+ };
+ struct { /* AOsync */
+ int halt;
+ };
+ };
+};
+
struct Fmsg {
Fcall;
int fd; /* the fd to repsond on */
- QLock *wrlk; /* write lock on fd */
int sz; /* the size of the message buf */
+ Amsg *a; /* admin messages */
uchar buf[];
};
@@ -358,6 +378,7 @@
* Shadows the superblock contents.
*/
struct Gefs {
+ /* immutable data */
int blksz; /* immutable */
int bufsz; /* immutable */
int pivsz; /* immutable */
@@ -380,28 +401,35 @@
int fd;
long broken;
+ long rdonly;
+ /* root snapshot tree */
Tree snap;
uvlong nextqid;
- uvlong nextgen; /* unlocked: only touched by mutator thread */
+ uvlong nextgen;
+ /* arena allocation */
Arena *arenas;
int narena;
long roundrobin;
vlong arenasz;
+ /* user list */
RWLock userlk;
User *users;
int nusers;
+ /* fid hash table */
Lock fidtablk;
Fid *fidtab[Nfidtab];
+
+ /* dent hash table */
Lock dtablk;
Dent *dtab[Ndtab];
- Lock lrulk;
/* protected by lrulk */
+ Lock lrulk;
Bucket *cache;
Blk *chead;
Blk *ctail;
--- a/fs.c
+++ b/fs.c
@@ -39,6 +39,70 @@
return nil;
}
+static void
+snapfs(int fd, char *old, char *new)
+{
+ Tree *t, *u;
+ char *e;
+
+ u = openlabel(new);
+ if((t = openlabel(old)) == nil){
+ fprint(fd, "snap: open %s: does not exist\n", old);
+ return;
+ }
+ if((e = labelsnap(new, t->gen)) != nil){
+ fprint(fd, "snap: label %s: %s\n", new, e);
+ return;
+ }
+ if(u != nil){
+ if((e = unrefsnap(u->gen, -1)) != nil){
+ fprint(fd, "snap: unref %s: %s\n", new, e);
+ return;
+ }
+ }
+ if(u != nil)
+ closesnap(u);
+ closesnap(t);
+ sync();
+ fprint(fd, "snap taken: %s\n", new);
+}
+
+static Tree*
+scratchsnap(Fid *f)
+{
+ Tree *t, *n;
+ char *e;
+
+ t = f->mnt->root;
+ qlock(&fs->snaplk);
+ if((n = newsnap(t)) == nil){
+ fprint(2, "snap: save %s: %s\n", f->mnt->name, "create snap");
+ abort();
+ }
+ if((e = labelsnap(f->mnt->name, t->gen)) != nil){
+ fprint(2, "snap: save %s: %s\n", f->mnt->name, e);
+ abort();
+ }
+ if(t->prev[0] != -1){
+ if((e = unrefsnap(t->prev[0], t->gen)) != nil){
+ fprint(2, "snap: unref old: %s\n", e);
+ abort();
+ }
+ }
+ f->mnt->root = n;
+ closesnap(t);
+ qunlock(&fs->snaplk);
+ sync();
+ return nil;
+}
+
+void
+freemsg(Fmsg *m)
+{
+ free(m->a);
+ free(m);
+}
+
static int
okname(char *name)
{
@@ -130,11 +194,9 @@
dprint("→ %F\n", r);
if((n = convS2M(r, buf, sizeof(buf))) == 0)
abort();
- qlock(m->wrlk);
w = write(m->fd, buf, n);
- qunlock(m->wrlk);
if(w != n)
- fshangup(m->fd, "failed write");
+ fshangup(m->fd, Eio);
free(m);
}
@@ -466,6 +528,7 @@
}
m->fd = fd;
m->sz = sz;
+ m->a = nil;
PBIT32(m->buf, sz);
return m;
}
@@ -656,7 +719,6 @@
return;
}
if(o->mode != -1){
-print("use walk\n");
rerror(m, Einuse);
return;
}
@@ -1000,7 +1062,6 @@
if(f->mode != -1){
unlock(f);
clunkdent(de);
-print("use create\n");
rerror(m, Einuse);
putfid(f);
return;
@@ -1155,7 +1216,6 @@
lock(f);
if(f->mode != -1){
rerror(m, Einuse);
-print("in use open\n");
unlock(f);
putfid(f);
return;
@@ -1327,7 +1387,6 @@
return;
}
if(!(f->mode & DMWRITE)){
-print("f->mode: %x\n", f->mode);
rerror(m, Einuse);
putfid(f);
return;
@@ -1401,7 +1460,6 @@
{
int fd, msgmax, versioned;
char err[128];
- QLock *wrlk;
Fcall r;
Fmsg *m;
@@ -1408,8 +1466,6 @@
fd = (uintptr)pfd;
msgmax = Max9p;
versioned = 0;
- if((wrlk = mallocz(sizeof(QLock), 1)) == nil)
- fshangup(fd, "alloc wrlk: %r");
while(1){
if((m = readmsg(fd, msgmax)) == nil){
fshangup(fd, "truncated message: %r");
@@ -1424,7 +1480,6 @@
fshangup(fd, "version required");
return;
}
- m->wrlk = wrlk;
versioned = 1;
dprint("← %F\n", &m->Fcall);
switch(m->type){
@@ -1436,7 +1491,6 @@
case Tattach: fsattach(m, msgmax); break;
/* mutators */
- case Tflush: chsend(fs->wrchan, m); break;
case Tcreate: chsend(fs->wrchan, m); break;
case Twrite: chsend(fs->wrchan, m); break;
case Twstat: chsend(fs->wrchan, m); break;
@@ -1443,6 +1497,7 @@
case Tremove: chsend(fs->wrchan, m); break;
/* reads */
+ case Tflush: chsend(fs->rdchan, m); break;
case Twalk: chsend(fs->rdchan, m); break;
case Tread: chsend(fs->rdchan, m); break;
case Tstat: chsend(fs->rdchan, m); break;
@@ -1462,16 +1517,40 @@
runwrite(int wid, void *)
{
Fmsg *m;
+ int ao;
while(1){
m = chrecv(fs->wrchan);
quiesce(wid);
- switch(m->type){
- case Tflush: rerror(m, Eimpl); break;
- case Tcreate: fscreate(m); break;
- case Twrite: fswrite(m); break;
- case Twstat: fswstat(m); break;
- case Tremove: fsremove(m); break;
+ ao = (m->a == nil) ? AOnone : m->a->op;
+ switch(ao){
+ case AOnone:
+ if(fs->rdonly){
+ rerror(m, Erdonly);
+ return;
+ }
+ if(fs->broken){
+ rerror(m, Efs);
+ return;
+ }
+ switch(m->type){
+ case Tcreate: fscreate(m); break;
+ case Twrite: fswrite(m); break;
+ case Twstat: fswstat(m); break;
+ case Tremove: fsremove(m); break;
+ }
+ break;
+ case AOsync:
+ fprint(m->a->fd, "syncing [readonly: %d]\n", m->a->halt);
+ if(m->a->halt)
+ ainc(&fs->rdonly);
+ sync();
+ freemsg(m);
+ break;
+ case AOsnap:
+ snapfs(m->a->fd, m->a->old, m->a->new);
+ freemsg(m);
+ break;
}
quiesce(wid);
}
@@ -1486,6 +1565,7 @@
m = chrecv(fs->rdchan);
quiesce(wid);
switch(m->type){
+ case Tflush: rerror(m, Eimpl); break;
case Twalk: fswalk(m); break;
case Tread: fsread(m); break;
case Tstat: fsstat(m); break;
--- a/load.c
+++ b/load.c
@@ -35,8 +35,7 @@
void
loadfs(char *dev)
{
- int blksz, bufspc, hdrsz;
- int i, dirty;
+ int i, blksz, bufspc, hdrsz;
vlong sb;
char *p, *e;
Tree *t;
@@ -59,7 +58,6 @@
sysfatal("corrupt superblock: bad magic");
p = b->data + 8;
- dirty = GBIT32(p); p += 4; /* dirty */
blksz = GBIT32(p); p += 4;
bufspc = GBIT32(p); p += 4;
hdrsz = GBIT32(p); p += 4;
@@ -95,10 +93,6 @@
sysfatal("fs uses different buffer size");
if(blksz != Blksz)
sysfatal("fs uses different block size");
- if(dirty){
- fprint(2, "file system was not unmounted cleanly");
- /* TODO: start gc pass */
- }
if((t = openlabel("main")) == nil)
sysfatal("load users: no main label");
if((e = loadusers(2, t)) != nil)