ref: b927f42f5f6ebbe9af2a7650a9bd504f2f0cd808
parent: 8e49cdfd1994af3ca0f112220b3614d685da4759
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Dec 25 11:26:34 EST 2023
fs: don't drop mutlk between syncing the snaps and serializing blocks
--- a/blk.c
+++ b/blk.c
@@ -161,7 +161,6 @@
a = &fs->arenas[mid];
alo = a->h0->bp.addr;
ahi = alo + a->size + 2*Blksz;
-//print("getarena %d [%d] %d (%#llx %#llx) %llx\n", lo, mid, hi, alo, ahi, b);
if(b < alo)
hi = mid-1;
else if(b > ahi)
@@ -349,7 +348,7 @@
switch(op){
case LogSync:
gen = ent >> 8;
- dprint("\tlog@%d: sync %llx\n", i, gen);
+ dprint("\tlog@%x: sync %lld\n", i, gen);
if(gen >= fs->qgen){
if(a->logtl == nil){
b->logsz = i;
@@ -364,7 +363,7 @@
case LogAlloc:
case LogAlloc1:
len = (op >= Log2wide) ? UNPACK64(d+8) : Blksz;
- dprint("\tlog@%d alloc: %llx+%llx\n", i, off, len);
+ dprint("\tlog@%x alloc: %llx+%llx\n", i, off, len);
grabrange(a->free, off & ~0xff, len);
a->used += len;
break;
@@ -371,13 +370,13 @@
case LogFree:
case LogFree1:
len = (op >= Log2wide) ? UNPACK64(d+8) : Blksz;
- dprint("\tlog@%d free: %llx+%llx\n", i, off, len);
+ dprint("\tlog@%x free: %llx+%llx\n", i, off, len);
freerange(a->free, off & ~0xff, len);
a->used -= len;
break;
default:
n = 0;
- dprint("\tlog@%d: log op %d\n", i, op);
+ dprint("\tlog@%x: log op %d\n", i, op);
abort();
break;
}
@@ -924,10 +923,11 @@
Arena *a;
Qent qe;
- b->enqueued = getcallerpc(&b);
- a = getarena(b->bp.addr);
assert(checkflag(b, Bdirty));
assert(b->bp.addr >= 0);
+
+ b->enqueued = getcallerpc(&b);
+ a = getarena(b->bp.addr);
holdblk(b);
finalize(b);
setflag(b, Bqueued);
@@ -954,14 +954,10 @@
{
if(a->qgen != b->qgen)
return (a->qgen < b->qgen) ? -1 : 1;
+ if(a->op != b->op)
+ return (a->op < b->op) ? -1 : 1;
if(a->bp.addr != b->bp.addr)
return (a->bp.addr < b->bp.addr) ? -1 : 1;
- if(a->op != b->op){
- if(a->op == Qfence)
- return -1;
- if(a->op == Qfree)
- return 1;
- }
return 0;
}
@@ -1029,7 +1025,6 @@
e.b->queued = 0;
}
return e;
-
}
void
@@ -1068,119 +1063,4 @@
}
assert(estacksz() == 0);
}
-}
-
-void
-wrbarrier(void)
-{
- Qent qe;
- int i;
-
- aincv(&fs->qgen, 1);
- fs->syncing = fs->nsyncers;
- for(i = 0; i < fs->nsyncers; i++){
- qe.op = Qfence;
- qe.bp.addr = 0;
- qe.bp.hash = -1;
- qe.bp.gen = -1;
- qe.b = nil;
- qput(&fs->syncq[i], qe);
- }
- while(fs->syncing != 0)
- rsleep(&fs->syncrz);
-}
-
-void
-sync(void)
-{
- Arena *a;
- int i;
-
-
- if(fs->rdonly)
- return;
- qlock(&fs->synclk);
- if(!fs->snap.dirty){
- qunlock(&fs->synclk);
- return;
- }
- if(waserror()){
- fprint(2, "failed to sync: %s\n", errmsg());
- qunlock(&fs->synclk);
- nexterror();
- }
-
- /*
- * pass 0: Pack the blocks we want to sync
- * while holding the write lock, and then
- * wait until all the blocks they point at
- * have hit disk; once they're on disk, we
- * can take a consistent snapshot.
- */
- qlock(&fs->mutlk);
- flushdlcache(1);
- for(i = 0; i < fs->narena; i++){
- a = &fs->arenas[i];
- qlock(a);
- setflag(a->logtl, Bdirty);
- enqueue(a->logtl);
- logbarrier(a, fs->qgen);
-
- packarena(a->h0->data, Blksz, a);
- packarena(a->h1->data, Blksz, a);
- finalize(a->h0);
- finalize(a->h1);
- setflag(a->h0, Bdirty);
- setflag(a->h1, Bdirty);
- fs->arenabp[i] = a->h0->bp;
- qunlock(a);
- }
-
- packsb(fs->sb0->buf, Blksz, fs);
- packsb(fs->sb1->buf, Blksz, fs);
- finalize(fs->sb0);
- finalize(fs->sb1);
- fs->snap.dirty = 0;
- qunlock(&fs->mutlk);
- wrbarrier();
- /*
- * pass 1: sync block headers; if we crash here,
- * the block footers are consistent, and we can
- * use them.
- */ wrbarrier();
- for(i = 0; i < fs->narena; i++)
- enqueue(fs->arenas[i].h0);
- wrbarrier();
-
- /*
- * pass 2: sync superblock; we have a consistent
- * set of block headers, so if we crash, we can
- * use the loaded block headers; the footers will
- * get synced after so that we can use them next
- * time around.
- */
- syncblk(fs->sb0);
- syncblk(fs->sb1);
-
- /*
- * pass 3: sync block footers; if we crash here,
- * the block headers are consistent, and we can
- * use them.
- */
- for(i = 0; i < fs->narena; i++)
- enqueue(fs->arenas[i].h1);
-
- /*
- * Pass 4: clean up the old snap tree's deadlist
- */
- freedl(&fs->snapdl, 1);
- fs->snapdl.hd.addr = -1;
- fs->snapdl.hd.hash = -1;
- fs->snapdl.hd.gen = -1;
- fs->snapdl.tl.addr = -1;
- fs->snapdl.tl.hash = -1;
- fs->snapdl.tl.gen = -1;
- fs->snapdl.ins = nil;
- qunlock(&fs->synclk);
- poperror();
}
--- a/dat.h
+++ b/dat.h
@@ -454,6 +454,7 @@
};
enum {
+ /* in priority order */
Qnone,
Qfence,
Qwrite,
@@ -461,7 +462,7 @@
};
struct Qent {
- long qgen;
+ vlong qgen;
Bptr bp;
Blk *b;
int op;
--- a/fns.h
+++ b/fns.h
@@ -61,6 +61,7 @@
void epochclean(void);
void limbo(Bfree*);
void freeblk(Tree*, Blk*, Bptr);
+int logbarrier(Arena *, vlong);
void dlappend(Dlist *dl, Bptr);
void killblk(Tree*, Bptr);
void blkdealloc(vlong);
@@ -84,12 +85,11 @@
void growfs(char*);
void loadarena(Arena*, Bptr);
void loadfs(char*);
-void sync(void);
void loadlog(Arena*, Bptr);
int scandead(Dlist*, int, void(*)(Bptr, void*), void*);
int endfs(void);
int compresslog(Arena*);
-void flushdlcache(int);
+void dlsync(void);
void setval(Blk*, Kvp*);
Conn* newconn(int, int);
--- a/fs.c
+++ b/fs.c
@@ -32,6 +32,137 @@
}
static void
+wrbarrier(void)
+{
+ Qent qe;
+ int i;
+
+ aincv(&fs->qgen, 1);
+ fs->syncing = fs->nsyncers;
+ for(i = 0; i < fs->nsyncers; i++){
+ qe.op = Qfence;
+ qe.bp.addr = 0;
+ qe.bp.hash = -1;
+ qe.bp.gen = -1;
+ qe.b = nil;
+ qput(&fs->syncq[i], qe);
+ }
+ aincv(&fs->qgen, 1);
+ while(fs->syncing != 0)
+ rsleep(&fs->syncrz);
+}
+
+static void
+sync(void)
+{
+ Mount *mnt;
+ Arena *a;
+ int i;
+
+
+ if(fs->rdonly)
+ return;
+ qlock(&fs->synclk);
+ if(!fs->snap.dirty){
+ qunlock(&fs->synclk);
+ return;
+ }
+ if(waserror()){
+ fprint(2, "failed to sync: %s\n", errmsg());
+ qunlock(&fs->synclk);
+ nexterror();
+ }
+
+ /*
+ * pass 0: Update all open snapshots, and
+ * pack the blocks we want to sync. Snap
+ * while holding the write lock, and then
+ * wait until all the blocks they point at
+ * have hit disk; once they're on disk, we
+ * can take a consistent snapshot.
+ */
+ qlock(&fs->mutlk);
+ lock(&fs->mountlk);
+ for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
+ updatesnap(&mnt->root, mnt->root, mnt->name);
+ unlock(&fs->mountlk);
+ dlsync();
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ qlock(a);
+ /*
+ * because the log uses preallocated
+ * blocks, we need to write the log
+ * block out synchronously, or it may
+ * get reused.
+ */
+ logbarrier(a, fs->qgen);
+ finalize(a->logtl);
+ syncblk(a->logtl);
+
+ packarena(a->h0->data, Blksz, a);
+ packarena(a->h1->data, Blksz, a);
+ finalize(a->h0);
+ finalize(a->h1);
+ setflag(a->h0, Bdirty);
+ setflag(a->h1, Bdirty);
+ fs->arenabp[i] = a->h0->bp;
+ qunlock(a);
+ }
+
+ packsb(fs->sb0->buf, Blksz, fs);
+ packsb(fs->sb1->buf, Blksz, fs);
+ finalize(fs->sb0);
+ finalize(fs->sb1);
+ fs->snap.dirty = 0;
+ qunlock(&fs->mutlk);
+ wrbarrier();
+
+ /*
+ * pass 1: sync block headers; if we crash here,
+ * the block footers are consistent, and we can
+ * use them.
+ */ wrbarrier();
+ for(i = 0; i < fs->narena; i++)
+ enqueue(fs->arenas[i].h0);
+ wrbarrier();
+
+ /*
+ * pass 2: sync superblock; we have a consistent
+ * set of block headers, so if we crash, we can
+ * use the loaded block headers; the footers will
+ * get synced after so that we can use them next
+ * time around.
+ */
+ syncblk(fs->sb0);
+ syncblk(fs->sb1);
+
+ /*
+ * pass 3: sync block footers; if we crash here,
+ * the block headers are consistent, and we can
+ * use them.
+ */
+ for(i = 0; i < fs->narena; i++)
+ enqueue(fs->arenas[i].h1);
+ wrbarrier();
+
+ /*
+ * Pass 4: clean up the old snap tree's deadlist
+ */
+ freedl(&fs->snapdl, 1);
+ fs->snapdl.hd.addr = -1;
+ fs->snapdl.hd.hash = -1;
+ fs->snapdl.hd.gen = -1;
+ fs->snapdl.tl.addr = -1;
+ fs->snapdl.tl.hash = -1;
+ fs->snapdl.tl.gen = -1;
+ fs->snapdl.ins = nil;
+ wrbarrier();
+ qunlock(&fs->synclk);
+ poperror();
+}
+
+static void
snapfs(Amsg *a, Tree **tp)
{
Tree *t, *s;
@@ -2190,7 +2321,6 @@
char buf[Offksz];
Bptr bp, nb, *oldhd;
vlong off;
- Mount *mnt;
Tree *t;
Arena *a;
Amsg *am;
@@ -2231,17 +2361,8 @@
epochend(id);
epochclean();
}
- qlock(&fs->mutlk);
if(am->halt)
ainc(&fs->rdonly);
- epochstart(id);
- lock(&fs->mountlk);
- for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
- updatesnap(&mnt->root, mnt->root, mnt->name);
- unlock(&fs->mountlk);
- qunlock(&fs->mutlk);
- epochend(id);
- epochclean();
sync();
for(i = 0; i < fs->narena; i++){
--- a/snap.c
+++ b/snap.c
@@ -8,7 +8,7 @@
#include "atomic.h"
static void
-dlsync(Dlist *dl)
+dlflush(Dlist *dl)
{
char kvbuf[512];
Msg m;
@@ -128,7 +128,7 @@
dlcachedel(dl, 0);
while(fs->dltail != nil && fs->dlcount >= fs->dlcmax){
dt = fs->dltail;
- dlsync(dt);
+ dlflush(dt);
dlcachedel(dt, 1);
dropblk(dt->ins);
free(dt);
@@ -306,7 +306,7 @@
nm++;
}
assert(nm <= nelem(m));
- flushdlcache(1);
+ dlsync();
btupsert(&fs->snap, m, nm);
reclaimblocks(t->gen, succ, t->pred);
if(deltree){
@@ -514,23 +514,13 @@
}
void
-flushdlcache(int clear)
+dlsync(void)
{
Dlist *dl, *n;
for(dl = fs->dlhead; dl != nil; dl = n){
n = dl->cnext;
- dlsync(dl);
- if(clear){
- if(dl->ins != nil)
- dropblk(dl->ins);
- free(dl);
- }
- }
- if(clear){
- fs->dlhead = nil;
- fs->dltail = nil;
- memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*));
+ dlflush(dl);
}
}