ref: ed97b6fdc1a4acd4216b748cf82fe3ba35cb1bb1
parent: 2d851cca16a65146bc6d38c6d54418d20cb38d04
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Oct 16 12:56:04 EDT 2022
everything: rework buffer lifetimes completely
--- a/blk.c
+++ b/blk.c
@@ -23,8 +23,6 @@
static vlong blkalloc_lk(Arena*);
static vlong blkalloc(int);
static int blkdealloc_lk(vlong);
-static Blk* blkbuf(void);
-static void blkfree(Blk*);
static Blk* initblk(Blk*, vlong, int);
static int logop(Arena *, vlong, vlong, int);
@@ -66,7 +64,7 @@
syncblk(Blk *b)
{
assert(checkflag(b, Bfinal));
- clrflag(b, Bqueued|Bdirty);
+ clrflag(b, Bdirty);
return pwrite(fs->fd, b->buf, Blksz, b->bp.addr);
}
@@ -77,20 +75,20 @@
vlong off, rem, n;
assert(bp != -1);
- if((b = blkbuf()) == nil)
+ if((b = cachepluck()) == nil)
return nil;
+ b->alloced = getcallerpc(&bp);
off = bp;
rem = Blksz;
while(rem != 0){
n = pread(fs->fd, b->buf, rem, off);
if(n <= 0){
- blkfree(b);
+ free(b);
return nil;
}
off += n;
rem -= n;
}
- b->ref = 1;
b->cnext = nil;
b->cprev = nil;
b->hnext = nil;
@@ -136,6 +134,7 @@
b->valsz = GBIT16(b->buf+4);
break;
}
+ assert(b->magic == Magic);
return b;
}
@@ -144,7 +143,7 @@
{
int n;
- n = hint+ainc(&fs->roundrobin)/(64*1024);
+ n = hint+ainc(&fs->roundrobin)/(1024*1024);
return &fs->arenas[n%fs->narena];
}
@@ -271,16 +270,16 @@
pb = lb;
if((o = blkalloc_lk(a)) == -1)
return -1;
- if((lb = blkbuf()) == nil)
+ if((lb = cachepluck()) == nil)
return -1;
initblk(lb, o, Tlog);
- cacheblk(lb);
+ cacheins(lb);
lb->logsz = Loghashsz;
p = lb->data + lb->logsz;
PBIT64(p, (uvlong)LogEnd);
finalize(lb);
if(syncblk(lb) == -1){
- putblk(lb);
+ dropblk(lb);
return -1;
}
@@ -289,11 +288,10 @@
PBIT64(p, lb->bp.addr|LogChain);
finalize(pb);
if(syncblk(pb) == -1){
- putblk(pb);
+ dropblk(pb);
return -1;
}
- lrubump(pb);
- putblk(pb);
+ dropblk(pb);
}
*tl = lb;
}
@@ -371,13 +369,13 @@
switch(op){
case LogEnd:
dprint("log@%d: end\n", i);
- putblk(b);
+ dropblk(b);
return 0;
case LogChain:
bp.addr = off & ~0xff;
bp.hash = -1;
bp.gen = -1;
- putblk(b);
+ dropblk(b);
dprint("log@%d: chain %B\n", i, bp);
goto Nextblk;
break;
@@ -431,7 +429,7 @@
*/
if((ba = blkalloc_lk(a)) == -1)
return -1;
- if((b = blkbuf()) == nil)
+ if((b = cachepluck()) == nil)
return -1;
initblk(b, ba, Tlog);
b->logsz = Loghashsz;
@@ -440,7 +438,7 @@
PBIT64(p, (uvlong)LogEnd);
finalize(b);
if(syncblk(b) == -1){
- putblk(b);
+ dropblk(b);
return -1;
}
@@ -448,7 +446,7 @@
if(a->tail != nil){
finalize(a->tail);
if(syncblk(a->tail) == -1){
- blkfree(b);
+ free(b);
return -1;
}
}
@@ -528,13 +526,13 @@
break;
}
}
- putblk(b);
lock(a);
- cachedel(bp.addr);
+ cachedel(b->bp.addr);
if(blkdealloc_lk(ba) == -1){
unlock(a);
return -1;
}
+ dropblk(b);
unlock(a);
}
}
@@ -639,61 +637,23 @@
}
static Blk*
-blkbuf(void)
-{
- uvlong *p;
- Blk *b;
-
- qlock(&fs->freelk);
- while(fs->free == nil)
- rsleep(&fs->freerz);
- b = fs->free;
- fs->free = b->fnext;
-
- /* check for corruption */
- p = (uvlong*)b - 1;
- assert(*p == HdMagic);
-
- p = (uvlong*)(b + 1);
- assert(*p == TlMagic);
- qunlock(&fs->freelk);
-
- /*
- * If the block is cached,
- * then the cache holds a ref
- * to the block, so we only
- * want to reset the refs
- * on an allocation.
- */
- b->ref = 1;
- b->cnext = nil;
- b->cprev = nil;
- b->hnext = nil;
- b->flag = 0;
-
- return b;
-}
-
-static void
-blkfree(Blk *b)
-{
- b->bp.addr = -1;
- qlock(&fs->freelk);
- b->fnext = fs->free;
- fs->free = b;
- rwakeup(&fs->freerz);
- qunlock(&fs->freelk);
-}
-
-static Blk*
initblk(Blk *b, vlong bp, int t)
{
- assert(lookupblk(bp) == nil);
+ Blk *ob;
+
+ ob = cacheget(bp);
+ if(ob != nil){
+ fprint(2, "dup block: %#p %B (alloced %#llx freed %#llx lasthold: %#llx, lastdrop: %#llx)\n",
+ ob, ob->bp, ob->alloced, ob->freed, ob->lasthold, ob->lastdrop);
+ abort();
+ }
b->type = t;
b->bp.addr = bp;
b->bp.hash = -1;
b->bp.gen = fs->nextgen;
+ lock(&fs->freelk);
b->qgen = fs->qgen;
+ unlock(&fs->freelk);
switch(t){
case Traw:
case Tarena:
@@ -719,8 +679,9 @@
b->bufsz = 0;
b->logsz = 0;
b->lognxt = 0;
+ b->alloced = getcallerpc(&b);
- return cacheblk(b);
+ return b;
}
Blk*
@@ -731,12 +692,10 @@
if((bp = blkalloc(t)) == -1)
return nil;
- if((b = blkbuf()) == nil)
+ if((b = cachepluck()) == nil)
return nil;
initblk(b, bp, t);
- cacheblk(b);
b->alloced = getcallerpc(&t);
- assert(b->ref == 2);
return b;
}
@@ -756,8 +715,8 @@
r->bufsz = b->bufsz;
r->logsz = b->logsz;
r->lognxt = b->lognxt;
+ r->alloced = getcallerpc(&b);
memcpy(r->buf, b->buf, sizeof(r->buf));
- b->alloced = getcallerpc(&b);
return r;
}
@@ -766,7 +725,6 @@
{
uvlong h;
- setflag(b, Bfinal);
if(b->type != Traw)
PBIT16(b->buf, b->type);
switch(b->type){
@@ -795,6 +753,9 @@
case Tarena:
break;
}
+
+ setflag(b, Bfinal);
+ cacheins(b);
}
Blk*
@@ -806,9 +767,7 @@
i = ihash(bp.addr) % nelem(fs->blklk);
qlock(&fs->blklk[i]);
- if((b = lookupblk(bp.addr)) != nil){
- cacheblk(b);
- lrubump(b);
+ if((b = cacheget(bp.addr)) != nil){
qunlock(&fs->blklk[i]);
return b;
}
@@ -815,11 +774,11 @@
if((b = readblk(bp.addr, flg)) == nil){
qunlock(&fs->blklk[i]);
return nil;
- }else
- b->alloced = getcallerpc(&bp);
+ }
+ b->alloced = getcallerpc(&bp);
h = blkhash(b);
if((flg&GBnochk) == 0 && h != bp.hash){
- fprint(2, "corrupt block %B: %.16llux != %.16llux\n", bp, h, bp.hash);
+ fprint(2, "corrupt block %p %B: %.16llux != %.16llux\n", b, bp, h, bp.hash);
qunlock(&fs->blklk[i]);
abort();
return nil;
@@ -826,8 +785,7 @@
}
b->bp.hash = h;
b->bp.gen = bp.gen;
- cacheblk(b);
- lrubump(b);
+ cacheins(b);
qunlock(&fs->blklk[i]);
return b;
@@ -834,12 +792,33 @@
}
Blk*
-refblk(Blk *b)
+holdblk(Blk *b)
{
ainc(&b->ref);
+ b->lasthold = getcallerpc(&b);
return b;
}
+void
+dropblk(Blk *b)
+{
+ assert(b == nil || b->ref > 0);
+ if(b == nil || adec(&b->ref) != 0)
+ return;
+ b->lastdrop = getcallerpc(&b);
+// assert(b->cprev == nil && b->cnext == nil);
+ /*
+ * While a freed block can get resurrected
+ * before quiescence, it's unlikely -- so
+ * it goes into the bottom of the LRU to
+ * get selected early for reuse.
+ */
+ if(checkflag(b, Bfreed))
+ lrubot(b);
+ else
+ lrutop(b);
+}
+
ushort
blkfill(Blk *b)
{
@@ -856,21 +835,10 @@
}
void
-putblk(Blk *b)
-{
- if(b == nil || adec(&b->ref) != 0)
- return;
- assert(!checkflag(b, Bcached));
- assert(checkflag(b, Bfreed) || !checkflag(b, Bdirty));
- blkfree(b);
-}
-
-void
freebp(Tree *t, Bptr bp)
{
Bfree *f;
- dprint("[%s] free blk %B\n", (t == &fs->snap) ? "snap" : "data", bp);
if(t != nil && t != &fs->snap && bp.gen <= t->gen){
killblk(t, bp);
return;
@@ -878,41 +846,41 @@
if((f = malloc(sizeof(Bfree))) == nil)
return;
f->bp = bp;
- lock(&fs->dealloclk);
- f->next = fs->deallochd;
- fs->deallochd = f;
- unlock(&fs->dealloclk);
+ lock(&fs->freelk);
+ f->next = fs->freehd;
+ fs->freehd = f;
+ unlock(&fs->freelk);
}
void
freeblk(Tree *t, Blk *b)
{
- b->freed = getcallerpc(&b);
+ b->freed = getcallerpc(&t);
setflag(b, Bfreed);
freebp(t, b->bp);
}
void
-reclaimblk(Bptr bp)
+epochstart(int tid)
{
- Arena *a;
+ ainc((long*)&fs->active[tid]);
+}
- a = getarena(bp.addr);
- lock(a);
- cachedel(bp.addr);
- blkdealloc_lk(bp.addr);
- unlock(a);
+void
+epochend(int tid)
+{
+ ainc((long*)&fs->active[tid]);
}
void
-quiesce(int tid)
+epochclean(void)
{
int i, allquiesced;
Bfree *p, *n;
+ Arena *a;
lock(&fs->activelk);
allquiesced = 1;
- fs->active[tid]++;
for(i = 0; i < fs->nquiesce; i++){
/*
* Odd parity on quiescence implies
@@ -921,30 +889,37 @@
* that enters us into the critical
* section.
*/
- if((fs->active[i] & 1) == 0)
+ if((fs->active[i] & 1) != 0)
continue;
if(fs->active[i] == fs->lastactive[i])
allquiesced = 0;
}
+
p = nil;
if(allquiesced){
- inc64(&fs->qgen, 1);
for(i = 0; i < fs->nquiesce; i++)
fs->lastactive[i] = fs->active[i];
- lock(&fs->dealloclk);
- if(fs->deallocp != nil){
- p = fs->deallocp->next;
- fs->deallocp->next = nil;
+ lock(&fs->freelk);
+ fs->qgen++;
+ if(fs->freep != nil){
+ p = fs->freep->next;
+ fs->freep->next = nil;
}
- fs->deallocp = fs->deallochd;
- unlock(&fs->dealloclk);
+ fs->freep = fs->freehd;
+ unlock(&fs->freelk);
}
unlock(&fs->activelk);
while(p != nil){
n = p->next;
- reclaimblk(p->bp);
+ a = getarena(p->bp.addr);
+
+ lock(a);
+ cachedel(p->bp.addr);
+ blkdealloc_lk(p->bp.addr);
+ unlock(a);
+
free(p);
p = n;
}
@@ -967,7 +942,7 @@
a = getarena(b->bp.addr);
assert(checkflag(b, Bdirty));
- refblk(b);
+ holdblk(b);
finalize(b);
chsend(a->sync, b);
}
@@ -1030,7 +1005,7 @@
c = p;
q.nheap = 0;
- q.heapsz = fs->cmax;
+ q.heapsz = 2*fs->cmax/fs->narena;
if((q.heap = malloc(q.heapsz*sizeof(Blk*))) == nil)
sysfatal("alloc queue: %r");
while(1){
@@ -1056,9 +1031,8 @@
fprint(2, "write: %r");
abort();
}
- lrubump(b);
}
- putblk(b);
+ dropblk(b);
}
}
--- a/cache.c
+++ b/cache.c
@@ -7,117 +7,120 @@
#include "fns.h"
static void
-cachedel_lk(vlong del)
+lrudel(Blk *b)
{
- Bucket *bkt;
- Blk *b, **p;
- u32int h;
-
- h = ihash(del);
- bkt = &fs->cache[h % fs->cmax];
- lock(bkt);
- p = &bkt->b;
- for(b = bkt->b; b != nil; b = b->hnext){
- if(b->bp.addr == del)
- break;
- p = &b->hnext;
- }
- unlock(bkt);
- if(b == nil)
- return;
- assert(checkflag(b, Bcached));
-
- *p = b->hnext;
+ if(b == fs->chead)
+ fs->chead = b->cnext;
+ if(b == fs->ctail)
+ fs->ctail = b->cprev;
if(b->cnext != nil)
b->cnext->cprev = b->cprev;
if(b->cprev != nil)
b->cprev->cnext = b->cnext;
- if(fs->ctail == b)
- fs->ctail = b->cprev;
- if(fs->chead == b)
- fs->chead = b->cnext;
b->cnext = nil;
- b->cprev = nil;
- fs->ccount--;
+ b->cprev = nil;
+}
- clrflag(b, Bcached);
- putblk(b);
+void
+lrutop(Blk *b)
+{
+ qlock(&fs->lrulk);
+ /*
+ * Someone got in first and did a
+ * cache lookup; we no longer want
+ * to put this into the LRU, because
+ * its now in use.
+ */
+ assert(b->magic == Magic);
+ if(b->ref != 0){
+ qunlock(&fs->lrulk);
+ return;
+ }
+ lrudel(b);
+ if(fs->chead != nil)
+ fs->chead->cprev = b;
+ if(fs->ctail == nil)
+ fs->ctail = b;
+ b->cnext = fs->chead;
+ fs->chead = b;
+ rwakeup(&fs->lrurz);
+ qunlock(&fs->lrulk);
}
void
-cachedel(vlong del)
+lrubot(Blk *b)
{
- lock(&fs->lrulk);
- cachedel_lk(del);
- unlock(&fs->lrulk);
+ qlock(&fs->lrulk);
+ /*
+ * Someone got in first and did a
+ * cache lookup; we no longer want
+ * to put this into the LRU, because
+ * its now in use.
+ */
+ assert(b->magic == Magic);
+ if(b->ref != 0){
+ qunlock(&fs->lrulk);
+ return;
+ }
+ lrudel(b);
+ if(fs->ctail != nil)
+ fs->ctail->cnext = b;
+ if(fs->chead == nil)
+ fs->chead = b;
+ b->cprev = fs->ctail;
+ fs->ctail = b;
+ rwakeup(&fs->lrurz);
+ qunlock(&fs->lrulk);
}
-Blk*
-cacheblk(Blk *b)
+void
+cacheins(Blk *b)
{
Bucket *bkt;
u32int h;
- Blk *e;
+ assert(b->magic == Magic);
h = ihash(b->bp.addr);
bkt = &fs->cache[h % fs->cmax];
lock(bkt);
- for(e = bkt->b; e != nil; e = e->hnext){
- if(b == e)
- goto Found;
- assert(b->bp.addr != e->bp.addr);
+ if(checkflag(b, Bcached)){
+ unlock(bkt);
+ return;
}
+ setflag(b, Bcached);
b->hnext = bkt->b;
bkt->b = b;
- if(!checkflag(b, Bcached)){
- setflag(b, Bcached);
- refblk(b);
- fs->ccount++;
- }
-Found:
unlock(bkt);
- return b;
}
-Blk*
-lrubump(Blk *b)
+void
+cachedel(vlong addr)
{
- Blk *c;
+ Bucket *bkt;
+ Blk *b, **p;
+ u32int h;
- lock(&fs->lrulk);
- if(checkflag(b, Bcached) == 0){
- assert(b->cnext == nil);
- assert(b->cprev == nil);
- goto Done;
- }
- if(b == fs->chead)
- fs->chead = b->cnext;
+ if(addr == -1)
+ return;
- if(b == fs->ctail)
- fs->ctail = b->cprev;
- if(b->cnext != nil)
- b->cnext->cprev = b->cprev;
- if(b->cprev != nil)
- b->cprev->cnext = b->cnext;
- if(fs->ctail == nil)
- fs->ctail = b;
- if(fs->chead != nil)
- fs->chead->cprev = b;
- b->cnext = fs->chead;
- b->cprev = nil;
- fs->chead = b;
- for(c = fs->ctail; c != b && fs->ccount >= fs->cmax; c = fs->ctail){
- assert(c != fs->chead);
- cachedel_lk(c->bp.addr);
+ h = ihash(addr);
+ bkt = &fs->cache[h % fs->cmax];
+ lock(bkt);
+ p = &bkt->b;
+ for(b = bkt->b; b != nil; b = b->hnext){
+ if(b->bp.addr == addr){
+ *p = b->hnext;
+ clrflag(b, Bcached);
+ b->hnext = nil;
+ break;
+ }
+ p = &b->hnext;
}
-
-Done:
- unlock(&fs->lrulk);
- return b;
+ unlock(bkt);
}
Blk*
-lookupblk(vlong off)
+cacheget(vlong off)
{
Bucket *bkt;
u32int h;
@@ -127,14 +130,49 @@
inc64(&fs->stats.cachelook, 1);
bkt = &fs->cache[h % fs->cmax];
+
+ qlock(&fs->lrulk);
lock(bkt);
- for(b = bkt->b; b != nil; b = b->hnext)
+ for(b = bkt->b; b != nil; b = b->hnext){
if(b->bp.addr == off){
inc64(&fs->stats.cachehit, 1);
- refblk(b);
+ holdblk(b);
+ lrudel(b);
+ b->lasthold = getcallerpc(&off);
break;
}
+ }
unlock(bkt);
+ qunlock(&fs->lrulk);
+
return b;
}
+/*
+ * Pulls the block from the bottom of the LRU for reuse.
+ */
+Blk*
+cachepluck(void)
+{
+ Blk *b;
+
+ qlock(&fs->lrulk);
+ while(fs->ctail == nil)
+ rsleep(&fs->lrurz);
+
+ b = fs->ctail;
+ assert(b->magic == Magic);
+ assert(b->ref == 0);
+ cachedel(b->bp.addr);
+ lrudel(b);
+ b->flag = 0;
+ b->bp.addr = -1;
+ b->bp.hash = -1;
+ b->lasthold = 0;
+ b->lastdrop = 0;
+ b->freed = 0;
+ b->hnext = nil;
+ qunlock(&fs->lrulk);
+
+ return holdblk(b);
+}
--- a/check.c
+++ b/check.c
@@ -81,7 +81,7 @@
}
if(badblk(fd, c, h - 1, &x, &y))
fail++;
- putblk(c);
+ dropblk(c);
}
r = keycmp(&x, &y);
switch(r){
@@ -187,7 +187,7 @@
if((b = getroot(&fs->snap, &height)) != nil){
if(badblk(fd, b, height-1, nil, 0))
ok = 0;
- putblk(b);
+ dropblk(b);
}
return ok;
}
--- a/cons.c
+++ b/cons.c
@@ -226,14 +226,23 @@
}
static void
-showblkdump(int fd, char **ap, int)
+showblkdump(int fd, char **ap, int na)
{
Bptr bp;
+ Blk *b;
- bp.addr = strtoll(ap[0], nil, 16);
- bp.hash = -1;
- bp.gen = -1;
- showbp(fd, bp, 0);
+ if(na == 0){
+ for(b = fs->blks; b != fs->blks+fs->cmax; b++){
+ fprint(fd, "%#p %B:\t%#lx %#llx %#llx\n", b, b->bp, b->flag, b->alloced, b->freed);
+ b->magic = Magic;
+ lrutop(b);
+ }
+ }else{
+ bp.addr = strtoll(ap[0], nil, 16);
+ bp.hash = -1;
+ bp.gen = -1;
+ showbp(fd, bp, 0);
+ }
}
static void
@@ -289,7 +298,8 @@
{.name="show", .sub="snap", .minarg=0, .maxarg=1, .fn=showsnap},
{.name="show", .sub="tree", .minarg=0, .maxarg=1, .fn=showtree},
{.name="show", .sub="users", .minarg=0, .maxarg=0, .fn=showusers},
- {.name="show", .sub="blk", .minarg=1, .maxarg=1, .fn=showblkdump},
+ {.name="show", .sub="blk", .minarg=0, .maxarg=1, .fn=showblkdump},
+ {.name="show", .sub="blks", .minarg=1, .maxarg=1, .fn=showblkdump},
{.name="debug", .sub=nil, .minarg=0, .maxarg=1, .fn=setdbg},
{.name=nil, .sub=nil},
@@ -306,7 +316,7 @@
while(1){
if((n = read(fd, buf, sizeof(buf)-1)) == -1)
break;
- quiesce(tid);
+ epochstart(tid);
buf[n] = 0;
nf = tokenize(buf, f, nelem(f));
if(nf == 0 || strlen(f[0]) == 0)
@@ -334,6 +344,6 @@
fprint(fd, " %s", f[i]);
fprint(fd, "'\n");
}
- quiesce(tid);
+ epochend(tid);
}
}
--- a/dat.h
+++ b/dat.h
@@ -92,10 +92,9 @@
enum {
Bdirty = 1 << 0,
- Bqueued = 1 << 1,
- Bfinal = 1 << 2,
- Bfreed = 1 << 3,
- Bcached = 1 << 4,
+ Bfinal = 1 << 1,
+ Bfreed = 1 << 2,
+ Bcached = 1 << 3,
};
/* internal errors */
@@ -240,8 +239,7 @@
};
enum {
- HdMagic = 0x68646d6167696373,
- TlMagic = 0x979b929e98969c8c,
+ Magic = 0x979b929e98969c8c,
};
/*
@@ -375,8 +373,9 @@
};
struct Bfree {
- Bptr bp;
Bfree *next;
+ Blk *b;
+ Bptr bp;
};
struct User {
@@ -430,18 +429,14 @@
int nquiesce;
vlong qgen;
Lock activelk;
- int active[32];
+ ulong active[32];
int lastactive[32];
Chan *chsync[32];
- QLock freelk;
- Rendez freerz;
- Blk *free;
+ Lock freelk;
+ Bfree *freep;
+ Bfree *freehd;
- Lock dealloclk;
- Bfree *deallocp;
- Bfree *deallochd;
-
int fd;
long broken;
long rdonly;
@@ -460,12 +455,14 @@
QLock blklk[32];
/* protected by lrulk */
- Lock lrulk;
+ QLock lrulk;
+ Rendez lrurz;
Bucket *cache;
+ Blk *blks; /* all blocks for debugging */
Blk *chead;
Blk *ctail;
- int ccount;
- int cmax;
+ usize ccount;
+ usize cmax;
Stats stats;
};
@@ -604,13 +601,20 @@
vlong logsz; /* for allocation log */
vlong lognxt; /* for allocation log */
+ /* debug */
uintptr alloced;
- uintptr freed; /* debug */
+ uintptr lasthold;
+ uintptr lasthold0;
+ uintptr lasthold1;
+ uintptr lastdrop;
+ uintptr uncached;
+ uintptr freed;
Bptr bp;
long ref;
char *data;
char buf[Blksz];
+ vlong magic;
};
struct Chan {
--- a/dump.c
+++ b/dump.c
@@ -266,7 +266,7 @@
sysfatal("failed load: %r");
if(recurse)
rshowblk(fd, c, indent + 1, 1);
- putblk(c);
+ dropblk(c);
}else{
fprint(fd, "%.*s[%03d]|%P\n", 4*indent, spc, i, &kv);
}
@@ -322,7 +322,7 @@
b = getroot(t, &h);
fprint(fd, "=== [%s] %B @%d\n", name, t->bp, t->ht);
rshowblk(fd, b, 0, 1);
- putblk(b);
+ dropblk(b);
if(t != &fs->snap)
closesnap(t);
}
@@ -334,7 +334,7 @@
b = getblk(bp, GBnochk);
rshowblk(fd, b, 0, recurse);
- putblk(b);
+ dropblk(b);
}
static void
--- a/fns.h
+++ b/fns.h
@@ -15,16 +15,23 @@
Blk* dupblk(Blk*);
Blk* getroot(Tree*, int*);
Blk* getblk(Bptr, int);
-Blk* refblk(Blk*);
-Blk* cacheblk(Blk*);
-Blk* lrubump(Blk*);
+Blk* holdblk(Blk*);
+void dropblk(Blk*);
+
+void lrutop(Blk*);
+void lrubot(Blk*);
+void cacheins(Blk*);
void cachedel(vlong);
-Blk* lookupblk(vlong);
+Blk* cacheget(vlong);
+Blk* cachepluck(void);
+
Arena* getarena(vlong);
-void putblk(Blk*);
int syncblk(Blk*);
void enqueue(Blk*);
-void quiesce(int);
+void epochstart(int);
+void epochend(int);
+void epochclean(void);
+void freesync(void);
void freeblk(Tree*, Blk*);
void freebp(Tree*, Bptr);
int killblk(Tree*, Bptr);
--- a/fs.c
+++ b/fs.c
@@ -276,7 +276,7 @@
if((b = getblk(bp, GBraw)) == nil)
return -1;
memcpy(d, b->buf+fo, n);
- putblk(b);
+ dropblk(b);
return n;
}
@@ -309,7 +309,7 @@
return -1;
memcpy(b->buf, t->buf, Blksz);
freeblk(f->mnt->root, t);
- putblk(t);
+ dropblk(t);
}else if(e != Eexist){
werrstr("%s", e);
return -1;
@@ -328,7 +328,7 @@
packbp(m->v, m->nv, &b->bp);
*ret = b->bp;
- putblk(b);
+ dropblk(b);
return n;
}
@@ -1886,7 +1886,7 @@
while(1){
m = chrecv(fs->wrchan, 1);
- quiesce(wid);
+ epochstart(wid);
ao = (m->a == nil) ? AOnone : m->a->op;
switch(ao){
case AOnone:
@@ -1923,7 +1923,7 @@
freemsg(m);
break;
}
- quiesce(wid);
+ epochend(wid);
}
}
@@ -1934,7 +1934,7 @@
while(1){
m = chrecv(fs->rdchan, 1);
- quiesce(wid);
+ epochstart(wid);
switch(m->type){
case Tflush: rerror(m, Eimpl); break;
case Tattach: fsattach(m); break;
@@ -1943,7 +1943,7 @@
case Tstat: fsstat(m); break;
case Topen: fsopen(m); break;
}
- quiesce(wid);
+ epochend(wid);
}
}
--- a/load.c
+++ b/load.c
@@ -50,11 +50,11 @@
Arena *a;
char *e;
Tree *t;
- int i;
+ int i, k;
fs->osnap = nil;
fs->gotinfo = 0;
- fs->narena = 8;
+ fs->narena = 1;
if((fs->fd = open(dev, ORDWR)) == -1)
sysfatal("open %s: %r", dev);
if((fs->arenas = calloc(1, sizeof(Arena))) == nil)
@@ -67,7 +67,8 @@
if(!fs->gotinfo){
if((fs->arenas = realloc(fs->arenas, fs->narena*sizeof(Arena))) == nil)
sysfatal("malloc: %r");
- memset(fs->arenas+1, 0, (fs->narena-1)*sizeof(Arena));
+ for(k = 1; k < fs->narena; k++)
+ memset(&fs->arenas[k], 0, sizeof(Arena));
fs->gotinfo = 1;
}
}
@@ -92,6 +93,7 @@
fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
+ fprint(2, "\tblocksize:\t%lld\n", Blksz);
fprint(2, "\tcachesz:\t%lld MiB\n", fs->cmax*Blksz/MiB);
if((t = openlabel("main")) == nil)
sysfatal("load users: no main label");
--- a/main.c
+++ b/main.c
@@ -15,7 +15,7 @@
char *forceuser;
char *srvname = "gefs";
char *dev;
-int cachesz = 512*MiB;
+vlong cachesz = 512*MiB;
vlong
inc64(vlong *v, vlong dv)
@@ -33,15 +33,12 @@
static void
initfs(vlong cachesz)
{
- char *p, *buf, *ebuf;
- usize sz;
- uvlong *ck;
- Blk *b;
+ Blk *b, *buf;
if((fs = mallocz(sizeof(Gefs), 1)) == nil)
sysfatal("malloc: %r");
- fs->freerz.l = &fs->freelk;
+ fs->lrurz.l = &fs->lrulk;
fs->syncrz.l = &fs->synclk;
fs->noauth = noauth;
fs->cmax = cachesz/Blksz;
@@ -51,22 +48,16 @@
sysfatal("malloc: %r");
/* leave room for corruption check magic */
- sz = 8+sizeof(Blk)+8;
- buf = sbrk(fs->cmax * sz);
+ buf = sbrk(fs->cmax * sizeof(Blk));
if(buf == (void*)-1)
sysfatal("sbrk: %r");
- ebuf = buf + fs->cmax*sz;
- for(p = buf; p != ebuf; p += sz){
- ck = (uvlong*)p;
- *ck = HdMagic;
-
- b = (Blk*)(p+8);
- b->fnext = fs->free;
- fs->free = b;
-
- ck = (uvlong*)(b+1);
- *ck = TlMagic;
+ for(b = buf; b != buf+fs->cmax; b++){
+ b->bp.addr = -1;
+ b->bp.hash = -1;
+ b->magic = Magic;
+ lrutop(b);
}
+ fs->blks = buf;
}
static void
@@ -197,10 +188,15 @@
if((s = getenv("NPROC")) != nil)
nproc = atoi(s);
- if(nproc == 0)
+
+ /*
+ * too few procs, we can't parallelize io,
+ * too many, we suffer lock contention
+ */
+ if(nproc < 2)
nproc = 2;
- if(nproc > nelem(fs->active))
- nproc = nelem(fs->active);
+ if(nproc > 6)
+ nproc = 6;
if(ream){
reamfs(dev);
exits(nil);
@@ -210,19 +206,19 @@
fs->rdchan = mkchan(32);
fs->wrchan = mkchan(32);
- fs->nsyncers = nproc;
+ fs->nsyncers = 2;
if(fs->nsyncers > fs->narena)
fs->nsyncers = fs->narena;
for(i = 0; i < fs->nsyncers; i++)
- fs->chsync[i] = mkchan(128);
+ fs->chsync[i] = mkchan(1024);
for(i = 0; i < fs->narena; i++)
- fs->arenas[i].sync = fs->chsync[i%nproc];
+ fs->arenas[i].sync = fs->chsync[i%fs->nsyncers];
srvfd = postfd(srvname, "");
ctlfd = postfd(srvname, ".cmd");
launch(runtasks, -1, nil, "tasks");
launch(runcons, fs->nquiesce++, (void*)ctlfd, "ctl");
launch(runwrite, fs->nquiesce++, nil, "mutate");
- for(i = 0; i < nproc; i++)
+ for(i = 0; i < 2; i++)
launch(runread, fs->nquiesce++, nil, "readio");
for(i = 0; i < fs->nsyncers; i++)
launch(runsync, -1, fs->chsync[i], "syncio");
--- a/ream.c
+++ b/ream.c
@@ -85,8 +85,7 @@
char *p;
Blk *b;
- if((b = mallocz(sizeof(Blk), 1)) == nil)
- sysfatal("ream: %r");
+ b = cachepluck();
addr = start+Blksz; /* arena header */
a->head.addr = -1;
@@ -93,7 +92,7 @@
a->head.hash = -1;
a->head.gen = -1;
- memset(b, 0, sizeof(Blk));
+ memset(b->buf, 0, sizeof(b->buf));
b->type = Tlog;
b->bp.addr = addr;
b->logsz = 32;
@@ -109,11 +108,13 @@
finalize(b);
if(syncblk(b) == -1)
sysfatal("ream: init log");
+ dropblk(b);
bh = b->bp.hash;
bo = b->bp.addr;
- memset(b, 0, sizeof(Blk));
+ b = cachepluck();
+ memset(b->buf, 0, sizeof(b->buf));
b->type = Tarena;
b->bp.addr = start;
b->data = b->buf;
@@ -127,6 +128,7 @@
finalize(b);
if(syncblk(b) == -1)
sysfatal("ream: write arena: %r");
+ dropblk(b);
}
void
@@ -189,7 +191,7 @@
}
if((tb = newblk(Tleaf)) == nil)
sysfatal("ream: allocate root: %r");
- refblk(tb);
+ holdblk(tb);
initroot(tb);
finalize(tb);
syncblk(tb);
@@ -204,7 +206,7 @@
*/
if((rb = newblk(Tleaf)) == nil)
sysfatal("ream: allocate snaps: %r");
- refblk(rb);
+ holdblk(rb);
initsnap(rb, tb);
finalize(rb);
syncblk(rb);
@@ -212,8 +214,8 @@
fs->snap.bp = rb->bp;
fs->snap.ht = 1;
- putblk(tb);
- putblk(rb);
+ dropblk(tb);
+ dropblk(rb);
for(i = 0; i < fs->narena; i++){
a = &fs->arenas[i];
finalize(a->tail);
--- a/snap.c
+++ b/snap.c
@@ -98,7 +98,7 @@
}
lb->logsz = Loghashsz;
dl->ins = lb;
- putblk(pb);
+ dropblk(pb);
}
p = lb->data + lb->logsz;
PBIT64(p, v1); p += 8;
--- a/tree.c
+++ b/tree.c
@@ -963,7 +963,7 @@
if(pp->op != POmod || pp->op != POmerge)
return 0;
- m = refblk(pp->nl);
+ m = holdblk(pp->nl);
spc = (m->type == Tleaf) ? Leafspc : Pivspc;
if(idx-1 >= 0){
getval(p->b, idx-1, &kl);
@@ -990,9 +990,9 @@
Done:
ret = 0;
Out:
- putblk(m);
- putblk(l);
- putblk(r);
+ dropblk(m);
+ dropblk(l);
+ dropblk(r);
return ret;
}
@@ -1081,9 +1081,9 @@
freeblk(t, p->b);
if(p->m != nil)
freeblk(t, p->m);
- putblk(p->b);
- putblk(p->nl);
- putblk(p->nr);
+ dropblk(p->b);
+ dropblk(p->nl);
+ dropblk(p->nr);
}
free(path);
}
@@ -1178,6 +1178,7 @@
PBIT16(p, o);
}
enqueue(r);
+
lock(&t->lk);
t->bp = r->bp;
t->dirty = 1;
@@ -1184,8 +1185,8 @@
unlock(&t->lk);
freeblk(t, b);
- putblk(b);
- putblk(r);
+ dropblk(b);
+ dropblk(r);
return nil;
}
@@ -1265,6 +1266,8 @@
assert(rb->bp.addr != 0);
+ assert(rb->bp.addr != 0);
+
lock(&t->lk);
t->ht += dh;
t->bp = rb->bp;
@@ -1305,11 +1308,13 @@
if((b = getroot(t, &h)) == nil)
return Efs;
- if((p = calloc(h, sizeof(Blk*))) == nil)
+ if((p = calloc(h, sizeof(Blk*))) == nil){
+ dropblk(b);
return Enomem;
+ }
err = Eexist;
ok = 0;
- p[0] = refblk(b);
+ p[0] = holdblk(b);
for(i = 1; i < h; i++){
if(blksearch(p[i-1], k, r, &same) == -1)
break;
@@ -1351,8 +1356,8 @@
Out:
for(i = 0; i < h; i++)
if(p[i] != nil)
- putblk(p[i]);
- putblk(b);
+ dropblk(p[i]);
+ dropblk(b);
free(p);
return err;
}
@@ -1432,7 +1437,7 @@
return nil;
}
if(p[i].b != nil)
- putblk(p[i].b);
+ dropblk(p[i].b);
p[i].b = nil;
p[i].vi = 0;
p[i].bi = 0;
@@ -1498,6 +1503,6 @@
int i;
for(i = 0; i < s->root.ht; i++)
- putblk(s->path[i].b);
+ dropblk(s->path[i].b);
free(s->path);
}