ref: 2a941967c8f06ee6f6eb63bf5399c0d2ba4b89df
parent: f3b4a20db85543154fdde5b8a6b88bb10bbfb5a5
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Dec 4 11:12:28 EST 2024
sync
--- a/atomic-386.s
+++ b/atomic-386.s
@@ -1,12 +1,3 @@
-#define CMPXCHG /* (CX) */\
- BYTE $0x0F; BYTE $0xB1; BYTE $0x11
-#define CMPXCHG64 /* (DI) */\
- BYTE $0x0F; BYTE $0xC7; BYTE $0x0F
-#define XADDL /* BX, (AX) */ \
- BYTE $0x0F; BYTE $0xC1; BYTE $0x03
-#define XADDLSP /* AX, (SP) */ \
- BYTE $0x0F; BYTE $0xC1; BYTE $0x04; BYTE $0x24
-
/* get variants */
TEXT ageti+0(SB),1,$0
TEXT agetl+0(SB),1,$0
@@ -38,7 +29,7 @@
MOVL 0(DI), AX
MOVL 4(DI), DX
loop:
- LOCK; CMPXCHG64
+ LOCK; CMPXCHG8B (DI)
JNE loop
MOVL p+0(FP),DI
MOVL AX, 0(DI)
@@ -52,7 +43,7 @@
MOVL p+0(FP), BX
MOVL v+4(FP), CX
MOVL CX, AX
- LOCK; XADDL
+ LOCK; XADDL AX, (BX)
ADDL CX, AX
RET
@@ -65,7 +56,7 @@
MOVL DX, CX
ADDL v+8(FP), BX
ADCL v+12(FP), CX
- LOCK; CMPXCHG64
+ LOCK; CMPXCHG8B (DI)
JNE retry
MOVL r+0(FP), DI
MOVL BX, 0x0(DI)
@@ -79,7 +70,7 @@
MOVL p+0(FP), CX
MOVL ov+4(FP), AX
MOVL nv+8(FP), DX
- LOCK; CMPXCHG
+ LOCK; CMPXCHGL DX, (CX)
JNE fail32
MOVL $1,AX
RET
@@ -93,7 +84,7 @@
MOVL ov+8(FP), DX
MOVL nv+12(FP), BX
MOVL nv+16(FP), CX
- LOCK; CMPXCHG64
+ LOCK; CMPXCHG8B (DI)
JNE fail64
MOVL $1,AX
RET
@@ -105,5 +96,5 @@
TEXT coherence+0(SB),1,$0
/* this is essentially mfence but that requires sse2 */
XORL AX, AX
- LOCK; XADDLSP
+ LOCK; XADDL AX, (SP)
RET
--- a/blk.c
+++ b/blk.c
@@ -7,28 +7,29 @@
#include "fns.h"
#include "atomic.h"
-static vlong blkalloc_lk(Arena*);
-static vlong blkalloc(int, uint);
+static vlong blkalloc_lk(Arena*, int);
+static vlong blkalloc(int, uint, int);
static void blkdealloc_lk(Arena*, vlong);
static Blk* initblk(Blk*, vlong, vlong, int);
+static void readblk(Blk*, Bptr, int);
int
-checkflag(Blk *b, int f)
+checkflag(Blk *b, int set, int clr)
{
long v;
v = agetl(&b->flag);
- return (v & f) == f;
+ return (v & (set|clr)) == set;
}
void
-setflag(Blk *b, int f)
+setflag(Blk *b, int set, int clr)
{
long ov, nv;
while(1){
ov = agetl(&b->flag);
- nv = ov | f;
+ nv = (ov & ~clr) | set;
if(acasl(&b->flag, ov, nv))
break;
}
@@ -35,39 +36,23 @@
}
void
-clrflag(Blk *b, int f)
-{
- long ov, nv;
-
- while(1){
- ov = agetl(&b->flag);
- nv = ov & ~f;
- if(acasl(&b->flag, ov, nv))
- break;
- }
-}
-
-void
syncblk(Blk *b)
{
- assert(checkflag(b, Bfinal));
+ assert(checkflag(b, Bfinal, 0));
assert(b->bp.addr >= 0);
- clrflag(b, Bdirty);
+ tracex("syncblk", b->bp, b->type, -1);
if(pwrite(fs->fd, b->buf, Blksz, b->bp.addr) == -1)
broke("%B %s: %r", b->bp, Eio);
+ setflag(b, 0, Bdirty);
}
-static Blk*
-readblk(vlong bp, int flg)
+static void
+readblk(Blk *b, Bptr bp, int flg)
{
- vlong off, rem, n;
+ vlong off, xh, ck, rem, n;
char *p;
- Blk *b;
- assert(bp != -1);
- b = cachepluck();
- b->alloced = getcallerpc(&bp);
- off = bp;
+ off = bp.addr;
rem = Blksz;
while(rem != 0){
n = pread(fs->fd, b->buf, rem, off);
@@ -79,12 +64,10 @@
b->cnext = nil;
b->cprev = nil;
b->hnext = nil;
- b->flag = 0;
- b->bp.addr = bp;
+ b->bp.addr = bp.addr;
b->bp.hash = -1;
b->bp.gen = -1;
- b->fnext = nil;
b->nval = 0;
b->valsz = 0;
@@ -128,21 +111,33 @@
b->data = p;
break;
}
+ if(b->type == Tlog || b->type == Tdlist){
+ xh = b->logh;
+ ck = bufhash(b->data, b->logsz);
+ }else{
+ xh = bp.hash;
+ ck = blkhash(b);
+ }
+ if((!flg&GBnochk) && ck != xh){
+ if(!(flg&GBsoftchk))
+ broke("%s: %ullx %llux != %llux", Ecorrupt, bp.addr, xh, ck);
+ fprint(2, "%s: %ullx %llux != %llux", Ecorrupt, bp.addr, xh, ck);
+ error(Ecorrupt);
+ }
assert(b->magic == Magic);
- return b;
}
static Arena*
pickarena(uint ty, uint hint, int tries)
{
- uint n;
+ uint n, r;
- n = hint + tries + ainc(&fs->roundrobin)/1024;
+ r = ainc(&fs->roundrobin)/2048;
if(ty == Tdat)
- n++;
- if(hint % fs->narena == 0)
- n++;
- return &fs->arenas[n%fs->narena];
+ n = hint % (fs->narena - 1) + r + 1;
+ else
+ n = r;
+ return &fs->arenas[(n + tries) % fs->narena];
}
Arena*
@@ -154,8 +149,10 @@
lo = 0;
hi = fs->narena;
- if(b == 0)
+ if(b == fs->sb0->bp.addr)
return &fs->arenas[0];
+ if(b == fs->sb1->bp.addr)
+ return &fs->arenas[hi-1];
while(1){
mid = (hi + lo)/2;
a = &fs->arenas[mid];
@@ -243,13 +240,17 @@
{
Blk *lb;
- lb = a->logbuf[a->lbidx++ % nelem(a->logbuf)];
- if(lb->bp.addr != -1)
- cachedel(lb->bp.addr);
+assert(!canqlock(a));
+ lb = a->logbuf[0];
+ if(lb == a->logtl)
+ lb = a->logbuf[1];
+ assert(lb->ref == 1);
+ lb->flag = Bstatic;
initblk(lb, o, -1, Tlog);
- finalize(lb);
- syncblk(lb);
- traceb("logblk" , lb->bp);
+ tracex("newlogb" , lb->bp, -1, getcallerpc(&a));
+ lb->lasthold0 = lb->lasthold;
+ lb = holdblk(lb);
+ lb->lasthold = getcallerpc(&a);
return lb;
}
@@ -263,27 +264,26 @@
logappend(Arena *a, vlong off, vlong len, int op)
{
vlong o, start, end;
- Blk *nl, *lb;
- char *p, *name;
+ Blk *lb;
+ char *p;
- lb = a->logtl;
assert((off & 0xff) == 0);
assert(op == LogAlloc || op == LogFree || op == LogSync);
if(op != LogSync){
start = a->h0->bp.addr;
end = start + a->size + 2*Blksz;
- assert(lb == nil || lb->type == Tlog);
assert(off >= start);
- assert(off <= end);
+ assert(off < end);
}
- switch(op){
- case LogAlloc: name = "alloc"; break;
- case LogFree: name = "free"; break;
- case LogSync: name = "sync"; break;
- default: name = "???"; break;
- }
- assert(lb == nil || lb->logsz >= 0);
- dprint("logop %llx+%llx@%x: %s\n", off, len, lb?lb->logsz:-1, name);
+ lb = a->logtl;
+ assert(lb->ref > 0);
+ assert(lb->type == Tlog);
+ assert(lb->logsz >= 0);
+ dprint("logop %d: %llx+%llx@%x\n", op, off, len, lb->logsz);
+
+ if(checkflag(lb, 0, Bdirty))
+ setflag(lb, Bdirty, Bfinal);
+
/*
* move to the next block when we have
* too little room in the log:
@@ -292,23 +292,18 @@
* 16 bytes of new log entry allocation
* and chaining.
*/
- if(lb == nil || lb->logsz >= Logspc - Logslop){
- o = blkalloc_lk(a);
+ if(lb->logsz >= Logspc - Logslop){
+ o = blkalloc_lk(a, 0);
if(o == -1)
error(Efull);
- nl = mklogblk(a, o);
p = lb->data + lb->logsz;
PACK64(p, o|LogAlloc1);
lb->logsz += 8;
- lb->logp = nl->bp;
- finalize(lb);
- syncblk(lb);
- a->logtl = nl;
- a->nlog++;
- lb = nl;
+ lb->logp = (Bptr){o, -1, -1};
+tracex("logchain1", lb->bp, o, a - fs->arenas);
+ lb = mklogblk(a, o);
+tracex("logchain2", lb->bp, getcallerpc(&a), -1);
}
-
- setflag(lb, Bdirty);
if(len == Blksz){
if(op == LogAlloc)
op = LogAlloc1;
@@ -323,6 +318,18 @@
PACK64(p+8, len);
lb->logsz += 8;
}
+ if(lb != a->logtl) {
+traceb("logstep1", a->logtl->logp);
+traceb("logstep2", a->logtl->bp);
+ finalize(lb);
+ syncblk(lb);
+
+ finalize(a->logtl);
+ syncblk(a->logtl);
+ dropblk(a->logtl);
+ a->logtl = lb;
+ a->nlog++;
+ }
}
void
@@ -336,10 +343,13 @@
dprint("loadlog %B\n", bp);
traceb("loadlog", bp);
+ b = a->logbuf[0];
while(1){
- b = getblk(bp, 0);
+ assert(checkflag(b, Bstatic, Bcached));
+ holdblk(b);
+ readblk(b, bp, 0);
dprint("\tload %B chain %B\n", bp, b->logp);
- /* the hash covers the log and offset */
+ a->nlog++;
for(i = 0; i < b->logsz; i += n){
d = b->data + i;
ent = UNPACK64(d);
@@ -353,7 +363,9 @@
if(gen >= fs->qgen){
if(a->logtl == nil){
b->logsz = i;
- a->logtl = holdblk(b);
+ a->logtl = b;
+ cachedel(b->bp.addr);
+ setflag(b, Bdirty, 0);
return;
}
dropblk(b);
@@ -391,21 +403,25 @@
}
void
-compresslog(Arena *a)
+flushlog(Arena *a)
{
+ if(checkflag(a->logtl, 0, Bdirty|Bstatic))
+ return;
+ finalize(a->logtl);
+ syncblk(a->logtl);
+}
- int i, nr, nblks;
+void
+compresslog(Arena *a)
+{
+ int i, nr, nblks, nlog;
vlong sz, *blks;
- Blk *b, *nb;
+ Blk *b;
Arange *r;
- Bptr hd;
char *p;
- tracem("compresslog");
- if(a->logtl != nil){
- finalize(a->logtl);
- syncblk(a->logtl);
- }
+ flushlog(a);
+tracex("compress", a->loghd, getcallerpc(&a), -1);
/*
* Prepare what we're writing back.
* Arenas must be sized so that we can
@@ -414,7 +430,7 @@
*/
sz = 0;
nr = 0;
- a->nlog = 0;
+ nlog = 0;
for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r)){
sz += 16;
nr++;
@@ -437,31 +453,22 @@
nexterror();
}
for(i = 0; i < nblks; i++){
- blks[i] = blkalloc_lk(a);
+ blks[i] = blkalloc_lk(a, 1);
if(blks[i] == -1)
error(Efull);
}
+
/* fill up the log with the ranges from the tree */
i = 0;
- hd = (Bptr){blks[0], -1, -1};
- b = a->logbuf[a->lbidx++ % nelem(a->logbuf)];
- a->logbuf[a->lbidx % nelem(a->logbuf)]->bp = Zb;
- if(b->bp.addr != -1)
- cachedel(b->bp.addr);
- initblk(b, blks[i++], -1, Tlog);
- finalize(b);
+ b = mklogblk(a, blks[i++]);
for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r)){
if(b->logsz >= Logspc - Logslop){
- a->nlog++;
- nb = a->logbuf[a->lbidx++ % nelem(a->logbuf)];
- if(nb->bp.addr != -1)
- cachedel(nb->bp.addr);
- initblk(nb, blks[i++], -1, Tlog);
- b->logp = nb->bp;
- setflag(b, Bdirty);
+ b->logp = (Bptr){blks[i], -1, -1};
finalize(b);
syncblk(b);
- b = nb;
+ dropblk(b);
+ nlog++;
+ b = mklogblk(a, blks[i++]);
}
p = b->data + b->logsz;
PACK64(p+0, r->off|LogFree);
@@ -468,20 +475,25 @@
PACK64(p+8, r->len);
b->logsz += 16;
}
- finalize(b);
- syncblk(b);
/*
* now we have a valid freelist, and we can start
* appending stuff to it. Clean up the eagerly
* allocated extra blocks.
+ *
+ * Note that we need to drop the reference to the
+ * old logtl before we free the old blocks, because
+ * deallocating a block may require another block.
*/
- a->loghd = hd;
- a->logtl = b;
- for(; i < nblks; i++){
- cachedel(b->bp.addr);
+ dropblk(a->logtl);
+ a->loghd = (Bptr){blks[0], -1, -1};
+ a->logtl = b; /* written back by sync() later */
+ a->nlog = nlog;
+ a->lastlogsz = nlog;
+
+ /* May add blocks to new log */
+ for(; i < nblks; i++)
blkdealloc_lk(a, blks[i]);
- }
poperror();
free(blks);
}
@@ -490,8 +502,6 @@
logbarrier(Arena *a, vlong gen)
{
logappend(a, gen<<8, 0, LogSync);
- if(a->loghd.addr == -1)
- a->loghd = a->logtl->bp;
return 0;
}
@@ -502,14 +512,15 @@
* the alloc log.
*/
static vlong
-blkalloc_lk(Arena *a)
+blkalloc_lk(Arena *a, int seq)
{
- Avltree *t;
Arange *r;
vlong b;
- t = a->free;
- r = (Arange*)t->root;
+ if(seq)
+ r = (Arange*)avlmin(a->free);
+ else
+ r = (Arange*)avlmax(a->free);
if(!usereserve && a->size - a->used <= a->reserve)
return -1;
if(r == nil)
@@ -522,11 +533,16 @@
* the sort order because the tree
* covers disjoint ranges
*/
- b = r->off;
- r->len -= Blksz;
- r->off += Blksz;
+ if(seq){
+ b = r->off;
+ r->len -= Blksz;
+ r->off += Blksz;
+ }else{
+ r->len -= Blksz;
+ b = r->off + r->len;
+ }
if(r->len == 0){
- avldelete(t, r);
+ avldelete(a->free, r);
free(r);
}
a->used += Blksz;
@@ -536,26 +552,14 @@
static void
blkdealloc_lk(Arena *a, vlong b)
{
+ cachedel(b);
logappend(a, b, Blksz, LogFree);
- if(a->loghd.addr == -1)
- a->loghd = a->logtl->bp;
freerange(a->free, b, Blksz);
a->used -= Blksz;
}
-void
-blkdealloc(vlong b)
-{
- Arena *a;
-
- a = getarena(b);
- qlock(a);
- blkdealloc_lk(a, b);
- qunlock(a);
-}
-
static vlong
-blkalloc(int ty, uint hint)
+blkalloc(int ty, uint hint, int seq)
{
Arena *a;
vlong b;
@@ -582,7 +586,7 @@
qunlock(a);
nexterror();
}
- b = blkalloc_lk(a);
+ b = blkalloc_lk(a, seq);
if(b == -1){
qunlock(a);
poperror();
@@ -589,8 +593,6 @@
goto Again;
}
logappend(a, b, Blksz, LogAlloc);
- if(a->loghd.addr == -1)
- a->loghd = a->logtl->bp;
qunlock(a);
poperror();
return b;
@@ -628,9 +630,7 @@
b->data = b->buf + Leafhdsz;
break;
}
- b->fnext = nil;
-
- setflag(b, Bdirty);
+ setflag(b, Bdirty, 0);
b->nval = 0;
b->valsz = 0;
b->nbuf = 0;
@@ -642,16 +642,31 @@
}
Blk*
-newblk(Tree *t, int ty, vlong hint)
+newdblk(Tree *t, vlong hint, int seq)
{
vlong bp;
Blk *b;
- bp = blkalloc(ty, hint);
+ bp = blkalloc(Tdat, hint, seq);
b = cachepluck();
+ initblk(b, bp, t->memgen, Tdat);
+ b->alloced = getcallerpc(&t);
+ tracex("newdblk" , b->bp, Tdat, getcallerpc(&t));
+ return b;
+
+}
+
+Blk*
+newblk(Tree *t, int ty)
+{
+ vlong bp;
+ Blk *b;
+
+ bp = blkalloc(ty, 0, 0);
+ b = cachepluck();
initblk(b, bp, t->memgen, ty);
b->alloced = getcallerpc(&t);
- tracex("newblk" , b->bp, ty, -1);
+ tracex("newblk" , b->bp, ty, getcallerpc(&t));
return b;
}
@@ -660,11 +675,10 @@
{
Blk *r;
- if((r = newblk(t, b->type, 0)) == nil)
+ if((r = newblk(t, b->type)) == nil)
return nil;
tracex("dup" , b->bp, b->type, t->gen);
- setflag(r, Bdirty);
r->bp.hash = -1;
r->nval = b->nval;
r->valsz = b->valsz;
@@ -710,20 +724,16 @@
}
b->bp.hash = blkhash(b);
- setflag(b, Bfinal);
- cacheins(b);
- b->cached = getcallerpc(&b);
+ setflag(b, Bdirty|Bfinal, 0);
}
Blk*
getblk(Bptr bp, int flg)
{
- uvlong xh, ck;
Blk *b;
int i;
i = ihash(bp.addr) % nelem(fs->blklk);
- tracex("get" , bp, getcallerpc(&bp), -1);
qlock(&fs->blklk[i]);
if(waserror()){
qunlock(&fs->blklk[i]);
@@ -730,31 +740,16 @@
nexterror();
}
if((b = cacheget(bp.addr)) != nil){
+ assert(checkflag(b, 0, Bfreed));
b->lasthold = getcallerpc(&bp);
qunlock(&fs->blklk[i]);
poperror();
return b;
}
- b = readblk(bp.addr, flg);
+ b = cachepluck();
b->alloced = getcallerpc(&bp);
- b->bp.hash = blkhash(b);
- if((flg&GBnochk) == 0){
- if(b->type == Tlog || b->type == Tdlist){
- xh = b->logh;
- ck = bufhash(b->data, b->logsz);
- }else{
- xh = bp.hash;
- ck = b->bp.hash;
- }
- if(ck != xh){
- if(flg & GBsoftchk){
- fprint(2, "%s: %ullx %llux != %llux", Ecorrupt, bp.addr, xh, ck);
- error(Ecorrupt);
- }else{
- broke("%s: %ullx %llux != %llux", Ecorrupt, bp.addr, xh, ck);
- }
- }
- }
+ b->alloced = getcallerpc(&bp);
+ readblk(b, bp, flg);
b->bp.gen = bp.gen;
b->lasthold = getcallerpc(&bp);
cacheins(b);
@@ -776,15 +771,16 @@
void
dropblk(Blk *b)
{
- assert(b == nil || b->ref > 0);
- if(b == nil || adec(&b->ref) != 0)
+ if(b == nil)
return;
b->lastdrop = getcallerpc(&b);
+ if(adec(&b->ref) != 0)
+ return;
/*
* freed blocks go to the LRU bottom
* for early reuse.
*/
- if(checkflag(b, Bfreed))
+ if(checkflag(b, Bfreed, 0))
lrubot(b);
else
lrutop(b);
@@ -805,17 +801,18 @@
}
void
-limbo(Bfree *f)
+limbo(int op, Limbo *l)
{
- Bfree *p;
+ Limbo *p;
ulong ge;
+ l->op = op;
while(1){
ge = agetl(&fs->epoch);
p = agetp(&fs->limbo[ge]);
- f->next = p;
- if(acasp(&fs->limbo[ge], p, f)){
- aincl(&fs->nlimbo, 1);
+ l->next = p;
+ if(acasp(&fs->limbo[ge], p, l)){
+ ainc(&fs->nlimbo);
break;
}
}
@@ -822,27 +819,42 @@
}
void
-freeblk(Tree *t, Blk *b, Bptr bp)
+freeblk(Tree *t, Blk *b)
{
+ if(t == &fs->snap || (t != nil && b->bp.gen < t->memgen)){
+ tracex("killb", b->bp, t->memgen, getcallerpc(&t));
+ killblk(t, b->bp);
+ return;
+ }
+ b->freed = getcallerpc(&t);
+ tracex("freeb", b->bp, getcallerpc(&t), -1);
+ setflag(b, Blimbo, 0);
+ holdblk(b);
+ assert(b->ref > 1);
+ limbo(DFblk, b);
+}
+
+void
+freebp(Tree *t, Bptr bp)
+{
Bfree *f;
if(t == &fs->snap || (t != nil && bp.gen < t->memgen)){
- tracex("killb", bp, getcallerpc(&t), -1);
+ tracex("killbp", bp, t->memgen, getcallerpc(&t));
killblk(t, bp);
return;
}
+ tracex("freebp", bp, getcallerpc(&t), -1);
- tracex("freeb", bp, getcallerpc(&t), -1);
- f = emalloc(sizeof(Bfree), 0);
- f->op = DFblk;
+ qlock(&fs->bfreelk);
+ while(fs->bfree == nil)
+ rsleep(&fs->bfreerz);
+ f = fs->bfree;
+ fs->bfree = (Bfree*)f->next;
+ qunlock(&fs->bfreelk);
+
f->bp = bp;
- f->b = nil;
- if(b != nil){
- setflag(b, Blimbo);
- b->freed = getcallerpc(&t);
- f->b = holdblk(b);
- }
- limbo(f);
+ limbo(DFbp, f);
}
void
@@ -875,7 +887,7 @@
for(i = 0; i < fs->nworker; i++){
e = agetl(&fs->lepoch[i]);
if((e & Eactive) && e != (ge | Eactive)){
- if(delay < 100)
+ if(delay < 1000)
delay++;
else
fprint(2, "stalled epoch %lx [worker %d]\n", e, i);
@@ -889,7 +901,9 @@
epochclean(void)
{
ulong c, e, ge;
- Bfree *p, *n;
+ Limbo *p, *n;
+ Blk *b;
+ Bfree *f;
Arena *a;
Qent qe;
int i;
@@ -912,28 +926,42 @@
n = p->next;
switch(p->op){
case DFtree:
- free(p->t);
+ free(p);
break;
case DFmnt:
- free(p->m);
+ free(p);
break;
+ case DFbp:
+ f = (Bfree*)p;
+ a = getarena(f->bp.addr);
+ if((b = cacheget(f->bp.addr)) != nil){
+ setflag(b, Bfreed, Bdirty|Blimbo);
+ dropblk(b);
+ }
+ qe.op = Qfree;
+ qe.bp = f->bp;
+ qe.b = nil;
+ qput(a->sync, qe);
+ qlock(&fs->bfreelk);
+ f->next = fs->bfree;
+ fs->bfree = f;
+ rwakeup(&fs->bfreerz);
+ qunlock(&fs->bfreelk);
+ break;
case DFblk:
- a = getarena(p->bp.addr);
+ b = (Blk*)p;
qe.op = Qfree;
- qe.bp = p->bp;
+ qe.bp = b->bp;
qe.b = nil;
+ setflag(b, Bfreed, Bdirty|Blimbo);
+ a = getarena(b->bp.addr);
+ dropblk(b);
qput(a->sync, qe);
- if(p->b != nil){
- clrflag(p->b, Blimbo);
- setflag(p->b, Bfreed);
- dropblk(p->b);
- }
break;
default:
abort();
}
- aincl(&fs->nlimbo, -1);
- free(p);
+ adec(&fs->nlimbo);
}
}
@@ -943,16 +971,18 @@
Arena *a;
Qent qe;
- assert(checkflag(b, Bdirty));
+ assert(checkflag(b, Bdirty, Bqueued|Bstatic));
assert(b->bp.addr >= 0);
+ finalize(b);
+ if(checkflag(b, 0, Bcached)){
+ cacheins(b);
+ b->cached = getcallerpc(&b);
+ }
+ holdblk(b);
b->enqueued = getcallerpc(&b);
- a = getarena(b->bp.addr);
- holdblk(b);
- finalize(b);
traceb("queueb", b->bp);
- setflag(b, Bqueued);
- b->queued = getcallerpc(&b);
+ a = getarena(b->bp.addr);
qe.op = Qwrite;
qe.bp = b->bp;
qe.b = b;
@@ -967,10 +997,9 @@
q->nheap = 0;
q->heapsz = fs->cmax;
q->heap = emalloc(q->heapsz*sizeof(Qent), 1);
-
}
-int
+static int
qcmp(Qent *a, Qent *b)
{
if(a->qgen != b->qgen)
@@ -988,11 +1017,13 @@
int i;
if(qe.op == Qfree || qe.op == Qwrite)
- assert(qe.bp.addr != 0 && (qe.bp.addr & (Blksz-1)) == 0);
+ assert((qe.bp.addr & (Blksz-1)) == 0);
else if(qe.op == Qfence)
assert(fs->syncing > 0);
else
abort();
+ if(qe.b != nil)
+ assert(qe.b->ref > 0);
qlock(&q->lk);
qe.qgen = agetv(&fs->qgen);
while(q->nheap == q->heapsz)
@@ -1042,7 +1073,7 @@
rwakeup(&q->fullrz);
qunlock(&q->lk);
if(e.b != nil){
- clrflag(e.b, Bqueued);
+ setflag(e.b, 0, Bqueued);
e.b->queued = 0;
}
return e;
@@ -1057,7 +1088,7 @@
q = p;
if(waserror()){
- aincl(&fs->rdonly, 1);
+ ainc(&fs->rdonly);
fprint(2, "error syncing: %s\n", errmsg());
return;
}
@@ -1066,12 +1097,15 @@
switch(qe.op){
case Qfree:
tracex("qfreeb", qe.bp, qe.qgen, -1);
+ /*
+ * we shouldn't have a block in a free op,
+ * the frees go into the queue just to ensure
+ * write/reuse ordering.
+ */
+ assert(qe.b == nil);
a = getarena(qe.bp.addr);
qlock(a);
- cachedel(qe.bp.addr);
blkdealloc_lk(a, qe.bp.addr);
- if(qe.b != nil)
- dropblk(qe.b);
qunlock(a);
break;
case Qfence:
@@ -1083,7 +1117,7 @@
break;
case Qwrite:
tracex("qsyncb", qe.bp, qe.qgen, -1);
- if(checkflag(qe.b, Bfreed) == 0)
+ if(checkflag(qe.b, Bfreed, Bstatic) == 0)
syncblk(qe.b);
dropblk(qe.b);
break;
--- a/cache.c
+++ b/cache.c
@@ -32,6 +32,7 @@
* its now in use.
*/
assert(b->magic == Magic);
+ assert(checkflag(b, 0, Bstatic));
if(b->ref != 0){
qunlock(&fs->lrulk);
return;
@@ -58,6 +59,7 @@
* its now in use.
*/
assert(b->magic == Magic);
+ assert(checkflag(b, 0, Bstatic));
if(b->ref != 0){
qunlock(&fs->lrulk);
return;
@@ -83,25 +85,18 @@
h = ihash(b->bp.addr);
bkt = &fs->bcache[h % fs->cmax];
qlock(&fs->lrulk);
- traceb("cache", b->bp);
- lock(bkt);
- if(checkflag(b, Bcached)){
- unlock(bkt);
- qunlock(&fs->lrulk);
- return;
- }
+ assert(checkflag(b, 0, Bstatic|Bcached));
+ setflag(b, Bcached, 0);
assert(b->hnext == nil);
for(Blk *bb = bkt->b; bb != nil; bb = bb->hnext)
- assert(b != bb);
- setflag(b, Bcached);
+ assert(b != bb && b->bp.addr != bb->bp.addr);
b->cached = getcallerpc(&b);
b->hnext = bkt->b;
bkt->b = b;
- unlock(bkt);
qunlock(&fs->lrulk);
}
-void
+static void
cachedel_lk(vlong addr)
{
Bucket *bkt;
@@ -111,28 +106,26 @@
if(addr == -1)
return;
- tracex("uncache", Zb, addr, getcallerpc(&addr));
h = ihash(addr);
bkt = &fs->bcache[h % fs->cmax];
- lock(bkt);
p = &bkt->b;
for(b = bkt->b; b != nil; b = b->hnext){
if(b->bp.addr == addr){
+ /* FIXME: Until we clean up snap.c, we can have dirty blocks in cache */
+ assert(checkflag(b, Bcached, Bstatic)); //Bdirty));
*p = b->hnext;
- clrflag(b, Bcached);
b->uncached = getcallerpc(&addr);
b->hnext = nil;
+ setflag(b, 0, Bcached);
break;
}
p = &b->hnext;
}
- unlock(bkt);
}
void
cachedel(vlong addr)
{
qlock(&fs->lrulk);
- tracex("uncachelk", Zb, addr, getcallerpc(&addr));
cachedel_lk(addr);
qunlock(&fs->lrulk);
}
@@ -147,7 +140,6 @@
h = ihash(addr);
bkt = &fs->bcache[h % fs->cmax];
qlock(&fs->lrulk);
- lock(bkt);
for(b = bkt->b; b != nil; b = b->hnext){
if(b->bp.addr == addr){
holdblk(b);
@@ -156,7 +148,6 @@
break;
}
}
- unlock(bkt);
qunlock(&fs->lrulk);
return b;
@@ -177,12 +168,12 @@
b = fs->ctail;
assert(b->magic == Magic);
assert(b->ref == 0);
- if(checkflag(b, Bcached))
+ if(checkflag(b, Bcached, 0))
cachedel_lk(b->bp.addr);
- if(checkflag(b, Bcached))
+ if(checkflag(b, Bcached, 0))
fprint(2, "%B cached %#p freed %#p\n", b->bp, b->cached, b->freed);
+ assert(checkflag(b, 0, Bcached));
lrudel(b);
- assert(!checkflag(b, Bcached));
b->flag = 0;
b->lasthold = 0;
b->lastdrop = 0;
--- a/check.c
+++ b/check.c
@@ -158,6 +158,7 @@
fprint(fd, "error loading %B\n", bp);
return 0;
}
+traceb("chklg", bp);
b = getblk(bp, 0);
nb = b->logp;
dropblk(b);
@@ -254,7 +255,7 @@
Blk *b;
ok = 1;
- aincl(&fs->rdonly, 1);
+ ainc(&fs->rdonly);
epochwait();
if(waserror()){
fprint(fd, "error checking %s\n", errmsg());
@@ -299,7 +300,7 @@
poperror();
}
btexit(&s);
- aincl(&fs->rdonly, -1);
+ adec(&fs->rdonly);
poperror();
return ok;
}
--- a/cons.c
+++ b/cons.c
@@ -8,6 +8,7 @@
#include "fns.h"
typedef struct Cmd Cmd;
+typedef struct Sizes Sizes;
struct Cmd {
char *name;
@@ -14,9 +15,33 @@
char *sub;
int minarg;
int maxarg;
+ int epoch;
void (*fn)(int, char**, int);
};
+struct Sizes {
+ vlong datasz;
+ vlong metasz;
+ vlong delqsz;
+ vlong clobsz;
+};
+
+
+static double
+hscaled(vlong sz, char **unit)
+{
+ static char *units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", nil};
+ double hsz;
+ int u;
+
+ hsz = sz;
+ for(u = 0; u < nelem(units)-1 && hsz >= 500 ; u++)
+ hsz /= 1024;
+ *unit = units[u];
+ return hsz;
+}
+
+
static void
setdbg(int fd, char **ap, int na)
{
@@ -209,14 +234,297 @@
}
static void
-showdf(int fd, char**, int)
+countlog(int fd, Dlist *dl)
{
- char *units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", nil};
+ Bptr bp, nb;
+ Blk *b;
+ int n;
+
+ n = 0;
+ for(bp = dl->hd; bp.addr != -1; bp = nb){
+ if(waserror()){
+ fprint(fd, "error loading %B\n", bp);
+ return;
+ }
+ b = getblk(bp, 0);
+ nb = b->logp;
+ dropblk(b);
+ poperror();
+ n += b->logsz/8;
+ n++;
+ }
+ fprint(fd, "\tDl(%lld, %lld): %d blocks\n", dl->gen, dl->bgen, n);
+}
+
+
+static void
+prleak(int fd, uvlong *marks)
+{
+ vlong a0, a1, ba, bi, leaksz;
+ Arena *a;
+ Arange *r;
+
+ if(marks == nil)
+ return;
+ leaksz = 0;
+ for(a = &fs->arenas[0]; a < &fs->arenas[fs->narena]; a++){
+ r = (Arange*)avlmin(a->free);
+ a0 = a->h0->bp.addr + 2*Blksz;
+ a1 = a->h0->bp.addr + a->size - 2*Blksz;
+ for(ba = a0; ba < a1; ba += Blksz){
+ if(r != nil && ba == r->off){
+ for(; ba < r->off+r->len; ba += Blksz){
+ bi = ba/Blksz;
+ if(marks[bi/64] & 1ULL<<(bi%64))
+ fprint(fd, "uaf %#llx\n", ba);
+ }
+ r = (Arange*)avlnext(r);
+ }
+ if(ba >= a1)
+ break;
+ bi = ba/Blksz;
+ if((marks[bi/64] & 1ULL<<(bi%64)) == 0){
+ leaksz += Blksz;
+ fprint(fd, "leak %#llx\n", ba);
+ }
+ }
+ }
+ fprint(fd, "total bytes leaked: %lld (%f MiB)\n", leaksz, (double)leaksz/MiB);
+}
+
+static void
+marktree(Tree *t, Blk *b, Sizes *ts, uvlong *marks)
+{
+ int i, fill;
+ vlong bn;
+ Bptr bp;
+ Blk *c;
+ Msg m;
+
+ bn = b->bp.addr/Blksz;
+ if(marks != nil)
+ marks[bn/64] |= 1ULL<<(bn%64);
+ ts->metasz += Blksz;
+ switch(b->type){
+ case Tleaf:
+ for(i = 0; i < b->nval; i++){
+ getval(b, i, &m);
+ if(m.k[0] != Kdat)
+ continue;
+ bp = unpackbp(m.v, m.nv);
+ bn = bp.addr/Blksz;
+ if(marks != nil)
+ marks[bn/64] |= 1ULL<<(bn%64);
+ if(bp.gen <= t->pred)
+ continue;
+ ts->datasz += Blksz;
+ }
+ break;
+ case Tpivot:
+ for(i = 0; i < b->nval; i++){
+ getval(b, i, &m);
+ bp = getptr(&m, &fill);
+ if(bp.gen <= t->pred)
+ continue;
+ c = getblk(bp, 0);
+ marktree(t, c, ts, marks);
+ }
+ for(i = 0; i < b->nbuf; i++){
+ getmsg(b, i, &m);
+ if(m.k[0] != Kdat)
+ continue;
+ switch(m.op){
+ case Odelete: ts->delqsz += Blksz; break;
+ case Oclobber: ts->clobsz += Blksz; break;
+ case Oclearb: ts->clobsz += Blksz; break;
+ case Oinsert:
+ bp = unpackbp(m.v, m.nv);
+ bn = bp.addr/Blksz;
+ if(marks != nil)
+ marks[bn/64] |= 1ULL<<(bn%64);
+ if(bp.gen > t->pred)
+ ts->datasz += Blksz;
+ break;
+ }
+ }
+ break;
+ }
+}
+
+static int
+marklog(int arena, Bptr hd, uvlong *marks)
+{
+ Bptr bp, nb;
+ vlong bn;
+ Blk *b;
+
+ bp = (Bptr){-1, -1, -1};
+ for(bp = hd; bp.addr != -1; bp = nb){
+tracex("marklog", bp, arena, -1);
+ b = getblk(bp, 0);
+ bn = b->bp.addr/Blksz;
+ marks[bn/64] |= 1ULL<<(bn%64);
+ nb = b->logp;
+ dropblk(b);
+ }
+ return 1;
+}
+
+static int
+markdlist(Bptr hd, uvlong *marks)
+{
+ Bptr bp, nb;
+ vlong bn;
+ char *p;
+ Blk *b;
+
+ bp = (Bptr){-1, -1, -1};
+ for(bp = hd; bp.addr != -1; bp = nb){
+ b = getblk(bp, 0);
+ bn = b->bp.addr/Blksz;
+ marks[bn/64] |= 1ULL<<(bn%64);
+ for(p = b->data; p != b->data+b->logsz; p += 8){
+ bn = UNPACK64(p);
+ bn /= Blksz;
+ marks[bn/64] |= 1ULL<<(bn%64);
+ }
+ nb = b->logp;
+ dropblk(b);
+ }
+ return 1;
+}
+
+static int
+markdlists(uvlong *marks)
+{
+ char pfx[1];
+ Dlist dl;
+ Scan s;
+
+ markdlist(fs->snapdl.hd, marks);
+ pfx[0] = Kdlist;
+ btnewscan(&s, pfx, 1);
+ btenter(&fs->snap, &s);
+ while(1){
+ if(!btnext(&s, &s.kv))
+ break;
+ kv2dlist(&s.kv, &dl);
+ markdlist(dl.hd, marks);
+ }
+ btexit(&s);
+ return 0;
+}
+
+static void
+showsnapsz(int fd)
+{
+ char pfx[1], name[Keymax+1], *u;
+ int i, h, ndone;
+ uvlong *marks;
+ vlong *done;
+ vlong ba, bn, used, total;
+ double sz;
+ Limbo *l;
+ Sizes ts;
+ Tree *t;
+ Scan s;
+ Blk *b;
+
+
+ done = nil;
+ ndone = 0;
+ total = 0;
+ ba = fs->sb1->bp.addr/Blksz;
+ marks = mallocz(sizeof(vlong)*(ba/64 + 1), 1);
+ if(marks == nil)
+ fprint(2, "not enough memory for leak detection\n");
+
+ /* RACY, may crash */
+ for(i = 0; i < 3; i++){
+ for(l = fs->limbo[i]; l != nil; l = l->next){
+ if(l->op == DFbp){
+ bn = ((Bfree*)l)->bp.addr/Blksz;
+ marks[bn/64] |= 1ULL<<(bn%64);
+ }else if(l->op == DFblk){
+ bn = ((Blk*)l)->bp.addr/Blksz;
+ marks[bn/64] |= 1ULL<<(bn%64);
+ }
+ }
+ }
+
+ b = getroot(&fs->snap, &h);
+ memset(&ts, 0, sizeof(Sizes));
+ marktree(&fs->snap, b, &ts, marks);
+ dropblk(b);
+
+ pfx[0] = Klabel;
+ btnewscan(&s, pfx, 1);
+ btenter(&fs->snap, &s);
+ while(1){
+ if(!btnext(&s, &s.kv))
+ break;
+ if(waserror()){
+ fprint(fd, "moving on: %s\n", errmsg());
+ continue;
+ }
+ memcpy(name, s.kv.k+1, s.kv.nk-1);
+ name[s.kv.nk-1] = 0;
+ if((t = opensnap(name, nil)) == nil){
+ fprint(2, "invalid snap label %s\n", name);
+ break;
+ }
+ fprint(fd, "snap %s [gen %lld..%lld]:\n", name, t->pred+1, t->gen);
+ for(i = 0; i < ndone; i++){
+ if(done[i] == t->gen){
+ fprint(fd, "\tdup\n");
+ goto Next;
+ }
+ }
+ done = realloc(done, (ndone+1)*sizeof(vlong));
+ done[ndone++] = t->gen;
+
+ b = getroot(t, &h);
+ memset(&ts, 0, sizeof(Sizes));
+ marktree(t, b, &ts, marks);
+
+ used = ts.datasz + ts.metasz;
+ sz = hscaled(used, &u);
+ fprint(fd, "\tused %lld (%.2f %s)\n", used, sz, u);
+ sz = hscaled(ts.datasz, &u);
+ fprint(fd, "\tdata %lld (%.2f %s)\n", ts.datasz, sz, u);
+ sz = hscaled(ts.metasz, &u);
+ fprint(fd, "\tmeta %lld (%.2f %s)\n", ts.metasz, sz, u);
+ sz = hscaled(ts.delqsz, &u);
+ fprint(fd, "\tdelq %lld (%.2f %s)\n", ts.delqsz, sz, u);
+ sz = hscaled(ts.clobsz, &u);
+ fprint(fd, "\tclob %lld (%.2f %s)\n", ts.clobsz, sz, u);
+ dropblk(b);
+ total += used;
+Next:
+ closesnap(t);
+ poperror();
+ }
+ btexit(&s);
+ if(marks != nil){
+ for(i = 0; i < fs->narena; i++)
+ marklog(i, fs->arenas[i].loghd, marks);
+ markdlists(marks);
+ }
+ sz = hscaled(total, &u);
+ fprint(fd, "total used: %lld (%.2f %s)\n", total, sz, u);
+ prleak(fd, marks);
+ free(marks);
+}
+
+static void
+showdf(int fd, char **ap, int na)
+{
vlong size, used, free;
double hsize, hused, hfree;
+ char *us, *uu, *uf;
double pct;
Arena *a;
- int i, us, uu, uf;
+ int i;
size = 0;
used = 0;
@@ -229,20 +537,16 @@
fprint(fd, "arena %d: %llx/%llx (%.2f%%)\n", i, a->used, a->size, 100*(double)a->used/(double)a->size);
}
free = size - used;
- hsize = size;
- hused = used;
- hfree = free;
- for(us = 0; us < nelem(units)-1 && hsize >= 500 ; us++)
- hsize /= 1024;
- for(uu = 0; uu < nelem(units)-1 && hused >= 500 ; uu++)
- hused /= 1024;
- for(uf = 0; uf < nelem(units)-1 && hfree >= 500 ; uf++)
- hfree /= 1024;
+ hsize = hscaled(size, &us);
+ hused = hscaled(used, &uu);
+ hfree = hscaled(free, &uf);
pct = 100.0*(double)used/(double)size;
fprint(fd, "fill:\t%.2f%%\n", pct);
- fprint(fd, "used:\t%lld (%.2f %s)\n", used, hused, units[uu]);
- fprint(fd, "size:\t%lld (%.2f %s)\n", size, hsize, units[us]);
- fprint(fd, "free:\t%lld (%.2f %s)\n", free, hfree, units[uf]);
+ fprint(fd, "used:\t%lld (%.2f %s)\n", used, hused, uu);
+ fprint(fd, "size:\t%lld (%.2f %s)\n", size, hsize, us);
+ fprint(fd, "free:\t%lld (%.2f %s)\n", free, hfree, uf);
+ if(na == 1 && strcmp(ap[0], "verbose") == 0)
+ showsnapsz(fd);
}
void
@@ -253,13 +557,14 @@
Conn *c;
for(c = fs->conns; c != nil; c = c->next){
- fprint(fd, "fids:\n");
+ fprint(fd, "-- conn %p: fids --\n", c);
for(i = 0; i < Nfidtab; i++){
lock(&c->fidtablk[i]);
for(f = c->fidtab[i]; f != nil; f = f->next){
rlock(f->dent);
- fprint(fd, "\tfid[%d] from %#zx: %d [refs=%ld, k=%K, qid=%Q]\n",
- i, getmalloctag(f), f->fid, f->dent->ref, &f->dent->Key, f->dent->qid);
+ fprint(fd, "\tfid[%d] from %#zx: %d [refs=%ld, k=%K, qid=%Q m=%d, dmode:%d duid: %d, dgid: %d]\n",
+ i, getmalloctag(f), f->fid, f->dent->ref, &f->dent->Key, f->dent->qid,
+ f->mode, f->dmode, f->duid, f->dgid);
runlock(f->dent);
}
unlock(&c->fidtablk[i]);
@@ -338,6 +643,24 @@
}
static void
+showfree(int fd, char **, int)
+{
+ Arange *r;
+ Arena *a;
+ int i;
+
+ for(i = 0; i < fs->narena; i++){
+ a = &fs->arenas[i];
+ qlock(a);
+ fprint(fd, "arena %d %llx+%llx{\n", i, a->h0->bp.addr, a->size);
+ for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r))
+ fprint(fd, "\t%llx..%llx (%llx)\n", r->off, r->off+r->len, r->len);
+ fprint(fd, "}\n");
+ qunlock(a);
+ }
+}
+
+static void
unreserve(int fd, char **ap, int)
{
if(strcmp(ap[0], "on") == 0)
@@ -350,6 +673,20 @@
}
static void
+showbptr(int fd, char **ap, int na)
+{
+ Bptr bp;
+ int i;
+
+ for(i = 0; i < na; i++){
+ bp.addr = strtoll(ap[i], nil, 0);
+ bp.hash = -1;
+ bp.gen = -1;
+ showbp(fd, bp, 0);
+ }
+}
+
+static void
help(int fd, char**, int)
{
char *msg =
@@ -372,8 +709,8 @@
Cmd cmdtab[] = {
/* admin */
- {.name="check", .sub=nil, .minarg=0, .maxarg=0, .fn=fsckfs},
- {.name="df", .sub=nil, .minarg=0, .maxarg=0, .fn=showdf},
+ {.name="check", .sub=nil, .minarg=0, .maxarg=0, .fn=fsckfs, .epoch=1},
+ {.name="df", .sub=nil, .minarg=0, .maxarg=1, .fn=showdf, .epoch=1},
{.name="halt", .sub=nil, .minarg=0, .maxarg=0, .fn=haltfs},
{.name="help", .sub=nil, .minarg=0, .maxarg=0, .fn=help},
{.name="permit", .sub=nil, .minarg=1, .maxarg=1, .fn=permflip},
@@ -383,10 +720,12 @@
{.name="users", .sub=nil, .minarg=0, .maxarg=1, .fn=refreshusers},
/* debugging */
+ {.name="show", .sub="bp", .minarg=1, .maxarg=1, .fn=showbptr},
{.name="show", .sub="fid", .minarg=0, .maxarg=0, .fn=showfid},
- {.name="show", .sub="tree", .minarg=0, .maxarg=1, .fn=showtree},
+ {.name="show", .sub="tree", .minarg=0, .maxarg=1, .fn=showtree, .epoch=1},
{.name="show", .sub="users", .minarg=0, .maxarg=0, .fn=showusers},
- {.name="show", .sub="bstate", .minarg=0, .maxarg=0, .fn=showbstate},
+ {.name="show", .sub="bstate", .minarg=0, .maxarg=0, .fn=showbstate, .epoch=1},
+ {.name="show", .sub="free", .minarg=0, .maxarg=0, .fn=showfree},
{.name="debug", .sub=nil, .minarg=0, .maxarg=1, .fn=setdbg},
{.name="save", .sub="trace", .minarg=0, .maxarg=1, .fn=savetrace},
{.name=nil, .sub=nil},
@@ -404,11 +743,10 @@
fprint(fd, "gefs# ");
if((n = read(fd, buf, sizeof(buf)-1)) == -1)
break;
- epochstart(tid);
buf[n] = 0;
nf = tokenize(buf, f, nelem(f));
if(nf == 0 || strlen(f[0]) == 0)
- goto Next;
+ continue;
for(c = cmdtab; c->name != nil; c++){
ap = f;
na = nf;
@@ -424,7 +762,15 @@
}
if(na < c->minarg || na > c->maxarg)
continue;
- c->fn(fd, ap, na);
+ if(c->epoch)
+ epochstart(tid);
+ if(!waserror()){
+ c->fn(fd, ap, na);
+ poperror();
+ }else
+ fprint(fd, "%s: %s\n", f[0], errmsg());
+ if(c->epoch)
+ epochend(tid);
break;
}
if(c->name == nil){
@@ -433,7 +779,5 @@
fprint(fd, " %s", f[i]);
fprint(fd, "'\n");
}
-Next:
- epochend(tid);
}
}
--- a/dat.h
+++ b/dat.h
@@ -10,6 +10,7 @@
typedef struct Kvp Kvp;
typedef struct Xdir Xdir;
typedef struct Bptr Bptr;
+typedef struct Limbo Limbo;
typedef struct Bfree Bfree;
typedef struct Scan Scan;
typedef struct Dent Dent;
@@ -114,6 +115,7 @@
Bcached = 1 << 3,
Bqueued = 1 << 4,
Blimbo = 1 << 5,
+ Bstatic = 1 << 6,
};
enum {
@@ -325,6 +327,18 @@
AOrclose,
};
+enum {
+ DFblk,
+ DFbp,
+ DFmnt,
+ DFtree,
+};
+
+struct Limbo {
+ Limbo *next;
+ int op;
+};
+
struct Bptr {
vlong addr;
uvlong hash;
@@ -377,7 +391,6 @@
};
struct Bucket {
- Lock;
Blk *b;
};
@@ -413,6 +426,8 @@
};
struct Tree {
+ Limbo;
+
/* in-memory */
Lock lk;
long memref; /* number of in-memory references to this */
@@ -431,19 +446,9 @@
vlong base; /* base snapshot */
};
-enum {
- DFblk,
- DFmnt,
- DFtree,
-};
-
struct Bfree {
- Bfree *next;
- int op;
- Mount *m;
- Tree *t;
- Blk *b;
- Bptr bp;
+ Limbo;
+ Bptr bp;
};
struct User {
@@ -517,6 +522,7 @@
QLock synclk;
Rendez syncrz;
+ QLock mountlk;
Mount *mounts;
Mount *snapmnt;
Lock connlk;
@@ -530,12 +536,11 @@
long nworker;
long epoch;
long lepoch[32];
- Bfree *limbo[3];
+ Limbo *limbo[3];
long nlimbo;
Syncq syncq[32];
-
int fd;
long rdonly;
int noauth;
@@ -545,10 +550,6 @@
User *users;
int nusers;
- /* open directory entries */
- Lock dtablk;
- Dent *dtab[Ndtab];
-
/* slow block io */
QLock blklk[32];
@@ -568,6 +569,11 @@
usize ccount;
usize cmax;
+ /* preallocated deferred frees */
+ QLock bfreelk;
+ Rendez bfreerz;
+ Bfree *bfree;
+
RWLock flushq[Nflushtab];
int flushop[Nflushtab];
@@ -581,7 +587,6 @@
Avltree *free;
Blk **queue;
int nqueue;
- int lbidx;
Blk *logbuf[2]; /* preallocated log pages */
Blk *h0; /* arena header */
Blk *h1; /* arena footer */
@@ -591,7 +596,8 @@
vlong used;
vlong reserve;
/* allocation log */
- vlong nlog; /* logged since last copression */
+ vlong lastlogsz; /* size after last compression */
+ vlong nlog; /* number of blocks in log */
Bptr loghd; /* allocation log */
Blk *logtl; /* end of the log, open for writing */
Syncq *sync;
@@ -623,10 +629,14 @@
char gone;
char trunc;
- char buf[Maxent];
+ union {
+ char buf[Maxent];
+ void *auth;
+ };
};
struct Mount {
+ Limbo;
Lock;
Mount *next;
long ref;
@@ -636,6 +646,10 @@
int flag;
+ /* open directory entries */
+ Lock dtablk;
+ Dent *dtab[Ndtab];
+
/* snapshot history */
char minutely[60][128];
char hourly[24][128];
@@ -643,12 +657,19 @@
struct Conn {
Conn *next;
+
QLock wrlk;
+
int rfd;
int wfd;
+ int cfd;
int iounit;
int versioned;
+ int authok;
+ int hangup;
+ long ref;
+
/* fid hash table */
Lock fidtablk[Nfidtab];
Fid *fidtab[Nfidtab];
@@ -655,7 +676,7 @@
};
struct Fid {
- Lock;
+ RWLock;
Fid *next;
/*
* if opened with OEXEC, we want to use a snapshot,
@@ -664,8 +685,9 @@
*/
Mount *mnt;
Scan *scan; /* in progres scan */
- Dent *dent; /* (pqid, name) ref, modified on rename */
- void *auth;
+ Dent *dent; /* (pqid, name) ref, modified on rename */
+ Dent *dir;
+ Amsg *rclose;
u32int fid;
vlong qpath;
@@ -680,7 +702,7 @@
int dmode;
char permit;
- char rclose;
+ char fromdump;
};
enum {
@@ -711,16 +733,12 @@
};
struct Blk {
+ Limbo;
/* cache entry */
Blk *cnext;
Blk *cprev;
Blk *hnext;
- /* Freelist entry */
- Blk *fnext;
-
- long flag;
-
/* serialized to disk in header */
short type; /* @0, for all */
union {
@@ -740,6 +758,7 @@
/* debug */
uintptr queued;
uintptr lasthold;
+ uintptr lasthold0;
uintptr lastdrop;
uintptr enqueued;
uintptr cached;
@@ -749,6 +768,7 @@
Bptr bp;
long ref;
+ long flag;
char *data;
char buf[Blksz];
vlong magic;
--- a/dump.c
+++ b/dump.c
@@ -89,9 +89,10 @@
case Onop:
case Oinsert:
kv2dir(v, &d);
- n = fmtprint(fmt, "[qid=(%llux,%lud,%d), %luo, t=%lld,%lld, l=%lld]",
- d.qid.path, d.qid.vers, d.qid.type,
- d.mode, d.atime, d.mtime, d.length);
+ n = fmtprint(fmt, "[qid=(%llux,%lud,%d), p=%luo, f=%llux, t=%lld,%lld, l=%lld, o=%d, g=%d m=%d]",
+ d.qid.path, d.qid.vers, d.qid.type, d.mode,
+ d.flag, d.atime, d.mtime, d.length,
+ d.uid, d.gid, d.muid);
break;
case Odelete:
n = fmtprint(fmt, "delete");
@@ -306,9 +307,11 @@
goto Show;
case Tlog:
fprint(fd, "log -- ");
+ fprint(fd, "logsz: %d, logh: %lld, logp: %B\n", b->logsz, b->logh, b->logp);
goto Show;
case Tdlist:
fprint(fd, "dlist -- ");
+ fprint(fd, "logsz: %d, logh: %lld, logp: %B\n", b->logsz, b->logh, b->logp);
goto Show;
case Tdat:
fprint(fd, "dat -- ");
--- /dev/null
+++ b/env.rc
@@ -1,0 +1,108 @@
+fn r{mk all && 6.out -dA -m 64 -r $user -f $testdev}
+fn t{mk all && 6.out -A -m 512 -f $testdev}
+fn d{mk all && 6.out -dA -m 512 -f $testdev}
+fn k{kill 6.out | rc}
+fn leak { g `{echo $1 | sed s/0x//g} /tmp/trace | grep -v syncblk | tail -n 3}
+fn m{mount -c /srv/gefs /n/gefs $*}
+fn s{mount -c /srv/gefs /n/gefs dump}
+fn tf{
+ arg=100
+ if(! ~ $#* 0)
+ arg=$*
+ touch /n/gefs/`{seq $arg}
+}
+fn cf{
+ dd -if /dev/zero -of /n/gefs/test -bs 1kk -count 100
+}
+fn rf{
+ arg=100
+ if(! ~ $#* 0)
+ arg=$*
+ for(f in `{seq $arg}) {
+ echo $f
+ rm /n/gefs/$f
+ }
+}
+
+fn iob{
+ 6c iobench.c && 6l -o 6.iobench iobench.6 && 6.iobench -o rand test.fs
+}
+
+fn a{
+ mk fs.acid && acid -l fs.acid $*
+}
+
+fn rps {
+ mac=(-ms)
+ if(~ gefs comp utf 9 contents) mac=(-ms -mnihongo)
+ { echo .FP lucidasans; cat gefs.ms } | pic | tbl | eqn |
+ troff $mac | lp -dstdout > gefs.ps
+ cleanps gefs.ps
+ page -p150 gefs.ps
+}
+fn tt {
+ kill 6.out|rc
+ @ {
+ cd /usr/ori/src/gefs/ && mk all
+ } && @ {
+ cd /usr/ori/src/gefs/test && ./mkgefs.rc /dev/sdO0/data && 6c fsbench.c && 6l fsbench.6 && 6.out /n/gefs
+ }
+}
+
+fn tg {@{
+ GOROOT=/n/gefs/go
+ GOROOT_BOOTSTRAP=/n/gefs/go-plan9-amd64-bootstrap
+ rfork ne
+ m
+ cd /n/gefs
+ mkdir go
+ mkdir tmp
+ bunzip2 -c /tmp/go1.17.13-plan9-amd64-bootstrap.tbz | tar x
+ dircp go-plan9-amd64-bootstrap go
+ bind -c tmp /tmp
+ cd go/src
+ alarm 1200 ./all.rc
+}}
+
+
+fn t9 {@{
+ rfork ne
+ m
+ cd /n/gefs
+ . /sys/lib/rootstub
+ if(! test -e plan9front)
+ git/clone /dist/plan9front
+ bind -c $objtype/lib /$objtype/lib
+ bind -c plan9front/sys/include /sys/include
+ bind -c tmp /tmp
+ cd plan9front/sys/src
+ mk clean >> /tmp/log
+ mk all >> /tmp/log
+}}
+
+fn tsl {@{
+ rfork ne
+ m
+ cd /n/gefs
+ if(! test -e gefs)
+ git/clone $home/src/gefs
+ cd gefs
+ for(i in `{seq 1000}){
+ echo @@ $i
+ mk clean > /dev/null
+ mk all > /dev/null
+ sleep 15
+ }
+}}
+
+fn tb {@{
+ rfork ne
+ m
+ cd /n/gefs
+ for(i in `{seq 1000}){
+ echo @@ $i
+ rm -f x
+ dd -if /dev/zero -of x -bs 15k -count 1
+ sleep 1
+ }
+}}
--- a/fns.h
+++ b/fns.h
@@ -35,8 +35,9 @@
void* emalloc(usize, int);
-Blk* newblk(Tree *, int, vlong);
-Blk* dupblk(Tree *, Blk*);
+Blk* newdblk(Tree*, vlong, int);
+Blk* newblk(Tree*, int);
+Blk* dupblk(Tree*, Blk*);
Blk* getroot(Tree*, int*);
Blk* getblk(Bptr, int);
Blk* holdblk(Blk*);
@@ -59,12 +60,12 @@
void epochend(int);
void epochwait(void);
void epochclean(void);
-void limbo(Bfree*);
-void freeblk(Tree*, Blk*, Bptr);
+void limbo(int op, Limbo*);
+void freeblk(Tree*, Blk*);
+void freebp(Tree*, Bptr);
int logbarrier(Arena *, vlong);
void dlappend(Dlist *dl, Bptr);
void killblk(Tree*, Bptr);
-void blkdealloc(vlong);
ushort blkfill(Blk*);
uvlong blkhash(Blk*);
uvlong bufhash(void*, usize);
@@ -86,6 +87,7 @@
void loadarena(Arena*, Bptr);
void loadfs(char*);
void loadlog(Arena*, Bptr);
+void flushlog(Arena*);
int scandead(Dlist*, int, void(*)(Bptr, void*), void*);
int endfs(void);
void compresslog(Arena*);
@@ -92,7 +94,8 @@
void dlsync(void);
void setval(Blk*, Kvp*);
-Conn* newconn(int, int);
+Conn* newconn(int, int, int);
+void putconn(Conn*);
int walk1(Tree*, vlong, char*, Qid*, vlong*);
void loadusers(int, Tree*);
@@ -106,9 +109,8 @@
int btnext(Scan*, Kvp*);
void btexit(Scan*);
-int checkflag(Blk *b, int);
-void setflag(Blk *b, int);
-void clrflag(Blk *b, int);
+int checkflag(Blk *b, int, int);
+void setflag(Blk *b, int, int);
char* estrdup(char*);
--- a/fs.c
+++ b/fs.c
@@ -12,6 +12,8 @@
static void rerror(Fmsg*, char*, ...);
static void clunkfid(Conn*, Fid*, Amsg**);
+static void authfree(AuthRpc*);
+
int
walk1(Tree *t, vlong up, char *name, Qid *qid, vlong *len)
{
@@ -36,13 +38,33 @@
}
static void
+touch(Dent *de, Msg *msg)
+{
+ wlock(de);
+ de->qid.vers++;
+ msg->op = Owstat;
+ msg->k = de->k;
+ msg->nk = de->nk;
+ msg->v = "\0";
+ msg->nv = 1;
+ wunlock(de);
+}
+
+static void
wrbarrier(void)
{
+ tracev("barrier", fs->qgen);
+ aincv(&fs->qgen, 1);
+}
+
+static void
+wrwait(void)
+{
Qent qe;
int i;
-
+
+ tracev("wrwait", fs->qgen);
aincv(&fs->qgen, 1);
- tracev("barrier", fs->qgen);
fs->syncing = fs->nsyncers;
for(i = 0; i < fs->nsyncers; i++){
qe.op = Qfence;
@@ -66,7 +88,6 @@
Dlist dl;
int i;
-
qlock(&fs->synclk);
if(waserror()){
fprint(2, "failed to sync: %s\n", errmsg());
@@ -111,16 +132,13 @@
* block out synchronously, or it may
* get reused.
*/
- logbarrier(a, fs->qgen);
- finalize(a->logtl);
- syncblk(a->logtl);
+ logbarrier(a, agetv(&fs->qgen));
+ flushlog(a);
packarena(a->h0->data, Blksz, a);
packarena(a->h1->data, Blksz, a);
finalize(a->h0);
finalize(a->h1);
- setflag(a->h0, Bdirty);
- setflag(a->h1, Bdirty);
fs->arenabp[i] = a->h0->bp;
qunlock(a);
}
@@ -150,10 +168,10 @@
* get synced after so that we can use them next
* time around.
*/
- qlock(&fs->mutlk);
tracem("supers");
- syncblk(fs->sb0);
- syncblk(fs->sb1);
+ enqueue(fs->sb0);
+ enqueue(fs->sb1);
+ wrbarrier();
/*
* pass 3: sync block footers; if we crash here,
@@ -165,11 +183,13 @@
enqueue(fs->arenas[i].h1);
/*
- * Pass 4: clean up the old snap tree's deadlist
+ * Pass 4: clean up the old snap tree's deadlist.
+ * we need to wait for all the new data to hit disk
+ * before we can free anything, otherwise it gets
+ * clobbered.
*/
tracem("snapdl");
- wrbarrier();
- qunlock(&fs->mutlk);
+ wrwait();
freedl(&dl, 1);
qunlock(&fs->synclk);
tracem("synced");
@@ -210,7 +230,7 @@
return;
}
if(t->nlbl == 1 && t->nref <= 1 && t->succ == -1){
- aincl(&t->memref, 1);
+ ainc(&t->memref);
*tp = t;
}
delsnap(t, t->succ, a->old);
@@ -244,7 +264,7 @@
d->qid.path = Qdump;
d->qid.vers = fs->nextgen;
d->qid.type = QTDIR;
- d->mode = 0555;
+ d->mode = DMDIR|0555;
d->atime = 0;
d->mtime = 0;
d->length = 0;
@@ -326,38 +346,23 @@
{
char buf[ERRMAX];
va_list ap;
- Amsg *a;
- Fid *f;
- int i;
+ c->hangup = 1;
+
va_start(ap, fmt);
vsnprint(buf, sizeof(buf), fmt, ap);
va_end(ap);
+
fprint(2, "hangup: %s\n", buf);
- close(c->rfd);
- close(c->wfd);
- for(i = 0; i < Nfidtab; i++){
- lock(&c->fidtablk[i]);
- for(f = c->fidtab[i]; f != nil; f = f->next){
- lock(f);
- if(waserror()){
- unlock(f);
- continue;
- }
- a = nil;
- clunkfid(c, f, &a);
- unlock(f);
- if(a != nil)
- chsend(fs->admchan, a);
- nexterror();
- }
- unlock(&c->fidtablk[i]);
- }
+
+ if(c->cfd >= 0)
+ hangup(c->cfd);
}
static void
respond(Fmsg *m, Fcall *r)
{
+ Conn *c;
RWLock *lk;
uchar buf[Max9p+IOHDRSZ];
int w, n;
@@ -367,11 +372,12 @@
assert(m->type+1 == r->type || r->type == Rerror);
if((n = convS2M(r, buf, sizeof(buf))) == 0)
abort();
- qlock(&m->conn->wrlk);
- w = write(m->conn->wfd, buf, n);
- qunlock(&m->conn->wrlk);
+ c = m->conn;
+ qlock(&c->wrlk);
+ w = c->hangup? n: write(c->wfd, buf, n);
+ qunlock(&c->wrlk);
if(w != n)
- fshangup(m->conn, Eio);
+ fshangup(c, Eio);
if(m->type == Tflush){
lk = &fs->flushq[ihash(m->oldtag) % Nflushtab];
wunlock(lk);
@@ -380,6 +386,7 @@
runlock(lk);
}
free(m);
+ putconn(c);
}
static void
@@ -432,7 +439,7 @@
static int
readb(Tree *t, Fid *f, char *d, vlong o, vlong n, vlong sz)
{
- char buf[17], kvbuf[17+32];
+ char buf[Offksz], kvbuf[Offksz+32];
vlong fb, fo;
Bptr bp;
Blk *b;
@@ -471,6 +478,7 @@
char buf[Kvmax];
vlong fb, fo;
Blk *b, *t;
+ int seq;
Tree *r;
Bptr bp;
Kvp kv;
@@ -482,9 +490,14 @@
PACK64(m->k+1, f->qpath);
PACK64(m->k+9, fb);
- b = newblk(f->mnt->root, Tdat, f->qpath);
+ if(fo+n >= Blksz)
+ seq = 1;
+ else
+ seq = 0;
+ b = newdblk(f->mnt->root, f->qpath, seq);
t = nil;
r = f->mnt->root;
+tracex("writeb", b->bp, f->qpath, o);
if(btlookup(r, m, &kv, buf, sizeof(buf))){
bp = unpackbp(kv.v, kv.nv);
if(fb < sz && (fo != 0 || n != Blksz)){
@@ -511,7 +524,7 @@
}
static Dent*
-getdent(vlong pqid, Xdir *d)
+getdent(Mount *mnt, vlong pqid, Xdir *d)
{
Dent *de;
char *e;
@@ -518,8 +531,8 @@
u32int h;
h = ihash(d->qid.path) % Ndtab;
- lock(&fs->dtablk);
- for(de = fs->dtab[h]; de != nil; de = de->next){
+ lock(&mnt->dtablk);
+ for(de = mnt->dtab[h]; de != nil; de = de->next){
if(de->qid.path == d->qid.path){
ainc(&de->ref);
goto Out;
@@ -542,11 +555,11 @@
de->k = de->buf;
de->nk = e - de->buf;
de->name = de->buf + 11;
- de->next = fs->dtab[h];
- fs->dtab[h] = de;
+ de->next = mnt->dtab[h];
+ mnt->dtab[h] = de;
Out:
- unlock(&fs->dtablk);
+ unlock(&mnt->dtablk);
return de;
}
@@ -607,6 +620,7 @@
return fs->snapmnt;
}
+ qlock(&fs->mountlk);
for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(strcmp(name, mnt->name) == 0){
ainc(&mnt->ref);
@@ -617,6 +631,7 @@
if((mnt = mallocz(sizeof(*mnt), 1)) == nil)
error(Enomem);
if(waserror()){
+ qunlock(&fs->mountlk);
free(mnt);
nexterror();
}
@@ -632,6 +647,7 @@
poperror();
Out:
+ qunlock(&fs->mountlk);
return mnt;
}
@@ -639,26 +655,24 @@
clunkmount(Mount *mnt)
{
Mount *me, **p;
- Bfree *f;
if(mnt == nil)
return;
if(adec(&mnt->ref) == 0){
+ qlock(&fs->mountlk);
for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
if(me == mnt)
break;
}
assert(me != nil);
- f = emalloc(sizeof(Bfree), 0);
- f->op = DFmnt;
- f->m = mnt;
*p = me->next;
- limbo(f);
+ limbo(DFmnt, me);
+ qunlock(&fs->mountlk);
}
}
static void
-clunkdent(Dent *de)
+clunkdent(Mount *mnt, Dent *de)
{
Dent *e, **pe;
u32int h;
@@ -665,16 +679,19 @@
if(de == nil)
return;
- if(de->qid.type & QTAUTH && adec(&de->ref) == 0){
- free(de);
+ if(de->qid.type & QTAUTH){
+ if(adec(&de->ref) == 0){
+ authfree(de->auth);
+ free(de);
+ }
return;
}
- lock(&fs->dtablk);
+ lock(&mnt->dtablk);
if(adec(&de->ref) != 0)
goto Out;
h = ihash(de->qid.path) % Ndtab;
- pe = &fs->dtab[h];
- for(e = fs->dtab[h]; e != nil; e = e->next){
+ pe = &mnt->dtab[h];
+ for(e = mnt->dtab[h]; e != nil; e = e->next){
if(e == de)
break;
pe = &e->next;
@@ -683,7 +700,7 @@
*pe = e->next;
free(de);
Out:
- unlock(&fs->dtablk);
+ unlock(&mnt->dtablk);
}
static Fid*
@@ -708,8 +725,9 @@
{
if(adec(&f->ref) != 0)
return;
+ clunkdent(f->mnt, f->dent);
+ clunkdent(f->mnt, f->dir);
clunkmount(f->mnt);
- clunkdent(f->dent);
free(f);
}
@@ -724,6 +742,7 @@
return nil;
*n = *f;
+ memset(&n->RWLock, 0, sizeof(RWLock));
n->fid = new;
n->ref = 2; /* one for dup, one for clunk */
n->mode = -1;
@@ -747,10 +766,16 @@
if(n->mnt != nil)
ainc(&n->mnt->ref);
ainc(&n->dent->ref);
+ ainc(&n->dir->ref);
setmalloctag(n, getcallerpc(&c));
return n;
}
+/*
+ * clunkfid() removes a fid from the
+ * connection fid tab and drops reference.
+ * Fid must be locked.
+ */
static void
clunkfid(Conn *c, Fid *fid, Amsg **ao)
{
@@ -757,6 +782,8 @@
Fid *f, **pf;
u32int h;
+ assert(!canwlock(fid));
+
h = ihash(fid->fid) % Nfidtab;
lock(&c->fidtablk[h]);
pf = &c->fidtab[h];
@@ -768,21 +795,27 @@
}
pf = &f->next;
}
+ unlock(&c->fidtablk[h]);
+
assert(f != nil);
if(f->scan != nil){
free(f->scan);
f->scan = nil;
}
- if(f->rclose){
+
+ if((*ao = f->rclose) != nil){
+ f->rclose = nil;
+
qlock(&f->dent->trunclk);
f->dent->trunc = 1;
qunlock(&f->dent->trunclk);
+
wlock(f->dent);
f->dent->gone = 1;
wunlock(f->dent);
- *ao = emalloc(sizeof(Amsg), 1);
- aincl(&f->dent->ref, 1);
- aincl(&f->mnt->ref, 1);
+
+ ainc(&f->dent->ref);
+ ainc(&f->mnt->ref);
(*ao)->op = AOrclose;
(*ao)->mnt = f->mnt;
(*ao)->qpath = f->qpath;
@@ -790,9 +823,23 @@
(*ao)->end = f->dent->length;
(*ao)->dent = f->dent;
}
- unlock(&c->fidtablk[h]);
}
+static void
+freeamsg(Amsg *a)
+{
+ if(a == nil)
+ return;
+ switch(a->op){
+ case AOrclose:
+ case AOclear:
+ clunkdent(a->mnt, a->dent);
+ clunkmount(a->mnt);
+ break;
+ }
+ free(a);
+}
+
static int
readmsg(Conn *c, Fmsg **pm)
{
@@ -821,6 +868,7 @@
free(m);
return -1;
}
+ ainc(&c->ref);
m->conn = c;
m->sz = sz;
PBIT32(m->buf, sz);
@@ -853,7 +901,7 @@
respond(m, &r);
}
-void
+static void
authfree(AuthRpc *auth)
{
AuthRpc *rpc;
@@ -894,7 +942,7 @@
AuthRpc *rpc;
User *u;
- if((rpc = f->auth) == nil)
+ if((f->dir->qid.type & QTAUTH) == 0 || (rpc = f->dir->auth) == nil)
error(Etype);
switch(auth_rpc(rpc, "read", nil, 0)){
@@ -930,7 +978,7 @@
{
AuthRpc *rpc;
- if((rpc = f->auth) == nil)
+ if((f->dir->qid.type & QTAUTH) == 0 || (rpc = f->dir->auth) == nil)
error(Etype);
if(auth_rpc(rpc, "write", data, count) != ARok)
error(Ebotch);
@@ -944,7 +992,7 @@
{
Dent *de;
Fcall r;
- Fid f;
+ Fid f, *nf;
if(fs->noauth){
rerror(m, Eauth);
@@ -959,6 +1007,11 @@
return;
}
memset(de, 0, sizeof(Dent));
+ de->auth = authnew();
+ if(de->auth == nil){
+ rerror(m, errmsg());
+ return;
+ }
de->ref = 0;
de->qid.type = QTAUTH;
de->qid.path = aincv(&fs->nextqid, 1);
@@ -975,13 +1028,15 @@
f.mode = -1;
f.iounit = m->conn->iounit;
f.dent = de;
+ f.dir = de;
f.uid = -1;
f.duid = -1;
f.dgid = -1;
f.dmode = 0600;
- f.auth = authnew();
- if(dupfid(m->conn, m->afid, &f) == nil){
+ nf = dupfid(m->conn, m->afid, &f);
+ if(nf == nil){
rerror(m, Efid);
+ authfree(de->auth);
free(de);
return;
}
@@ -988,6 +1043,7 @@
r.type = Rauth;
r.aqid = de->qid;
respond(m, &r);
+ putfid(nf);
}
static int
@@ -1065,7 +1121,7 @@
if((m & (fmode>>3)) == m)
return 0;
}
- if(m & fmode) {
+ if((m & fmode) == m) {
if((fmode & DMDIR) && (m == DMEXEC))
return 0;
if(!ingroup(f->uid, nogroupid))
@@ -1087,7 +1143,7 @@
Xdir d;
Kvp kv;
Key dk;
- Fid f, *af;
+ Fid f, *af, *nf;
int uid;
de = nil;
@@ -1128,10 +1184,15 @@
putfid(af);
if(af->uid != uid)
error(Ebadu);
- }else if(!fs->noauth && strcmp(m->uname, "none") != 0)
- error(Ebadu);
+ m->conn->authok = 1; /* none attach allowed now */
+ }else if(!fs->noauth){
+ if(uid != noneid || !m->conn->authok)
+ error(Ebadu);
+ }
if(strcmp(m->aname, "dump") == 0){
+ if(uid == noneid)
+ error(Eperm);
memset(&d, 0, sizeof(d));
filldumpdir(&d);
}else{
@@ -1144,7 +1205,7 @@
error(Enosnap);
kv2dir(&kv, &d);
}
- de = getdent(-1, &d);
+ de = getdent(mnt, -1, &d);
memset(&f, 0, sizeof(Fid));
f.fid = NOFID;
f.mnt = mnt;
@@ -1153,6 +1214,7 @@
f.mode = -1;
f.iounit = m->conn->iounit;
f.dent = de;
+ f.dir = de;
f.uid = uid;
f.duid = d.uid;
f.dgid = d.gid;
@@ -1162,44 +1224,58 @@
error(Eperm);
f.permit = 1;
}
- if(dupfid(m->conn, m->fid, &f) == nil)
+ if(strcmp(aname, "dump") == 0)
+ f.fromdump = 1;
+ nf = dupfid(m->conn, m->fid, &f);
+ if(nf == nil)
error(Efid);
-
r.type = Rattach;
r.qid = d.qid;
respond(m, &r);
+ putfid(nf);
poperror();
-Err: clunkdent(de);
+Err: clunkdent(mnt, de);
clunkmount(mnt);
}
static int
-findparent(Tree *t, Fid *f, vlong *qpath, char **name, char *buf, int nbuf)
+findparent(Tree *t, vlong up, vlong *qpath, char **name, char *buf, int nbuf)
{
char *p, kbuf[Keymax];
Kvp kv;
Key k;
- p = packsuper(kbuf, sizeof(kbuf), f->pqpath);
+ p = packsuper(kbuf, sizeof(kbuf), up);
k.k = kbuf;
k.nk = p - kbuf;
if(!btlookup(t, &k, &kv, buf, nbuf))
- return 0;
+ error(Esrch);
*name = unpackdkey(kv.v, kv.nv, qpath);
return 1;
}
static void
+dkey(Key *k, vlong up, char *name, char *buf, int nbuf)
+{
+ char *p;
+
+ p = packdkey(buf, nbuf, up, name);
+ k->k = buf;
+ k->nk = p - buf;
+}
+
+static void
fswalk(Fmsg *m)
{
- char *p, *name, kbuf[Maxent], kvbuf[Kvmax];
- int duid, dgid, dmode;
- vlong up, prev;
+ char *name, kbuf[Maxent], kvbuf[Kvmax];
+ int duid, dgid, dmode, duped;
+ vlong up, upup, prev;
+ Dent *dent, *dir;
Fid *o, *f;
- Dent *dent;
Mount *mnt;
+ Amsg *ao;
Tree *t;
Fcall r;
Xdir d;
@@ -1211,8 +1287,10 @@
rerror(m, Enofid);
return;
}
+ rlock(o);
if(waserror()){
rerror(m, errmsg());
+ runlock(o);
putfid(o);
return;
}
@@ -1220,7 +1298,7 @@
error(Einuse);
t = o->mnt->root;
mnt = o->mnt;
- up = o->qpath;
+ up = o->pqpath;
prev = o->qpath;
rlock(o->dent);
d = *o->dent;
@@ -1234,42 +1312,31 @@
if(strlen(name) > Maxname)
error(Elength);
if(fsaccess(o, d.mode, d.uid, d.gid, DMEXEC) != 0)
- error(Eperm);
- if(d.qid.path == Qdump){
- if((mnt = getmount(m->wname[i])) == nil)
- error(Esrch);
- if(waserror()){
- clunkmount(mnt);
- nexterror();
+ break;
+ if(strcmp(name, "..") == 0){
+ if(up == -1 && o->fromdump){
+ mnt = fs->snapmnt;
+ filldumpdir(&d);
+ prev = -1ULL;
+ up = -1ULL;
+ r.wqid[i] = d.qid;
+ continue;
}
+ findparent(t, up, &prev, &name, kbuf, sizeof(kbuf));
+ }else if(d.qid.path == Qdump){
+ mnt = getmount(m->wname[i]);
+ name = "";
+ prev = -1ULL;
t = mnt->root;
- p = packdkey(kbuf, sizeof(kbuf), -1ULL, "");
- poperror();
- }else{
- if(strcmp(m->wname[i], "..") == 0){
- if(o->pqpath == Qdump){
- mnt = fs->snapmnt;
- filldumpdir(&d);
- duid = d.uid;
- dgid = d.gid;
- dmode = d.mode;
- goto Found;
- }
- if(!findparent(t, o, &prev, &name, kbuf, sizeof(kbuf)))
- error(Esrch);
- }
- p = packdkey(kbuf, sizeof(kbuf), prev, name);
}
+ up = prev;
duid = d.uid;
dgid = d.gid;
dmode = d.mode;
- k.k = kbuf;
- k.nk = p - kbuf;
+ dkey(&k, prev, name, kbuf, sizeof(kbuf));
if(!btlookup(t, &k, &kv, kvbuf, sizeof(kvbuf)))
break;
kv2dir(&kv, &d);
-Found:
- up = prev;
prev = d.qid.path;
r.wqid[i] = d.qid;
}
@@ -1277,41 +1344,59 @@
if(i == 0 && m->nwname != 0)
error(Esrch);
f = o;
+ duped = 0;
if(m->fid != m->newfid && i == m->nwname){
if((f = dupfid(m->conn, m->newfid, o)) == nil)
error(Efid);
- putfid(o);
+ duped = 1;
}
+ runlock(o);
+
if(i > 0 && i == m->nwname){
- lock(f);
+ wlock(f);
+ ao = nil;
if(waserror()){
- if(f != o)
- clunkfid(m->conn, f, nil);
- unlock(f);
+ if(duped)
+ clunkfid(m->conn, f, &ao);
+ assert(ao == nil);
+ wunlock(f);
nexterror();
}
- if(up == Qdump)
- dent = getdent(-1ULL, &d);
- else
- dent = getdent(up, &d);
+ if(up == -1ULL){
+ /* the root contains itself, I guess */
+ dent = getdent(mnt, up, &d);
+ dir = getdent(mnt, up, &d);
+ }else{
+ dent = getdent(mnt, up, &d);
+ findparent(t, up, &upup, &name, kbuf, sizeof(kbuf));
+ dkey(&k, upup, name, kbuf, sizeof(kbuf));
+ if(!btlookup(t, &k, &kv, kvbuf, sizeof(kvbuf)))
+ broke("missing parent");
+ kv2dir(&kv, &d);
+ dir = getdent(mnt, upup, &d);
+ }
+ clunkdent(f->mnt, f->dent);
+ clunkdent(f->mnt, f->dir);
if(mnt != f->mnt){
clunkmount(f->mnt);
ainc(&mnt->ref);
f->mnt = mnt;
}
- clunkdent(f->dent);
f->qpath = r.wqid[i-1].path;
f->pqpath = up;
f->dent = dent;
+ f->dir = dir;
f->duid = duid;
f->dgid = dgid;
f->dmode = dmode;
poperror();
- unlock(f);
+ wunlock(f);
}
+ if(duped)
+ putfid(o);
+ putfid(f);
respond(m, &r);
poperror();
- putfid(f);
}
static void
@@ -1353,7 +1438,7 @@
Qid old;
Fcall r;
Dent *de;
- Msg mb[3];
+ Msg mb[4];
Xdir n;
Dir d;
Tree *t;
@@ -1372,7 +1457,7 @@
wlock(de);
if(waserror()){
rerror(m, errmsg());
- free(*ao);
+ freeamsg(*ao);
*ao = nil;
goto Err;
}
@@ -1418,8 +1503,8 @@
qlock(&de->trunclk);
de->trunc = 1;
qunlock(&de->trunclk);
- aincl(&de->ref, 1);
- aincl(&f->mnt->ref, 1);
+ ainc(&de->ref);
+ ainc(&f->mnt->ref);
(*ao)->op = AOclear;
(*ao)->mnt = f->mnt;
(*ao)->qpath = f->qpath;
@@ -1536,6 +1621,7 @@
mb[nm].nv = mb[nm-1].nk;
nm++;
}
+ touch(f->dir, &mb[nm++]);
}else{
opbuf[0] = op;
mb[nm].op = Owstat;
@@ -1570,9 +1656,9 @@
rerror(m, Enofid);
return;
}
- lock(f);
+ wlock(f);
clunkfid(m->conn, f, ao);
- unlock(f);
+ wunlock(f);
r.type = Rclunk;
respond(m, &r);
putfid(f);
@@ -1582,14 +1668,14 @@
fscreate(Fmsg *m)
{
char *p, *e, buf[Kvmax], upkbuf[Keymax], upvbuf[Inlmax];
+ int nm, duid, dgid, dmode;
Dent *de;
vlong oldlen;
Qid old;
Fcall r;
- Msg mb[2];
+ Msg mb[3];
Fid *f;
Xdir d;
- int nm;
if((e = okname(m->name)) != nil){
rerror(m, e);
@@ -1603,7 +1689,7 @@
rerror(m, Enofid);
return;
}
- lock(f);
+ wlock(f);
if(waserror()){
rerror(m, errmsg());
@@ -1626,8 +1712,9 @@
runlock(de);
goto Out;
}
-
- d.gid = de->gid;
+ duid = de->uid;
+ dgid = de->gid;
+ dmode = de->mode;
runlock(de);
nm = 0;
@@ -1652,6 +1739,7 @@
d.mtime = d.atime;
d.length = 0;
d.uid = f->uid;
+ d.gid = dgid;
d.muid = f->uid;
mb[nm].op = Oinsert;
@@ -1670,16 +1758,21 @@
mb[nm].nv = p - upvbuf;
nm++;
}
+ touch(f->dent, &mb[nm++]);
+ assert(nm <= nelem(mb));
upsert(f->mnt, mb, nm);
- de = getdent(f->qpath, &d);
- clunkdent(f->dent);
+ de = getdent(f->mnt, f->qpath, &d);
+ clunkdent(f->mnt, f->dent);
f->mode = mode2bits(m->mode);
f->pqpath = f->qpath;
f->qpath = d.qid.path;
f->dent = de;
+ f->duid = duid;
+ f->dgid = dgid;
+ f->dmode = dmode;
if(m->mode & ORCLOSE)
- f->rclose = 1;
+ f->rclose = emalloc(sizeof(Amsg), 1);
r.type = Rcreate;
r.qid = d.qid;
@@ -1686,7 +1779,7 @@
r.iounit = f->iounit;
respond(m, &r);
Out: poperror();
-Err: unlock(f);
+Err: wunlock(f);
putfid(f);
return;
}
@@ -1718,7 +1811,8 @@
{
char *e, buf[Kvmax];
Fcall r;
- Msg mb[2];
+ int nm;
+ Msg mb[3];
Tree *t;
Kvp kv;
Fid *f;
@@ -1728,17 +1822,18 @@
return;
}
t = f->mnt->root;
- clunkfid(m->conn, f, nil);
-
+ nm = 0;
+ wlock(f);
+ clunkfid(m->conn, f, ao);
truncwait(f->dent, id);
wlock(f->dent);
- *ao = nil;
if(waserror()){
rerror(m, errmsg());
- free(*ao);
+ freeamsg(*ao);
*ao = nil;
goto Err;
}
+tracex("removef", Zb, f->qpath, -1);
if(f->dent->gone)
error(Ephase);
/*
@@ -1756,21 +1851,26 @@
error(e);
if(fsaccess(f, f->dmode, f->duid, f->dgid, DMWRITE) == -1)
error(Eperm);
- mb[0].op = Odelete;
- mb[0].k = f->dent->k;
- mb[0].nk = f->dent->nk;
- mb[0].nv = 0;
+ freeamsg(*ao);
+ *ao = nil;
+
+ mb[nm].op = Odelete;
+ mb[nm].k = f->dent->k;
+ mb[nm].nk = f->dent->nk;
+ mb[nm].v = "\0";
+ mb[nm].nv = 1;
+ nm++;
if(f->dent->qid.type & QTDIR){
packsuper(buf, sizeof(buf), f->qpath);
- mb[1].op = Oclobber;
- mb[1].k = buf;
- mb[1].nk = Upksz;
- mb[1].nv = 0;
- upsert(f->mnt, mb, 2);
+ mb[nm].op = Oclobber;
+ mb[nm].k = buf;
+ mb[nm].nk = Upksz;
+ mb[nm].nv = 0;
+ nm++;
}else{
*ao = emalloc(sizeof(Amsg), 1);
- aincl(&f->mnt->ref, 1);
+ ainc(&f->mnt->ref);
(*ao)->op = AOclear;
(*ao)->mnt = f->mnt;
(*ao)->qpath = f->qpath;
@@ -1777,8 +1877,10 @@
(*ao)->off = 0;
(*ao)->end = f->dent->length;
(*ao)->dent = nil;
- upsert(f->mnt, mb, 1);
}
+ touch(f->dir, &mb[nm++]);
+ assert(nm <= nelem(mb));
+ upsert(f->mnt, mb, nm);
f->dent->gone = 1;
r.type = Rremove;
respond(m, &r);
@@ -1785,6 +1887,7 @@
poperror();
Err:
wunlock(f->dent);
+ wunlock(f);
putfid(f);
return;
}
@@ -1843,9 +1946,9 @@
r.qid = d.qid;
r.iounit = f->iounit;
- lock(f);
+ wlock(f);
if(f->mode != -1){
- unlock(f);
+ wunlock(f);
error(Einuse);
}
if((m->mode & OTRUNC) && !(f->dent->mode & DMAPPEND)){
@@ -1853,7 +1956,7 @@
if(waserror()){
wunlock(f->dent);
- free(*ao);
+ freeamsg(*ao);
*ao = nil;
nexterror();
}
@@ -1861,8 +1964,8 @@
qlock(&f->dent->trunclk);
f->dent->trunc = 1;
qunlock(&f->dent->trunclk);
- aincl(&f->dent->ref, 1);
- aincl(&f->mnt->ref, 1);
+ ainc(&f->dent->ref);
+ ainc(&f->mnt->ref);
(*ao)->op = AOclear;
(*ao)->mnt = f->mnt;
(*ao)->qpath = f->qpath;
@@ -1890,8 +1993,8 @@
}
f->mode = mode2bits(m->mode);
if(m->mode & ORCLOSE)
- f->rclose = 1;
- unlock(f);
+ f->rclose = emalloc(sizeof(Amsg), 1);
+ wunlock(f);
poperror();
respond(m, &r);
putfid(f);
@@ -1905,6 +2008,12 @@
Scan *s;
Xdir d;
+ /* mutates scan */
+ wlock(f);
+ if(waserror()){
+ wunlock(f);
+ nexterror();
+ }
s = f->scan;
if(s != nil && s->offset != 0 && s->offset != m->offset)
error(Edscan);
@@ -1912,12 +2021,10 @@
s = emalloc(sizeof(Scan), 1);
pfx[0] = Klabel;
btnewscan(s, pfx, 1);
- lock(f);
if(f->scan != nil){
free(f->scan);
}
f->scan = s;
- unlock(f);
}
if(s->donescan){
r->count = 0;
@@ -1925,7 +2032,7 @@
}
p = r->data;
n = m->count;
- d = f->dent->Xdir;
+ filldumpdir(&d);
if(s->overflow){
memcpy(d.name, s->kv.k+1, s->kv.nk-1);
d.name[s->kv.nk-1] = 0;
@@ -1953,6 +2060,8 @@
n -= ns;
}
btexit(s);
+ poperror();
+ wunlock(f);
r->count = p - r->data;
return;
}
@@ -1965,6 +2074,12 @@
Tree *t;
Scan *s;
+ /* mutates scan */
+ wlock(f);
+ if(waserror()){
+ wunlock(f);
+ nexterror();
+ }
s = f->scan;
t = agetp(&f->mnt->root);
if(s != nil && s->offset != 0 && s->offset != m->offset)
@@ -1973,22 +2088,21 @@
s = emalloc(sizeof(Scan), 1);
packdkey(pfx, sizeof(pfx), f->qpath, nil);
btnewscan(s, pfx, sizeof(pfx));
- lock(f);
if(f->scan != nil)
free(f->scan);
f->scan = s;
- unlock(f);
}
if(s->donescan){
r->count = 0;
- return;
+ goto Out;
}
p = r->data;
n = m->count;
if(s->overflow){
+ /* someone picked an iounit too small for a dir */
if((ns = kv2statbuf(&s->kv, p, n)) == -1){
r->count = 0;
- return;
+ error(Ebotch);
}
s->overflow = 0;
p += ns;
@@ -2005,8 +2119,11 @@
p += ns;
n -= ns;
}
- btexit(s);
r->count = p - r->data;
+ btexit(s);
+Out:
+ poperror();
+ wunlock(f);
}
static void
@@ -2017,10 +2134,12 @@
Dent *e;
Tree *t;
+ rlock(f);
e = f->dent;
rlock(e);
if(m->offset > e->length){
runlock(e);
+ runlock(f);
return;
}
p = r->data;
@@ -2039,6 +2158,7 @@
c -= n;
}
runlock(e);
+ runlock(f);
}
static void
@@ -2092,19 +2212,18 @@
rerror(m, Enofid);
return;
}
- if(!(f->mode & DMWRITE)){
- rerror(m, Einuse);
- putfid(f);
- return;
- }
+ wlock(f);
truncwait(f->dent, id);
wlock(f->dent);
if(waserror()){
rerror(m, errmsg());
wunlock(f->dent);
+ wunlock(f);
putfid(f);
return;
}
+ if(!(f->mode & DMWRITE))
+ error(Einuse);
if(f->dent->gone)
error(Ephase);
if(f->dent->qid.type & QTAUTH){
@@ -2119,7 +2238,8 @@
if(f->dent->mode & DMAPPEND)
o = f->dent->length;
t = agetp(&f->mnt->root);
- for(i = 0; i < nelem(kv)-1 && c != 0; i++){
+ for(i = 0; c != 0; i++){
+ assert(i < nelem(kv));
assert(i == 0 || o%Blksz == 0);
kv[i].op = Oinsert;
kv[i].k = kbuf[i];
@@ -2129,7 +2249,7 @@
if(waserror()){
if(!fs->rdonly)
for(j = 0; j < i; j++)
- freeblk(t, nil, bp[j]);
+ freebp(t, bp[j]);
nexterror();
}
n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
@@ -2149,7 +2269,7 @@
sbuf[0] |= Owsize;
PACK64(p, o);
p += 8;
- f->dent->length = m->offset+m->count;
+ f->dent->length = o;
}
sbuf[0] |= Owmtime;
f->dent->mtime = nsec();
@@ -2169,6 +2289,7 @@
poperror();
respond(m, &r);
wunlock(f->dent);
+ wunlock(f);
putfid(f);
}
@@ -2182,23 +2303,79 @@
}
Conn *
-newconn(int rfd, int wfd)
+newconn(int rfd, int wfd, int cfd)
{
Conn *c;
if((c = mallocz(sizeof(*c), 1)) == nil)
return nil;
+
c->rfd = rfd;
c->wfd = wfd;
+ c->cfd = cfd;
+
c->iounit = Max9p;
- c->next = fs->conns;
+
+ c->ref = 1;
+
lock(&fs->connlk);
+ c->next = fs->conns;
fs->conns = c;
unlock(&fs->connlk);
+
return c;
}
void
+putconn(Conn *c)
+{
+ Conn **pp;
+ Amsg *a;
+ Fid *f;
+ int i;
+
+ if(adec(&c->ref) != 0)
+ return;
+
+ lock(&fs->connlk);
+ for(pp = &fs->conns; *pp != nil; pp = &((*pp)->next)){
+ if(*pp == c){
+ *pp = c->next;
+ break;
+ }
+ }
+ unlock(&fs->connlk);
+
+ close(c->rfd);
+ if(c->rfd != c->wfd)
+ close(c->wfd);
+ if(c->cfd >= 0)
+ close(c->cfd);
+
+ for(i = 0; i < Nfidtab; i++){
+ for(;;){
+ lock(&c->fidtablk[i]);
+ f = c->fidtab[i];
+ if(f == nil){
+ unlock(&c->fidtablk[i]);
+ break;
+ }
+ ainc(&f->ref);
+ unlock(&c->fidtablk[i]);
+
+ wlock(f);
+ clunkfid(c, f, &a);
+ wunlock(f);
+ putfid(f);
+
+ if(a != nil)
+ chsend(fs->admchan, a);
+ }
+ }
+ free(c);
+}
+
+void
runfs(int, void *pc)
{
char err[128];
@@ -2210,20 +2387,20 @@
u32int h;
c = pc;
- while(1){
+ while(!c->hangup){
if(readmsg(c, &m) < 0){
fshangup(c, "read message: %r");
- return;
+ break;
}
if(m == nil)
break;
if(convM2S(m->buf, m->sz, m) == 0){
fshangup(c, "invalid message: %r");
- return;
+ break;
}
if(m->type != Tversion && !c->versioned){
fshangup(c, "version required");
- return;
+ break;
}
dprint("← %F\n", &m->Fcall);
@@ -2276,6 +2453,7 @@
if(a != nil)
chsend(fs->admchan, a);
}
+ putconn(c);
}
void
@@ -2298,8 +2476,11 @@
rerror(m, Enofid);
continue;
}
- clunkfid(m->conn, f, nil);
+ wlock(f);
+ clunkfid(m->conn, f, &a);
+ wunlock(f);
putfid(f);
+ freeamsg(a);
}
rerror(m, Erdonly);
continue;
@@ -2318,8 +2499,8 @@
}
assert(estacksz() == 0);
epochend(id);
- epochclean();
qunlock(&fs->mutlk);
+ epochclean();
if(a != nil)
chsend(fs->admchan, a);
@@ -2361,12 +2542,12 @@
bp = unpackbp(kv.v, kv.nv);
freetree(bp, pred);
qlock(&fs->mutlk);
- epochclean();
qunlock(&fs->mutlk);
+ epochclean();
}
}
if(rb.gen > pred)
- freeblk(nil, nil, rb);
+ freebp(nil, rb);
dropblk(b);
}
@@ -2395,10 +2576,10 @@
break;
bp = unpackbp(s.kv.v, s.kv.nv);
if(bp.gen > t->pred)
- freeblk(nil, nil, bp);
+ freebp(nil, bp);
qlock(&fs->mutlk);
- epochclean();
qunlock(&fs->mutlk);
+ epochclean();
}
btexit(&s);
freetree(t->bp, t->pred);
@@ -2408,28 +2589,24 @@
runsweep(int id, void*)
{
char buf[Kvmax];
+ Msg mb[Kvmax/Offksz];
Bptr bp, nb, *oldhd;
+ int i, nm;
vlong off;
Tree *t;
Arena *a;
Amsg *am;
Blk *b;
- Msg m, mb[2];
- int i, nm;
if((oldhd = calloc(fs->narena, sizeof(Bptr))) == nil)
sysfatal("malloc log heads");
while(1){
am = chrecv(fs->admchan);
- if(agetl(&fs->rdonly)){
- fprint(2, "spurious adm message\n");
- break;
- }
switch(am->op){
case AOsync:
tracem("syncreq");
if(!fs->snap.dirty && !am->halt)
- continue;
+ goto Next;
if(agetl(&fs->rdonly))
goto Justhalt;
if(waserror()){
@@ -2440,31 +2617,31 @@
if(am->halt)
ainc(&fs->rdonly);
- qlock(&fs->mutlk);
for(i = 0; i < fs->narena; i++){
a = &fs->arenas[i];
+ oldhd[i].addr = -1;
+ oldhd[i].hash = -1;
+ oldhd[i].gen = -1;
qlock(a);
- if(a->nlog < a->reserve/(10*Blksz)){
- oldhd[i].addr = -1;
- oldhd[i].hash = -1;
- oldhd[i].gen = -1;
- qunlock(a);
- continue;
- }
- if(waserror()){
- qunlock(&fs->mutlk);
- qunlock(a);
- nexterror();
- }
- oldhd[i] = a->loghd;
- epochstart(id);
- compresslog(a);
+ /*
+ * arbitrary heuristic -- try compressing
+ * when the log doubles in size.
+ */
+// if(a->nlog >= 2*a->lastlogsz){
+// oldhd[i] = a->loghd;
+// epochstart(id);
+// if(waserror()){
+// epochend(id);
+// qunlock(a);
+// nexterror();
+// }
+// compresslog(a);
+// epochend(id);
+// poperror();
+// }
qunlock(a);
- epochend(id);
epochclean();
- poperror();
}
- qunlock(&fs->mutlk);
sync();
for(i = 0; i < fs->narena; i++){
@@ -2473,11 +2650,11 @@
epochstart(id);
b = getblk(bp, 0);
nb = b->logp;
- freeblk(nil, b, b->bp);
+ freeblk(nil, b);
dropblk(b);
epochend(id);
- epochclean();
qunlock(&fs->mutlk);
+ epochclean();
}
}
@@ -2494,8 +2671,8 @@
case AOsnap:
tracem("snapreq");
if(agetl(&fs->rdonly)){
- fprint(2, "read only fs");
- continue;
+ fprint(2, "snap on read only fs");
+ goto Next;
}
if(waserror()){
fprint(2, "taking snap: %s\n", errmsg());
@@ -2525,6 +2702,10 @@
break;
case AOrclose:
+ if(agetl(&fs->rdonly)){
+ fprint(2, "rclose on read only fs");
+ goto Next;
+ }
nm = 0;
mb[nm].op = Odelete;
mb[nm].k = am->dent->k;
@@ -2531,6 +2712,7 @@
mb[nm].nk = am->dent->nk;
mb[nm].nv = 0;
nm++;
+tracex("rclose", Zb, am->qpath, -1);
if(am->dent->qid.type & QTDIR){
packsuper(buf, sizeof(buf), am->qpath);
mb[nm].op = Oclobber;
@@ -2539,9 +2721,15 @@
mb[nm].nv = 0;
nm++;
}
+ qlock(&fs->mutlk);
upsert(am->mnt, mb, nm);
+ qunlock(&fs->mutlk);
/* fallthrough */
case AOclear:
+ if(agetl(&fs->rdonly)){
+ fprint(2, "clear on read only fs");
+ goto Next;
+ }
tracem("bgclear");
if(waserror()){
fprint(2, "clear file %llx: %s\n", am->qpath, errmsg());
@@ -2551,39 +2739,43 @@
if(am->dent != nil)
qlock(&am->dent->trunclk);
fs->snap.dirty = 1;
+ nm = 0;
for(off = am->off; off < am->end; off += Blksz){
- qlock(&fs->mutlk);
- if(waserror()){
+tracex("clearb", Zb, am->qpath, off);
+ mb[nm].op = Oclearb;
+ mb[nm].k = buf + Offksz * nm;
+ mb[nm].nk = Offksz;
+ mb[nm].k[0] = Kdat;
+ PACK64(mb[nm].k+1, am->qpath);
+ PACK64(mb[nm].k+9, off);
+ mb[nm].v = nil;
+ mb[nm].nv = 0;
+ if(++nm >= nelem(mb) || off + Blksz >= am->end){
+ qlock(&fs->mutlk);
+ if(waserror()){
+ qunlock(&fs->mutlk);
+ nexterror();
+ }
+ epochstart(id);
+ upsert(am->mnt, mb, nm);
+ epochend(id);
qunlock(&fs->mutlk);
- nexterror();
+ epochclean();
+ poperror();
+ nm = 0;
}
- epochstart(id);
- m.k = buf;
- m.nk = sizeof(buf);
- m.op = Oclearb;
- m.k[0] = Kdat;
- PACK64(m.k+1, am->qpath);
- PACK64(m.k+9, off);
- m.v = nil;
- m.nv = 0;
- upsert(am->mnt, &m, 1);
- epochend(id);
- epochclean();
- qunlock(&fs->mutlk);
- poperror();
}
if(am->dent != nil){
am->dent->trunc = 0;
rwakeup(&am->dent->truncrz);
qunlock(&am->dent->trunclk);
- clunkdent(am->dent);
}
- clunkmount(am->mnt);
poperror();
break;
}
+Next:
assert(estacksz() == 0);
- free(am);
+ freeamsg(am);
}
}
@@ -2631,6 +2823,7 @@
a->fd = -1;
chsend(fs->admchan, a);
+if(0){
tmnow(&now, nil);
for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
if(!(mnt->flag & Ltsnap))
@@ -2638,7 +2831,7 @@
if(now.yday != then.yday){
snprint(buf, sizeof(buf),
"%s@day.%τ", mnt->name, tmfmt(&now, "YYYY.MM.DD[_]hh:mm:ss"));
- snapmsg("main", buf, Lauto);
+ snapmsg(mnt->name, buf, Lauto);
}
if(now.hour != then.hour){
if(mnt->hourly[h][0] != 0)
@@ -2645,7 +2838,7 @@
snapmsg(mnt->hourly[h], nil, 0);
snprint(mnt->hourly[h], sizeof(mnt->hourly[h]),
"%s@hour.%τ", mnt->name, tmfmt(&now, "YYYY.MM.DD[_]hh:mm:ss"));
- snapmsg("main", mnt->hourly[h], Lauto);
+ snapmsg(mnt->name, mnt->hourly[h], Lauto);
}
if(now.min != then.min){
if(mnt->minutely[m][0] != 0)
@@ -2652,7 +2845,7 @@
snapmsg(mnt->minutely[m], nil, 0);
snprint(mnt->minutely[m], sizeof(mnt->minutely[m]),
"%s@minute.%τ", mnt->name, tmfmt(&now, "YYYY.MM.DD[_]hh:mm:ss"));
- snapmsg("main", mnt->minutely[m], Lauto);
+ snapmsg(mnt->name, mnt->minutely[m], Lauto);
}
}
if(now.hour != then.hour)
@@ -2659,6 +2852,7 @@
h = (h+1)%24;
if(now.min != then.min)
m = (m+1)%60;
+}
then = now;
poperror();
}
--- a/load.c
+++ b/load.c
@@ -54,6 +54,12 @@
unpackarena(a, b->data, Arenasz);
if((a->free = avlcreate(rangecmp)) == nil)
error(Enomem);
+ a->logbuf[0] = cachepluck();
+ a->logbuf[1] = cachepluck();
+ a->logbuf[0]->bp = (Bptr){-1, -1, -1};
+ a->logbuf[1]->bp = (Bptr){-1, -1, -1};
+ setflag(a->logbuf[0], Bstatic, 0);
+ setflag(a->logbuf[1], Bstatic, 0);
a->h0 = h0;
a->h1 = h1;
a->used = a->size;
@@ -117,10 +123,6 @@
}
for(i = 0; i < fs->narena; i++){
a = &fs->arenas[i];
- a->logbuf[0] = cachepluck();
- a->logbuf[1] = cachepluck();
- a->logbuf[0]->bp = (Bptr){-1, -1, -1};
- a->logbuf[1]->bp = (Bptr){-1, -1, -1};
loadlog(a, a->loghd);
}
--- a/main.c
+++ b/main.c
@@ -21,7 +21,7 @@
int checkonly;
char *reamuser;
char *dev;
-vlong tracesz = 16*MiB;
+vlong tracesz = 1024*MiB;
vlong cachesz = 512*MiB;
char *srvname = "gefs";
int noneid = 0;
@@ -28,6 +28,7 @@
int nogroupid = 9999;
int admid = -1;
Blk *blkbuf;
+Bfree *bfbuf;
Errctx **errctx;
void
@@ -46,23 +47,6 @@
t->v1 = v1;
}
-static void
-nokill(void)
-{
- char buf[128];
- int fd;
-
- snprint(buf, sizeof(buf), "/proc/%d/ctl", getpid());
- if((fd = open(buf, OWRITE)) == -1){
- fprint(2, "nokill: open %s: %r", buf);
- return;
- }
- if(fprint(fd, "noswap\n") == -1){
- fprint(2, "nokill: write %s: %r", buf);
- return;
- }
-}
-
static uvlong
memsize(void)
{
@@ -118,7 +102,7 @@
{
va_list ap;
- aincl(&fs->rdonly, 1);
+ ainc(&fs->rdonly);
va_start(ap, fmt);
errorv(fmt, ap, 1);
}
@@ -156,6 +140,7 @@
static void
initfs(vlong cachesz)
{
+ Bfree *f, *g;
Blk *b;
if((fs = mallocz(sizeof(Gefs), 1)) == nil)
@@ -167,6 +152,7 @@
}
fs->lrurz.l = &fs->lrulk;
fs->syncrz.l = &fs->synclk;
+ fs->bfreerz.l = &fs->bfreelk;
fs->noauth = noauth;
fs->cmax = cachesz/Blksz;
if(fs->cmax > (1<<30))
@@ -181,12 +167,23 @@
if((fs->dlcache = mallocz(fs->dlcmax*sizeof(Dlist*), 1)) == nil)
sysfatal("malloc: %r");
+ bfbuf = sbrk(fs->cmax * sizeof(Bfree));
+ if(bfbuf == (void*)-1)
+ sysfatal("sbrk: %r");
+
+ g = nil;
+ for(f = bfbuf; f != bfbuf+fs->cmax; f++){
+ f->bp = Zb;
+ f->next = g;
+ g = f;
+ }
+ fs->bfree = g;
+
blkbuf = sbrk(fs->cmax * sizeof(Blk));
if(blkbuf == (void*)-1)
sysfatal("sbrk: %r");
for(b = blkbuf; b != blkbuf+fs->cmax; b++){
- b->bp.addr = -1;
- b->bp.hash = -1;
+ b->bp = Zb;
b->magic = Magic;
lrutop(b);
}
@@ -202,7 +199,6 @@
if (pid < 0)
sysfatal("can't fork: %r");
if (pid == 0) {
- nokill();
id = aincl(&fs->nworker, 1);
if((*errctx = mallocz(sizeof(Errctx), 1)) == nil)
sysfatal("malloc: %r");
@@ -243,21 +239,22 @@
sysfatal("announce %s: %r", ann);
while(1){
if((lctl = listen(adir, ldir)) < 0){
- fprint(2, "listen %s: %r", adir);
+ fprint(2, "listen %s: %r\n", adir);
break;
}
fd = accept(lctl, ldir);
- close(lctl);
if(fd < 0){
- fprint(2, "accept %s: %r", ldir);
+ fprint(2, "accept %s: %r\n", ldir);
+ close(lctl);
continue;
}
- if(!(c = newconn(fd, fd))){
+ c = newconn(fd, fd, lctl);
+ if(c == nil){
+ fprint(2, "newconn: %r\n");
+ close(lctl);
close(fd);
- fprint(2, "%r");
continue;
}
-
launch(runfs, c, "netio");
}
close(actl);
@@ -393,11 +390,14 @@
}
rfork(RFNOTEG);
- nokill();
loadfs(dev);
fs->wrchan = mkchan(32);
fs->admchan = mkchan(32);
- fs->nsyncers = nproc/2;
+ /*
+ * for spinning disks, parallel sync tanks performance
+ * for ssds, it doesn't help much.
+ */
+ fs->nsyncers = 1;
fs->nreaders = nproc/2;
if(fs->nsyncers > fs->narena)
fs->nsyncers = fs->narena;
@@ -422,12 +422,12 @@
for(i = 0; i < nann; i++)
launch(runannounce, ann[i], "announce");
if(srvfd != -1){
- if((c = newconn(srvfd, srvfd)) == nil)
+ if((c = newconn(srvfd, srvfd, -1)) == nil)
sysfatal("%r");
launch(runfs, c, "srvio");
}
if(stdio){
- if((c = newconn(0, 1)) == nil)
+ if((c = newconn(0, 1, -1)) == nil)
sysfatal("%r");
launch(runfs, c, "stdio");
}
--- a/pack.c
+++ b/pack.c
@@ -402,6 +402,7 @@
assert(sz >= Arenasz);
e = p + Arenasz;
+tracex("loghd", a->loghd, a - fs->arenas, -1);
PACK64(p, a->loghd.addr); p += 8; /* freelist addr */
PACK64(p, a->loghd.hash); p += 8; /* freelist hash */
PACK64(p, a->size); p += 8; /* arena size */
--- a/ream.c
+++ b/ream.c
@@ -54,7 +54,7 @@
dir2kv(-1, &d, &kv, vbuf, sizeof(vbuf));
setval(r, &kv);
- p = packsuper(kbuf, sizeof(kbuf), 0);
+ p = packsuper(kbuf, sizeof(kbuf), Qadmroot);
kv.k = kbuf;
kv.nk = p - kbuf;
p = packdkey(vbuf, sizeof(vbuf), -1, "");
@@ -75,7 +75,7 @@
dir2kv(-1, &d, &kv, vbuf, sizeof(vbuf));
setval(r, &kv);
- p = packsuper(kbuf, sizeof(kbuf), 0);
+ p = packsuper(kbuf, sizeof(kbuf), Qmainroot);
kv.k = kbuf;
kv.nk = p - kbuf;
p = packdkey(vbuf, sizeof(vbuf), -1, "");
@@ -168,6 +168,7 @@
char *p;
b = cachepluck();
+
addr = hdaddr+2*Blksz; /* leave room for arena hdr */
a->loghd.addr = -1;
@@ -180,7 +181,7 @@
b->logsz = 0;
b->logp = (Bptr){-1, -1, -1};
b->data = b->buf + Loghdsz;
- setflag(b, Bdirty);
+ setflag(b, Bdirty, 0);
p = b->buf + Loghdsz;
b->logp = (Bptr){-1, -1, -1};
@@ -206,21 +207,18 @@
h0->type = Tarena;
h0->bp.addr = hdaddr;
h0->data = h0->buf+2;
+ packarena(h0->data, Arenasz, a);
finalize(h0);
+ syncblk(h0);
+ a->h0 = h0;
memset(h1->buf, 0, sizeof(h1->buf));
h1->type = Tarena;
h1->bp.addr = hdaddr+Blksz;
h1->data = h1->buf+2;
- finalize(h1);
-
- packarena(h0->data, Arenasz, a);
packarena(h1->data, Arenasz, a);
- finalize(h0);
finalize(h1);
- syncblk(h0);
syncblk(h1);
- a->h0 = h0;
a->h1 = h1;
}
@@ -286,7 +284,7 @@
loadlog(a, a->loghd);
}
- if((mb = newblk(mnt->root, Tleaf, 0)) == nil)
+ if((mb = newblk(mnt->root, Tleaf)) == nil)
sysfatal("ream: allocate root: %r");
holdblk(mb);
initroot(mb);
@@ -296,9 +294,9 @@
mnt->root->ht = 1;
mnt->root->bp = mb->bp;
- if((ab = newblk(adm->root, Tleaf, 0)) == nil)
+ if((ab = newblk(adm->root, Tleaf)) == nil)
sysfatal("ream: allocate root: %r");
- if((ub = newblk(adm->root, Tdat, 0)) == nil)
+ if((ub = newdblk(adm->root, 0, 1)) == nil)
sysfatal("ream: allocate root: %r");
holdblk(ab);
holdblk(ub);
@@ -322,7 +320,7 @@
* a single snap block that the tree will insert
* into, and take a snapshot as the initial state.
*/
- if((tb = newblk(mnt->root, Tleaf, 0)) == nil)
+ if((tb = newblk(mnt->root, Tleaf)) == nil)
sysfatal("ream: allocate snaps: %r");
holdblk(tb);
initsnap(tb, mb, ab);
--- a/snap.c
+++ b/snap.c
@@ -189,10 +189,11 @@
bp = b->logp;
qe.op = Qfree;
qe.bp = b->bp;
- qe.b = b;
+ qe.b = nil;
a = getarena(qe.bp.addr);
qput(a->sync, qe);
traceb("dlfreeb", qe.bp);
+ dropblk(b);
}
}
@@ -377,12 +378,12 @@
i = 0;
n = nil;
- if(waserror()){
- free(n);
- nexterror();
- }
if(flg & Lmut){
n = emalloc(sizeof(Tree), 1);
+ if(waserror()){
+ free(n);
+ nexterror();
+ }
n->memref = 1;
n->dirty = 0;
n->nlbl = 1;
@@ -405,6 +406,7 @@
m[i].op = Oinsert;
tree2kv(n, &m[i], buf[i], sizeof(buf[i]));
i++;
+ poperror();
}else{
t->nlbl++;
m[i].op = Orelink;
@@ -418,7 +420,6 @@
i++;
}
btupsert(&fs->snap, m, i);
- poperror();
free(n);
}
@@ -542,14 +543,9 @@
void
closesnap(Tree *t)
{
- Bfree *f;
-
if(t == nil || adec(&t->memref) != 0)
return;
- f = malloc(sizeof(Bfree));
- f->op = DFtree;
- f->t = t;
- limbo(f);
+ limbo(DFtree, t);
}
void
@@ -585,10 +581,14 @@
* are the responsibility of the other chain; in this chain, we
* leak it and let the last reference in the other chain clean up
*/
- if(t == &fs->snap)
+ if(t == &fs->snap){
+traceb("killsnap", bp);
dl = &fs->snapdl;
- else if(bp.gen > t->base)
+}
+ else if(bp.gen > t->base){
+traceb("killdl", bp);
dl = getdl(t->memgen, bp.gen);
+}
else
return;
if(waserror()){
@@ -596,7 +596,7 @@
nexterror();
}
if(dl->ins == nil || Logspc - dl->ins->logsz < Logslop){
- b = newblk(&fs->snap, Tdlist, 0);
+ b = newblk(&fs->snap, Tdlist);
if(dl->ins != nil){
enqueue(dl->ins);
dropblk(dl->ins);
@@ -610,7 +610,7 @@
}
p = dl->ins->data + dl->ins->logsz;
dl->ins->logsz += 8;
- setflag(dl->ins, Bdirty);
+ setflag(dl->ins, Bdirty, 0);
PACK64(p, bp.addr);
poperror();
putdl(dl);
--- a/test/freplay.c
+++ b/test/freplay.c
@@ -171,8 +171,9 @@
sysfatal("open %s: %r", argv[0]);
if((d = dirfstat(fd)) == nil)
sysfatal("failed to stat file: %r");
- if((membuf = sbrk(d->length)) == nil)
+ if((membuf = sbrk(d->length)) == (void*)-1)
sysfatal("failed to allocate buffer: %r");
+ d->length -= (d->length % IOUNIT);
memset(membuf, 0, d->length);
for(off = 0; off < d->length; off += n)
if((n = read(fd, membuf+off, IOUNIT)) <= 0)
--- a/test/fsbench.c
+++ b/test/fsbench.c
@@ -3,13 +3,13 @@
#include <libsec.h>
#include <thread.h>
-int mainstacksize = 2*1024*1024;
+int mainstacksize = 64*1024*1024;
typedef struct Bench Bench;
enum {
KiB = 1024ULL,
MiB = 1024ULL*KiB,
GiB = 1024ULL*MiB,
- Bufsz = IOUNIT,
+ Bufsz = 128*IOUNIT,
};
enum {
--- a/test/mkfile
+++ b/test/mkfile
@@ -3,8 +3,9 @@
TESTS=\
basic\
build\
+ files\
-all:V: 6.freplay 6.fsbench
+all:V: 6.freplay 6.fsbench 6.files
test:VQ:
@{cd .. && mk 6.out}
--- a/test/run.rc
+++ b/test/run.rc
@@ -1,6 +1,6 @@
#!/bin/rc
-rfork ne
+rfork e
dev=$testdev
if(~ $#testdev 0)
@@ -26,11 +26,11 @@
}
fn ge_ream {
- $O.out -m 512 -r $user -f $1
+ gefs -m 512 -r $user -f $1
}
fn ge_start {
- $O.out -m 512 -A -f $1 -n gefs.test
+ gefs -m 512 -A -f $1 -n gefs.test
while(! test -e /srv/gefs.test)
sleep 0.1
mount -c /srv/gefs.test /n/gefs
@@ -37,7 +37,7 @@
}
fn ge_kill {
- kill $O.out | rc
+ kill gefs | rc
while(test -e /srv/gefs.test)
sleep 0.1
}
@@ -48,7 +48,7 @@
ge_ream $dev
log preparing replay...
rm -f replay.log
- test/6.freplay -l replay.log $dev
+ $O.freplay -l replay.log $dev
ge_start /mnt/replay/data
$*
echo save trace /tmp/trace >> /srv/gefs.test.cmd
@@ -63,9 +63,9 @@
# check blockwise consistency
log starting replay...
- test/6.freplay -c 1 -r replay.log $dev
+ $O.freplay -c 1 -r replay.log $dev
for(i in `{seq 2 $count}){
- $O.out -c -f /mnt/replay/data >[2]/tmp/log || die 'broken'
+ gefs -c -f /mnt/replay/data >[2]/tmp/log || die 'broken'
log stepping $i...
echo step > /mnt/replay/ctl
}
@@ -80,12 +80,12 @@
log reaming...
ge_ream $dev
log preparing build-and-verify...
- test/6.freplay -l replay.log $dev
+ $O.freplay -l replay.log $dev
ge_start $dev
$*
echo save trace /tmp/trace >> /srv/gefs.test.cmd
ge_kill
- $O.out -c -f $dev
+ gefs -c -f $dev
}}
fn buildsys{@{
--- a/tree.c
+++ b/tree.c
@@ -29,7 +29,7 @@
#define efreeblk(t, b) do { \
if(b != nil) \
- freeblk(t, b, b->bp); \
+ freeblk(t, b); \
} while(0)
static void
@@ -326,7 +326,7 @@
* delete messages, so we need to check if
* there's anything in it to copy up.
*/
- if(pp->nl->nval > 0){
+ if(pp->nl != nil){
getval(pp->nl, 0, &kv);
if(pp->nl->nbuf > 0){
getmsg(pp->nl, 0, &m);
@@ -337,7 +337,7 @@
if(nbytes != nil)
*nbytes += valsz(&kv);
}
- if(pp->nr != nil && pp->nr->nval > 0){
+ if(pp->nr != nil){
getval(pp->nr, 0, &kv);
if(pp->nr->nbuf > 0){
getmsg(pp->nr, 0, &m);
@@ -405,11 +405,12 @@
Tree t;
switch(m->op){
- case Oclearb:
case Odelete:
- case Oclobber:
assert(keycmp(kv, m) == 0);
return 0;
+ case Oclearb:
+ case Oclobber:
+ return 0;
case Oinsert:
cpkvp(kv, m, buf, nbuf);
return 1;
@@ -435,6 +436,19 @@
return 0;
}
+static Blk*
+setb(Tree *t, Blk *b)
+{
+ if(b->nval == 0){
+ freeblk(t, b);
+ return nil;
+ }else{
+ enqueue(b);
+ return b;
+ }
+}
+
+
static int
pullmsg(Path *p, int i, Kvp *v, Msg *m, int *full, int spc)
{
@@ -482,7 +496,7 @@
*/
full = 0;
spc = Leafspc - blkfill(b);
- n = newblk(t, b->type, 0);
+ n = newblk(t, b->type);
assert(i >= 0 && j >= 0);
while(i < b->nval || j < up->hi){
if(i >= b->nval)
@@ -517,7 +531,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
ok = apply(&v, &m, buf, sizeof(buf));
goto Copyloop;
@@ -541,7 +555,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
p->pullsz += msgsz(&m);
ok = apply(&v, &m, buf, sizeof(buf));
@@ -553,7 +567,8 @@
}
}
p->npull = (j - up->lo);
- p->nl = n;
+ p->op = POmod;
+ p->nl = setb(t, n);
}
/*
@@ -573,7 +588,7 @@
Msg m, u;
b = p->b;
- n = newblk(t, b->type, 0);
+ n = newblk(t, b->type);
for(i = 0; i < b->nval; i++){
if(pp != nil && i == p->midx){
copyup(n, pp, nil);
@@ -625,7 +640,8 @@
j++;
}
p->npull = (j - up->lo);
- p->nl = n;
+ p->op = POmod;
+ p->nl = setb(t, n);
}
/*
@@ -657,8 +673,8 @@
efreeblk(t, r);
nexterror();
}
- l = newblk(t, b->type, 0);
- r = newblk(t, b->type, 0);
+ l = newblk(t, b->type);
+ r = newblk(t, b->type);
d = l;
i = 0;
@@ -701,7 +717,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
ok = apply(&v, &m, buf, sizeof(buf));
goto Copyloop;
@@ -725,7 +741,7 @@
|| m.op == Oinsert
|| m.op == Odelete){
bp = unpackbp(v.v, v.nv);
- freeblk(t, nil, bp);
+ freebp(t, bp);
}
p->pullsz += msgsz(&m);
ok = apply(&v, &m, buf, sizeof(buf));
@@ -738,8 +754,8 @@
}
p->npull = (j - up->lo);
p->op = POsplit;
- p->nl = l;
- p->nr = r;
+ p->nl = setb(t, l);
+ p->nr = setb(t, r);
poperror();
}
@@ -770,8 +786,8 @@
efreeblk(t, r);
nexterror();
}
- l = newblk(t, b->type, 0);
- r = newblk(t, b->type, 0);
+ l = newblk(t, b->type);
+ r = newblk(t, b->type);
d = l;
copied = 0;
halfsz = (2*b->nval + b->valsz)/2;
@@ -808,8 +824,8 @@
setmsg(d, &m);
}
p->op = POsplit;
- p->nl = l;
- p->nr = r;
+ p->nl = setb(t, l);
+ p->nr = setb(t, r);
poperror();
}
@@ -820,7 +836,7 @@
Msg m;
int i;
- d = newblk(t, a->type, 0);
+ d = newblk(t, a->type);
for(i = 0; i < a->nval; i++){
getval(a, i, &m);
setval(d, &m);
@@ -839,11 +855,9 @@
setmsg(d, &m);
}
}
- enqueue(d);
p->midx = idx;
- pp->nl = d;
pp->op = POmerge;
- pp->nr = nil;
+ pp->nl = setb(t, d);
}
/*
@@ -904,8 +918,8 @@
efreeblk(t, r);
nexterror();
}
- l = newblk(t, a->type, 0);
- r = newblk(t, a->type, 0);
+ l = newblk(t, a->type);
+ r = newblk(t, a->type);
d = l;
cp = 0;
sp = -1;
@@ -950,12 +964,10 @@
o++;
}
}
- enqueue(l);
- enqueue(r);
p->midx = midx;
pp->op = POrot;
- pp->nl = l;
- pp->nr = r;
+ pp->nl = setb(t, l);
+ pp->nr = setb(t, r);
poperror();
}
@@ -1054,12 +1066,9 @@
if(p->b->type == Tleaf){
if(!filledleaf(p->b, up->sz)){
updateleaf(t, p-1, p);
- enqueue(p->nl);
rp = p;
}else{
splitleaf(t, up, p, &mid);
- enqueue(p->nl);
- enqueue(p->nr);
}
p->midx = -1;
pp = p;
@@ -1075,12 +1084,9 @@
goto Out;
}
updatepiv(t, up, p, pp);
- enqueue(p->nl);
rp = p;
}else{
splitpiv(t, up, p, pp, &mid);
- enqueue(p->nl);
- enqueue(p->nr);
}
pp = p;
up--;
@@ -1088,7 +1094,7 @@
}
if(pp->nl != nil && pp->nr != nil){
rp = &path[0];
- rp->nl = newblk(t, Tpivot, 0);
+ rp->nl = newblk(t, Tpivot);
rp->npull = pp->npull;
rp->pullsz = pp->pullsz;
copyup(rp->nl, pp, nil);
@@ -1105,9 +1111,9 @@
for(p = path; p != path + npath; p++){
if(p->b != nil)
- freeblk(t, p->b, p->b->bp);
+ freeblk(t, p->b);
if(p->m != nil)
- freeblk(t, p->b, p->m->bp);
+ freeblk(t, p->b);
dropblk(p->b);
dropblk(p->nl);
dropblk(p->nr);
@@ -1211,7 +1217,7 @@
t->dirty = 1;
unlock(&t->lk);
- freeblk(t, b, b->bp);
+ freeblk(t, b);
dropblk(b);
dropblk(r);
}
@@ -1226,6 +1232,7 @@
Kvp sep;
Bptr bp;
+ assert(!canqlock(&fs->mutlk));
sz = 0;
stablesort(msg, nmsg);
for(i = 0; i < nmsg; i++)
@@ -1358,9 +1365,10 @@
j = bufsearch(p[i], k, &m, &same);
if(j < 0 || !same)
continue;
- if(!(ok || m.op == Oinsert || m.op == Oclearb))
+ if(ok || m.op == Oinsert)
+ ok = apply(r, &m, buf, nbuf);
+ else if(m.op != Oclearb && m.op != Oclobber)
fatal("lookup %K << %M missing insert\n", k, &m);
- ok = apply(r, &m, buf, nbuf);
for(j++; j < p[i]->nbuf; j++){
getmsg(p[i], j, &m);
if(keycmp(k, &m) != 0)