ref: 8edbe1a13c00f0c2f3460e182aa9664292963d32
parent: 32325a04016b615ffbdbe07d3f401dc2dd43d957
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Sep 19 11:10:43 EDT 2021
scan: don't drop files in directory listing
--- a/blk.c
+++ b/blk.c
@@ -692,7 +692,9 @@
syncblk(Blk *b)
{
assert(b->flag & Bfinal);
- print("\tsyncblk: %llx+%llx\n", b->off, Blksz);
+ wlock(b);
+ b->flag &= ~(Bqueued|Bdirty);
+ wunlock(b);
return pwrite(fs->fd, b->buf, Blksz, b->off);
}
@@ -701,16 +703,12 @@
enqueue(Blk *b)
{
print("sync %llx\n", b->off);
- assert(b->flag & Bfinal);
+ assert(b->flag&Bdirty);
+ finalize(b);
if(syncblk(b) == -1){
ainc(&fs->broken);
fprint(2, "write: %r");
- return;
}
- wlock(b);
- b->flag &= ~(Bqueued|Bdirty);
- wunlock(b);
-
}
void
@@ -720,6 +718,9 @@
assert(b->type == Tsuper);
p = b->data;
+ wlock(b);
+ b->flag |= Bdirty;
+ wunlock(b);
memcpy(p + 0, "gefs0001", 8);
PBIT32(p + 8, 0); /* dirty */
PBIT32(p + 12, Blksz);
@@ -741,7 +742,8 @@
// assert((b->flag & Bfinal) == 0);
b->flag |= Bfinal;
- PBIT16(b->buf, b->type);
+ if(b->type != Traw)
+ PBIT16(b->buf, b->type);
switch(b->type){
default:
case Tnone:
@@ -817,8 +819,7 @@
{
if(b == nil)
return;
- if((b->flag & (Bdirty|Bqueued)) == Bdirty)
- enqueue(b);
+ assert((b->flag & Bqueued) || !(b->flag & Bdirty));
if(adec(&b->ref) == 0){
cachedel(b->off);
free(b);
--- a/dat.h
+++ b/dat.h
@@ -184,6 +184,7 @@
};
enum {
+ Onop,
Oinsert,
Odelete,
Owstat,
@@ -397,6 +398,7 @@
Tree root;
int done;
+ int overflow;
Kvp kv;
Key pfx;
char kvbuf[Kvmax];
--- a/fns.h
+++ b/fns.h
@@ -38,7 +38,7 @@
int btupsert(Tree*, Msg*, int);
char *btlookup(Tree*, Key*, Kvp*, Blk**);
char *btlookupat(Blk*, Key*, Kvp*, Blk**);
-char *btscan(Tree*, Scan*, Key*);
+char *btscan(Tree*, Scan*, char*, int);
char *btnext(Scan*, Kvp*, int*);
void btdone(Scan*);
--- a/fs.c
+++ b/fs.c
@@ -662,6 +662,7 @@
mb.k = f->dent->k;
mb.nk = f->dent->nk;
mb.nv = 0;
+//showfs("preremove");
if(btupsert(&fs->root, &mb, 1) == -1){
runlock(f->dent);
rerror(m, "remove: %r");
@@ -740,22 +741,19 @@
int n, ns, done;
Tree *t;
Scan *s;
- Kvp kv;
s = f->scan;
if(s != nil && s->offset != 0 && s->offset != m->offset)
return Edscan;
if(s == nil || m->offset == 0){
- pfx[0] = Kent;
- PBIT64(pfx+1, f->qpath);
- kv.k = pfx;
- kv.nk = sizeof(pfx);
-
print("scan starting\n");
if((s = mallocz(sizeof(Scan), 1)) == nil)
return "out of memory";
+
+ pfx[0] = Kent;
+ PBIT64(pfx+1, f->qpath);
t = (f->root.bp != -1) ? &f->root : &fs->root;
- if((e = btscan(t, s, &kv)) != nil){
+ if((e = btscan(t, s, pfx, sizeof(pfx))) != nil){
free(r->data);
btdone(s);
return e;
@@ -774,12 +772,20 @@
}
p = r->data;
n = m->count;
+ if(s->overflow){
+ if((ns = kv2statbuf(&s->kv, p, n)) == -1)
+ return Edscan;
+ s->overflow = 0;
+ p += ns;
+ n -= ns;
+ }
while(1){
- if((e = btnext(s, &kv, &done)) != nil)
+ if((e = btnext(s, &s->kv, &done)) != nil)
return e;
if(done)
break;
- if((ns = kv2statbuf(&kv, p, n)) == -1){
+ if((ns = kv2statbuf(&s->kv, p, n)) == -1){
+ s->overflow = 1;
fprint(2, "** could not fill buf: %r\n");
break;
}
@@ -825,12 +831,12 @@
if((b = getblk(bp, bh, GBraw)) == nil)
return -1;
- fprint(2, "\treading from %llx (%llx) %s %s\n", bp, b->off, b->buf, b->data);
+ fprint(2, "\treading(%lld+%d) from %llx (%llx) %s %s\n", o, n, bp, b->off, b->buf, b->data);
if(bo+n > Blksz)
n = Blksz-bo;
if(b != nil){
fprint(2, "\tcopying %lld to resp %p\n", n, d);
- memcpy(d, b->buf, n);
+ memcpy(d, b->buf+bo, n);
putblk(b);
}else
memset(d, 0, n);
@@ -864,6 +870,7 @@
//showfs("pre-readb");
while(c != 0){
n = readb(f, p, o, c, e->length);
+print("after readb: p[%d]=%.*s\n", n, n, p);
if(n == -1){
runlock(e);
return Efs;
@@ -924,30 +931,36 @@
fo = o & (Blksz-1);
m->k[0] = Kdat;
-// dprint("offset: %llx\n", fb);
PBIT64(m->k+1, f->qpath);
PBIT64(m->k+9, fb);
+
+print("%lld < %d && (%lld != 0 || %lld != %lld\n", fb, sz, fo, n, Blksz);
b = newblk(Traw);
if(b == nil)
return -1;
- if(o < sz){
+ if(fb < sz && (fo != 0 || n != Blksz)){
fprint(2, "\tappending to block %llx\n", b->off);
- if(fslookup(f, m, &kv, &t, 0) != nil)
- goto new;
+ if(fslookup(f, m, &kv, &t, 0) != nil){
+ putblk(b);
+ return -1;
+ }
bp = GBIT64(kv.v+0);
bh = GBIT64(kv.v+8);
putblk(t);
- if((t = getblk(bp, bh, GBraw)) == nil)
+ if((t = getblk(bp, bh, GBraw)) == nil){
+ putblk(b);
return -1;
+ }
memcpy(b->buf, t->buf, Blksz);
putblk(t);
}
-new:
if(fo+n > Blksz)
n = Blksz-fo;
- memcpy(b->buf, s, n);
+ memcpy(b->buf+fo, s, n);
+print("blk contents{{%.*s}}\n", (int)(fo+n), b->data);
+ enqueue(b);
putblk(b);
fprint(2, "\twrote to new blk %llx at offset %lld\n", b->off, o);
bh = blkhash(b);
@@ -967,7 +980,6 @@
Fid *f;
int i;
-
if((f = getfid(m->fid)) == nil){
rerror(m, Efid);
return;
@@ -988,6 +1000,7 @@
kv[i].v = offbuf[i]+17;
kv[i].nv = 16;
n = writeb(f, &kv[i], p, o, c, f->dent->length);
+ btupsert(&fs->root, &kv[i], 1);
if(n == -1){
// badwrite(f, i);
// FIXME: free pages
@@ -1005,13 +1018,13 @@
kv[i].v = sbuf;
kv[i].nv = 0;
if(m->offset+m->count > f->dent->length){
- fprint(2, "bumping size...\n");
kv[i].op |= Owsize;
kv[i].nv += 8;
PBIT64(kv[i].v, m->offset+m->count);
f->dent->length = m->offset+m->count;
}
- btupsert(&fs->root, kv, i+1);
+ btupsert(&fs->root, &kv[i], 1);
+// btupsert(&fs->root, kv, i+1);
wunlock(f->dent);
r.type = Rwrite;
@@ -1114,23 +1127,24 @@
void
runctl(void *pfd)
{
- char buf[128];
- int fd, n;
+ char buf[256], *arg[4];
+ int fd, n, narg;
fd = (uintptr)pfd;
while(1){
if((n = read(fd, buf, sizeof(buf)-1)) == -1)
break;
- buf[n--] = 0;
- while(buf[n] == ' ' || buf[n] == '\t' || buf[n] == '\n')
- buf[n--] = 0;
- fprint(2, "ctl message: %s\n", buf);
- fprint(fd, "ctl message: %s\n", buf);
- if(strcmp(buf, "show") == 0)
+ buf[n] = 0;
+ narg = tokenize(buf, arg, nelem(arg));
+ if(narg == 0 || strlen(arg[0]) == 0)
+ continue;
+ if(strcmp(arg[0], "show") == 0)
fshowfs(fd, "show");
- else if(strcmp(buf, "check") == 0)
+ else if(strcmp(arg[0], "check") == 0)
checkfs();
+ else if(strcmp(arg[0], "dbg") && narg == 2)
+ debug = atoi(arg[1]);
else
- fprint(fd, "unknown command %s", buf);
+ fprint(fd, "unknown command %s\n", arg[0]);
}
}
--- a/pack.c
+++ b/pack.c
@@ -214,6 +214,7 @@
v = unpackstr(&err, v, ev, &d->gid);
v = unpackstr(&err, v, ev, &d->muid);
if(err){
+ abort();
werrstr("kv too small");
return -1;
}
@@ -238,6 +239,7 @@
if(kv2dir(kv, &d) == -1)
return -1;
+print("\tpackname: %s\n", d.name);
dprint("have %d bytes to pack into\n", nbuf);
if((n = convD2M(&d, (uchar*)buf, nbuf)) <= BIT16SZ){
fprint(2, "here...failed convert??, needed %d\n", GBIT16(buf));
--- a/ream.c
+++ b/ream.c
@@ -130,6 +130,7 @@
s->data = s->buf + Hdrsz;
fillsuper(s);
finalize(s);
+ syncblk(s);
print("superblock @%llx\n", s->off);
for(i = 0; i < fs->narena; i++)
@@ -145,6 +146,7 @@
sysfatal("ream: allocate root: %r");
initroot(r);
finalize(r);
+ syncblk(r);
fs->super = s;
fs->root.bp = r->off;
--- a/tree.c
+++ b/tree.c
@@ -365,34 +365,44 @@
{
Kvp kv;
-
+ /*
+ * It's possible for the previous node to have
+ * been fully cleared out by a large number of
+ * delete messages, so we need to check if
+ * there's anything in it to copy up.
+ */
if(pp->split){
- getval(pp->l, 0, &kv);
- kv.type = Vref;
- kv.bp = pp->l->off;
- kv.bh = blkhash(pp->l);
- kv.fill = blkfill(pp->l);
- setval(n, i++, &kv, 0);
- if(nbytes != nil)
- *nbytes += 2 + valsz(&kv);
-
- getval(pp->r, 0, &kv);
- kv.type = Vref;
- kv.bp = pp->r->off;
- kv.bh = blkhash(pp->r);
- kv.fill = blkfill(pp->r);
- setval(n, i++, &kv, 0);
- if(nbytes != nil)
- *nbytes += 2 + valsz(&kv);
+ if(pp->l->nval > 0){
+ getval(pp->l, 0, &kv);
+ kv.type = Vref;
+ kv.bp = pp->l->off;
+ kv.bh = blkhash(pp->l);
+ kv.fill = blkfill(pp->l);
+ setval(n, i++, &kv, 0);
+ if(nbytes != nil)
+ *nbytes += 2 + valsz(&kv);
+ }
+ if(pp->r->nval > 0){
+ getval(pp->r, 0, &kv);
+ kv.type = Vref;
+ kv.bp = pp->r->off;
+ kv.bh = blkhash(pp->r);
+ kv.fill = blkfill(pp->r);
+ setval(n, i++, &kv, 0);
+ if(nbytes != nil)
+ *nbytes += 2 + valsz(&kv);
+ }
}else{
- getval(pp->n, 0, &kv);
- kv.type = Vref;
- kv.bp = pp->n->off;
- kv.bh = blkhash(pp->n);
- kv.fill = blkfill(pp->n);
- setval(n, i++, &kv, 1);
- if(nbytes != nil)
- *nbytes += 2 + valsz(&kv);
+ if(pp->n->nval > 0){
+ getval(pp->n, 0, &kv);
+ kv.type = Vref;
+ kv.bp = pp->n->off;
+ kv.bh = blkhash(pp->n);
+ kv.fill = blkfill(pp->n);
+ setval(n, i++, &kv, 1);
+ if(nbytes != nil)
+ *nbytes += 2 + valsz(&kv);
+ }
}
return i;
}
@@ -565,7 +575,6 @@
Kvp kv;
assert(b->type == Tleaf);
- assert(b->flag & Bdirty);
switch(m->op&0xf){
case Oinsert:
blkinsert(b, m);
@@ -617,7 +626,7 @@
Msg m;
Blk *d;
- USED(p);
+//showfs("premerge");
if((d = newblk(a->type)) == nil)
return -1;
o = 0;
@@ -640,7 +649,7 @@
setmsg(d, o++, &m);
}
}
- finalize(d);
+//showfs("postmerge");
enqueue(d);
p->merge = 1;
p->midx = idx;
@@ -756,8 +765,6 @@
setmsg(d, o++, &m);
}
}
- finalize(l);
- finalize(r);
enqueue(l);
enqueue(r);
p->merge = 1;
@@ -796,7 +803,7 @@
}
int
-trymerge(Path *p, Path *pp, int idx)
+trybalance(Path *p, Path *pp, int idx)
{
Blk *l,*m, *r;
Kvp km, kl, kr;
@@ -847,12 +854,28 @@
return ret;
}
-static int
-flush(Path *path, int npath)
+int
+insertmsg(Blk *rb, Msg *msg, int nmsg, int sz)
{
+ int i;
- Path *p, *pp;
- Blk *b;
+ if(rb->type == Tleaf && !filledleaf(rb, sz))
+ for(i = 0; i < nmsg; i++)
+ apply(rb, &msg[i]);
+ else if(rb->type == Tpivot && !filledbuf(rb, sz))
+ for(i = 0; i < nmsg; i++)
+ bufinsert(rb, &msg[i]);
+ else
+ return 1;
+ return 0;
+}
+
+static Blk*
+flush(Path *path, int npath, Msg *msg, int nmsg, int *redo)
+{
+
+ Path *p, *pp, *oldroot;
+ Blk *b, *r;
Kvp mid;
Msg m;
int i;
@@ -864,12 +887,15 @@
* we put the new root into if the root gets split.
*/
assert(npath >= 2);
+ r = nil;
pp = nil;
p = &path[npath - 1];
+ oldroot = &path[1];
+ *redo = 1;
if(p->b->type == Tleaf){
if(!filledleaf(p->b, p[-1].sz)){
if(update(p, pp) == -1)
- return -1;
+ goto error;
for(i = p[-1].lo; i < p[-1].hi; i++){
getmsg(p[-1].b, i, &m);
if(filledleaf(p->n, msgsz(&m)))
@@ -876,7 +902,11 @@
break;
apply(p->n, &m);
}
- finalize(p->n);
+ if(p == oldroot){
+ r = p->n;
+ *redo = insertmsg(r, msg, nmsg, path[0].sz);
+ }
+ enqueue(p->n);
}else{
if(split(p->b, p, pp, &mid) == -1)
goto error;
@@ -890,8 +920,8 @@
if(apply(b, &m) == -1)
goto error;
}
- finalize(p->l);
- finalize(p->r);
+ enqueue(p->l);
+ enqueue(p->r);
}
assert(p->n || (p->l && p->r));
p->midx = -1;
@@ -900,11 +930,13 @@
}
for(; p > path; p--){
if(!filledpiv(p->b, 1)){
- if(trymerge(p, pp, p->idx) == -1)
+ if(trybalance(p, pp, p->idx) == -1)
goto error;
/* If we merged the root node, break out. */
- if(p[-1].b == nil && p[0].merge && p[0].nr == nil && p[0].b->nval == 2){
+ if(p == oldroot && p->merge && p->nr == nil && p->b->nval == 2){
/* FIXME: shouldn't p[1].n already be right? */
+ r = p[1].n;
+ *redo = insertmsg(r, msg, nmsg, path[0].sz);
p[1].n = p[0].nl;
p[0].n = nil;
break;
@@ -917,7 +949,11 @@
break;
bufinsert(p->n, &m);
}
- finalize(p->n);
+ if(p == oldroot && !filledbuf(p->n, path[0].sz)){
+ r = p->n;
+ *redo = insertmsg(r, msg, nmsg, path[0].sz);
+ }
+ enqueue(p->n);
}else{
dprint("-- split\n");
if(split(p->b, p, pp, &mid) == -1)
@@ -931,21 +967,22 @@
continue;
bufinsert(b, &m);
}
- finalize(p->l);
- Blk *x = getblk(p->l->off, -1, 0);
- assert(p->l == x);
- finalize(p->r);
+ enqueue(p->l);
+ enqueue(p->r);
}
pp = p;
}
if(path[1].split){
- if((path[0].n = newblk(Tpivot)) == nil)
+ if((r = newblk(Tpivot)) == nil)
goto error;
+ path[0].n = r;
+ *redo = insertmsg(r, msg, nmsg, path[0].sz);
copyup(path[0].n, 0, &path[1], nil);
+ enqueue(r);
}
- return 0;
+ return r;
error:
- return -1;
+ return nil;
}
void
@@ -962,6 +999,10 @@
putblk(p->l);
if(p->r != nil)
putblk(p->r);
+ if(p->nl != nil)
+ putblk(p->nl);
+ if(p->nr != nil)
+ putblk(p->nr);
}
}
@@ -1010,19 +1051,19 @@
int
btupsert(Tree *t, Msg *msg, int nmsg)
{
- int i, npath, redo, dh, nm, height;
+ int i, npath, redo, dh, sz, height;
vlong rh;
Path *path;
Blk *b, *rb;
Kvp sep;
- nm = 0;
+ sz = 0;
for(i = 0; i < nmsg; i++){
if(msg[i].nk + 2 > Keymax){
werrstr("overlong key");
return -1;
}
- nm += msgsz(&msg[i]);
+ sz += msgsz(&msg[i]);
}
again:
@@ -1045,7 +1086,7 @@
path[npath].midx = -1;
npath++;
- path[0].sz = nm;
+ path[0].sz = sz;
while(b->type == Tpivot){
if(!filledbuf(b, path[npath - 1].sz))
break;
@@ -1063,32 +1104,21 @@
npath++;
dh = -1;
- rb = nil;
-// showfs("flushing");
- if(flush(path, npath) == -1)
+ rb = flush(path, npath, msg, nmsg, &redo);
+ if(redo)
+ goto again;
+ if(rb == nil)
goto error;
- if(path[0].n != nil){
+ if(path[0].n != nil)
dh = 1;
- rb = path[0].n;
- }else if(path[1].n != nil){
+ else if(path[1].n != nil)
dh = 0;
- rb = path[1].n;
- }else if(npath >2 && path[2].n != nil){
+ else if(npath >2 && path[2].n != nil)
dh = -1;
- rb = path[2].n;
- }else
+ else
abort();
- if(rb->type == Tleaf && !filledleaf(rb, nm))
- for(i = 0; i < nmsg; i++)
- apply(rb, &msg[i]);
- else if(rb->type == Tpivot && !filledbuf(rb, nm))
- for(i = 0; i < nmsg; i++)
- bufinsert(rb, &msg[i]);
- else
- redo = 1;
- finalize(rb);
assert(rb->off != 0);
rh = blkhash(rb);
@@ -1106,7 +1136,6 @@
}
if(redo)
goto again;
-// showfs("fs");
snapshot();
return 0;
error:
@@ -1209,7 +1238,7 @@
}
char*
-btscan(Tree *t, Scan *s, Key *pfx)
+btscan(Tree *t, Scan *s, char *pfx, int npfx)
{
int i, same;
Scanp *p;
@@ -1219,11 +1248,13 @@
s->done = 0;
s->offset = 0;
- cpkey(&s->pfx, pfx, s->pfxbuf, sizeof(s->pfxbuf));
+ s->pfx.k = s->pfxbuf;
+ s->pfx.nk = npfx;
+ memcpy(s->pfxbuf, pfx, npfx);
- s->kv.v = s->kvbuf+pfx->nk;
+ s->kv.v = s->kvbuf+npfx;
s->kv.nv = 0;
- cpkey(&s->kv, pfx, s->kvbuf, sizeof(s->kvbuf));
+ cpkey(&s->kv, &s->pfx, s->kvbuf, sizeof(s->kvbuf));
lock(&t->lk);
s->root = *t;
@@ -1269,6 +1300,7 @@
Msg m, n, t;
Kvp kv;
+Again:
p = s->path;
h = s->root.ht;
*done = 0;
@@ -1295,6 +1327,7 @@
if((p[i].b = getblk(kv.bp, kv.bh, 0)) == nil)
return "error reading block";
}
+ m.op = Onop;
getval(p[h-1].b, p[h-1].vi, &m);
for(i = h-2; i >= 0; i--){
if(p[i].bi == p[i].b->nbuf)
@@ -1307,7 +1340,6 @@
*done = 1;
return nil;
}
- cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
getval(p[h-1].b, p[h-1].vi, &t);
if(keycmp(&m, &t) == 0)
p[h-1].vi++;
@@ -1317,8 +1349,12 @@
if(keycmp(&m, &t) != 0)
break;
p[i].bi++;
+ m = t;
}
}
+ if(m.op == Odelete)
+ goto Again;
+ cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
return nil;
}
@@ -1346,7 +1382,7 @@
qlock(&fs->snaplk);
lock(&fs->root.lk);
fillsuper(s);
- finalize(s);
+ enqueue(s);
unlock(&fs->root.lk);
for(i = 0; i < fs->narena; i++){