ref: 1a98d4fe239089d86ff0e45939e98a40ccf025ea
dir: /snap.c/
#include <u.h> #include <libc.h> #include <fcall.h> #include <avl.h> #include "dat.h" #include "fns.h" #include "atomic.h" static char* dlsync(Dlist *dl) { char kvbuf[512]; Msg m; if(dl->ins == nil) return nil; if(checkflag(dl->ins, Bdirty)) enqueue(dl->ins); dl->hd = dl->ins->bp; if(dl->hd.addr == dl->tl.addr) dl->tl = dl->hd; m.op = Oinsert; dlist2kv(dl, &m, kvbuf, sizeof(kvbuf)); return btupsert(&fs->snap, &m, 1); } static void dlcachedel(Dlist *dl, int hdel) { uint h; Dlist *d, **p; h = ihash(dl->gen) ^ ihash(dl->bgen); if(hdel){ p = &fs->dlcache[h % fs->dlcmax]; for(d = *p; d != nil; d = d->chain){ if(d->gen == dl->gen && d->bgen == dl->bgen) break; p = &d->chain; } if(d != nil) *p = d->chain; } if(dl == fs->dlhead) fs->dlhead = dl->cnext; if(dl == fs->dltail) fs->dltail = dl->cprev; if(dl->cnext != nil) dl->cnext->cprev = dl->cprev; if(dl->cprev != nil) dl->cprev->cnext = dl->cnext; dl->cnext = nil; dl->cprev = nil; } static Dlist* dlcacheget(vlong gen, vlong bgen) { Dlist *dl; uint h; h = ihash(gen) ^ ihash(bgen); for(dl = fs->dlcache[h % fs->dlcmax]; dl != nil; dl = dl->chain) if(dl->gen == gen && dl->bgen == bgen) break; if(dl != nil) dlcachedel(dl, 0); return dl; } static Dlist* getdl(vlong gen, vlong bgen) { char kbuf[Dlksz], kvbuf[Dlkvpsz]; Dlist *dl, **p; uint h; Msg m; Kvp kv; Key k; if((dl = dlcacheget(gen, bgen)) != nil) return dl; if((dl = mallocz(sizeof(Dlist), 1)) == nil) return nil; kbuf[0] = Kdlist; PACK64(kbuf+1, gen); PACK64(kbuf+9, bgen); k.k = kbuf; k.nk = sizeof(kbuf); /* load up existing dlist */ if(btlookup(&fs->snap, &k, &kv, kvbuf, sizeof(kvbuf)) == nil){ kv2dlist(&kv, dl); if(dl->hd.addr != -1 && (dl->ins = getblk(dl->hd, 0)) == nil){ free(dl); return nil; } goto Found; } /* create a new one if it didn't exist */ dl->gen = gen; dl->bgen = bgen; dl->hd.addr = -1; dl->tl.addr = -1; dl->ins = nil; m.op = Oinsert; dlist2kv(dl, &m, kvbuf, sizeof(kvbuf)); if(btupsert(&fs->snap, &m, 1) != nil){ free(dl); return nil; } Found: h = ihash(gen) ^ ihash(bgen); p = &fs->dlcache[h % fs->dlcmax]; dl->chain = *p; *p = dl; return dl; } void putdl(Dlist *dl) { Dlist *dt; dlcachedel(dl, 0); while(fs->dltail != nil && fs->dlcount >= fs->dlcmax){ dt = fs->dltail; dlsync(dt); dlcachedel(dt, 1); dropblk(dt->ins); free(dt); } dl->cprev = nil; dl->cnext = fs->dlhead; if(fs->dltail == nil) fs->dltail = dl; if(fs->dlhead != nil) fs->dlhead->cprev = dl; fs->dlhead = dl; } char* freedl(Dlist *dl, int docontents) { Bptr bp, fb; Blk *b; char *p; bp = dl->hd; while(bp.addr != -1){ if((b = getblk(bp, 0)) == nil) return Efs; if(docontents){ for(p = b->data; p != b->data+b->logsz; p += 8){ fb.addr = UNPACK64(p); fb.hash = -1; fb.gen = -1; freeblk(nil, nil, fb); } } bp = b->logp; freeblk(&fs->snap, b, b->bp); dropblk(b); } return nil; } static char* mergedl(vlong succ, vlong gen, vlong bgen) { char *err, buf[2][Kvmax]; Dlist *d, *m; Msg msg[2]; Blk *b; // vlong x; err = nil; d = getdl(gen, bgen); m = getdl(succ, bgen); if(d == nil || m == nil){ err = Efs; goto Out; } if(m->ins == nil){ m->hd = d->hd; m->tl = d->tl; m->ins = d->ins; }else{ m->hd = d->hd; if((b = getblk(d->tl, 0)) == nil) goto Out; msg[0].op = Odelete; dlist2kv(d, &msg[0], buf[0], sizeof(buf[0])); msg[1].op = Oinsert; dlist2kv(m, &msg[1], buf[1], sizeof(buf[1])); if((err = btupsert(&fs->snap, msg, 2)) != nil) sysfatal("merge deadlists: %s", err); packbp(b->buf+2, Blksz, &m->hd); enqueue(b); dropblk(b); // TODO: merge // if(m->ins->logsz + d->ins->logsz < Dlspc){ // p = d->ins->data; // q = m->ins->data + m->ins->logsz; // for(i = 0; i < d->ins->logsz; i += 8){ // m->ins->logsz += 8; // x = UNPACK64(p); // PACK64(q, x); // p += 8; // q += 8; // } // } } Out: if(d != nil) putdl(d); if(m != nil) putdl(m); return err; } vlong successor(vlong gen) { char *e, pfx[9]; vlong id, succ; Scan s; pfx[0] = Kslink; succ = -1; PACK64(pfx+1, gen); btnewscan(&s, pfx, sizeof(pfx)); if((e = btenter(&fs->snap, &s)) != nil) goto Err; if((e = btnext(&s, &s.kv)) != nil) goto Err; if(!s.done) kv2link(&s.kv, &id, &succ); if((e = btnext(&s, &s.kv)) != nil) goto Err; if(!s.done) sysfatal("multiple successors in cleanup"); btexit(&s); return succ; Err: fprint(2, "error getting snap successor: %s", e); btexit(&s); return -1; } static char* reclaimblocks(vlong gen, vlong succ, vlong prev) { char *e, pfx[9]; Dlist dl; Scan s; Msg m; pfx[0] = Kdlist; PACK64(pfx+1, gen); btnewscan(&s, pfx, sizeof(pfx)); if((e = btenter(&fs->snap, &s)) != nil) return e; while(1){ if((e = btnext(&s, &s.kv)) != nil) break; if(s.done) break; kv2dlist(&s.kv, &dl); /* * delete the dlist before processing it; it's * better to leak blocks than to double free. */ m.op = Odelete; m.Kvp = s.kv; if((e = btupsert(&fs->snap, &m, 1)) != nil) break; if(succ == -1) e = freedl(&dl, 1); else if(dl.bgen > prev) e = freedl(&dl, 0); else e = mergedl(succ, dl.gen, dl.bgen); if(e != nil) break; } btexit(&s); return e; } /* * Removes a label from a snapshot, allowing * it to be reclaimed if it is not a direct * predecessor of more than one other snapshot. * * If it has one successor and no label, then * it will be merged with that successor. */ char* delsnap(Tree *t, vlong succ, char *name) { char buf[2][Kvmax], *e; Msg m[2]; int nm; nm = 0; if(name != nil){ if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0) return Ename; m[nm].op = Odelete; m[nm].k = buf[nm]; if((e = packlbl(buf[nm], sizeof(buf[nm]), name)) == nil) return Ename; m[nm].nk = e - m[nm].k; m[nm].v = nil; m[nm].nv = 0; t->nlbl--; nm++; } if(t->nlbl == 0 && t->nsucc <= 1){ m[nm].op = Odelete; m[nm].k = buf[nm]; if((e = packsnap(buf[nm], sizeof(buf[nm]), t->gen)) == nil) return Ename; m[nm].nk = e - m[nm].k; m[nm].v = nil; m[nm].nv = 0; nm++; }else{ m[nm].op = Oinsert; tree2kv(t, &m[nm], buf[nm], sizeof(buf[nm])); nm++; } if((e = flushdlcache(1)) != nil) return e; if((e = btupsert(&fs->snap, m, nm)) != nil) return e; if((e = reclaimblocks(t->gen, succ, t->prev)) != nil) return e; return nil; } /* * Attaches a label to a tree, incrementing * its reference count. This labelled snapshot * will show up in the dump. */ char* tagsnap(Tree *t, char *name, int mutable) { char buf[3][Kvmax]; Msg m[3]; Tree *n; int i; if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0) return Ename; i = 0; if(mutable){ if((n = mallocz(sizeof(Tree), 1)) == nil) return Enomem; n->memref = 1; n->dirty = 0; n->nlbl = 1; n->nsucc = 0; n->ht = t->ht; n->bp = t->bp; n->prev = t->prev; n->base = t->gen; n->gen = aincv(&fs->nextgen, 1); n->memgen = aincv(&fs->nextgen, 1); m[i].op = Oretag; retag2kv(t->gen, 0, 1, &m[i], buf[i], sizeof(buf[i])); i++; m[i].op = Oinsert; lbl2kv(name, n->gen, 1, &m[i], buf[i], sizeof(buf[i])); i++; }else{ t->nlbl++; m[i].op = Oretag; retag2kv(t->gen, 1, 0, &m[i], buf[i], sizeof(buf[i])); i++; m[i].op = Oinsert; lbl2kv(name, t->gen, 0, &m[i], buf[i], sizeof(buf[i])); i++; } return btupsert(&fs->snap, m, i); } /* * Updates a snapshot; keeps the generation the same if possible, * otherwise moves to a new generation. A snapshot may only stay * at the same generation as long as it is at the tip of a snapshot * list; once it's observable by a derived snapshot it must be * immutable. */ char* updatesnap(Tree **r, Tree *o, char *lbl) { char buf[4][Kvmax], *e; Msg m[4]; Tree *t; int i; if(!o->dirty) return nil; /* update the old kvp */ o->nlbl--; o->nsucc++; /* create the new one */ if((t = mallocz(sizeof(Tree), 1)) == nil) return Enomem; t->memref = 1; t->dirty = 0; t->nlbl = 1; t->nsucc = 0; t->ht = o->ht; t->bp = o->bp; t->base = o->base; if(o->nlbl == 0 && o->nsucc == 1) t->prev = o->prev; else t->prev = o->gen; t->gen = o->memgen; t->memgen = aincv(&fs->nextgen, 1); i = 0; m[i].op = Oretag; retag2kv(o->gen, -1, 1, &m[i], buf[i], sizeof(buf[i])); i++; m[i].op = Oinsert; tree2kv(t, &m[i], buf[i], sizeof(buf[i])); i++; m[i].op = Oinsert; link2kv(t->prev, t->gen, &m[i], buf[i], sizeof(buf[i])); i++; m[i].op = Oinsert; lbl2kv(lbl, t->gen, Lmut, &m[i], buf[i], sizeof(buf[i])); i++; if((e = btupsert(&fs->snap, m, i)) != nil){ free(t); return e; } /* only update the dirty status after we sync */ o->dirty = 0; /* this was the last ref to the snap */ if(o->nlbl == 0 && o->nsucc == 1) delsnap(o, -1, nil); closesnap(o); asetp(r, t); return nil; } /* * open snapshot by label, returning a tree. */ Tree* opensnap(char *label, int *mut) { char *p, buf[Kvmax]; Tree *t; uint flg; vlong gen; Kvp kv; Key k; /* Klabel{"name"} => Ksnap{id} */ if((p = packlbl(buf, sizeof(buf), label)) == nil) return nil; k.k = buf; k.nk = p - buf; if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil) return nil; assert(kv.nv == 1+8+4); gen = UNPACK64(kv.v + 1); flg = UNPACK32(kv.v + 1+8); if(mut != nil) *mut = !!(flg&Lmut); if((t = mallocz(sizeof(Tree), 1)) == nil) goto Error; if((p = packsnap(buf, sizeof(buf), gen)) == nil) goto Error; k.k = buf; k.nk = p - buf; if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil) abort(); if(unpacktree(t, kv.v, kv.nv) == nil) abort(); t->memref = 1; t->memgen = aincv(&fs->nextgen, 1); return t; Error: free(t); return nil; } /* * close snapshot, flushing and freeing in-memory * representation. */ void closesnap(Tree *t) { Bfree *f; ulong ge; if(t == nil || adec(&t->memref) != 0) return; if((f = malloc(sizeof(Bfree))) == nil) abort(); f->op = DFtree; f->t = t; ge = agetl(&fs->epoch); f->next = fs->limbo[ge]; fs->limbo[ge] = f; } char* flushdlcache(int clear) { Dlist *dl, *n; char *e; for(dl = fs->dlhead; dl != nil; dl = n){ n = dl->cnext; if((e = dlsync(dl)) != nil) return e; if(clear){ if(dl->ins != nil) dropblk(dl->ins); free(dl); } } if(clear){ fs->dlhead = nil; fs->dltail = nil; memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*)); } return nil; } /* * Marks a block as killed by the tree * t, which means that it will be free * for use after t is reclaimed. * * t must be an active snapshot with * no successors. */ int killblk(Tree *t, Bptr bp) { Dlist *dl; Blk *b; char *p; /* leak it and let the full collection clean it up */ if(bp.gen <= t->base) return 0; if(t == &fs->snap) dl = &fs->snapdl; else if((dl = getdl(t->gen, bp.gen)) == nil) return -1; if(dl->ins == nil || Logspc - dl->ins->logsz < Logslop){ if((b = newblk(&fs->snap, Tdlist)) == nil){ putdl(dl); return -1; } if(dl->ins != nil){ enqueue(dl->ins); /* enqueuing updates the hashes */ if(dl->ins->bp.addr == dl->tl.addr) dl->tl = dl->ins->bp; b->logp = dl->ins->bp; }else{ dl->tl = b->bp; b->logp = (Bptr){-1, -1, -1}; } cacheins(b); dl->hd = b->bp; dl->ins = b; } p = dl->ins->data + dl->ins->logsz; dl->ins->flag |= Bdirty; dl->ins->logsz += 8; PACK64(p, bp.addr); if(t != &fs->snap) putdl(dl); return 0; }