ref: 2106bbe66a53e35785afbb26bd7620eea792c0bf
parent: 7ae531c53df7be259c74f896eb704c74c07c1ece
author: Ori Bernstein <ori@eigenstate.org>
date: Sat Apr 8 17:23:45 EDT 2023
log: fix log compression ordering if we preallocate the blocks so they never appear in the log, we don't get ordering issues.
--- a/TODO
+++ b/TODO
@@ -1,7 +1,6 @@
*** pending disk format changes ***
- small file optimizations
- inline data
- - block fragmentation
*** issues, need to fix ***
- reclaiming data from deleted files is very delayed
--- a/blk.c
+++ b/blk.c
@@ -3,17 +3,16 @@
#include <fcall.h>
#include <avl.h>
+typedef struct Range Range;
+struct Range {
+ vlong off;
+ vlong len;
+};
+
#include "dat.h"
#include "fns.h"
#include "atomic.h"
-typedef struct Range Range;
-
-struct Range {
- vlong off;
- vlong len;
-};
-
static vlong blkalloc_lk(Arena*, int);
static vlong blkalloc(int);
static int blkdealloc_lk(vlong);
@@ -234,6 +233,41 @@
return 0;
}
+static Blk*
+chainlog(Blk *pb, vlong o)
+{
+ char *p;
+ Blk *lb;
+
+ if((lb = cachepluck()) == nil)
+ return nil;
+ initblk(lb, o, -1, Tlog);
+
+ lb->logsz = Loghashsz;
+ p = lb->data + lb->logsz;
+ PACK64(p+0, o|LogAlloc1);
+ PACK64(p+8, (uvlong)LogEnd);
+ finalize(lb);
+
+ if(syncblk(lb) == -1){
+ dropblk(lb);
+ return nil;
+ }
+
+ if(pb != nil){
+ assert(pb->type == Tlog);
+ p = pb->data + pb->logsz;
+ PACK64(p, lb->bp.addr|LogChain);
+ finalize(pb);
+ if(syncblk(pb) == -1){
+ dropblk(pb);
+ return nil;
+ }
+ dropblk(pb);
+ }
+ return lb;
+}
+
/*
* Logs an allocation. Must be called
* with arena lock held. Duplicates some
@@ -243,14 +277,15 @@
static int
logappend(Arena *a, vlong off, vlong len, int op, Blk **tl)
{
- Blk *pb, *lb;
vlong o, ao;
+ Blk *lb;
char *p;
- assert(off % Blksz == 0);
- assert(op == LogAlloc || op == LogFree);
o = -1;
lb = *tl;
+ assert(off % Blksz == 0);
+ assert(op == LogAlloc || op == LogFree);
+ assert(lb == nil || lb->type == Tlog);
dprint("logop %llx+%llx@%x: %s\n", off, len, lb->logsz, (op == LogAlloc) ? "Alloc" : "Free");
/*
* move to the next block when we have
@@ -260,35 +295,11 @@
* 16 bytes of new log entry allocation
* and chaining.
*/
- if(lb == nil || lb->logsz >= Logspc - 40){
- pb = lb;
+ if(lb == nil || lb->logsz >= Logspc - Logslop){
if((o = blkalloc_lk(a, 1)) == -1)
return -1;
- if((lb = cachepluck()) == nil)
+ if((lb = chainlog(lb, o)) == nil)
return -1;
- initblk(lb, o, -1, Tlog);
-
- lb->logsz = Loghashsz;
- p = lb->data + lb->logsz;
- PACK64(p+0, o|LogAlloc1);
- PACK64(p+8, (uvlong)LogEnd);
- finalize(lb);
-
- if(syncblk(lb) == -1){
- dropblk(lb);
- return -1;
- }
-
- if(pb != nil){
- p = pb->data + pb->logsz;
- PACK64(p, lb->bp.addr|LogChain);
- finalize(pb);
- if(syncblk(pb) == -1){
- dropblk(pb);
- return -1;
- }
- dropblk(pb);
- }
*tl = lb;
a->nlog++;
}
@@ -406,11 +417,11 @@
int
compresslog(Arena *a)
{
- Arange *r;
- Range *log, *nlog;
- vlong v, ba, na, graft, oldhd;
- int i, n, sz;
+ vlong v, ba, na, nl, sz, graft, oldhd, *log;
+ int i, n, nr, rv;
Blk *b, *hd, *tl;
+ Range *rng;
+ Arange *r;
Bptr bp;
char *p;
@@ -457,46 +468,64 @@
* keep the merged log in memory for
* a rewrite.
*/
- n = 0;
- sz = 512;
- if((log = malloc(sz*sizeof(Range))) == nil)
+ sz = 0;
+ nr = 0;
+ lock(a);
+ for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r)){
+ sz += (r->len == Blksz) ? 8 : 16;
+ nr++;
+ }
+ nl = (sz+Logspc-Logslop-1)/(Logspc - Logslop);
+ if((log = malloc(nl*sizeof(vlong))) == nil){
+ unlock(a);
return -1;
- /*
- * we need to allocate this block before
- * we prepare the ranges we write back,
- * so we don't record this block as
- * available when we compress the log.
- */
- if((b = cachepluck()) == nil){
- free(log);
- return -1;
}
- if((ba = blkalloc_lk(a, 1)) == -1){
+ if((rng = malloc(nr*sizeof(Range))) == nil){
+ unlock(a);
free(log);
return -1;
}
- initblk(b, ba, -1, Tlog);
+ for(i = 0; i < nl; i++){
+ if((log[i] = blkalloc_lk(a, 1)) == -1){
+ unlock(a);
+ free(log);
+ free(rng);
+ return -1;
+ }
+ }
+ nr = 0;
for(r = (Arange*)avlmin(a->free); r != nil; r = (Arange*)avlnext(r)){
- if(n == sz){
- sz *= 2;
- if((nlog = realloc(log, sz*sizeof(Range))) == nil){
+ rng[nr].off = r->off;
+ rng[nr].len = r->len;
+ nr++;
+ }
+ unlock(a);
+
+ n = 0;
+ hd = nil;
+ tl = nil;
+ for(i = 0; i < nr; i++){
+ if(tl == nil || tl->logsz >= Logspc - Logslop){
+ if((tl = chainlog(tl, log[n++])) == nil){
+ free(rng);
free(log);
return -1;
}
- log = nlog;
+ if(hd == nil)
+ hd = tl;
+ p = tl->data + tl->logsz;
}
- log[n].off = r->off;
- log[n].len = r->len;
- n++;
+ if(rng[i].len == Blksz){
+ PACK64(p+0, rng[i].off | LogFree1);
+ tl->logsz += 8;
+ p += 8;
+ }else{
+ PACK64(p+0, rng[i].off | LogFree);
+ PACK64(p+8, rng[i].len);
+ tl->logsz += 16;
+ p += 16;
+ }
}
- hd = b;
- tl = b;
- b->logsz = Loghashsz;
- for(i = 0; i < n; i++)
- if(logappend(a, log[i].off, log[i].len, LogFree, &tl) == -1)
- return -1;
-
- p = tl->data + tl->logsz;
PACK64(p, LogChain|graft);
free(log);
finalize(tl);
@@ -503,14 +532,18 @@
if(syncblk(tl) == -1)
return -1;
+ lock(a);
oldhd = a->head.addr;
a->head.addr = hd->bp.addr;
a->head.hash = hd->bp.hash;
a->head.gen = -1;
- if(syncarena(a) == -1)
+ rv = syncarena(a);
+ unlock(a);
+ if(rv == -1)
return -1;
+
if(oldhd != -1){
- for(ba = oldhd; ba != -1; ba = na){
+ for(ba = oldhd; ba != -1 && ba != graft; ba = na){
na = -1;
bp.addr = ba;
bp.hash = -1;
--- a/dat.h
+++ b/dat.h
@@ -70,13 +70,14 @@
Leafhdsz = 6,
Loghdsz = 2,
Loghashsz = 8,
- Dlhdsz = 2+2+Ptrsz, /* type, size, chain */
+ Dlhdsz = 2+2+Ptrsz, /* type, size, chain */
Dlspc = Blksz - Dlhdsz,
- Rootsz = 4+Ptrsz, /* root pointer */
+ Rootsz = 4+Ptrsz, /* root pointer */
Pivsz = Blksz - Pivhdsz,
- Bufspc = (Blksz - Pivhdsz) / 2,
+ Bufspc = (Blksz - Pivhdsz)/2, /* pivot room */
Pivspc = Blksz - Pivhdsz - Bufspc,
Logspc = Blksz - Loghdsz,
+ Logslop = 16+16+8, /* val, nextb, chain */
Leafspc = Blksz - Leafhdsz,
Msgmax = 1 + (Kvmax > Kpmax ? Kvmax : Kpmax)
};
--- a/fs.c
+++ b/fs.c
@@ -2125,9 +2125,7 @@
void
runtasks(int, void *)
{
-#ifdef NOTYET
int i, c;
-#endif
Fmsg *m;
Amsg *a;
@@ -2147,7 +2145,6 @@
m->a = a;
chsend(fs->wrchan, m);
-#ifdef NOTYET
/*
* compresslog is designed to be concurrent with allocation,
* so it's safe to call from outside the mutator proc; we
@@ -2166,6 +2163,5 @@
fprint(2, "compress log: %r");
}
}
-#endif
}
}