shithub: neoventi

Download patch

ref: 9f6b8e1dbc8af694eab3abaac1a529be89b8b0ff
parent: 194a9f6821c6870056be370ea88e62c0e76c8ca3
author: Noam Preil <noam@pixelhero.dev>
date: Sun Sep 7 18:19:19 EDT 2025

fixes

--- /dev/null
+++ b/arena.c
@@ -1,0 +1,105 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "neoventi.h"
+
+int
+arenarepair(VtArena *arena, char *ci, u64int addr)
+{
+	uchar buf[30];
+	if(!vtarenaread(arena, addr, buf, 30)){
+		werrstr("arenarepair: %r");
+		return 0;
+	}
+	if(U32GET(buf) != arena->clumpmagic){
+		werrstr("arenarepair: corrupt");
+		return 0;
+	}
+	ci[0] = 1;
+	U16PUT(ci + 1, U16GET(buf + 5));
+	U16PUT(ci + 3, U16GET(buf + 7));
+	return 1;
+}
+
+int
+vtarenasync(VtArena *arena)
+{
+	// First, check if the directory and the data log are in sync
+	usize n, m;
+	char *buf, *ci;
+	u32int block, perblock, off;
+	u64int addr;
+	n = arena->arenastats.clumps;
+	fprint(2, "clumps: %d, %d\n", n, arena->indexstats.clumps);
+	if(n == arena->indexstats.clumps)
+		return 1;
+	buf = nil;
+	perblock = arena->blocksize / ClumpInfoSize;
+	off = 0;
+	addr = 0;
+	for(m = 0; m < n; m += 1){
+		if(m % perblock == 0){
+			// Load next block
+			if(buf != nil)
+				cacheunlock(arena->index, block);
+			block = (arena->size / arena->blocksize) - 1 - (m / perblock);
+			buf = vtarenareadblock(arena, block);
+			if(buf == nil){
+				werrstr("vtarenasync: unable to read CIG: %r");
+				return 0;
+			}
+			off = 0;
+		}
+		fprint(2, "validating clump %d, offset %d, block %d\n", m, off, block);
+		off += ClumpInfoSize;
+		ci = &buf[off];
+		if(U32GET(ci) != arena->clumpmagic){
+			if(!arenarepair(arena, ci, addr)){
+				werrstr("clump %d needs repair, at address %lld: %r", m, addr);
+				return 0;
+			}
+		}
+		addr += U16GET(ci + 1) + 38;
+	}
+	if(arena->arenastats.used != addr){
+		werrstr("corrupt: found addr %d, expected %d", addr, arena->arenastats.used);
+		return 0;
+	}
+	return 1;
+}
+
+int
+vtarenawritedirectory(VtArena *arena, char score[20], u16int uncsize)
+{
+	return 0;
+}
+
+int
+vtarenawb(VtArena *arena)
+{
+	char *buf, *p;
+	u32int blockindex, t;
+	int ret;
+	blockindex = arena->size / arena->blocksize;
+	// FIXME rebuild on vtarenareadblock
+	if(!cachelookup(&buf, arena->index, blockindex)){
+		if(pread(arena->fd, buf, arena->blocksize, arena->base + arena->size) != arena->blocksize)
+			sysfatal("failed to pread");
+	}
+	p = buf + 8 + NameSize;
+	// Only support split stats
+	U32PUT(p, arena->indexstats.clumps);
+	// If it wasn't already using split arenastats and index stats, make it so.
+	// If it wasn't, that space should be clear.
+	p[37] = 1;
+	U32PUT(p + 38, arena->arenastats.clumps);
+	U64PUT(p + 46, arena->arenastats.used, t);
+	p[62] = arena->arenastats.sealed;
+	ret = pwrite(arena->fd, buf, arena->blocksize, arena->base + arena->size);
+	cacheunlock(arena->index, blockindex);
+	if(ret != arena->blocksize){
+		werrstr("writeback failed: %r");
+		return 0;
+	}
+	return 1;
+}
\ No newline at end of file
--- a/cache.c
+++ b/cache.c
@@ -2,8 +2,7 @@
 #include <libc.h>
 
 // This is designed to run with a maximum load factor < 1,
-// and does not support resizing; there is NO EVICTION MECHANISM.
-// Insertion when at 100% capacity is definitionally user error.
+// and does not support resizing
 
 #pragma pack on
 typedef struct {
@@ -119,13 +118,7 @@
 getbucket(u32int key)
 {
 	// Fibonacci hashing!
-	bucket_t *bucket = &buckets[key * 11400714819323198485ULL >> CACHESHIFT];
-	bucket_t *last = &buckets[BUCKETS-1];
-	if(bucket > last){
-		fprint(2, "internal error: computed bucket hash is out of range\n");
-		abort();
-	}
-	return bucket;
+	return &buckets[key * 11400714819323198485ULL >> CACHESHIFT];
 }
 
 static void
--- a/checkarena.c
+++ b/checkarena.c
@@ -23,16 +23,16 @@
 		if(i % 20 == 19 || i+1 == numarenas)
 			fprint(2, "\n");
 		for(int j = 0 ; 1 ; j += 1){
-			if(vtreadarena(&arenas[i], addr, magicbuf, 4) != 4){
+			if(vtarenaread(&arenas[i], addr, magicbuf, 4) != 4){
 				fprint(2, "corrupt arena");
 				bad = 1;
 				break;
 			}
 			magic = U32GET(magicbuf);
-			if(magic == 0) // TODO: verify stopping point
+			if(magic == 0 || addr == 0) // TODO: verify stopping point
 				break;
 			if(magic == arenas[i].clumpmagic)
-				fprint(2, "valid!...");
+				fprint(2, ".");
 			else{
 				bad = 1;
 				fprint(2, "arena contains invalid clumps! magic: %08x, clumpmagic: %08x", magic, arenas[i].clumpmagic);
@@ -128,9 +128,9 @@
 	parseargs(argc, argv);
 	fprint(2, "loading config... ");
 	loadconfig();
+	cacheinit();
 	fprint(2, "loading arena partition metadata... ");
 	initarenas();
-	cacheinit();
 	fprint(2, "...scanning.\n");
 	if(!checkarenas())
 		sysfatal("arenas do be corrupt, yo!");
--- a/disk.c
+++ b/disk.c
@@ -179,7 +179,7 @@
 // If the data is already in cache, it will not be read again.
 // Caller is responsible for calling cachedone(arena->fd, blockindex);
 char*
-vtreadarenablock(VtArena *arena, u32int blockindex)
+vtarenareadblock(VtArena *arena, u32int blockindex)
 {
 	char *buf;
 	if(arena->blocksize != 8192)
@@ -186,6 +186,7 @@
 		sysfatal("invalid blocksize %d\n", arena->blocksize);
 	if(!cachelookup(&buf, arena->index, blockindex)){
 		if(pread(arena->fd, buf, arena->blocksize, arena->base+(blockindex*arena->blocksize)) != arena->blocksize){
+			werrstr("Failed to read: %r");
 			return nil;
 		}
 	}
@@ -193,7 +194,7 @@
 }
 
 u16int
-vtreadarena(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize)
+vtarenaread(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize)
 {
 	u16int off, n, m, size;
 	u32int blockindex;
@@ -204,7 +205,7 @@
 	n = 0;
 	while(n < size){
 		blockindex = addr/arena->blocksize;
-		buf = vtreadarenablock(arena, blockindex);
+		buf = vtarenareadblock(arena, blockindex);
 		if(buf == nil)
 			// TODO: I/O error should not crash the disk layer.
 			// Might be good to be able to recover cached data in this case?
@@ -226,7 +227,7 @@
 {
 	u16int size = addr.blocks<<ABlockLog;
 	uchar buf[0x10000];
-	if(!vtreadarena(addr.s_arena, addr.offset, buf, size)){
+	if(!vtarenaread(addr.s_arena, addr.offset, buf, size)){
 		werrstr("arena read failed: %r");
 		return 0;
 	}
@@ -244,7 +245,7 @@
 {
 	index.arena->arenastats.sealed = 1;
 	if(index.arena->index+1 == numarenas)
-		sysfatal("TODO last arena full!");
+		sysfatal("TODO last arena, %d, full, at size %d!", index.arena->index, index.arena->arenastats.used);
 	index.arena = &arenas[index.arena->index+1];
 	if(index.arena->block != 0 || index.arena->offset != 0 || index.arena->blockremain != index.arena->blocksize || index.arena->buf != nil)
 		sysfatal("TODO handle writing to venti which previously experienced nonlinear writes from other software?");
@@ -297,6 +298,7 @@
 static void
 getblock(void)
 {
+	// TODO rebuild on vtarenareadblock
 	if(!cachelookup(&index.arena->buf, index.arena->index, index.arena->block)){
 		// Don't read when there's no data _to_ read; saves on unnecessary cache fills.
 		if(index.arena->offset == 0)
@@ -325,7 +327,6 @@
 {
 	index.arena->offset += n;
 	index.arena->blockremain -= n;
-	index.arena->arenastats.used += n;
 }
 
 int
@@ -366,6 +367,7 @@
 		}
 	}
 	index.arena->arenastats.uncsize += len;
+	index.arena->arenastats.used += 38+len;
 	index.arena->arenastats.clumps += 1;
 	blockflush();
 	return vtarenawb(index.arena);
@@ -446,6 +448,7 @@
 	}
 	arena->block = arena->arenastats.used / arena->blocksize;
 	arena->offset = arena->arenastats.used & (arena->blocksize - 1);
+	fprint(2, "Resuming: used %lld, block %lld, blocksize %lld, offset %d\n", arena->arenastats.used, arena->block, arena->blocksize, arena->offset);
 	// Probably not necessary, but factors out arena->offset to arenastats.used and arena->blocksize
 	// so that this operation depends only on already-computed values, avoiding a false dependency on
 	// the arena->offset calculation; the true value is just "blocksize - offset".
@@ -464,8 +467,6 @@
 		sysfatal("arena name mismatch: %s vs %s", arena->name, buf + 8);
 	if(index.arena == nil)
 		index.arena = arena;
-	if(arena->indexstats.clumps != arena->arenastats.clumps && !vtarenareaddirectory(arena))
-		sysfatal("unable to load arena directory");
 }
 
 static void
@@ -546,6 +547,16 @@
 	tabsize = arenabase - tabbase;
 	arenapartcheck(magic, version, blocksize, arenabase, tabbase);
 	readarenatable(fd, tabbase, tabsize, blocksize);
+}
+
+void
+arenassync(void)
+{
+	int i;
+	for(i = 0; i < numarenas; i += 1){
+		if(!vtarenasync(&arenas[i]))
+			sysfatal("syncarenas: %r");
+	}
 }
 
 void
--- a/neoventi.c
+++ b/neoventi.c
@@ -94,17 +94,12 @@
 init(void)
 {
 	loadconfig();
+	cacheinit();
 	initarenas();
+	arenassync();
 	initindex();
-	cacheinit();
 }
 
-static void
-validate(void)
-{
-	fprint(2, "TODO: validate initial state");
-}
-
 void
 threadmain(int argc, char **argv)
 {
@@ -111,7 +106,6 @@
 	parseargs(argc, argv);
 	print("Initializing neoventi build 5... ");
 	init();
-	validate();
 	print("initialized, launching server...");
 	print("overridding tcp address for simultaneous testing! tcp!*!14011...\n");
 	tcpaddr = "tcp!*!14011";
--- a/neoventi.h
+++ b/neoventi.h
@@ -137,7 +137,6 @@
 void serve(char *addr);
 /* Looks up the address of a score on disk using the index */
 int vtreadlookup(u8int *score, VtAddress *addr);
-u16int vtreadarena(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize);
 int readclump(uchar *dst, VtAddress addr);
 int vtwriteclump(char *dst, u16int len, uchar *score);
 int Brdu32(Biobufhdr *bio, u32int *u32);
@@ -154,7 +153,9 @@
 extern char *arenapath;
 extern char *isectpath;
 
-/* Directory interface */
-int vtarenareaddirectory(VtArena *);
+void arenassync(void);
+int vtarenasync(VtArena *);
 int vtarenawritedirectory(VtArena *, char score[20], u16int uncsize);
 int vtarenawb(VtArena *);
+char* vtarenareadblock(VtArena *arena, u32int blockindex);
+u16int vtarenaread(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize);
--- a/notebook
+++ b/notebook
@@ -3232,4 +3232,60 @@
 % venti/read f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
 foo
 
-Woo!
\ No newline at end of file
+Woo!
+
+And, now writing offsets and such correctly. Need to actually implement arena directory loading, now..
+
+...okay, there's a fifth thing that needs updating: the header?
+
+That is, we've got:
+
+- data log
+- arena header
+- arena trailer
+- arena directory
+- index
+
+Is the header necessary? We actually are doing the trailer, not the header, that's where we get the info. Ah, never mind, header doesn't include the actual info, we're good.
+
+What we need now is basically syncarena(); - though, actually, venti roots it from syncindex, not syncarena.. That actually calls syncarena on _every_ arena, then - if necessary - indexes any unindexed clumps, and writes back the arena. Honestly, I understand the process enough, don't really care how they're doing it any more. ...except that I haven't actually looked at the trailer logic, and there's a few interesting questions. Probably worth looking just to be safe.
+
+Yeah, okay, they're doing it in increments of the block, but I think it makes more sense to load the blocks when on block boundaries and simply proceed until hitting the clump counter.
+
+Looks like _blocks_ grow backwards, but within blocks, we write beginning-to-end, which makes sense.
+
+...annnnnnd broke the gefs I was using to host the code while working on it. Shows me for not self-hosting - surprised it lasted so long, given that gefs was not stable when I started using it on the pinebook, but whatevs, got everything safely moved.
+
+
+
+
+Current test:
+- Reformat
+- Write
+- Kill, relaunch
+- Write new block, check address
+
+First one is written to offset 38 of block 0, which makes sense; 38 bytes for the header.
+Second... not so much. 
+WPTR block 2097152 offset 38, n 0 nn 4, block must be at least 4
+
+Offset 38 of a very large block number, why?
+
+During load, arena->block = arena->arenastats.used / arena->blocksize;
+
+Is used weird?
+
+Resuming: used 34359738368, block 4194304, blocksize 8192, offset 0
+
+Yes, yes it is. Ohhhh, putting it as u32, but it's actually u64. Oops.
+Resuming: used 4, block 0, blocksize 8192, offset 4
+
+That's better.
+
+WPTR block 0 offset 42, n 0 nn 4, block must be at least 4; Now resuming the correct block, coolio.
+
+That offset is... suspicious, though. That's missing the header? Yep. Cool, now resuming is reliably working.
+
+./6.neoventi: syncarenas: clump 1 needs repair, at address 4: arenarepair: corrupt
+
+...other than that part. Repair code might be wrong, - yeah. Wasn't adding the clump size to address during computation. That's fixed.
\ No newline at end of file
--