shithub: neoventi

Download patch

ref: 89bdceb722cd18eeddddcf884e845f561459b49d
parent: 4b60d3dd32efd00a1d07cb08662926ba9836719f
author: Noam Preil <noam@pixelhero.dev>
date: Sat Jan 4 21:05:52 EST 2025

fix check tool!!

--- a/badcheck.c
+++ /dev/null
@@ -1,50 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "neoventi.h"
-
-extern VtArena *arenas;
-extern u32int numarenas;
-
-int
-checkarenas(void)
-{
-	int bad = 0;
-	u8int magicbuf[4];
-	u32int magic;
-	u64int addr;
-	for(int i = numarenas-1; i >= 0; i -= 1){
-		addr = 0;
-		fprint(2, ".");
-		if(i % 20 == 19 || i+1 == numarenas)
-			fprint(2, "\n");
-		for(int j = 0 ; 1 ; j += 1){
-			if(vtreadarena(&arenas[i], addr, magicbuf, 4) != 4){
-				sysfatal("corrupt arena");
-			}
-			magic = U32GET(magicbuf);
-			if(magic == 0) // TODO: verify stopping point
-				break;
-			if(magic == arenas[i].clumpmagic)
-				fprint(2, "valid!...");
-			else{
-				fprint(2, "arena contains invalid clumps!");
-				break;
-			}
-		}
-	};
-	return bad;
-}
-
-void
-threadmain(int argc, char **argv)
-{
-	if(argc != 1)
-		sysfatal("i'm a dummy, sorry.");
-	fprint(2, "loading arena partition metadata... ");
-	initarenas();
-	fprint(2, "...scanning.\n");
-	if(!checkarenas())
-		sysfatal("arenas do be corrupt, yo!");
-	fprint(2, "looks like you're good - for now...\n");
-}
--- /dev/null
+++ b/checkarena.c
@@ -1,0 +1,138 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "neoventi.h"
+
+extern VtArena *arenas;
+char *arenapath;
+char *isectpath;
+extern u32int numarenas;
+int mainstacksize = 128*1024;
+char *configpath = "/dev/kaladin/arenas";
+
+int
+checkarenas(void)
+{
+	int bad = 0;
+	u8int magicbuf[4];
+	u32int magic;
+	u64int addr;
+	for(int i = numarenas-1; i >= 0; i -= 1){
+		addr = arenas[i].indexstats.used;
+		fprint(2, ".");
+		if(i % 20 == 19 || i+1 == numarenas)
+			fprint(2, "\n");
+		for(int j = 0 ; 1 ; j += 1){
+			if(vtreadarena(&arenas[i], addr, magicbuf, 4) != 4){
+				fprint(2, "corrupt arena");
+				bad = 1;
+				break;
+			}
+			magic = U32GET(magicbuf);
+			if(magic == 0) // TODO: verify stopping point
+				break;
+			if(magic == arenas[i].clumpmagic)
+				fprint(2, "valid!...");
+			else{
+				bad = 1;
+				fprint(2, "arena contains invalid clumps! magic: %08x, clumpmagic: %08x", magic, arenas[i].clumpmagic);
+				break;
+			}
+		}
+	};
+	return !bad;
+}
+
+void
+parseargs(int argc, char **argv)
+{
+	ARGBEGIN {
+	case 'c':
+		configpath = ARGF();
+		if(configpath == nil)
+			sysfatal("must specify a config path");
+		break;
+	default:
+		sysfatal("unsupported flag '%c'", ARGC());
+		break;
+	} ARGEND;
+}
+
+static void
+configvalidate(char *buf)
+{
+	if(memcmp(buf, "venti config\n", 13) != 0)
+		sysfatal("invalid config");
+}
+
+static void
+configparse(char *buf)
+{
+	char *lines[16];
+	char *fields[2];
+	int nlines = getfields(buf+13, lines, 16, 0, "\n");
+	for(int i = 0; i < nlines; i += 1){
+		if(strlen(lines[i]) == 0)
+			continue;
+		if(tokenize(lines[i], fields, 2) != 2)
+			sysfatal("invalid config, bad tokenize on line %d", i);
+		if(strcmp(fields[0], "arenas") == 0)
+			arenapath = strdup(fields[1]);
+		else if(strcmp(fields[0], "isect") == 0)
+			isectpath = strdup(fields[1]);
+		else if(strcmp(fields[0], "bcmem") == 0 || strcmp(fields[0], "mem") == 0 || strcmp(fields[0], "icmem") == 0)
+			// ignore cache sizing
+			{}
+		else if(strcmp(fields[0], "addr") == 0)
+			// no tcp server her
+			{}
+		else if(strcmp(fields[0], "httpaddr") == 0)
+			// no http server, fuck that
+			{}
+		else if(strcmp(fields[0], "index") == 0)
+			// apparently no effect????
+			{}
+		else
+			fprint(2, "ignoring config directive '%s'\n", fields[0]);
+	}
+}
+
+static void
+loadconfig(void)
+{
+	// Config is either a flat file, or is the last 8K of a 256K block
+	int fd = open(configpath, OREAD);
+	Dir *dir = dirfstat(fd);
+	// NOTE venti technically allows for 8K bytes of config. Fuck that.
+	// 8191 bytes is enough for anybody.
+	char buf[8192];
+	if(fd < 0 || dir == nil)
+		sysfatal("unable to open config '%s'", configpath);
+	if(dir->length > 256*1024){
+		// Config partition
+		vlong w = pread(fd, buf, 8192, 248*1024);
+		if(w < 0)
+			sysfatal("unable to read configpart '%s'", configpath);
+		else if(w == 0)
+			sysfatal("configpart empty!");
+	} else if(pread(fd, buf, 8192, 0) <= 0)
+		sysfatal("unable to read config file '%s'", configpath);
+	// Just in case.
+	buf[8191] = 0;
+	configvalidate(buf);
+	configparse(buf);
+}
+void
+threadmain(int argc, char **argv)
+{
+	parseargs(argc, argv);
+	fprint(2, "loading config... ");
+	loadconfig();
+	fprint(2, "loading arena partition metadata... ");
+	initarenas();
+	cacheinit();
+	fprint(2, "...scanning.\n");
+	if(!checkarenas())
+		sysfatal("arenas do be corrupt, yo!");
+	fprint(2, "looks like you're good - for now...\n");
+}
--- a/disk.c
+++ b/disk.c
@@ -7,6 +7,7 @@
 u32int numarenas = 0;
 
 struct {
+	RWLock;
 	u32int blocksize;
 	u32int buckets;
 	VtISect *sects;
@@ -14,6 +15,12 @@
 	u32int div;
 	u32int namap;
 	MapEntry *amap;
+	struct {
+		// The active arena, which is currently being appended to.
+		VtArena *arena;
+		// Next offset within the current arena.
+		u32int offset;
+	} active;
 } index;
 
 int
@@ -54,7 +61,6 @@
 		if(addr >= index.amap[a].start && addr < index.amap[a].stop)
 			return a;
 	sysfatal("internal corruption: arena not found for arenaindex");
-	return 0;
 }
 
 int
@@ -94,7 +100,7 @@
 static u64int
 arenadirsize(VtArena *arena)
 {
-	return ((arena->memstats.clumps / (arena->blocksize / 25)) + 1) * arena->blocksize;
+	return ((arena->arenastats.clumps / (arena->blocksize / 25)) + 1) * arena->blocksize;
 }
 
 static u64int
@@ -172,15 +178,28 @@
 	}
 	size = U16GET(buf+7);
 	if(buf[29] == 2){
-		if(unwhack(dst, size, buf+38, U16GET(buf+5)) != size){
+		if(unwhack(dst, size, buf+38, U16GET(buf+5)) != size)
 			sysfatal("decompression failed: %r. block index %llx", addr.offset/addr.s_arena->blocksize);
-			return 0;
-		}
 	} else if(buf[29] == 1)
 		memcpy(dst, buf+38, size);
 	return 1;
 }
 
+int
+vtwriteclump(char *buf, u16int len)
+{
+	USED(len, buf);
+	// - Lock index and arena
+	// - Write data to arena
+	// - Write metadata to arena
+	// - Add entry to index
+	wlock(&index);
+	
+	wunlock(&index);
+	werrstr("TODO: write clump");
+	return 0;
+}
+
 static int
 parsemap(Biobufhdr *b, MapEntry **map, u32int *nmap)
 {
@@ -209,14 +228,29 @@
 static void
 loadarena(VtArena *arena)
 {
-	u32int version;
+	u32int version, magic;
 	char *buf = malloc(arena->blocksize);
 	u8int *p = (void*)buf;
 	if(pread(arena->fd, buf, arena->blocksize, arena->base + arena->size) != arena->blocksize)
 		sysfatal("failed to pread");
+	magic = U32GET(p);
 	version = U32GET(p + 4);
+	arena->indexstats.clumps = U32GET(p+8+NameSize);
+	arena->indexstats.cclumps = U32GET(p+8+NameSize+4);
+	arena->ctime = U32GET(p+8+NameSize+8);
+	arena->wtime = U32GET(p+8+NameSize+12);
+	arena->clumpmagic = U32GET(p+8+NameSize+16);
+	arena->indexstats.used = U64GET(p+8+NameSize+20);
+	// We _can_ read the values in even if the arena is invalid, and the code looks
+	// cleaner with all the parsing and validation grouped, so meh, going to keep it
+	// like this.
+	if(magic != ArenaMagic)
+		sysfatal("corrupt arena: magic is incorrect!");
+	if(version != 5)
+		sysfatal("unsupported arena version %d\n", version);
 	if(strncmp(arena->name, buf + 8, strlen(arena->name)) != 0)
-		sysfatal("arena name mismatch: %s vs %s, ver %d", arena->name, buf + 8, version);
+		sysfatal("arena name mismatch: %s vs %s", arena->name, buf + 8);
+	
 }
 
 static void
--- a/mkfile
+++ b/mkfile
@@ -1,6 +1,6 @@
 </$objtype/mkfile
 
-TARG=neoventi
+TARG=neoventi checkarena
 BIN=/$objtype/bin
 OFILES=unwhack.$O server.$O util.$O disk.$O cache.$O
 CLEANFILES=paper.ps paper.pdf
--- a/neoventi.c
+++ b/neoventi.c
@@ -50,15 +50,15 @@
 			isectpath = strdup(fields[1]);
 		else if(strcmp(fields[0], "bcmem") == 0 || strcmp(fields[0], "mem") == 0 || strcmp(fields[0], "icmem") == 0)
 			// ignore cache sizing
-			;
+			{}
 		else if(strcmp(fields[0], "addr") == 0)
 			tcpaddr = strdup(fields[1]);
 		else if(strcmp(fields[0], "httpaddr") == 0)
 			// no http server, fuck that
-			;
+			{}
 		else if(strcmp(fields[0], "index") == 0)
 			// apparently no effect????
-			;
+			{}
 		else
 			fprint(2, "ignoring config directive '%s'\n", fields[0]);
 	}
--- a/neoventi.h
+++ b/neoventi.h
@@ -34,6 +34,7 @@
 	 * need for special casing and magic around it. Just read the damn block. */
 	HeadSize = 512,
 	ArenaPartMagic = 0xa9e4a5e7U,
+	ArenaMagic = 0xf2a14eadU,
 	ISectMagic = 0xd15c5ec7U,
 	IBucketSize		= 6,
 	IEntrySize		= 38,
@@ -64,7 +65,12 @@
 	int fd;
 	struct {
 		u32int clumps;
-	} memstats;
+	} arenastats;
+	struct {
+		// Total clump count, and compressed clump count, respectively
+		u32int clumps, cclumps;
+		u64int used;
+	} indexstats;
 } VtArena;
 
 typedef struct {
@@ -126,6 +132,7 @@
 int vtreadlookup(u8int *score, VtAddress *addr);
 u16int vtreadarena(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize);
 int readclump(uchar *dst, VtAddress addr);
+int vtwriteclump(char *dst, u16int len);
 int Brdu32(Biobufhdr *bio, u32int *u32);
 int stru32int(char *s, u32int *r);
 int stru64int(char *s, u64int *r);
--- a/notebook
+++ b/notebook
@@ -2609,3 +2609,35 @@
 also, TODO: remove libbio usage, it's probably overkill for what we actually need.
 
 Shorter term TODO: see about bumping buffer size to account for Bungetsz to make reads aligned so devfs patch can be reverted. (Performance improvement, since aligned reads are faster.)
+
+Okay, so what we need now:
+
+	- Reserve space in the arena
+	- Write the block to the arena
+	- Append to the log
+	- Handle arena maintenance
+		- e.g. if finishing this arena:
+			- jump the address to the next one
+			- in the background, start the process of "sealing" the current arena.
+
+let's check what venti's doing to ensure there's nothing being missed in the design here.
+
+- Locks index for writing
+- Writes clump to the arena, refered by the index
+- If successful, allocate space in the index, insert into the index, unlock, return
+
+ohhhh!
+
+Was working on reading more arena info so that I can better track which one is live, and I realized I'm not reading a bunch of arena fields - including clumpmagic. Which is probably why the clump verifier doesn't work! It's validating against arena->clumpmagic, which is uninitialized!!
+
+So, let's start by loading in all fields, and testing that. If it works, then great, if not, update the todo list and get back to the write side...
+
+If I need a conversion tool for v4 arenas - possible - then should I define a v6 format, improving on venti, and go straight to that?
+
+Got it working, and used it to confirm that the loadarena() code is now grabbing everything from the right place :)
+
+Rename it from badcheck.c, commit this, add the sealed detection, and then determine live arena...
+
+% mv badcheck.c checkarena.c
+% git/add badcheck.c check.c
+% git/commit -m 'fix check tool!!' .
--- a/server.c
+++ b/server.c
@@ -108,7 +108,8 @@
 		if(memcmp(buf2, buf+8, len) != 0)
 			vterr(conn, buf, "hash collision detected");
 	} else {
-		vterr(conn, buf, "TODO: insert data");
+		if(!vtwriteclump(buf+8, len))
+			vterr(conn, buf, "data insertion failed");
 	}
 	memcpy(buf+4, score, 20);
 	vtsend(conn, buf, 22, VtRwrite, 0);
--