ref: 89bdceb722cd18eeddddcf884e845f561459b49d
parent: 4b60d3dd32efd00a1d07cb08662926ba9836719f
author: Noam Preil <noam@pixelhero.dev>
date: Sat Jan 4 21:05:52 EST 2025
fix check tool!!
--- a/badcheck.c
+++ /dev/null
@@ -1,50 +1,0 @@
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "neoventi.h"
-
-extern VtArena *arenas;
-extern u32int numarenas;
-
-int
-checkarenas(void)
-{- int bad = 0;
- u8int magicbuf[4];
- u32int magic;
- u64int addr;
- for(int i = numarenas-1; i >= 0; i -= 1){- addr = 0;
- fprint(2, ".");
- if(i % 20 == 19 || i+1 == numarenas)
- fprint(2, "\n");
- for(int j = 0 ; 1 ; j += 1){- if(vtreadarena(&arenas[i], addr, magicbuf, 4) != 4){- sysfatal("corrupt arena");- }
- magic = U32GET(magicbuf);
- if(magic == 0) // TODO: verify stopping point
- break;
- if(magic == arenas[i].clumpmagic)
- fprint(2, "valid!...");
- else{- fprint(2, "arena contains invalid clumps!");
- break;
- }
- }
- };
- return bad;
-}
-
-void
-threadmain(int argc, char **argv)
-{- if(argc != 1)
- sysfatal("i'm a dummy, sorry.");- fprint(2, "loading arena partition metadata... ");
- initarenas();
- fprint(2, "...scanning.\n");
- if(!checkarenas())
- sysfatal("arenas do be corrupt, yo!");- fprint(2, "looks like you're good - for now...\n");
-}
--- /dev/null
+++ b/checkarena.c
@@ -1,0 +1,138 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "neoventi.h"
+
+extern VtArena *arenas;
+char *arenapath;
+char *isectpath;
+extern u32int numarenas;
+int mainstacksize = 128*1024;
+char *configpath = "/dev/kaladin/arenas";
+
+int
+checkarenas(void)
+{+ int bad = 0;
+ u8int magicbuf[4];
+ u32int magic;
+ u64int addr;
+ for(int i = numarenas-1; i >= 0; i -= 1){+ addr = arenas[i].indexstats.used;
+ fprint(2, ".");
+ if(i % 20 == 19 || i+1 == numarenas)
+ fprint(2, "\n");
+ for(int j = 0 ; 1 ; j += 1){+ if(vtreadarena(&arenas[i], addr, magicbuf, 4) != 4){+ fprint(2, "corrupt arena");
+ bad = 1;
+ break;
+ }
+ magic = U32GET(magicbuf);
+ if(magic == 0) // TODO: verify stopping point
+ break;
+ if(magic == arenas[i].clumpmagic)
+ fprint(2, "valid!...");
+ else{+ bad = 1;
+ fprint(2, "arena contains invalid clumps! magic: %08x, clumpmagic: %08x", magic, arenas[i].clumpmagic);
+ break;
+ }
+ }
+ };
+ return !bad;
+}
+
+void
+parseargs(int argc, char **argv)
+{+ ARGBEGIN {+ case 'c':
+ configpath = ARGF();
+ if(configpath == nil)
+ sysfatal("must specify a config path");+ break;
+ default:
+ sysfatal("unsupported flag '%c'", ARGC());+ break;
+ } ARGEND;
+}
+
+static void
+configvalidate(char *buf)
+{+ if(memcmp(buf, "venti config\n", 13) != 0)
+ sysfatal("invalid config");+}
+
+static void
+configparse(char *buf)
+{+ char *lines[16];
+ char *fields[2];
+ int nlines = getfields(buf+13, lines, 16, 0, "\n");
+ for(int i = 0; i < nlines; i += 1){+ if(strlen(lines[i]) == 0)
+ continue;
+ if(tokenize(lines[i], fields, 2) != 2)
+ sysfatal("invalid config, bad tokenize on line %d", i);+ if(strcmp(fields[0], "arenas") == 0)
+ arenapath = strdup(fields[1]);
+ else if(strcmp(fields[0], "isect") == 0)
+ isectpath = strdup(fields[1]);
+ else if(strcmp(fields[0], "bcmem") == 0 || strcmp(fields[0], "mem") == 0 || strcmp(fields[0], "icmem") == 0)
+ // ignore cache sizing
+ {}+ else if(strcmp(fields[0], "addr") == 0)
+ // no tcp server her
+ {}+ else if(strcmp(fields[0], "httpaddr") == 0)
+ // no http server, fuck that
+ {}+ else if(strcmp(fields[0], "index") == 0)
+ // apparently no effect????
+ {}+ else
+ fprint(2, "ignoring config directive '%s'\n", fields[0]);
+ }
+}
+
+static void
+loadconfig(void)
+{+ // Config is either a flat file, or is the last 8K of a 256K block
+ int fd = open(configpath, OREAD);
+ Dir *dir = dirfstat(fd);
+ // NOTE venti technically allows for 8K bytes of config. Fuck that.
+ // 8191 bytes is enough for anybody.
+ char buf[8192];
+ if(fd < 0 || dir == nil)
+ sysfatal("unable to open config '%s'", configpath);+ if(dir->length > 256*1024){+ // Config partition
+ vlong w = pread(fd, buf, 8192, 248*1024);
+ if(w < 0)
+ sysfatal("unable to read configpart '%s'", configpath);+ else if(w == 0)
+ sysfatal("configpart empty!");+ } else if(pread(fd, buf, 8192, 0) <= 0)
+ sysfatal("unable to read config file '%s'", configpath);+ // Just in case.
+ buf[8191] = 0;
+ configvalidate(buf);
+ configparse(buf);
+}
+void
+threadmain(int argc, char **argv)
+{+ parseargs(argc, argv);
+ fprint(2, "loading config... ");
+ loadconfig();
+ fprint(2, "loading arena partition metadata... ");
+ initarenas();
+ cacheinit();
+ fprint(2, "...scanning.\n");
+ if(!checkarenas())
+ sysfatal("arenas do be corrupt, yo!");+ fprint(2, "looks like you're good - for now...\n");
+}
--- a/disk.c
+++ b/disk.c
@@ -7,6 +7,7 @@
u32int numarenas = 0;
struct {+ RWLock;
u32int blocksize;
u32int buckets;
VtISect *sects;
@@ -14,6 +15,12 @@
u32int div;
u32int namap;
MapEntry *amap;
+ struct {+ // The active arena, which is currently being appended to.
+ VtArena *arena;
+ // Next offset within the current arena.
+ u32int offset;
+ } active;
} index;
int
@@ -54,7 +61,6 @@
if(addr >= index.amap[a].start && addr < index.amap[a].stop)
return a;
sysfatal("internal corruption: arena not found for arenaindex");- return 0;
}
int
@@ -94,7 +100,7 @@
static u64int
arenadirsize(VtArena *arena)
{- return ((arena->memstats.clumps / (arena->blocksize / 25)) + 1) * arena->blocksize;
+ return ((arena->arenastats.clumps / (arena->blocksize / 25)) + 1) * arena->blocksize;
}
static u64int
@@ -172,15 +178,28 @@
}
size = U16GET(buf+7);
if(buf[29] == 2){- if(unwhack(dst, size, buf+38, U16GET(buf+5)) != size){+ if(unwhack(dst, size, buf+38, U16GET(buf+5)) != size)
sysfatal("decompression failed: %r. block index %llx", addr.offset/addr.s_arena->blocksize);- return 0;
- }
} else if(buf[29] == 1)
memcpy(dst, buf+38, size);
return 1;
}
+int
+vtwriteclump(char *buf, u16int len)
+{+ USED(len, buf);
+ // - Lock index and arena
+ // - Write data to arena
+ // - Write metadata to arena
+ // - Add entry to index
+ wlock(&index);
+
+ wunlock(&index);
+ werrstr("TODO: write clump");+ return 0;
+}
+
static int
parsemap(Biobufhdr *b, MapEntry **map, u32int *nmap)
{@@ -209,14 +228,29 @@
static void
loadarena(VtArena *arena)
{- u32int version;
+ u32int version, magic;
char *buf = malloc(arena->blocksize);
u8int *p = (void*)buf;
if(pread(arena->fd, buf, arena->blocksize, arena->base + arena->size) != arena->blocksize)
sysfatal("failed to pread");+ magic = U32GET(p);
version = U32GET(p + 4);
+ arena->indexstats.clumps = U32GET(p+8+NameSize);
+ arena->indexstats.cclumps = U32GET(p+8+NameSize+4);
+ arena->ctime = U32GET(p+8+NameSize+8);
+ arena->wtime = U32GET(p+8+NameSize+12);
+ arena->clumpmagic = U32GET(p+8+NameSize+16);
+ arena->indexstats.used = U64GET(p+8+NameSize+20);
+ // We _can_ read the values in even if the arena is invalid, and the code looks
+ // cleaner with all the parsing and validation grouped, so meh, going to keep it
+ // like this.
+ if(magic != ArenaMagic)
+ sysfatal("corrupt arena: magic is incorrect!");+ if(version != 5)
+ sysfatal("unsupported arena version %d\n", version);if(strncmp(arena->name, buf + 8, strlen(arena->name)) != 0)
- sysfatal("arena name mismatch: %s vs %s, ver %d", arena->name, buf + 8, version);+ sysfatal("arena name mismatch: %s vs %s", arena->name, buf + 8);+
}
static void
--- a/mkfile
+++ b/mkfile
@@ -1,6 +1,6 @@
</$objtype/mkfile
-TARG=neoventi
+TARG=neoventi checkarena
BIN=/$objtype/bin
OFILES=unwhack.$O server.$O util.$O disk.$O cache.$O
CLEANFILES=paper.ps paper.pdf
--- a/neoventi.c
+++ b/neoventi.c
@@ -50,15 +50,15 @@
isectpath = strdup(fields[1]);
else if(strcmp(fields[0], "bcmem") == 0 || strcmp(fields[0], "mem") == 0 || strcmp(fields[0], "icmem") == 0)
// ignore cache sizing
- ;
+ {}else if(strcmp(fields[0], "addr") == 0)
tcpaddr = strdup(fields[1]);
else if(strcmp(fields[0], "httpaddr") == 0)
// no http server, fuck that
- ;
+ {}else if(strcmp(fields[0], "index") == 0)
// apparently no effect????
- ;
+ {}else
fprint(2, "ignoring config directive '%s'\n", fields[0]);
}
--- a/neoventi.h
+++ b/neoventi.h
@@ -34,6 +34,7 @@
* need for special casing and magic around it. Just read the damn block. */
HeadSize = 512,
ArenaPartMagic = 0xa9e4a5e7U,
+ ArenaMagic = 0xf2a14eadU,
ISectMagic = 0xd15c5ec7U,
IBucketSize = 6,
IEntrySize = 38,
@@ -64,7 +65,12 @@
int fd;
struct {u32int clumps;
- } memstats;
+ } arenastats;
+ struct {+ // Total clump count, and compressed clump count, respectively
+ u32int clumps, cclumps;
+ u64int used;
+ } indexstats;
} VtArena;
typedef struct {@@ -126,6 +132,7 @@
int vtreadlookup(u8int *score, VtAddress *addr);
u16int vtreadarena(VtArena *arena, u64int addr, uchar *dbuf, u16int reqsize);
int readclump(uchar *dst, VtAddress addr);
+int vtwriteclump(char *dst, u16int len);
int Brdu32(Biobufhdr *bio, u32int *u32);
int stru32int(char *s, u32int *r);
int stru64int(char *s, u64int *r);
--- a/notebook
+++ b/notebook
@@ -2609,3 +2609,35 @@
also, TODO: remove libbio usage, it's probably overkill for what we actually need.
Shorter term TODO: see about bumping buffer size to account for Bungetsz to make reads aligned so devfs patch can be reverted. (Performance improvement, since aligned reads are faster.)
+
+Okay, so what we need now:
+
+ - Reserve space in the arena
+ - Write the block to the arena
+ - Append to the log
+ - Handle arena maintenance
+ - e.g. if finishing this arena:
+ - jump the address to the next one
+ - in the background, start the process of "sealing" the current arena.
+
+let's check what venti's doing to ensure there's nothing being missed in the design here.
+
+- Locks index for writing
+- Writes clump to the arena, refered by the index
+- If successful, allocate space in the index, insert into the index, unlock, return
+
+ohhhh!
+
+Was working on reading more arena info so that I can better track which one is live, and I realized I'm not reading a bunch of arena fields - including clumpmagic. Which is probably why the clump verifier doesn't work! It's validating against arena->clumpmagic, which is uninitialized!!
+
+So, let's start by loading in all fields, and testing that. If it works, then great, if not, update the todo list and get back to the write side...
+
+If I need a conversion tool for v4 arenas - possible - then should I define a v6 format, improving on venti, and go straight to that?
+
+Got it working, and used it to confirm that the loadarena() code is now grabbing everything from the right place :)
+
+Rename it from badcheck.c, commit this, add the sealed detection, and then determine live arena...
+
+% mv badcheck.c checkarena.c
+% git/add badcheck.c check.c
+% git/commit -m 'fix check tool!!' .
--- a/server.c
+++ b/server.c
@@ -108,7 +108,8 @@
if(memcmp(buf2, buf+8, len) != 0)
vterr(conn, buf, "hash collision detected");
} else {- vterr(conn, buf, "TODO: insert data");
+ if(!vtwriteclump(buf+8, len))
+ vterr(conn, buf, "data insertion failed");
}
memcpy(buf+4, score, 20);
vtsend(conn, buf, 22, VtRwrite, 0);
--
⑨