ref: e061cf22cdf6563a02cd0941da722f8d006f31be
parent: d396821b5f425aec2ef4800e19633a3e0d93b78d
author: Noam Preil <noam@pixelhero.dev>
date: Sat Dec 28 01:20:54 EST 2024
server: detect if written data already is present on the server
--- a/notebook
+++ b/notebook
@@ -2280,3 +2280,171 @@
Also, noted that venti/write at the least is doing two write()s to venti instead of just the one. Unsure how I feel about this. It should be possible to do that more optimally without any tradeoffs i think. TODO.
+% echo -n hello | venti/write -h tcp!127.1!14011
+venti/write: vtwrite: neoventi: TODO: write(data len 5 / 'hello')
+
+Perfect :)
+
+Step one: compute the score... do I have logic for this yet? I don't think so. It's just sha256 tho iirc? SHA1, actually, 20 bytes / 160 bit.
+
+venti/venti code:
+
+void
+scoremem(u8int *score, u8int *buf, int n)
+{
+ DigestState s;
+
+ memset(&s, 0, sizeof s);
+ sha1(buf, n, score, &s);
+}
+
+This is just sechash(2) stuff, except done wrong lmfao.
+
+ The first call to the
+ routine should have nil as the state parameter. This call
+ returns a state which can be used to chain subsequent calls.
+
+This doesn't mean "zero init a ds", it means "pass nil."
+
+the correct invocation here is just sha1(buf, len, score, nil);
+
+xD okay.
+
+Here's what we can do: we're live-testing against the venti I'm typing this into. We can:
+
+- Compute the score for 'hello'
+- Verify that venti/read gives no data for it from neoventi
+- Verify the same for oldventi
+- use venti/write against the oldventi instance, note down the score
+- use venti/read against neoventi, and verify that looking it up succeeds on the write path too
+
+...okay, actually, looking through the sha1 source code, venti might have a point. DigestStates explicitly track whether they were mallocated; passing in nil results in it mallocing and then demallocing. This seems like a really silly workaround though. If the allocation isn't desired, let's patch libsec:
+
+diff df8a21821d8b1728953afd49929ba5b69c925ca1 uncommitted
+--- a/sys/src/libsec/port/sha1.c
++++ b/sys/src/libsec/port/sha1.c
+@@ -18,12 +18,19 @@
+ int i;
+ uchar *e;
+
++ SHA1state ns;
++
+ if(s == nil){
+- s = malloc(sizeof(*s));
+- if(s == nil)
+- return nil;
+- memset(s, 0, sizeof(*s));
+- s->malloced = 1;
++ if(digest != nil){
++ s = &ns;
++ memset(s, 0, sizeof(*s));
++ } else {
++ s = malloc(sizeof(*s));
++ if(s == nil)
++ return nil;
++ memset(s, 0, sizeof(*s));
++ s->malloced = 1;
++ }
+ }
+
+ if(s->seeded == 0){
+
+This adds sizeof(DigestState) - ~350 bytes - of stack usage to each call, even when not needed, but that shouldn't practically matter, and it removes one alloc+free pair for the specific case of one-shot hashes.
+
+Arguably, this is optimizing libsec for venti's needs, but I don't think that's really a problem here, since it does not cause any problems for any other usage. I'll need to poke cinap and/or moody, though; iirc moody's been through libsec?
+
+regardless, the score should be correct...
+static void
+vtwrite(VtConn conn, char *buf, u16int len)
+{
+ char *data;
+ u8int score[20];
+ data = &buf[8];
+ data[len] = 0;
+ sha1((uchar*)data, len, score, nil);
+ char scorebuf[41];
+ for(int i = 0; i < 20; i += 1){
+ sprint(&scorebuf[i*2], "%02x", score[i]);
+ }
+ scorebuf[40] = 0;
+ vterr(conn, buf, "TODO: write(data len %d / '%s', score '%s')", len, data, scorebuf);
+}
+venti/write: vtwrite: neoventi: TODO: write(data len 5 / 'hello', score 'aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d')
+
+% echo -n hello | sha1sum
+aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d
+
+Yep!
+
+Now for the fun part: abstract the read logic out of vtread, and then call it from vtwrite, and check if the data already exists...
+
+actually, no, the logic is so simple that there's no need.
+
+ VtAddress addr;
+ int exists;
+ if(vtreadlookup(score, &addr))
+ // exists
+
+Then the handling for validating the data, and for actually writing.
+
+Okay! This is working just fine, but we need to actually make sure it doesn't fail to detect data which _does_ exist. Grab fossil/last?
+
+annnnd fossil/last is giving the wrong result. lovely.
+
+
+
+static void
+vtwrite(VtConn conn, char *buf, u16int len)
+{
+ u8int score[20];
+ VtAddress addr;
+ sha1((uchar*)buf+8, len, score, nil);
+ if(vtreadlookup(score, &addr)){
+ vterr(conn, buf, "TODO: handle vtwrite for existing data");
+ } else {
+ vterr(conn, buf, "TODO: insert data");
+ }
+}
+
+At least the code is stupidly simple!
+
+archive vac:dc294a9b60bc62b368036181784c7a42688c05c8
+
+Okay, let's use this one then.
+
+% venti/read -h tcp!127.1!14011 dc294a9b60bc62b368036181784c7a42688c05c8
+
+That should actually read the block, so
+
+% venti/read -h tcp!127.1!14011 dc294a9b60bc62b368036181784c7a42688c05c8 | venti/write -h tcp!127.1!14011
+
+should trigger the other path
+
+./7.neoventi: index is corrupt: invalid bucket magic: sect 839a5928, buck cbc5e0b7
+
+uhhhhh
+
+okay, not great. Is this what happens when venti modifies the index underneat neoventi? >_<
+
+same happens after relaunching neoventi. Trigger a venti/venti index flush?
+
+% hget http://127.1!13012/flushicache
+
+Of note is that the NVMe activity light is on maybe like 10% of the time during this process. This is just bad.
+
+Okay, but that was the issue :)
+
+Read length from packet: 26
+Read length from packet: 306
+Read length from packet: 306
+abandoning client: TODO: handle vtwrite for existing data
+
+
+but also venti/write is even worse than I thought, is it writing 306 byte chunks???
+
+ah wait! that was two requests lol
+
+...oh. no, no it wasn't, only the 26-byte one is from the read? eh, noty looking into this now.
+
+Anyways, confirmed that this is correct :) Commit and next step :D
+
--- a/server.c
+++ b/server.c
@@ -2,6 +2,7 @@
#include <libc.h>
#include <bio.h>
#include <thread.h>
+#include <libsec.h>
#include "neoventi.h"
static void vtsend(VtConn conn, char *buf, u16int size, u8int tag, int drop);
@@ -95,9 +96,14 @@
static void
vtwrite(VtConn conn, char *buf, u16int len)
{
- char *data;
- data = &buf[8];
- vterr(conn, buf, "TODO: write(data len %d)", len);
+ u8int score[20];
+ VtAddress addr;
+ sha1((uchar*)buf+8, len, score, nil);
+ if(vtreadlookup(score, &addr)){
+ vterr(conn, buf, "TODO: handle vtwrite for existing data");
+ } else {
+ vterr(conn, buf, "TODO: insert data");
+ }
}
static int