shithub: neoventi

Download patch

ref: 45d94ff4f17612bbe5fd67288fafaeb1e88e7259
parent: b008c03c4d842750bb6635096cfcd7cea8355e79
author: Noam Preil <noam@pixelhero.dev>
date: Sat Mar 23 09:59:24 EDT 2024

cleanup

--- a/cache.c
+++ b/cache.c
@@ -17,10 +17,13 @@
 static u32int freelen;
 static Lock freelock;
 
-#define NENTRIES 8
-#define ENTRIES 0x40000
-// Oversizes, but ensures power-of-two for fibonacci hashing
-#define BUCKETS (ENTRIES/8)
+#define BUCKET_SIZE 8
+// Hardcode 2GiB of cache for now
+#define CACHE_SIZE 2ULL*1024*1024*1024
+// 
+#define NENTRIES CACHE_SIZE/0x2000ULL
+// Triple needed size so max possible load factor is ⅓
+#define NBUCKETS (3ULL*NENTRIES/8)
 #define BITS 64-15
 
 void
@@ -29,11 +32,11 @@
 	usize displacement;
 	assert(sizeof(Lock) == 8);
 	assert(sizeof(bucket_t) == 64);
-	data = sbrk((usize)0x2000 * ENTRIES);
+	data = sbrk((usize)0x2000 * NENTRIES);
 	if(data == (void*)-1)
-		sysfatal("failed to allocate cache space of %lux", 8192*ENTRIES/2);
-	buckets = sbrk(64 + 64*BUCKETS);
-	freelist = sbrk(4 * ENTRIES);
+		sysfatal("failed to allocate cache space of %lux", 8192*NENTRIES);
+	buckets = sbrk(64 + 64*NBUCKETS);
+	freelist = sbrk(4 * NENTRIES);
 	displacement = ((usize)buckets & 0b111111);
 	if(displacement != 0){
 		displacement = 64 - displacement;
@@ -41,8 +44,8 @@
 	}
 	// alignment
 	assert(((usize)buckets&0b111111) == 0);
-	bucketend = buckets + BUCKETS;
-	freelen = ENTRIES;
+	bucketend = buckets + NBUCKETS;
+	freelen = NENTRIES;
 	for(u32int i = 0; i < freelen; i += 1){
 		freelist[i] = i;
 	}
@@ -54,13 +57,12 @@
 {
 	u32int result;
 	lock(&freelock);
-	// For now, block until something is free.
-	// This is terrible. FIXME
-	while(freelen == 0){
-		unlock(&freelock);
-		sleep(50);
-		lock(&freelock);
-	}
+	// Should be impossible to run out - we set up
+	// enough buffers for each cache entry to get
+	// one.
+	// Before grabbing a new one, we'll always evict
+	// an old one.
+	assert(freelen != 0);
 	freelen -= 1;
 	result = freelist[freelen];
 	unlock(&freelock);
@@ -93,7 +95,7 @@
 static int
 trybucket(bucket_t *bucket, int *i, u32int key)
 {
-	for(*i = 0; *i < NENTRIES; *i += 1){
+	for(*i = 0; *i < BUCKET_SIZE; *i += 1){
 		if(bucket->index[*i] == key)
 			return 1;
 		if(bucket->index[*i] == 0)
@@ -114,7 +116,7 @@
 	u32int key = (arenaindex << 16) | blockindex;
 	bucket_t *bucket = getbucket(key);
 	usize tried = 0;
-	while(tried < BUCKETS){
+	while(tried < NBUCKETS){
 		switch(trybucket(bucket, &i, key)){
 		case -1:
 			// miss. linear probe.
--- a/notebook
+++ b/notebook
@@ -1082,4 +1082,39 @@
 
 ...max index into data should be 0x40000 * 0x2000 is 2GiB. Are array indices of type int? If so, it might be wrapping >_>
 
-Ahhhhh, sbrk is failing to allocate 2GiB?
\ No newline at end of file
+Ahhhhh, sbrk is failing to allocate 2GiB?
+
+Right. the _sbrk_ parameter was an int >_> (usize)0x2000 and now it works.
+
+Also, with the cache fixed so that linear probing is less bad:
+
+90.0  187.229  1368400	read
+ 4.7    9.844   684185	vtreadlookup
+ 1.8    3.760   664759	unwhack
+ 1.6    3.338   743123	vtreadarenablock
+ 0.3    0.717        1	listen
+ 0.3    0.673     1416	loadarena
+ 0.2    0.377   766800	memcpy
+ 0.1    0.280   684185	aindexfromaddr
+ 0.1    0.266  1427308	cachelookup
+ 0.1    0.148   684185	bucketlookup
+ 0.1    0.143   684185	vtread
+ 0.1    0.141   684187	vtrecv
+ 0.1    0.133  1572426	memcmp
+ 0.1    0.127   684185	vtreadarena
+
+Next up is to reduce the number of read()s. Can do this by reading multiple packets per call (read into a 64K buffer, track its length, grab packets from it, rehydrate as needed). Might be able to reduce that 187s to closer to 5s. There were more than 20 runs in that profile, so the result would likely be a total runtime of ~1s.
+
+Nooope. reading from the TCP file will only give us a packet at a time.
+
+This will at best reduce total time by ~45%, because it combines the 2-byte read with the read of the rest of the packet. Still, not bad.
+
+Okay, lol. From 23.68s cold and 4.45s warm to 20.69s cold and 4.35s warm. Time to grab another profile :/
+
+...the profile is treating time read is _blocked_ as time spent reading. Oi. Going to run neoventi and then run this as quickly as possible: mk vac; i=();while(! ~ $#i 50){time walk /n/vac > /dev/null; i=($i ())} ; slay vacfs | rc
+
+Should take around 4-5 minutes, and the profile might be a tad more accurate
+
+92.7 1415.727 11089337	read
+
+We're blocked on the client :P Most of the runtime is waiting for vacfs to get back to us :D
\ No newline at end of file