ref: 45d94ff4f17612bbe5fd67288fafaeb1e88e7259
parent: b008c03c4d842750bb6635096cfcd7cea8355e79
author: Noam Preil <noam@pixelhero.dev>
date: Sat Mar 23 09:59:24 EDT 2024
cleanup
--- a/cache.c
+++ b/cache.c
@@ -17,10 +17,13 @@
static u32int freelen;
static Lock freelock;
-#define NENTRIES 8
-#define ENTRIES 0x40000
-// Oversizes, but ensures power-of-two for fibonacci hashing
-#define BUCKETS (ENTRIES/8)
+#define BUCKET_SIZE 8
+// Hardcode 2GiB of cache for now
+#define CACHE_SIZE 2ULL*1024*1024*1024
+//
+#define NENTRIES CACHE_SIZE/0x2000ULL
+// Triple needed size so max possible load factor is ⅓
+#define NBUCKETS (3ULL*NENTRIES/8)
#define BITS 64-15
void
@@ -29,11 +32,11 @@
usize displacement;
assert(sizeof(Lock) == 8);
assert(sizeof(bucket_t) == 64);
- data = sbrk((usize)0x2000 * ENTRIES);
+ data = sbrk((usize)0x2000 * NENTRIES);
if(data == (void*)-1)
- sysfatal("failed to allocate cache space of %lux", 8192*ENTRIES/2);
- buckets = sbrk(64 + 64*BUCKETS);
- freelist = sbrk(4 * ENTRIES);
+ sysfatal("failed to allocate cache space of %lux", 8192*NENTRIES);
+ buckets = sbrk(64 + 64*NBUCKETS);
+ freelist = sbrk(4 * NENTRIES);
displacement = ((usize)buckets & 0b111111);
if(displacement != 0){
displacement = 64 - displacement;
@@ -41,8 +44,8 @@
}
// alignment
assert(((usize)buckets&0b111111) == 0);
- bucketend = buckets + BUCKETS;
- freelen = ENTRIES;
+ bucketend = buckets + NBUCKETS;
+ freelen = NENTRIES;
for(u32int i = 0; i < freelen; i += 1){
freelist[i] = i;
}
@@ -54,13 +57,12 @@
{
u32int result;
lock(&freelock);
- // For now, block until something is free.
- // This is terrible. FIXME
- while(freelen == 0){
- unlock(&freelock);
- sleep(50);
- lock(&freelock);
- }
+ // Should be impossible to run out - we set up
+ // enough buffers for each cache entry to get
+ // one.
+ // Before grabbing a new one, we'll always evict
+ // an old one.
+ assert(freelen != 0);
freelen -= 1;
result = freelist[freelen];
unlock(&freelock);
@@ -93,7 +95,7 @@
static int
trybucket(bucket_t *bucket, int *i, u32int key)
{
- for(*i = 0; *i < NENTRIES; *i += 1){
+ for(*i = 0; *i < BUCKET_SIZE; *i += 1){
if(bucket->index[*i] == key)
return 1;
if(bucket->index[*i] == 0)
@@ -114,7 +116,7 @@
u32int key = (arenaindex << 16) | blockindex;
bucket_t *bucket = getbucket(key);
usize tried = 0;
- while(tried < BUCKETS){
+ while(tried < NBUCKETS){
switch(trybucket(bucket, &i, key)){
case -1:
// miss. linear probe.
--- a/notebook
+++ b/notebook
@@ -1082,4 +1082,39 @@
...max index into data should be 0x40000 * 0x2000 is 2GiB. Are array indices of type int? If so, it might be wrapping >_>
-Ahhhhh, sbrk is failing to allocate 2GiB?
\ No newline at end of file
+Ahhhhh, sbrk is failing to allocate 2GiB?
+
+Right. the _sbrk_ parameter was an int >_> (usize)0x2000 and now it works.
+
+Also, with the cache fixed so that linear probing is less bad:
+
+90.0 187.229 1368400 read
+ 4.7 9.844 684185 vtreadlookup
+ 1.8 3.760 664759 unwhack
+ 1.6 3.338 743123 vtreadarenablock
+ 0.3 0.717 1 listen
+ 0.3 0.673 1416 loadarena
+ 0.2 0.377 766800 memcpy
+ 0.1 0.280 684185 aindexfromaddr
+ 0.1 0.266 1427308 cachelookup
+ 0.1 0.148 684185 bucketlookup
+ 0.1 0.143 684185 vtread
+ 0.1 0.141 684187 vtrecv
+ 0.1 0.133 1572426 memcmp
+ 0.1 0.127 684185 vtreadarena
+
+Next up is to reduce the number of read()s. Can do this by reading multiple packets per call (read into a 64K buffer, track its length, grab packets from it, rehydrate as needed). Might be able to reduce that 187s to closer to 5s. There were more than 20 runs in that profile, so the result would likely be a total runtime of ~1s.
+
+Nooope. reading from the TCP file will only give us a packet at a time.
+
+This will at best reduce total time by ~45%, because it combines the 2-byte read with the read of the rest of the packet. Still, not bad.
+
+Okay, lol. From 23.68s cold and 4.45s warm to 20.69s cold and 4.35s warm. Time to grab another profile :/
+
+...the profile is treating time read is _blocked_ as time spent reading. Oi. Going to run neoventi and then run this as quickly as possible: mk vac; i=();while(! ~ $#i 50){time walk /n/vac > /dev/null; i=($i ())} ; slay vacfs | rc
+
+Should take around 4-5 minutes, and the profile might be a tad more accurate
+
+92.7 1415.727 11089337 read
+
+We're blocked on the client :P Most of the runtime is waiting for vacfs to get back to us :D
\ No newline at end of file