shithub: neoventi

Download patch

ref: 337f74d8143862b34559cb82f459e12827f30ff9
parent: 45ee8c7f10b56c7164a28bb1dc1fcd10fa0695b0
author: Noam Preil <noam@pixelhero.dev>
date: Tue Sep 24 05:03:38 EDT 2024

cache: random eviction when full

--- a/cache.c
+++ b/cache.c
@@ -20,14 +20,37 @@
 static u32int freelen;
 static Lock freelock;
 
-#define BUCKET_SIZE 8
-// Hardcode 2GiB of cache for now
-#define CACHE_SIZE 2ULL*1024*1024*1024
-// 
-#define NENTRIES CACHE_SIZE/0x2000ULL
-// Triple needed size so max possible load factor is ⅓
-#define NBUCKETS (3ULL*NENTRIES/8)
-#define BITS 64-15
+enum {
+	BUCKETSIZE = 8,
+	CACHESIZE = 1ULL*1024*1024*1024,
+	// Each cache entry is an 8K buffer.
+	ENTRYSIZE = 0x2000,
+	// We target a load factor of 0.25, or a divisor of 4.
+	// This requires allocating 4x more buckets than strictly required, but the
+	// memory overhead is relatively low (a mere 3 bytes per kilobyte of cache),
+	// and this has both performance and design benefits. It makes linear probing
+	// more practical, removing the need to use a more complicated design in order
+	// to retain performance.
+	TARGETDIVISOR = 4,
+	// We use fibonacci hashing to select buckets, which requires that
+	// we have a power-of-two number of buckets.
+	// (Technically, we could have more, but they'd only be used during
+	// linear probing when the desired bucket was full, and that's silly.)
+	// (Also technically, we _could_ use division instead of a right-shift
+	// when selecting the portion of the fibonacci hash to use, so it's
+	// not strictly Fibonacci hashing that requires power-of-two bucket sizes
+	// so much as our high-performance implementation thereof).
+	REQUIREDENTRIES = CACHESIZE / ENTRYSIZE,
+	ENTRIES = REQUIREDENTRIES * TARGETDIVISOR,
+	BUCKETS = ENTRIES / BUCKETSIZE,
+	// CACHEBITS needs to be computed by hand because no logarithms in
+	// macros. 2^(CACHEBITS) = BUCKETS.
+	// BUCKETS >> CACHEBITS = 2.
+	CACHEBITS = 16,
+	// Fibonacci hashing yields a 64-bit number; shifting it right by this
+	// value will yield CACHEBITS significant bits.
+	CACHESHIFT = 64-CACHEBITS,
+};
 
 void
 cacheinit(void)
@@ -35,11 +58,13 @@
 	usize displacement;
 	assert(sizeof(Lock) == 8);
 	assert(sizeof(bucket_t) == 64);
-	data = sbrk((usize)0x2000 * NENTRIES);
+	// CACHEBITS is computed manually; verify it.
+	assert(1 << CACHEBITS == BUCKETS || (BUCKETS == 1 && CACHEBITS == 1));
+	data = sbrk((usize)ENTRYSIZE * ENTRIES);
 	if(data == (void*)-1)
-		sysfatal("failed to allocate cache space of %llux", 8192*NENTRIES);
-	buckets = sbrk(64 + 64*NBUCKETS);
-	freelist = sbrk(4 * NENTRIES);
+		sysfatal("failed to allocate cache space of %llux", (usize)ENTRYSIZE*ENTRIES);
+	buckets = sbrk(64 + 64*BUCKETS);
+	freelist = sbrk(sizeof(u32int) * ENTRIES);
 	displacement = ((usize)buckets & 0b111111);
 	if(displacement != 0){
 		displacement = 64 - displacement;
@@ -47,13 +72,38 @@
 	}
 	// alignment
 	assert(((usize)buckets&0b111111) == 0);
-	bucketend = buckets + NBUCKETS;
-	freelen = NENTRIES;
+	bucketend = buckets + BUCKETS;
+	freelen = ENTRIES / TARGETDIVISOR;
 	for(u32int i = 0; i < freelen; i += 1){
 		freelist[i] = i;
 	}
 }
 
+// For now: random eviction.
+// Returns the buffer to use.
+static u32int
+evict(void)
+{
+	int nbucket, i;
+	bucket_t *bucket;
+	u32int index;
+	while(1){
+		nbucket = rand() % BUCKETS;
+		bucket = &buckets[nbucket];
+		if(!canlock(bucket))
+			continue;
+		i = rand() % BUCKETSIZE;
+		if(bucket->index[i] != 0){
+			// Found an entry; evict it!
+			index = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
+			bucket->index[i] = 0;
+			unlock(bucket);
+			return index;
+		}
+		unlock(bucket);
+	}
+}
+
 // Grabs a free cache entry, evicting if necessary.
 u32int
 grabfree(void)
@@ -60,14 +110,12 @@
 {
 	u32int result;
 	lock(&freelock);
-	// Should be impossible to run out - we set up
-	// enough buffers for each cache entry to get
-	// one.
-	// Before grabbing a new one, we'll always evict
-	// an old one.
-	assert(freelen != 0);
-	freelen -= 1;
-	result = freelist[freelen];
+	if(freelen == 0){
+		result = evict();
+	} else {
+		freelen -= 1;
+		result = freelist[freelen];
+	}
 	unlock(&freelock);
 	return result;
 }
@@ -76,7 +124,13 @@
 getbucket(u32int key)
 {
 	// Fibonacci hashing!
-	return &buckets[key * 11400714819323198485LL >> BITS];
+	bucket_t *bucket = &buckets[key * 11400714819323198485LL >> CACHESHIFT];
+	bucket_t *last = &buckets[BUCKETS-1];
+	if(bucket > last){
+		fprint(2, "internal error: computed bucket hash is out of range\n");
+		abort();
+	}
+	return bucket;
 }
 
 static void
@@ -98,7 +152,7 @@
 static int
 trybucket(bucket_t *bucket, int *i, u32int key)
 {
-	for(*i = 0; *i < BUCKET_SIZE; *i += 1){
+	for(*i = 0; *i < BUCKETSIZE; *i += 1){
 		if(bucket->index[*i] == key)
 			return 1;
 		if(bucket->index[*i] == 0)
@@ -113,8 +167,15 @@
 	u32int key = (arenaindex << 16) | blockindex;
 	bucket_t *bucket = getbucket(key);
 	int index;
-	while(trybucket(bucket, &index, key) != 1)
+	usize tried = 0;
+	while(trybucket(bucket, &index, key) != 1){
 		bucket++;
+		tried++;
+		if(tried == BUCKETS)
+			sysfatal("internal error: went to unlock entry, but it's not in any bucket!");
+		if(bucket == bucketend)
+			bucket = buckets;
+	}
 	unlock(bucket);
 }
 
@@ -130,11 +191,11 @@
 	u32int key = (arenaindex << 16) | blockindex;
 	bucket_t *bucket = getbucket(key);
 	usize tried = 0;
-	while(tried < NBUCKETS){
+	while(tried < BUCKETS){
 		lock(bucket);
 		switch(trybucket(bucket, &i, key)){
 		case -1:
-			// miss. linear probe.
+			// miss, and this bucket is full; linear probe.
 			unlock(bucket);
 			tried+=1;
 			bucket++;
@@ -142,7 +203,7 @@
 				bucket = buckets;
 			continue;
 		case 0:
-			// found an empty slot
+			// miss, but there's an empty slot in this bucket.
 			cacheindex = grabfree();
 			*buf = data+(usize)cacheindex*8192;
 			put(bucket, i, key, cacheindex);