ref: 337f74d8143862b34559cb82f459e12827f30ff9
parent: 45ee8c7f10b56c7164a28bb1dc1fcd10fa0695b0
author: Noam Preil <noam@pixelhero.dev>
date: Tue Sep 24 05:03:38 EDT 2024
cache: random eviction when full
--- a/cache.c
+++ b/cache.c
@@ -20,14 +20,37 @@
static u32int freelen;
static Lock freelock;
-#define BUCKET_SIZE 8
-// Hardcode 2GiB of cache for now
-#define CACHE_SIZE 2ULL*1024*1024*1024
-//
-#define NENTRIES CACHE_SIZE/0x2000ULL
-// Triple needed size so max possible load factor is ⅓
-#define NBUCKETS (3ULL*NENTRIES/8)
-#define BITS 64-15
+enum {
+ BUCKETSIZE = 8,
+ CACHESIZE = 1ULL*1024*1024*1024,
+ // Each cache entry is an 8K buffer.
+ ENTRYSIZE = 0x2000,
+ // We target a load factor of 0.25, or a divisor of 4.
+ // This requires allocating 4x more buckets than strictly required, but the
+ // memory overhead is relatively low (a mere 3 bytes per kilobyte of cache),
+ // and this has both performance and design benefits. It makes linear probing
+ // more practical, removing the need to use a more complicated design in order
+ // to retain performance.
+ TARGETDIVISOR = 4,
+ // We use fibonacci hashing to select buckets, which requires that
+ // we have a power-of-two number of buckets.
+ // (Technically, we could have more, but they'd only be used during
+ // linear probing when the desired bucket was full, and that's silly.)
+ // (Also technically, we _could_ use division instead of a right-shift
+ // when selecting the portion of the fibonacci hash to use, so it's
+ // not strictly Fibonacci hashing that requires power-of-two bucket sizes
+ // so much as our high-performance implementation thereof).
+ REQUIREDENTRIES = CACHESIZE / ENTRYSIZE,
+ ENTRIES = REQUIREDENTRIES * TARGETDIVISOR,
+ BUCKETS = ENTRIES / BUCKETSIZE,
+ // CACHEBITS needs to be computed by hand because no logarithms in
+ // macros. 2^(CACHEBITS) = BUCKETS.
+ // BUCKETS >> CACHEBITS = 2.
+ CACHEBITS = 16,
+ // Fibonacci hashing yields a 64-bit number; shifting it right by this
+ // value will yield CACHEBITS significant bits.
+ CACHESHIFT = 64-CACHEBITS,
+};
void
cacheinit(void)
@@ -35,11 +58,13 @@
usize displacement;
assert(sizeof(Lock) == 8);
assert(sizeof(bucket_t) == 64);
- data = sbrk((usize)0x2000 * NENTRIES);
+ // CACHEBITS is computed manually; verify it.
+ assert(1 << CACHEBITS == BUCKETS || (BUCKETS == 1 && CACHEBITS == 1));
+ data = sbrk((usize)ENTRYSIZE * ENTRIES);
if(data == (void*)-1)
- sysfatal("failed to allocate cache space of %llux", 8192*NENTRIES);
- buckets = sbrk(64 + 64*NBUCKETS);
- freelist = sbrk(4 * NENTRIES);
+ sysfatal("failed to allocate cache space of %llux", (usize)ENTRYSIZE*ENTRIES);
+ buckets = sbrk(64 + 64*BUCKETS);
+ freelist = sbrk(sizeof(u32int) * ENTRIES);
displacement = ((usize)buckets & 0b111111);
if(displacement != 0){
displacement = 64 - displacement;
@@ -47,13 +72,38 @@
}
// alignment
assert(((usize)buckets&0b111111) == 0);
- bucketend = buckets + NBUCKETS;
- freelen = NENTRIES;
+ bucketend = buckets + BUCKETS;
+ freelen = ENTRIES / TARGETDIVISOR;
for(u32int i = 0; i < freelen; i += 1){
freelist[i] = i;
}
}
+// For now: random eviction.
+// Returns the buffer to use.
+static u32int
+evict(void)
+{
+ int nbucket, i;
+ bucket_t *bucket;
+ u32int index;
+ while(1){
+ nbucket = rand() % BUCKETS;
+ bucket = &buckets[nbucket];
+ if(!canlock(bucket))
+ continue;
+ i = rand() % BUCKETSIZE;
+ if(bucket->index[i] != 0){
+ // Found an entry; evict it!
+ index = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
+ bucket->index[i] = 0;
+ unlock(bucket);
+ return index;
+ }
+ unlock(bucket);
+ }
+}
+
// Grabs a free cache entry, evicting if necessary.
u32int
grabfree(void)
@@ -60,14 +110,12 @@
{
u32int result;
lock(&freelock);
- // Should be impossible to run out - we set up
- // enough buffers for each cache entry to get
- // one.
- // Before grabbing a new one, we'll always evict
- // an old one.
- assert(freelen != 0);
- freelen -= 1;
- result = freelist[freelen];
+ if(freelen == 0){
+ result = evict();
+ } else {
+ freelen -= 1;
+ result = freelist[freelen];
+ }
unlock(&freelock);
return result;
}
@@ -76,7 +124,13 @@
getbucket(u32int key)
{
// Fibonacci hashing!
- return &buckets[key * 11400714819323198485LL >> BITS];
+ bucket_t *bucket = &buckets[key * 11400714819323198485LL >> CACHESHIFT];
+ bucket_t *last = &buckets[BUCKETS-1];
+ if(bucket > last){
+ fprint(2, "internal error: computed bucket hash is out of range\n");
+ abort();
+ }
+ return bucket;
}
static void
@@ -98,7 +152,7 @@
static int
trybucket(bucket_t *bucket, int *i, u32int key)
{
- for(*i = 0; *i < BUCKET_SIZE; *i += 1){
+ for(*i = 0; *i < BUCKETSIZE; *i += 1){
if(bucket->index[*i] == key)
return 1;
if(bucket->index[*i] == 0)
@@ -113,8 +167,15 @@
u32int key = (arenaindex << 16) | blockindex;
bucket_t *bucket = getbucket(key);
int index;
- while(trybucket(bucket, &index, key) != 1)
+ usize tried = 0;
+ while(trybucket(bucket, &index, key) != 1){
bucket++;
+ tried++;
+ if(tried == BUCKETS)
+ sysfatal("internal error: went to unlock entry, but it's not in any bucket!");
+ if(bucket == bucketend)
+ bucket = buckets;
+ }
unlock(bucket);
}
@@ -130,11 +191,11 @@
u32int key = (arenaindex << 16) | blockindex;
bucket_t *bucket = getbucket(key);
usize tried = 0;
- while(tried < NBUCKETS){
+ while(tried < BUCKETS){
lock(bucket);
switch(trybucket(bucket, &i, key)){
case -1:
- // miss. linear probe.
+ // miss, and this bucket is full; linear probe.
unlock(bucket);
tried+=1;
bucket++;
@@ -142,7 +203,7 @@
bucket = buckets;
continue;
case 0:
- // found an empty slot
+ // miss, but there's an empty slot in this bucket.
cacheindex = grabfree();
*buf = data+(usize)cacheindex*8192;
put(bucket, i, key, cacheindex);