ref: 0d00f3f90ad95b4f03617ece28db893ef0b99204
dir: /cache.c/
#include <u.h>
#include <libc.h>
// This is designed to run with a maximum load factor < 1,
// and does not support resizing; there is NO EVICTION MECHANISM.
// Insertion when at 100% capacity is definitionally user error.
#pragma pack on
typedef struct {
Lock;
// arena-local block index | arena index << 16
u32int index[8];
u8int values[8*3];
} bucket_t;
#pragma pack off
static char *data;
static bucket_t *buckets, *bucketend;
static u32int *freelist;
static u32int freelen;
static Lock freelock;
enum {
BUCKETSIZE = 8,
CACHESIZE = 1ULL*1024*1024*1024,
// Each cache entry is an 8K buffer.
ENTRYSIZE = 0x2000,
// We target a load factor of 0.25, or a divisor of 4.
// This requires allocating 4x more buckets than strictly required, but the
// memory overhead is relatively low (a mere 3 bytes per kilobyte of cache),
// and this has both performance and design benefits. It makes linear probing
// more practical, removing the need to use a more complicated design in order
// to retain performance.
TARGETDIVISOR = 4,
// We use fibonacci hashing to select buckets, which requires that
// we have a power-of-two number of buckets.
// (Technically, we could have more, but they'd only be used during
// linear probing when the desired bucket was full, and that's silly.)
// (Also technically, we _could_ use division instead of a right-shift
// when selecting the portion of the fibonacci hash to use, so it's
// not strictly Fibonacci hashing that requires power-of-two bucket sizes
// so much as our high-performance implementation thereof).
REQUIREDENTRIES = CACHESIZE / ENTRYSIZE,
ENTRIES = REQUIREDENTRIES * TARGETDIVISOR,
BUCKETS = ENTRIES / BUCKETSIZE,
// CACHEBITS needs to be computed by hand because no logarithms in
// macros. 2^(CACHEBITS) = BUCKETS.
// BUCKETS >> CACHEBITS = 2.
CACHEBITS = 16,
// Fibonacci hashing yields a 64-bit number; shifting it right by this
// value will yield CACHEBITS significant bits.
CACHESHIFT = 64-CACHEBITS,
};
void
cacheinit(void)
{
usize displacement;
assert(sizeof(Lock) == 8);
assert(sizeof(bucket_t) == 64);
// CACHEBITS is computed manually; verify it.
assert(1 << CACHEBITS == BUCKETS || (BUCKETS == 1 && CACHEBITS == 1));
data = sbrk((usize)ENTRYSIZE * ENTRIES);
if(data == (void*)-1)
sysfatal("failed to allocate cache space of %llux", (usize)ENTRYSIZE*ENTRIES);
buckets = sbrk(64 + 64*BUCKETS);
freelist = sbrk(sizeof(u32int) * ENTRIES);
displacement = ((usize)buckets & 0b111111);
if(displacement != 0){
displacement = 64 - displacement;
buckets = (bucket_t*)((usize)buckets + displacement);
}
// alignment
assert(((usize)buckets&0b111111) == 0);
bucketend = buckets + BUCKETS;
freelen = ENTRIES / TARGETDIVISOR;
for(u32int i = 0; i < freelen; i += 1){
freelist[i] = i;
}
}
// For now: random eviction.
// Returns the buffer to use.
static u32int
evict(void)
{
int nbucket, i;
bucket_t *bucket;
u32int index;
while(1){
nbucket = rand() % BUCKETS;
bucket = &buckets[nbucket];
if(!canlock(bucket))
continue;
i = rand() % BUCKETSIZE;
if(bucket->index[i] != 0){
// Found an entry; evict it!
index = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
bucket->index[i] = 0;
unlock(bucket);
return index;
}
unlock(bucket);
}
}
// Grabs a free cache entry, evicting if necessary.
u32int
grabfree(void)
{
u32int result;
lock(&freelock);
if(freelen == 0){
result = evict();
} else {
freelen -= 1;
result = freelist[freelen];
}
unlock(&freelock);
return result;
}
static bucket_t*
getbucket(u32int key)
{
// Fibonacci hashing!
bucket_t *bucket = &buckets[key * 11400714819323198485LL >> CACHESHIFT];
bucket_t *last = &buckets[BUCKETS-1];
if(bucket > last){
fprint(2, "internal error: computed bucket hash is out of range\n");
abort();
}
return bucket;
}
static void
put(bucket_t *bucket, int i, u32int key, u32int cacheindex)
{
bucket->index[i] = key;
bucket->values[i*3] = cacheindex&0xFF;
bucket->values[i*3+1] = (cacheindex>>8)&0xFF;
bucket->values[i*3+2] = cacheindex>>16;
}
static void
get(bucket_t *bucket, int i, char **buf)
{
usize cacheindex = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
*buf = data + cacheindex * 8192;
}
static int
trybucket(bucket_t *bucket, int *i, u32int key)
{
for(*i = 0; *i < BUCKETSIZE; *i += 1){
if(bucket->index[*i] == key)
return 1;
if(bucket->index[*i] == 0)
return 0;
}
return -1;
}
void
cacheunlock(u16int arenaindex, u32int blockindex)
{
u32int key = (arenaindex << 16) | blockindex;
bucket_t *bucket = getbucket(key);
int index;
usize tried = 0;
while(trybucket(bucket, &index, key) != 1){
bucket++;
tried++;
if(tried == BUCKETS)
sysfatal("internal error: went to unlock entry, but it's not in any bucket!");
if(bucket == bucketend)
bucket = buckets;
}
unlock(bucket);
}
// Looks up a cache entry. If the entry is not present in cache,
// this grabs a free buffer, inserts it into cache, and yields
// its index so that the caller may fill it up.
// Returns whether the data was already in cache.
// ALWAYS returns the entry locked. The caller MUST unlock via
// cacheunlock when done with the buffer!
int
cachelookup(char **buf, u16int arenaindex, u32int blockindex)
{
u32int cacheindex;
int i;
u32int key = (arenaindex << 16) | blockindex;
bucket_t *bucket = getbucket(key);
usize tried = 0;
while(tried < BUCKETS){
lock(bucket);
switch(trybucket(bucket, &i, key)){
case -1:
// miss, and this bucket is full; linear probe.
unlock(bucket);
tried+=1;
bucket++;
if(bucket == bucketend)
bucket = buckets;
continue;
case 0:
// miss, but there's an empty slot in this bucket.
cacheindex = grabfree();
*buf = data+(usize)cacheindex*8192;
put(bucket, i, key, cacheindex);
return 0;
case 1:
// already exists
get(bucket, i, buf);
return 1;
}
}
fprint(2, "impossible\n");
// DO NOT PROCEED if the cache might be corrupt!
abort();
return 0;
}