shithub: neoventi

ref: 4b60d3dd32efd00a1d07cb08662926ba9836719f
dir: /cache.c/

View raw version
#include <u.h>
#include <libc.h>

// This is designed to run with a maximum load factor < 1,
// and does not support resizing; there is NO EVICTION MECHANISM.
// Insertion when at 100% capacity is definitionally user error.

#pragma pack on
typedef struct {
	Lock;
	// arena-local block index | arena index << 16
	u32int index[8];
	u8int values[8*3];
} bucket_t;
#pragma pack off

static char *data;
static bucket_t *buckets, *bucketend;
static long nextbuf = 0;
static int full = 0;

enum {
	BUCKETSIZE = 8,
	CACHESIZE = 1ULL*1024*1024*1024,
	// Each cache entry is an 8K buffer.
	ENTRYSIZE = 0x2000,
	// We target a load factor of 0.25, or a divisor of 4.
	// This requires allocating 4x more buckets than strictly required, but the
	// memory overhead is relatively low (a mere 3 bytes per kilobyte of cache),
	// and this has both performance and design benefits. It makes linear probing
	// more practical, removing the need to use a more complicated design in order
	// to retain performance.
	TARGETDIVISOR = 4,
	// We use fibonacci hashing to select buckets, which requires that
	// we have a power-of-two number of buckets.
	// (Technically, we could have more, but they'd only be used during
	// linear probing when the desired bucket was full, and that's silly.)
	// (Also technically, we _could_ use division instead of a right-shift
	// when selecting the portion of the fibonacci hash to use, so it's
	// not strictly Fibonacci hashing that requires power-of-two bucket sizes
	// so much as our high-performance implementation thereof).
	REQENTRIES = CACHESIZE / ENTRYSIZE,
	ENTRIES = REQENTRIES * TARGETDIVISOR,
	BUCKETS = ENTRIES / BUCKETSIZE,
	// CACHEBITS needs to be computed by hand because no logarithms in
	// macros. 2^(CACHEBITS) = BUCKETS.
	// BUCKETS >> CACHEBITS = 2.
	CACHEBITS = 16,
	// Fibonacci hashing yields a 64-bit number; shifting it right by this
	// value will yield CACHEBITS significant bits.
	CACHESHIFT = 64-CACHEBITS,
};

void
cacheinit(void)
{
	usize displacement;
	assert(sizeof(Lock) == 8);
	assert(sizeof(bucket_t) == 64);
	// CACHEBITS is computed manually; verify it.
	assert(1 << CACHEBITS == BUCKETS || (BUCKETS == 1 && CACHEBITS == 1));
	data = sbrk((usize)ENTRYSIZE * ENTRIES);
	if(data == (void*)-1)
		sysfatal("failed to allocate cache space of %llux", (usize)ENTRYSIZE*ENTRIES);
	buckets = sbrk(64 + 64*BUCKETS);
	displacement = ((usize)buckets & 0b111111);
	if(displacement != 0){
		displacement = 64 - displacement;
		buckets = (bucket_t*)((usize)buckets + displacement);
	}
	// alignment
	assert(((usize)buckets&0b111111) == 0);
	bucketend = buckets + BUCKETS;
}

// For now: random eviction.
// Returns the buffer to use.
static u32int
evict(void)
{
	int nbucket, i;
	bucket_t *bucket;
	u32int index;
	while(1){
		nbucket = rand() % BUCKETS;
		bucket = &buckets[nbucket];
		if(!canlock(bucket))
			continue;
		i = rand() % BUCKETSIZE;
		if(bucket->index[i] != 0){
			// Found an entry; evict it!
			index = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
			bucket->index[i] = 0;
			unlock(bucket);
			return index;
		}
		unlock(bucket);
	}
}

// Grabs a free cache entry, evicting if necessary.
u32int
grabfree(void)
{
	u32int result;
	if(full)
		return evict();
	result = ainc(&nextbuf);
	if(result >= REQENTRIES){
		full = 1;
		return evict();
	}
	return result;
}

static bucket_t*
getbucket(u32int key)
{
	// Fibonacci hashing!
	bucket_t *bucket = &buckets[key * 11400714819323198485LL >> CACHESHIFT];
	bucket_t *last = &buckets[BUCKETS-1];
	if(bucket > last){
		fprint(2, "internal error: computed bucket hash is out of range\n");
		abort();
	}
	return bucket;
}

static void
put(bucket_t *bucket, int i, u32int key, u32int cacheindex)
{
	bucket->index[i] = key;
	bucket->values[i*3] = cacheindex&0xFF;
	bucket->values[i*3+1] = (cacheindex>>8)&0xFF;
	bucket->values[i*3+2] = cacheindex>>16;
}

static void
get(bucket_t *bucket, int i, char **buf)
{
	usize cacheindex = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16);
	*buf = data + cacheindex * 8192;
}

static int
trybucket(bucket_t *bucket, int *i, u32int key)
{
	for(*i = 0; *i < BUCKETSIZE; *i += 1){
		if(bucket->index[*i] == key)
			return 1;
		if(bucket->index[*i] == 0)
			return 0;
	}
	return -1;
}

void
cacheunlock(u16int arenaindex, u32int blockindex)
{
	u32int key = (arenaindex << 16) | blockindex;
	bucket_t *bucket = getbucket(key);
	int index;
	usize tried = 0;
	while(trybucket(bucket, &index, key) != 1){
		bucket++;
		tried++;
		if(tried == BUCKETS)
			sysfatal("internal error: went to unlock entry, but it's not in any bucket!");
		if(bucket == bucketend)
			bucket = buckets;
	}
	unlock(bucket);
}

// Looks up a cache entry. If the entry is not present in cache,
// this grabs a free buffer, inserts it into cache, and yields
// its index so that the caller may fill it up.
// Returns whether the data was already in cache.
// ALWAYS returns the entry locked. The caller MUST unlock via
// cacheunlock when done with the buffer!
int
cachelookup(char **buf, u16int arenaindex, u32int blockindex)
{
	u32int cacheindex;
	int i;
	u32int key = (arenaindex << 16) | blockindex;
	bucket_t *bucket = getbucket(key);
	usize tried = 0;
	while(tried < BUCKETS){
		lock(bucket);
		switch(trybucket(bucket, &i, key)){
		case -1:
			// miss, and this bucket is full; linear probe.
			unlock(bucket);
			tried+=1;
			bucket++;
			if(bucket == bucketend)
				bucket = buckets;
			continue;
		case 0:
			// miss, but there's an empty slot in this bucket.
			cacheindex = grabfree();
			*buf = data+(usize)cacheindex*8192;
			put(bucket, i, key, cacheindex);
			return 0;
		case 1:
			// already exists
			get(bucket, i, buf);
			return 1;
		}
	}
	fprint(2, "impossible\n");
	// DO NOT PROCEED if the cache might be corrupt!
	abort();
	return 0;
}