ref: 4b60d3dd32efd00a1d07cb08662926ba9836719f
dir: /cache.c/
#include <u.h> #include <libc.h> // This is designed to run with a maximum load factor < 1, // and does not support resizing; there is NO EVICTION MECHANISM. // Insertion when at 100% capacity is definitionally user error. #pragma pack on typedef struct { Lock; // arena-local block index | arena index << 16 u32int index[8]; u8int values[8*3]; } bucket_t; #pragma pack off static char *data; static bucket_t *buckets, *bucketend; static long nextbuf = 0; static int full = 0; enum { BUCKETSIZE = 8, CACHESIZE = 1ULL*1024*1024*1024, // Each cache entry is an 8K buffer. ENTRYSIZE = 0x2000, // We target a load factor of 0.25, or a divisor of 4. // This requires allocating 4x more buckets than strictly required, but the // memory overhead is relatively low (a mere 3 bytes per kilobyte of cache), // and this has both performance and design benefits. It makes linear probing // more practical, removing the need to use a more complicated design in order // to retain performance. TARGETDIVISOR = 4, // We use fibonacci hashing to select buckets, which requires that // we have a power-of-two number of buckets. // (Technically, we could have more, but they'd only be used during // linear probing when the desired bucket was full, and that's silly.) // (Also technically, we _could_ use division instead of a right-shift // when selecting the portion of the fibonacci hash to use, so it's // not strictly Fibonacci hashing that requires power-of-two bucket sizes // so much as our high-performance implementation thereof). REQENTRIES = CACHESIZE / ENTRYSIZE, ENTRIES = REQENTRIES * TARGETDIVISOR, BUCKETS = ENTRIES / BUCKETSIZE, // CACHEBITS needs to be computed by hand because no logarithms in // macros. 2^(CACHEBITS) = BUCKETS. // BUCKETS >> CACHEBITS = 2. CACHEBITS = 16, // Fibonacci hashing yields a 64-bit number; shifting it right by this // value will yield CACHEBITS significant bits. CACHESHIFT = 64-CACHEBITS, }; void cacheinit(void) { usize displacement; assert(sizeof(Lock) == 8); assert(sizeof(bucket_t) == 64); // CACHEBITS is computed manually; verify it. assert(1 << CACHEBITS == BUCKETS || (BUCKETS == 1 && CACHEBITS == 1)); data = sbrk((usize)ENTRYSIZE * ENTRIES); if(data == (void*)-1) sysfatal("failed to allocate cache space of %llux", (usize)ENTRYSIZE*ENTRIES); buckets = sbrk(64 + 64*BUCKETS); displacement = ((usize)buckets & 0b111111); if(displacement != 0){ displacement = 64 - displacement; buckets = (bucket_t*)((usize)buckets + displacement); } // alignment assert(((usize)buckets&0b111111) == 0); bucketend = buckets + BUCKETS; } // For now: random eviction. // Returns the buffer to use. static u32int evict(void) { int nbucket, i; bucket_t *bucket; u32int index; while(1){ nbucket = rand() % BUCKETS; bucket = &buckets[nbucket]; if(!canlock(bucket)) continue; i = rand() % BUCKETSIZE; if(bucket->index[i] != 0){ // Found an entry; evict it! index = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16); bucket->index[i] = 0; unlock(bucket); return index; } unlock(bucket); } } // Grabs a free cache entry, evicting if necessary. u32int grabfree(void) { u32int result; if(full) return evict(); result = ainc(&nextbuf); if(result >= REQENTRIES){ full = 1; return evict(); } return result; } static bucket_t* getbucket(u32int key) { // Fibonacci hashing! bucket_t *bucket = &buckets[key * 11400714819323198485LL >> CACHESHIFT]; bucket_t *last = &buckets[BUCKETS-1]; if(bucket > last){ fprint(2, "internal error: computed bucket hash is out of range\n"); abort(); } return bucket; } static void put(bucket_t *bucket, int i, u32int key, u32int cacheindex) { bucket->index[i] = key; bucket->values[i*3] = cacheindex&0xFF; bucket->values[i*3+1] = (cacheindex>>8)&0xFF; bucket->values[i*3+2] = cacheindex>>16; } static void get(bucket_t *bucket, int i, char **buf) { usize cacheindex = bucket->values[i*3] | (bucket->values[i*3+1]<<8) | (bucket->values[i*3+2]<<16); *buf = data + cacheindex * 8192; } static int trybucket(bucket_t *bucket, int *i, u32int key) { for(*i = 0; *i < BUCKETSIZE; *i += 1){ if(bucket->index[*i] == key) return 1; if(bucket->index[*i] == 0) return 0; } return -1; } void cacheunlock(u16int arenaindex, u32int blockindex) { u32int key = (arenaindex << 16) | blockindex; bucket_t *bucket = getbucket(key); int index; usize tried = 0; while(trybucket(bucket, &index, key) != 1){ bucket++; tried++; if(tried == BUCKETS) sysfatal("internal error: went to unlock entry, but it's not in any bucket!"); if(bucket == bucketend) bucket = buckets; } unlock(bucket); } // Looks up a cache entry. If the entry is not present in cache, // this grabs a free buffer, inserts it into cache, and yields // its index so that the caller may fill it up. // Returns whether the data was already in cache. // ALWAYS returns the entry locked. The caller MUST unlock via // cacheunlock when done with the buffer! int cachelookup(char **buf, u16int arenaindex, u32int blockindex) { u32int cacheindex; int i; u32int key = (arenaindex << 16) | blockindex; bucket_t *bucket = getbucket(key); usize tried = 0; while(tried < BUCKETS){ lock(bucket); switch(trybucket(bucket, &i, key)){ case -1: // miss, and this bucket is full; linear probe. unlock(bucket); tried+=1; bucket++; if(bucket == bucketend) bucket = buckets; continue; case 0: // miss, but there's an empty slot in this bucket. cacheindex = grabfree(); *buf = data+(usize)cacheindex*8192; put(bucket, i, key, cacheindex); return 0; case 1: // already exists get(bucket, i, buf); return 1; } } fprint(2, "impossible\n"); // DO NOT PROCEED if the cache might be corrupt! abort(); return 0; }