shithub: purgatorio

ref: c116550e6a41572796e4db65e4f6acbcb3d9d6f8
dir: /appl/lib/bloomfilter.b/

View raw version
implement Bloomfilter;

include "sys.m";
	sys: Sys;
include "sets.m";
	sets: Sets;
	Set: import sets;
include "keyring.m";
	keyring: Keyring;
include "bloomfilter.m";

init()
{
	sys = load Sys Sys->PATH;
	sets = load Sets Sets->PATH;
	sets->init();
	keyring = load Keyring Keyring->PATH;
}

Bits: adt {
	d: array of byte;
	n: int;			# bits used
	get:	fn(bits: self ref Bits, n: int): int;
};

filter(d: array of byte, logm, k: int): Set
{
	if(logm < 3 || logm > 30)
		raise "invalid bloom filter size";
	nb := 1 << logm;
	f := array[nb / 8 + 1] of {* => byte 0};	# one extra zero to make sure set's not inverted.
	bits := hashbits(d, logm * k);
	while(k--){
		v := bits.get(logm);
		f[v >> 3] |= byte 1 << (v & 7);
	}
	return sets->bytes2set(f);
}

hashbits(data: array of byte, n: int): ref Bits
{
	d := array[((n + 7) / 8)] of byte;
	digest := array[Keyring->SHA1dlen] of byte;
	state := keyring->sha1(data, len data, nil, nil);
	extra := array[2] of byte;
	e := 0;
	for(i := 0; i < len d; i += Keyring->SHA1dlen){
		extra[0] = byte e;
		extra[1] = byte (e>>8);
		e++;
		state = keyring->sha1(extra, len extra, digest, state);
		if(i + Keyring->SHA1dlen > len d)
			digest = digest[0:len d - i];
		d[i:] = digest;
	}
	return ref Bits(d, 0);
}

# XXX could be more efficient.
Bits.get(bits: self ref Bits, n: int): int
{
	d := bits.d;
	v := 0;
	nb := bits.n;
	for(i := 0; i < n; i++){
		j := nb + i;
		if(int d[j >> 3] & (1 << (j & 7)))
			v |= (1 << i);
	}
	bits.n += n;
	return v;
}