ref: 12c9d2fc728b51aa1eb9a70d0d331eb9464912d9
dir: /src/equal.c/
#include "sl.h"
#include "operators.h"
#include "cvalues.h"
#include "equal.h"
#include "hashing.h"
#define BOUNDED_COMPARE_BOUND 128
#define BOUNDED_HASH_BOUND 16384
#if defined(BITS64)
#define MIX(a, b) inthash((sl_v)(a) ^ (sl_v)(b));
#define doublehash(a) inthash(a)
#else
#define MIX(a, b) int64to32hash((u64int)(a)<<32 | (u64int)(b));
#define doublehash(a) int64to32hash(a)
#endif
// comparable tag
#define cmptag(v) (isfixnum(v) ? TAG_NUM : tag(v))
static sl_v
eq_class(sl_htable *table, sl_v key)
{
sl_v c = (sl_v)ptrhash_get(table, (void*)key);
if(c == (sl_v)HT_NOTFOUND)
return sl_nil;
if(c == key)
return c;
return eq_class(table, c);
}
static void
eq_union(sl_htable *table, sl_v a, sl_v b, sl_v c, sl_v cb)
{
sl_v ca = c == sl_nil ? a : c;
if(cb != sl_nil)
ptrhash_put(table, (void*)cb, (void*)ca);
ptrhash_put(table, (void*)a, (void*)ca);
ptrhash_put(table, (void*)b, (void*)ca);
}
static sl_v bounded_compare(sl_v a, sl_v b, int bound, bool eq);
static sl_v cyc_compare(sl_v a, sl_v b, sl_htable *table, bool eq);
static sl_v
bounded_vector_compare(sl_v a, sl_v b, int bound, bool eq)
{
usize la = vector_size(a);
usize lb = vector_size(b);
usize m, i;
if(eq && la != lb)
return fixnum(1);
m = la < lb ? la : lb;
for(i = 0; i < m; i++){
sl_v d = bounded_compare(vector_elt(a, i), vector_elt(b, i), bound-1, eq);
if(d == sl_nil || numval(d) != 0)
return d;
}
if(la < lb)
return fixnum(-1);
if(la > lb)
return fixnum(1);
return fixnum(0);
}
// strange comparisons are resolved arbitrarily but consistently.
// ordering: number < cprim < function < vector < cvalue < symbol < cons
static sl_v
bounded_compare(sl_v a, sl_v b, int bound, bool eq)
{
sl_v d;
csl_v *cv;
compare_top:
if(a == b)
return fixnum(0);
if(bound <= 0)
return sl_nil;
int taga = tag(a);
int tagb = cmptag(b);
int c;
switch(taga){
case TAG_NUM :
case TAG_NUM1:
if(isfixnum(b))
return (sl_fx)a < (sl_fx)b ? fixnum(-1) : fixnum(1);
if(iscprim(b)){
if(cp_class(ptr(b)) == sl_runetype)
return fixnum(1);
return fixnum(numeric_compare(a, b, eq, true, false));
}
if(iscvalue(b)){
cv = ptr(b);
if(valid_numtype(cv_class(cv)->numtype))
return fixnum(numeric_compare(a, b, eq, true, false));
}
return fixnum(-1);
case TAG_SYM:
if(eq || tagb < TAG_SYM)
return fixnum(1);
if(tagb > TAG_SYM)
return fixnum(-1);
return fixnum(strcmp(symbol_name(a), symbol_name(b)));
case TAG_VECTOR:
if(isvector(b))
return bounded_vector_compare(a, b, bound, eq);
break;
case TAG_CPRIM:
if(cp_class(ptr(a)) == sl_runetype){
if(!iscprim(b) || cp_class(ptr(b)) != sl_runetype)
return fixnum(-1);
}else if(iscprim(b) && cp_class(ptr(b)) == sl_runetype)
return fixnum(1);
c = numeric_compare(a, b, eq, true, false);
if(c != 2)
return fixnum(c);
break;
case TAG_CVALUE:
cv = ptr(a);
if(valid_numtype(cv_class(cv)->numtype)){
if((c = numeric_compare(a, b, eq, true, false)) != 2)
return fixnum(c);
}
if(iscvalue(b)){
if(cv_isPOD(ptr(a)) && cv_isPOD(ptr(b)))
return cvalue_compare(a, b);
return fixnum(1);
}
break;
case TAG_FUNCTION:
if(tagb == TAG_FUNCTION){
if(uintval(a) > N_BUILTINS && uintval(b) > N_BUILTINS){
sl_fn *fa = ptr(a);
sl_fn *fb = ptr(b);
d = bounded_compare(fa->bcode, fb->bcode, bound-1, eq);
if(d == sl_nil || numval(d) != 0)
return d;
d = bounded_compare(fa->vals, fb->vals, bound-1, eq);
if(d == sl_nil || numval(d) != 0)
return d;
d = bounded_compare(fa->env, fb->env, bound-1, eq);
if(d == sl_nil || numval(d) != 0)
return d;
return fixnum(0);
}
return uintval(a) < uintval(b) ? fixnum(-1) : fixnum(1);
}
break;
case TAG_CONS:
if(tagb < TAG_CONS)
return fixnum(1);
d = bounded_compare(car_(a), car_(b), bound-1, eq);
if(d == sl_nil || numval(d) != 0)
return d;
a = cdr_(a); b = cdr_(b);
bound--;
goto compare_top;
}
return taga < tagb ? fixnum(-1) : fixnum(1);
}
static sl_v
cyc_vector_compare(sl_v a, sl_v b, sl_htable *table, bool eq)
{
usize la = vector_size(a);
usize lb = vector_size(b);
usize m, i;
sl_v d, xa, xb, ca, cb;
// first try to prove them different with no recursion
if(eq && la != lb)
return fixnum(1);
m = la < lb ? la : lb;
for(i = 0; i < m; i++){
xa = vector_elt(a, i);
xb = vector_elt(b, i);
if(leafp(xa) || leafp(xb)){
d = bounded_compare(xa, xb, 1, eq);
if(d != sl_nil && numval(d) != 0)
return d;
}else if(tag(xa) < tag(xb))
return fixnum(-1);
else if(tag(xa) > tag(xb))
return fixnum(1);
}
ca = eq_class(table, a);
cb = eq_class(table, b);
if(ca != sl_nil && ca == cb)
return fixnum(0);
eq_union(table, a, b, ca, cb);
for(i = 0; i < m; i++){
xa = vector_elt(a, i);
xb = vector_elt(b, i);
if(!leafp(xa) || tag(xa) == TAG_FUNCTION){
d = cyc_compare(xa, xb, table, eq);
if(numval(d) != 0)
return d;
}
}
if(la < lb)
return fixnum(-1);
if(la > lb)
return fixnum(1);
return fixnum(0);
}
static sl_v
cyc_compare(sl_v a, sl_v b, sl_htable *table, bool eq)
{
sl_v d, ca, cb;
cyc_compare_top:
if(a == b)
return fixnum(0);
if(iscons(a)){
if(iscons(b)){
sl_v aa = car_(a);
sl_v da = cdr_(a);
sl_v ab = car_(b);
sl_v db = cdr_(b);
int tagaa = tag(aa);
int tagda = tag(da);
int tagab = tag(ab);
int tagdb = tag(db);
if(leafp(aa) || leafp(ab)){
d = bounded_compare(aa, ab, 1, eq);
if(d != sl_nil && numval(d) != 0)
return d;
}
if(tagaa < tagab)
return fixnum(-1);
if(tagaa > tagab)
return fixnum(1);
if(leafp(da) || leafp(db)){
d = bounded_compare(da, db, 1, eq);
if(d != sl_nil && numval(d) != 0)
return d;
}
if(tagda < tagdb)
return fixnum(-1);
if(tagda > tagdb)
return fixnum(1);
ca = eq_class(table, a);
cb = eq_class(table, b);
if(ca != sl_nil && ca == cb)
return fixnum(0);
eq_union(table, a, b, ca, cb);
d = cyc_compare(aa, ab, table, eq);
if(numval(d) != 0)
return d;
a = da;
b = db;
goto cyc_compare_top;
}else{
return fixnum(1);
}
}
if(isvector(a) && isvector(b))
return cyc_vector_compare(a, b, table, eq);
if(isfunction(a) && isfunction(b)){
sl_fn *fa = ptr(a);
sl_fn *fb = ptr(b);
d = bounded_compare(fa->bcode, fb->bcode, 1, eq);
if(numval(d) != 0)
return d;
ca = eq_class(table, a);
cb = eq_class(table, b);
if(ca != sl_nil && ca == cb)
return fixnum(0);
eq_union(table, a, b, ca, cb);
d = cyc_compare(fa->vals, fb->vals, table, eq);
if(numval(d) != 0)
return d;
a = fa->env;
b = fb->env;
goto cyc_compare_top;
}
return bounded_compare(a, b, 1, eq);
}
static sl_htable equal_eq_hashtable;
void
comparehash_init(void)
{
htable_new(&equal_eq_hashtable, 512);
}
// 'eq' means unordered comparison is sufficient
sl_v
sl_compare(sl_v a, sl_v b, bool eq)
{
sl_v guess = bounded_compare(a, b, BOUNDED_COMPARE_BOUND, eq);
if(guess == sl_nil){
guess = cyc_compare(a, b, &equal_eq_hashtable, eq);
htable_reset(&equal_eq_hashtable, 512);
}
return guess;
}
/*
optimizations:
- use hash updates instead of calling lookup then insert. i.e. get the
bp once and use it twice.
* preallocate hash table and call reset() instead of new/free
* less redundant tag checking, 3-bit tags
*/
// *oob: output argument, means we hit the limit specified by 'bound'
static uintptr
bounded_hash(sl_v a, int bound, bool *oob)
{
union {
double d;
s64int i64;
}u;
sl_numtype nt;
usize i, len;
csl_v *cv;
sl_cprim *cp;
void *data;
uintptr h = 0;
int tg = tag(a);
bool oob2;
*oob = false;
switch(tg){
case TAG_NUM :
case TAG_NUM1:
u.d = (double)numval(a);
return doublehash(u.i64);
case TAG_FUNCTION:
if(uintval(a) > N_BUILTINS)
return bounded_hash(((sl_fn*)ptr(a))->bcode, bound, oob);
return inthash(a);
case TAG_SYM:
return ((sl_sym*)ptr(a))->hash;
case TAG_CPRIM:
cp = ptr(a);
data = cp_data(cp);
if(cp_class(cp) == sl_runetype)
return inthash(*(Rune*)data);
nt = cp_numtype(cp);
u.d = conv_to_double(data, nt);
return doublehash(u.i64);
case TAG_CVALUE:
cv = ptr(a);
data = cv_data(cv);
if(cv->type == sl_mptype){
len = mptobe(*(mpint**)data, nil, 0, (u8int**)&data);
h = memhash(data, len);
MEM_FREE(data);
}else{
h = memhash(data, cv_len(cv));
}
return h;
case TAG_VECTOR:
if(bound <= 0){
*oob = true;
return 1;
}
len = vector_size(a);
for(i = 0; i < len; i++){
h = MIX(h, bounded_hash(vector_elt(a, i), bound/2, &oob2)^1);
if(oob2)
bound /= 2;
*oob = *oob || oob2;
}
return h;
case TAG_CONS:
do{
if(bound <= 0){
*oob = true;
return h;
}
h = MIX(h, bounded_hash(car_(a), bound/2, &oob2));
// bounds balancing: try to share the bounds efficiently
// so we can hash better when a list is cdr-deep (a common case)
if(oob2)
bound /= 2;
else
bound--;
// recursive OOB propagation. otherwise this case is slow:
// (hash '#2=((#0=(#1=(#1#) . #0#)) . #2#))
*oob = *oob || oob2;
a = cdr_(a);
}while(iscons(a));
h = MIX(h, bounded_hash(a, bound-1, &oob2)^2);
*oob = *oob || oob2;
return h;
}
return 0;
}
int
equal_lispvalue(sl_v a, sl_v b)
{
if(eq_comparable(a, b))
return a == b;
return numval(sl_compare(a, b, true)) == 0;
}
uintptr
hash_lispvalue(sl_v a)
{
bool oob = false;
return bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
}
BUILTIN("hash", hash)
{
argcount(nargs, 1);
return fixnum(hash_lispvalue(args[0]));
}