shithub: femtolisp

Download patch

ref: ff650e3049019496422fa3a437531a43567e5307
parent: bfbbf051c90844ab0e3e18c10d646013b17e38d9
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Wed May 20 00:30:00 EDT 2009

fixing hash function to do a better job on long lists.


--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@@ -259,9 +259,10 @@
 #define doublehash(a) int64to32hash(a)
 #endif
 
-// *ut means we had to start using the table
-static uptrint_t bounded_hash(value_t a, int bound, int *ut)
+// *oob: output argument, means we hit the limit specified by 'bound'
+static uptrint_t bounded_hash(value_t a, int bound, int *oob)
 {
+    *oob = 0;
     double d;
     numerictype_t nt;
     size_t i, len;
@@ -269,12 +270,7 @@
     cprim_t *cp;
     void *data;
     uptrint_t h = 0;
-    if (*ut) {
-        h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
-        if (h != (uptrint_t)HT_NOTFOUND)
-            return h;
-    }
-    int tg = tag(a);
+    int oob2, tg = tag(a);
     switch(tg) {
     case TAG_NUM :
     case TAG_NUM1:
@@ -282,7 +278,7 @@
         return doublehash(*(int64_t*)&d);
     case TAG_FUNCTION:
         if (uintval(a) > N_BUILTINS)
-            return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
+            return bounded_hash(((function_t*)ptr(a))->bcode, bound, oob);
         return inthash(a);
     case TAG_SYM:
         return ((symbol_t*)ptr(a))->hash;
@@ -296,39 +292,39 @@
         cv = (cvalue_t*)ptr(a);
         data = cv_data(cv);
         return memhash(data, cv_len(cv));
+
     case TAG_VECTOR:
         if (bound <= 0) {
-            h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
-            ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
-            return h;
+            *oob = 1;
+            return 1;
         }
         len = vector_size(a);
         for(i=0; i < len; i++) {
-            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
+            h = MIX(h, bounded_hash(vector_elt(a,i), bound/2, &oob2)+1);
+            if (oob2)
+                bound/=2;
+            *oob = *oob || oob2;
         }
         return h;
+
     case TAG_CONS:
-        if (bound <= 0)
+        if (bound <= 0) {
+            *oob = 1;
             return 1;
-        return MIX(bounded_hash(car_(a), bound/2, ut),
-                   bounded_hash(cdr_(a), bound/2, ut)+2);
-        // this should be able to hash long lists with greater fidelity,
-        // but it does not work yet.
-        /*
-        first = a;
-        bb = BOUNDED_HASH_BOUND;
-        do {
-            h = MIX(h, bounded_hash(car_(a), bound-1, ut));
-            a = cdr_(a);
-            bb--;
-            if (bb <= 0) {
-                *ut = 1;
-                ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
-                return h;
-            }
-        } while (iscons(a));
-        return MIX(h, bounded_hash(a, bound-1, ut));
-        */
+        }
+        h = bounded_hash(car_(a), bound/2, oob);
+        // bounds balancing: try to share the bounds efficiently
+        // between the car and cdr so we can hash better when a list is
+        // car-shallow and cdr-deep (a common case) or vice-versa.
+        if (*oob)
+            bound/=2;
+        else
+            bound--;
+        h = MIX(h, bounded_hash(cdr_(a), bound, &oob2)+2);
+        // recursive OOB propagation. otherwise this case is slow:
+        // (hash '#2=('#0=(#1=(#1#) . #0#) . #2#))
+        *oob = *oob || oob2;
+        return h;
     }
     return 0;
 }
@@ -342,10 +338,8 @@
 
 uptrint_t hash_lispvalue(value_t a)
 {
-    int ut=0;
-    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
-    if (ut)
-        htable_reset(&equal_eq_hashtable, 512);
+    int oob=0;
+    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
     return n;
 }
 
--- a/femtolisp/unittest.lsp
+++ b/femtolisp/unittest.lsp
@@ -154,5 +154,17 @@
 	      (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
 	      (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
 
+(assert (equal? (hash '#0=(1 . #0#))
+		(hash '#1=(1 1 . #1#))))
+
+(assert (not (equal? (hash '#0=(1 1 . #0#))
+		     (hash '#1=(1 #0# . #1#)))))
+
+(assert (not (equal? (hash (iota 10))
+		     (hash (iota 20)))))
+
+(assert (not (equal? (hash (iota 41))
+		     (hash (iota 42)))))
+
 (princ "all tests pass\n")
 #t