shithub: femtolisp

--- a/3rd/lookup3.c

+++ /dev/null

@@ -1,319 +1,0 @@

-/*

--------------------------------------------------------------------------------

-lookup3.c, by Bob Jenkins, May 2006, Public Domain.

-These are functions for producing 32-bit hashes for hash table lookup.

-hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()

-are externally useful functions.  You can use this free for any purpose.

-It's in the public domain.  It has no warranty.

-If you want to find a hash of, say, exactly 7 integers, do

-  a = i1;  b = i2;  c = i3;

-  mix(a,b,c);

-  a += i4; b += i5; c += i6;

-  mix(a,b,c);

-  a += i7;

-  final(a,b,c);

-then use c as the hash value.  If you have a variable length array of

-4-byte integers to hash, use hashword().  If you have a byte array (like

-a character string), use hashlittle().  If you have several byte arrays, or

-a mix of things, see the comments above hashlittle().

-Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,

-then mix those integers.  This is fast (you can do a lot more thorough

-mixing with 12*3 instructions on 3 integers than you can with 3 instructions

-on 1 byte), but shoehorning those bytes into integers efficiently is messy.

--------------------------------------------------------------------------------

-*/

-/*

- * My best guess at if you are big-endian or little-endian.  This may

- * need adjustment.

- */

-#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN

-#define HASH_LITTLE_ENDIAN 1

-#define HASH_BIG_ENDIAN 0

-#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN

-#define HASH_LITTLE_ENDIAN 0

-#define HASH_BIG_ENDIAN 1

-#else

-#error endianess unknown

-#endif

-#define hashsize(n) ((uint32_t)1<<(n))

-#define hashmask(n) (hashsize(n)-1)

-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))

-/*

--------------------------------------------------------------------------------

-mix -- mix 3 32-bit values reversibly.

-This is reversible, so any information in (a,b,c) before mix() is

-still in (a,b,c) after mix().

-If four pairs of (a,b,c) inputs are run through mix(), or through

-mix() in reverse, there are at least 32 bits of the output that

-are sometimes the same for one pair and different for another pair.

-This was tested for:

-* pairs that differed by one bit, by two bits, in any combination

-  of top bits of (a,b,c), or in any combination of bottom bits of

-  (a,b,c).

-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed

-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

-  is commonly produced by subtraction) look like a single 1-bit

-  difference.

-* the base values were pseudorandom, all zero but one bit set, or

-  all zero plus a counter that starts at zero.

-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that

-satisfy this are

-    4  6  8 16 19  4

-    9 15  3 18 27 15

-   14  9  3  7 17  3

-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing

-for "differ" defined as + with a one-bit base and a two-bit delta.  I

-used http://burtleburtle.net/bob/hash/avalanche.html to choose

-the operations, constants, and arrangements of the variables.

-This does not achieve avalanche.  There are input bits of (a,b,c)

-that fail to affect some output bits of (a,b,c), especially of a.  The

-most thoroughly mixed value is c, but it doesn't really even achieve

-avalanche in c.

-This allows some parallelism.  Read-after-writes are good at doubling

-the number of bits affected, so the goal of mixing pulls in the opposite

-direction as the goal of parallelism.  I did what I could.  Rotates

-seem to cost as much as shifts on every machine I could lay my hands

-on, and rotates are much kinder to the top and bottom bits, so I used

-rotates.

--------------------------------------------------------------------------------

-*/

-#define mix(a,b,c) \

-{ \

-  a -= c;  a ^= rot(c, 4);  c += b; \

-  b -= a;  b ^= rot(a, 6);  a += c; \

-  c -= b;  c ^= rot(b, 8);  b += a; \

-  a -= c;  a ^= rot(c,16);  c += b; \

-  b -= a;  b ^= rot(a,19);  a += c; \

-  c -= b;  c ^= rot(b, 4);  b += a; \

-}

-/*

--------------------------------------------------------------------------------

-final -- final mixing of 3 32-bit values (a,b,c) into c

-Pairs of (a,b,c) values differing in only a few bits will usually

-produce values of c that look totally different.  This was tested for

-* pairs that differed by one bit, by two bits, in any combination

-  of top bits of (a,b,c), or in any combination of bottom bits of

-  (a,b,c).

-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed

-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

-  is commonly produced by subtraction) look like a single 1-bit

-  difference.

-* the base values were pseudorandom, all zero but one bit set, or

-  all zero plus a counter that starts at zero.

-These constants passed:

- 14 11 25 16 4 14 24

- 12 14 25 16 4 14 24

-and these came close:

-  4  8 15 26 3 22 24

- 10  8 15 26 3 22 24

- 11  8 15 26 3 22 24

--------------------------------------------------------------------------------

-*/

-#define final(a,b,c) \

-{ \

-  c ^= b; c -= rot(b,14); \

-  a ^= c; a -= rot(c,11); \

-  b ^= a; b -= rot(a,25); \

-  c ^= b; c -= rot(b,16); \

-  a ^= c; a -= rot(c,4);  \

-  b ^= a; b -= rot(a,14); \

-  c ^= b; c -= rot(b,24); \

-}

-/*

- * hashlittle2: return 2 32-bit hash values

- *

- * This is identical to hashlittle(), except it returns two 32-bit hash

- * values instead of just one.  This is good enough for hash table

- * lookup with 2^^64 buckets, or if you want a second hash if you're not

- * happy with the first, or if you want a probably-unique 64-bit ID for

- * the key.  *pc is better mixed than *pb, so use *pc first.  If you want

- * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".

- */

-static void hashlittle2(

-  const void *key,       /* the key to hash */

-  size_t      length,    /* length of the key */

-  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */

-  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */

-{

-  uint32_t a,b,c;                                          /* internal state */

-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */

-  /* Set up the internal state */

-  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;

-  c += *pb;

-  u.ptr = key;

-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {

-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */

-    const uint8_t  *k8;

-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */

-    while (length > 12)

-    {

-      a += k[0];

-      b += k[1];

-      c += k[2];

-      mix(a,b,c);

-      length -= 12;

-      k += 3;

-    }

-    /*----------------------------- handle the last (probably partial) block */

-    /*

-     * "k[2]&0xffffff" actually reads beyond the end of the string, but

-     * then masks off the part it's not allowed to read.  Because the

-     * string is aligned, the masked-off tail is in the same word as the

-     * rest of the string.  Every machine with memory protection I've seen

-     * does it on word boundaries, so is OK with this.  But VALGRIND will

-     * still catch it and complain.  The masking trick does make the hash

-     * noticably faster for short strings (like English words).

-     */

-#ifndef VALGRIND

-    (void)k8;

-    switch(length)

-    {

-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;

-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;

-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;

-    case 8 : b+=k[1]; a+=k[0]; break;

-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;

-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;

-    case 5 : b+=k[1]&0xff; a+=k[0]; break;

-    case 4 : a+=k[0]; break;

-    case 3 : a+=k[0]&0xffffff; break;

-    case 2 : a+=k[0]&0xffff; break;

-    case 1 : a+=k[0]&0xff; break;

-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */

-    }

-#else /* make valgrind happy */

-    k8 = (const uint8_t *)k;

-    switch(length)

-    {

-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */

-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */

-    case 9 : c+=k8[8];                   /* fall through */

-    case 8 : b+=k[1]; a+=k[0]; break;

-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */

-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */

-    case 5 : b+=k8[4];                   /* fall through */

-    case 4 : a+=k[0]; break;

-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */

-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */

-    case 1 : a+=k8[0]; break;

-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */

-    }

-#endif /* !valgrind */

-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {

-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */

-    const uint8_t  *k8;

-    /*--------------- all but last block: aligned reads and different mixing */

-    while (length > 12)

-    {

-      a += k[0] + (((uint32_t)k[1])<<16);

-      b += k[2] + (((uint32_t)k[3])<<16);

-      c += k[4] + (((uint32_t)k[5])<<16);

-      mix(a,b,c);

-      length -= 12;

-      k += 6;

-    }

-    /*----------------------------- handle the last (probably partial) block */

-    k8 = (const uint8_t *)k;

-    switch(length)

-    {

-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);

-             b+=k[2]+(((uint32_t)k[3])<<16);

-             a+=k[0]+(((uint32_t)k[1])<<16);

-             break;

-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */

-    case 10: c+=k[4];

-             b+=k[2]+(((uint32_t)k[3])<<16);

-             a+=k[0]+(((uint32_t)k[1])<<16);

-             break;

-    case 9 : c+=k8[8];                      /* fall through */

-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);

-             a+=k[0]+(((uint32_t)k[1])<<16);

-             break;

-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */

-    case 6 : b+=k[2];

-             a+=k[0]+(((uint32_t)k[1])<<16);

-             break;

-    case 5 : b+=k8[4];                      /* fall through */

-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);

-             break;

-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */

-    case 2 : a+=k[0];

-             break;

-    case 1 : a+=k8[0];

-             break;

-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */

-    }

-  } else {                        /* need to read the key one byte at a time */

-    const uint8_t *k = (const uint8_t *)key;

-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */

-    while (length > 12)

-    {

-      a += k[0];

-      a += ((uint32_t)k[1])<<8;

-      a += ((uint32_t)k[2])<<16;

-      a += ((uint32_t)k[3])<<24;

-      b += k[4];

-      b += ((uint32_t)k[5])<<8;

-      b += ((uint32_t)k[6])<<16;

-      b += ((uint32_t)k[7])<<24;

-      c += k[8];

-      c += ((uint32_t)k[9])<<8;

-      c += ((uint32_t)k[10])<<16;

-      c += ((uint32_t)k[11])<<24;

-      mix(a,b,c);

-      length -= 12;

-      k += 12;

-    }

-    /*-------------------------------- last block: affect all 32 bits of (c) */

-    switch(length)                   /* all the case statements fall through */

-    {

-    case 12: c+=((uint32_t)k[11])<<24; // fallthrough

-    case 11: c+=((uint32_t)k[10])<<16; // fallthrough

-    case 10: c+=((uint32_t)k[9])<<8; // fallthrough

-    case 9 : c+=k[8]; // fallthrough

-    case 8 : b+=((uint32_t)k[7])<<24; // fallthrough

-    case 7 : b+=((uint32_t)k[6])<<16; // fallthrough

-    case 6 : b+=((uint32_t)k[5])<<8; // fallthrough

-    case 5 : b+=k[4]; // fallthrough

-    case 4 : a+=((uint32_t)k[3])<<24; // fallthrough

-    case 3 : a+=((uint32_t)k[2])<<16; // fallthrough

-    case 2 : a+=((uint32_t)k[1])<<8; // fallthrough

-    case 1 : a+=k[0];

-             break;

-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */

-    }

-  }

-  final(a,b,c);

-  *pc=c; *pb=b;

-}

--- /dev/null

+++ b/3rd/spooky.c

@@ -1,0 +1,526 @@

+//

+// SpookyHash - 128-bit noncryptographic hash function

+//

+// Written in 2012 by Bob Jenkins

+//

+// Converted to C in 2015 by Joergen Ibsen

+//

+// To the extent possible under law, the author(s) have dedicated all

+// copyright and related and neighboring rights to this software to the

+// public domain worldwide. This software is distributed without any

+// warranty. <http://creativecommons.org/publicdomain/zero/1.0/>

+//

+// Original comment from SpookyV2.cpp by Bob Jenkins:

+//

+// Spooky Hash

+// A 128-bit noncryptographic hash, for checksums and table lookup

+// By Bob Jenkins.  Public domain.

+//   Oct 31 2010: published framework, disclaimer ShortHash isn't right

+//   Nov 7 2010: disabled ShortHash

+//   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again

+//   April 10 2012: buffer overflow on platforms without unaligned reads

+//   July 12 2012: was passing out variables in final to in/out in short

+//   July 30 2012: I reintroduced the buffer overflow

+//   August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash

+#include "platform.h"

+#include "spooky.h"

+#define ALLOW_UNALIGNED_READS 1

+//

+// SC_CONST: a constant which:

+//  - is not zero

+//  - is odd

+//  - is a not-very-regular mix of 1's and 0's

+//  - does not need any other special mathematical properties

+//

+#define SC_CONST 0xDEADBEEFDEADBEEFULL

+#define ROTL64(x, k) (((x) << (k)) | ((x) >> (64 - (k))))

+#ifdef _MSC_VER

+# define restrict __restrict

+# define inline __forceinline

+#endif

+static bool

+spooky_is_aligned(const void *p, size_t size)

+{

+	return (uintptr_t) p % size == 0;

+}

+static bool

+spooky_is_little_endian(void)

+{

+	const union {

+		uint32_t i;

+		uint8_t c[sizeof(uint32_t)];

+	} x = { 1 };

+	return x.c[0];

+}

+//

+// Read uint64_t in little-endian order.

+//

+static inline uint64_t

+spooky_read_le64(const uint64_t *s)

+{

+	if (spooky_is_little_endian()) {

+		uint64_t v;

+		memcpy(&v, s, sizeof(v));

+		return v;

+	}

+	else {

+		const uint8_t *p = (const uint8_t *) s;

+		return (uint64_t) p[0]

+		    | ((uint64_t) p[1] << 8)

+		    | ((uint64_t) p[2] << 16)

+		    | ((uint64_t) p[3] << 24)

+		    | ((uint64_t) p[4] << 32)

+		    | ((uint64_t) p[5] << 40)

+		    | ((uint64_t) p[6] << 48)

+		    | ((uint64_t) p[7] << 56);

+	}

+}

+//

+// This is used if the input is 96 bytes long or longer.

+//

+// The internal state is fully overwritten every 96 bytes.

+// Every input bit appears to cause at least 128 bits of entropy

+// before 96 other bytes are combined, when run forward or backward

+//   For every input bit,

+//   Two inputs differing in just that input bit

+//   Where "differ" means xor or subtraction

+//   And the base value is random

+//   When run forward or backwards one Mix

+// I tried 3 pairs of each; they all differed by at least 212 bits.

+//

+static inline void

+spooky_mix(const uint64_t *restrict data, uint64_t *restrict s)

+{

+	s[0] += spooky_read_le64(&data[0]);          s[2] ^= s[10];

+	s[11] ^= s[0];   s[0] = ROTL64(s[0], 11);    s[11] += s[1];

+	s[1] += spooky_read_le64(&data[1]);          s[3] ^= s[11];

+	s[0] ^= s[1];    s[1] = ROTL64(s[1], 32);    s[0] += s[2];

+	s[2] += spooky_read_le64(&data[2]);          s[4] ^= s[0];

+	s[1] ^= s[2];    s[2] = ROTL64(s[2], 43);    s[1] += s[3];

+	s[3] += spooky_read_le64(&data[3]);          s[5] ^= s[1];

+	s[2] ^= s[3];    s[3] = ROTL64(s[3], 31);    s[2] += s[4];

+	s[4] += spooky_read_le64(&data[4]);          s[6] ^= s[2];

+	s[3] ^= s[4];    s[4] = ROTL64(s[4], 17);    s[3] += s[5];

+	s[5] += spooky_read_le64(&data[5]);          s[7] ^= s[3];

+	s[4] ^= s[5];    s[5] = ROTL64(s[5], 28);    s[4] += s[6];

+	s[6] += spooky_read_le64(&data[6]);          s[8] ^= s[4];

+	s[5] ^= s[6];    s[6] = ROTL64(s[6], 39);    s[5] += s[7];

+	s[7] += spooky_read_le64(&data[7]);          s[9] ^= s[5];

+	s[6] ^= s[7];    s[7] = ROTL64(s[7], 57);    s[6] += s[8];

+	s[8] += spooky_read_le64(&data[8]);          s[10] ^= s[6];

+	s[7] ^= s[8];    s[8] = ROTL64(s[8], 55);    s[7] += s[9];

+	s[9] += spooky_read_le64(&data[9]);          s[11] ^= s[7];

+	s[8] ^= s[9];    s[9] = ROTL64(s[9], 54);    s[8] += s[10];

+	s[10] += spooky_read_le64(&data[10]);        s[0] ^= s[8];

+	s[9] ^= s[10];   s[10] = ROTL64(s[10], 22);  s[9] += s[11];

+	s[11] += spooky_read_le64(&data[11]);        s[1] ^= s[9];

+	s[10] ^= s[11];  s[11] = ROTL64(s[11], 46);  s[10] += s[0];

+}

+//

+// Mix all 12 inputs together so that h0, h1 are a hash of them all.

+//

+// For two inputs differing in just the input bits

+// Where "differ" means xor or subtraction

+// And the base value is random, or a counting value starting at that bit

+// The final result will have each bit of h0, h1 flip

+// For every input bit,

+// with probability 50 +- .3%

+// For every pair of input bits,

+// with probability 50 +- 3%

+//

+// This does not rely on the last Mix() call having already mixed some.

+// Two iterations was almost good enough for a 64-bit result, but a

+// 128-bit result is reported, so End() does three iterations.

+//

+static inline void

+spooky_end_partial(uint64_t *h)

+{

+	h[11] += h[1];  h[2] ^= h[11];  h[1] = ROTL64(h[1], 44);

+	h[0] += h[2];   h[3] ^= h[0];   h[2] = ROTL64(h[2], 15);

+	h[1] += h[3];   h[4] ^= h[1];   h[3] = ROTL64(h[3], 34);

+	h[2] += h[4];   h[5] ^= h[2];   h[4] = ROTL64(h[4], 21);

+	h[3] += h[5];   h[6] ^= h[3];   h[5] = ROTL64(h[5], 38);

+	h[4] += h[6];   h[7] ^= h[4];   h[6] = ROTL64(h[6], 33);

+	h[5] += h[7];   h[8] ^= h[5];   h[7] = ROTL64(h[7], 10);

+	h[6] += h[8];   h[9] ^= h[6];   h[8] = ROTL64(h[8], 13);

+	h[7] += h[9];   h[10] ^= h[7];  h[9] = ROTL64(h[9], 38);

+	h[8] += h[10];  h[11] ^= h[8];  h[10] = ROTL64(h[10], 53);

+	h[9] += h[11];  h[0] ^= h[9];   h[11] = ROTL64(h[11], 42);

+	h[10] += h[0];  h[1] ^= h[10];  h[0] = ROTL64(h[0], 54);

+}

+static inline void

+spooky_end(const uint64_t *restrict data, uint64_t *restrict h)

+{

+	h[0] += spooky_read_le64(&data[0]);

+	h[1] += spooky_read_le64(&data[1]);

+	h[2] += spooky_read_le64(&data[2]);

+	h[3] += spooky_read_le64(&data[3]);

+	h[4] += spooky_read_le64(&data[4]);

+	h[5] += spooky_read_le64(&data[5]);

+	h[6] += spooky_read_le64(&data[6]);

+	h[7] += spooky_read_le64(&data[7]);

+	h[8] += spooky_read_le64(&data[8]);

+	h[9] += spooky_read_le64(&data[9]);

+	h[10] += spooky_read_le64(&data[10]);

+	h[11] += spooky_read_le64(&data[11]);

+	spooky_end_partial(h);

+	spooky_end_partial(h);

+	spooky_end_partial(h);

+}

+//

+// The goal is for each bit of the input to expand into 128 bits of

+//   apparent entropy before it is fully overwritten.

+// n trials both set and cleared at least m bits of h0 h1 h2 h3

+//   n: 2   m: 29

+//   n: 3   m: 46

+//   n: 4   m: 57

+//   n: 5   m: 107

+//   n: 6   m: 146

+//   n: 7   m: 152

+// when run forwards or backwards

+// for all 1-bit and 2-bit diffs

+// with diffs defined by either xor or subtraction

+// with a base of all zeros plus a counter, or plus another bit, or random

+//

+static inline void

+spooky_short_mix(uint64_t *h)

+{

+	h[2] = ROTL64(h[2], 50);  h[2] += h[3];  h[0] ^= h[2];

+	h[3] = ROTL64(h[3], 52);  h[3] += h[0];  h[1] ^= h[3];

+	h[0] = ROTL64(h[0], 30);  h[0] += h[1];  h[2] ^= h[0];

+	h[1] = ROTL64(h[1], 41);  h[1] += h[2];  h[3] ^= h[1];

+	h[2] = ROTL64(h[2], 54);  h[2] += h[3];  h[0] ^= h[2];

+	h[3] = ROTL64(h[3], 48);  h[3] += h[0];  h[1] ^= h[3];

+	h[0] = ROTL64(h[0], 38);  h[0] += h[1];  h[2] ^= h[0];

+	h[1] = ROTL64(h[1], 37);  h[1] += h[2];  h[3] ^= h[1];

+	h[2] = ROTL64(h[2], 62);  h[2] += h[3];  h[0] ^= h[2];

+	h[3] = ROTL64(h[3], 34);  h[3] += h[0];  h[1] ^= h[3];

+	h[0] = ROTL64(h[0], 5);   h[0] += h[1];  h[2] ^= h[0];

+	h[1] = ROTL64(h[1], 36);  h[1] += h[2];  h[3] ^= h[1];

+}

+//

+// Mix all 4 inputs together so that h0, h1 are a hash of them all.

+//

+// For two inputs differing in just the input bits

+// Where "differ" means xor or subtraction

+// And the base value is random, or a counting value starting at that bit

+// The final result will have each bit of h0, h1 flip

+// For every input bit,

+// with probability 50 +- .3% (it is probably better than that)

+// For every pair of input bits,

+// with probability 50 +- .75% (the worst case is approximately that)

+//

+static inline void

+spooky_short_end(uint64_t *h)

+{

+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 15);  h[3] += h[2];

+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 52);  h[0] += h[3];

+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 26);  h[1] += h[0];

+	h[2] ^= h[1];  h[1] = ROTL64(h[1], 51);  h[2] += h[1];

+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 28);  h[3] += h[2];

+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 9);   h[0] += h[3];

+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 47);  h[1] += h[0];

+	h[2] ^= h[1];  h[1] = ROTL64(h[1], 54);  h[2] += h[1];

+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 32);  h[3] += h[2];

+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 25);  h[0] += h[3];

+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 63);  h[1] += h[0];

+}

+//

+// short hash ... it could be used on any message,

+// but it's used by Spooky just for short messages.

+//

+static void

+spooky_short(const void *restrict message, size_t length,

+             uint64_t *restrict hash1, uint64_t *restrict hash2)

+{

+	uint64_t buf[2 * SC_NUMVARS];

+	union {

+		const uint8_t *p8;

+		uint64_t *p64;

+	} u;

+	u.p8 = (const uint8_t *) message;

+	if (ALLOW_UNALIGNED_READS == 0 && !spooky_is_aligned(u.p8, 8)) {

+		memcpy(buf, message, length);

+		u.p64 = buf;

+	}

+	size_t left = length % 32;

+	uint64_t h[4];

+	h[0] = *hash1;

+	h[1] = *hash2;

+	h[2] = SC_CONST;

+	h[3] = SC_CONST;

+	if (length > 15) {

+		const uint64_t *end = u.p64 + (length / 32) * 4;

+		// handle all complete sets of 32 bytes

+		for (; u.p64 < end; u.p64 += 4) {

+			h[2] += spooky_read_le64(&u.p64[0]);

+			h[3] += spooky_read_le64(&u.p64[1]);

+			spooky_short_mix(h);

+			h[0] += spooky_read_le64(&u.p64[2]);

+			h[1] += spooky_read_le64(&u.p64[3]);

+		}

+		//Handle the case of 16+ remaining bytes.

+		if (left >= 16) {

+			h[2] += spooky_read_le64(&u.p64[0]);

+			h[3] += spooky_read_le64(&u.p64[1]);

+			spooky_short_mix(h);

+			u.p64 += 2;

+			left -= 16;

+		}

+	}

+	// Handle the last 0..15 bytes, and its length

+	h[3] += ((uint64_t) length) << 56;

+	switch (left) {

+	case 15:

+		h[3] += ((uint64_t) u.p8[14]) << 48; // fallthrough

+	case 14:

+		h[3] += ((uint64_t) u.p8[13]) << 40; // fallthrough

+	case 13:

+		h[3] += ((uint64_t) u.p8[12]) << 32; // fallthrough

+	case 12:

+		h[3] += ((uint64_t) u.p8[11]) << 24; // fallthrough

+	case 11:

+		h[3] += ((uint64_t) u.p8[10]) << 16; // fallthrough

+	case 10:

+		h[3] += ((uint64_t) u.p8[9]) << 8; // fallthrough

+	case 9:

+		h[3] += (uint64_t) u.p8[8]; // fallthrough

+	case 8:

+		h[2] += spooky_read_le64(&u.p64[0]);

+		break;

+	case 7:

+		h[2] += ((uint64_t) u.p8[6]) << 48; // fallthrough

+	case 6:

+		h[2] += ((uint64_t) u.p8[5]) << 40; // fallthrough

+	case 5:

+		h[2] += ((uint64_t) u.p8[4]) << 32; // fallthrough

+	case 4:

+		h[2] += ((uint64_t) u.p8[3]) << 24; // fallthrough

+	case 3:

+		h[2] += ((uint64_t) u.p8[2]) << 16; // fallthrough

+	case 2:

+		h[2] += ((uint64_t) u.p8[1]) << 8; // fallthrough

+	case 1:

+		h[2] += (uint64_t) u.p8[0];

+		break;

+	case 0:

+		h[2] += SC_CONST;

+		h[3] += SC_CONST;

+	}

+	spooky_short_end(h);

+	*hash1 = h[0];

+	*hash2 = h[1];

+}

+uint64_t

+spooky_hash64(const void *message, size_t length, uint64_t seed)

+{

+	uint64_t hash1 = seed;

+	spooky_hash128(message, length, &hash1, &seed);

+	return hash1;

+}

+uint32_t

+spooky_hash32(const void *message, size_t length, uint32_t seed)

+{

+	uint64_t hash1 = seed, hash2 = seed;

+	spooky_hash128(message, length, &hash1, &hash2);

+	return (uint32_t) hash1;

+}

+// do the whole hash in one call

+void

+spooky_hash128(const void *restrict message, size_t length,

+               uint64_t *restrict hash1, uint64_t *restrict hash2)

+{

+	if (length < SC_BUFSIZE) {

+		spooky_short(message, length, hash1, hash2);

+		return;

+	}

+	uint64_t h[SC_NUMVARS];

+	uint64_t buf[SC_NUMVARS];

+	uint64_t *end;

+	union {

+		const uint8_t *p8;

+		uint64_t *p64;

+	} u;

+	size_t left;

+	h[0] = h[3] = h[6] = h[9] = *hash1;

+	h[1] = h[4] = h[7] = h[10] = *hash2;

+	h[2] = h[5] = h[8] = h[11] = SC_CONST;

+	u.p8 = (const uint8_t *) message;

+	end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;

+	// handle all whole SC_BLOCKSIZE blocks of bytes

+	if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {

+		do {

+			spooky_mix(u.p64, h);

+			u.p64 += SC_NUMVARS;

+		} while (u.p64 < end);

+	}

+	else {

+		do {

+			memcpy(buf, u.p64, SC_BLOCKSIZE);

+			spooky_mix(buf, h);

+			u.p64 += SC_NUMVARS;

+		} while (u.p64 < end);

+	}

+	// handle the last partial block of SC_BLOCKSIZE bytes

+	left = length - ((const uint8_t *) end - (const uint8_t *) message);

+	memcpy(buf, end, left);

+	memset(((uint8_t *) buf) + left, 0, SC_BLOCKSIZE - left);

+	((uint8_t *) buf)[SC_BLOCKSIZE - 1] = (uint8_t) left;

+	// do some final mixing

+	spooky_end(buf, h);

+	*hash1 = h[0];

+	*hash2 = h[1];

+}

+/*

+// init spooky state

+void

+spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2)

+{

+	state->length = 0;

+	state->left = 0;

+	state->state[0] = seed1;

+	state->state[1] = seed2;

+}

+// add a message fragment to the state

+void

+spooky_update(struct spooky_state *restrict state,

+              const void *restrict message, size_t length)

+{

+	uint64_t h[SC_NUMVARS];

+	size_t newLength = length + state->left;

+	uint8_t left;

+	union {

+		const uint8_t *p8;

+		uint64_t *p64;

+	} u;

+	const uint64_t *end;

+	// Is this message fragment too short?  If it is, stuff it away.

+	if (newLength < SC_BUFSIZE) {

+		memcpy(&((uint8_t *) state->data)[state->left], message, length);

+		state->length = length + state->length;

+		state->left = (uint8_t) newLength;

+		return;

+	}

+	// init the variables

+	if (state->length < SC_BUFSIZE) {

+		h[0] = h[3] = h[6] = h[9] = state->state[0];

+		h[1] = h[4] = h[7] = h[10] = state->state[1];

+		h[2] = h[5] = h[8] = h[11] = SC_CONST;

+	}

+	else {

+		memcpy(h, state->state, sizeof(state->state));

+	}

+	state->length = length + state->length;

+	// if we've got anything stuffed away, use it now

+	if (state->left) {

+		uint8_t prefix = SC_BUFSIZE - state->left;

+		memcpy(&(((uint8_t *) state->data)[state->left]), message, prefix);

+		u.p64 = state->data;

+		spooky_mix(u.p64, h);

+		spooky_mix(&u.p64[SC_NUMVARS], h);

+		u.p8 = ((const uint8_t *) message) + prefix;

+		length -= prefix;

+	}

+	else {

+		u.p8 = (const uint8_t *) message;

+	}

+	// handle all whole blocks of SC_BLOCKSIZE bytes

+	end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;

+	left = (uint8_t) (length - ((const uint8_t *) end - u.p8));

+	if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {

+		while (u.p64 < end) {

+			spooky_mix(u.p64, h);

+			u.p64 += SC_NUMVARS;

+		}

+	}

+	else {

+		while (u.p64 < end) {

+			memcpy(state->data, u.p8, SC_BLOCKSIZE);

+			spooky_mix(state->data, h);

+			u.p64 += SC_NUMVARS;

+		}

+	}

+	// stuff away the last few bytes

+	state->left = left;

+	memcpy(state->data, end, left);

+	// stuff away the variables

+	memcpy(state->state, h, sizeof(state->state));

+}

+// report the hash for the concatenation of all message fragments so far

+void

+spooky_final(struct spooky_state *restrict state,

+             uint64_t *restrict hash1, uint64_t *restrict hash2)

+{

+	// init the variables

+	if (state->length < SC_BUFSIZE) {

+		*hash1 = state->state[0];

+		*hash2 = state->state[1];

+		spooky_short(state->data, state->length, hash1, hash2);

+		return;

+	}

+	const uint64_t *data = (const uint64_t *) state->data;

+	uint8_t left = state->left;

+	uint64_t h[SC_NUMVARS];

+	memcpy(h, state->state, sizeof(state->state));

+	if (left >= SC_BLOCKSIZE) {

+		// m_data can contain two blocks; handle any whole first block

+		spooky_mix(data, h);

+		data += SC_NUMVARS;

+		left -= SC_BLOCKSIZE;

+	}

+	// mix in the last partial block, and the length mod SC_BLOCKSIZE

+	memset(&((uint8_t *) data)[left], 0, (SC_BLOCKSIZE - left));

+	((uint8_t *) data)[SC_BLOCKSIZE - 1] = left;

+	// do some final mixing

+	spooky_end(data, h);

+	*hash1 = h[0];

+	*hash2 = h[1];

+}

+*/

--- /dev/null

+++ b/3rd/spooky.h

@@ -1,0 +1,65 @@

+/*

+ * SpookyHash - 128-bit noncryptographic hash function

+ *

+ * Written in 2012 by Bob Jenkins

+ *

+ * Converted to C in 2015 by Joergen Ibsen

+ *

+ * To the extent possible under law, the author(s) have dedicated all

+ * copyright and related and neighboring rights to this software to the

+ * public domain worldwide. This software is distributed without any

+ * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>

+ *

+ * Original comment from SpookyV2.h by Bob Jenkins:

+ *

+ * SpookyHash: a 128-bit noncryptographic hash function

+ * By Bob Jenkins, public domain

+ *   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right

+ *   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right

+ *   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas

+ *   Feb  2 2012: production, same bits as beta

+ *   Feb  5 2012: adjusted definitions of uint* to be more portable

+ *   Mar 30 2012: 3 bytes/cycle, not 4.  Alpha was 4 but wasn't thorough enough.

+ *   August 5 2012: SpookyV2 (different results)

+ *

+ * Up to 3 bytes/cycle for long messages.  Reasonably fast for short messages.

+ * All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.

+ *

+ * This was developed for and tested on 64-bit x86-compatible processors.

+ * It assumes the processor is little-endian.  There is a macro

+ * controlling whether unaligned reads are allowed (by default they are).

+ * This should be an equally good hash on big-endian machines, but it will

+ * compute different results on them than on little-endian machines.

+ *

+ * Google's CityHash has similar specs to SpookyHash, and CityHash is faster

+ * on new Intel boxes.  MD4 and MD5 also have similar specs, but they are orders

+ * of magnitude slower.  CRCs are two or more times slower, but unlike

+ * SpookyHash, they have nice math for combining the CRCs of pieces to form

+ * the CRCs of wholes.  There are also cryptographic hashes, but those are even

+ * slower than MD5.

+ */

+#pragma once

+// number of uint64_t's in internal state

+#define SC_NUMVARS 12U

+// size of the internal state

+#define SC_BLOCKSIZE (SC_NUMVARS * 8U)

+// size of buffer of unhashed data, in bytes

+#define SC_BUFSIZE (2U * SC_BLOCKSIZE)

+struct spooky_state {

+	uint64_t data[2 * SC_NUMVARS]; // unhashed data, for partial messages

+	uint64_t state[SC_NUMVARS];    // internal state of the hash

+	size_t length;                 // total length of the input so far

+	uint8_t left;                  // length of unhashed data stashed in data

+};

+void spooky_hash128(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2);

+uint64_t spooky_hash64(const void *message, size_t length, uint64_t seed);

+uint32_t spooky_hash32(const void *message, size_t length, uint32_t seed);

+//void spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2);

+//void spooky_update(struct spooky_state *state, const void *message, size_t length);

+//void spooky_final(struct spooky_state *state, uint64_t *hash1, uint64_t *hash2);

--- a/hashing.c

+++ b/hashing.c

@@ -1,5 +1,6 @@

 #include "llt.h"

 #include "hashing.h"

+#include "spooky.h"

 lltuint_t

 nextipow2(lltuint_t i)

@@ -55,22 +56,14 @@

 	return (uint32_t)key;

-#include "lookup3.c"

 uint64_t

-memhash(const char* buf, size_t n)

+memhash(const char *buf, size_t n)

-	uint32_t c = 0xcafe8881, b = 0x4d6a087c;

-	hashlittle2(buf, n, &c, &b);

-	return (uint64_t)c | (((uint64_t)b)<<32);

+	return spooky_hash64(buf, n, 0xcafe8881);

 uint32_t

-memhash32(const char* buf, size_t n)

+memhash32(const char *buf, size_t n)

-	uint32_t c = 0xcafe8881, b = 0x4d6a087c;

-	hashlittle2(buf, n, &c, &b);

-	return c;

+	return spooky_hash32(buf, n, 0xcafe8881);

--- a/meson.build

+++ b/meson.build

@@ -52,6 +52,7 @@

 	'3rd/mp/u16.c',

 	'3rd/mp/u32.c',

 	'3rd/mp/u64.c',

+	'3rd/spooky.c',

 	'3rd/utf/rune.c',

 	'3rd/utf/runeistype.c',

 	'3rd/utf/runetotype.c',

--- a/mkfile

+++ b/mkfile

@@ -12,6 +12,7 @@

 OFILES=\

 	3rd/mt19937-64.$O\

 	3rd/wcwidth.$O\

+	3rd/spooky.$O\

 	bitreverse.$O\

 	bitvector-ops.$O\

 	bitvector.$O\