shithub: neatroff

Download patch

ref: d49cfc1cc200ba9a3f29a4e2909af02b87d1f0dc
parent: 80b2dc5db97b8e1d0628578d6696836693e1d563
author: Ali Gholami Rudi <ali@rudi.ir>
date: Thu Dec 28 06:36:49 EST 2017

dict: consider more characters in the hash function

--- a/dict.c
+++ b/dict.c
@@ -3,7 +3,6 @@
 #include <string.h>
 #include "roff.h"
 
-#define DHASH(d, s)	((d)->level2 && (s)[0] ? (((unsigned char) (s)[1]) << 8) | (unsigned char) (s)[0] : (unsigned char) s[0])
 #define CNTMIN		(1 << 10)
 
 struct dict {
@@ -13,7 +12,7 @@
 	int size;
 	int n;
 	int notfound;		/* the value returned for missing keys */
-	int level2;		/* use two characters for hashing */
+	int hashlen;		/* the number of characters used for hashing */
 	int dupkeys;		/* duplicate keys if set */
 };
 
@@ -29,14 +28,14 @@
  *
  * notfound: the value returned for missing keys.
  * dupkeys: if nonzero, store a copy of keys inserted via dict_put().
- * level2: use two characters for hasing
+ * hashlen: the number of characters used for hashing
  */
-struct dict *dict_make(int notfound, int dupkeys, int level2)
+struct dict *dict_make(int notfound, int dupkeys, int hashlen)
 {
 	struct dict *d = xmalloc(sizeof(*d));
 	memset(d, 0, sizeof(*d));
 	d->n = 1;
-	d->level2 = level2;
+	d->hashlen = hashlen ? hashlen : 32;
 	d->dupkeys = dupkeys;
 	d->notfound = notfound;
 	d->map = iset_make();
@@ -56,6 +55,15 @@
 	free(d);
 }
 
+static int dict_hash(struct dict *d, char *key)
+{
+	unsigned long hash = (unsigned char) *key++;
+	int i = d->hashlen;
+	while (--i > 0 && *key)
+		hash = (hash << 5) + hash + (unsigned char) *key++;
+	return hash & 0xffff;
+}
+
 void dict_put(struct dict *d, char *key, int val)
 {
 	int idx;
@@ -70,13 +78,13 @@
 	idx = d->n++;
 	d->key[idx] = key;
 	d->val[idx] = val;
-	iset_put(d->map, DHASH(d, key), idx);
+	iset_put(d->map, dict_hash(d, key), idx);
 }
 
 /* return the index of key in d */
 int dict_idx(struct dict *d, char *key)
 {
-	int h = DHASH(d, key);
+	int h = dict_hash(d, key);
 	int *b = iset_get(d->map, h);
 	int *r = b + iset_len(d->map, h);
 	while (b && --r >= b)
@@ -104,7 +112,7 @@
 /* match a prefix of key; in the first call, *idx should be -1 */
 int dict_prefix(struct dict *d, char *key, int *pos)
 {
-	int *r = iset_get(d->map, DHASH(d, key));
+	int *r = iset_get(d->map, dict_hash(d, key));
 	while (r && r[++*pos] >= 0) {
 		int idx = r[*pos];
 		int plen = strlen(d->key[idx]);
--- a/hyph.c
+++ b/hyph.c
@@ -331,8 +331,8 @@
 
 void hyph_init(void)
 {
-	hwdict = dict_make(-1, 0, 1);
-	hydict = dict_make(-1, 0, 1);
+	hwdict = dict_make(-1, 0, 2);
+	hydict = dict_make(-1, 0, 2);
 	hcodedict = dict_make(-1, 0, 1);
 }
 
--- a/map.c
+++ b/map.c
@@ -15,7 +15,7 @@
 	if (s[0] == '.' && s[1] && !s[2])	/* ".x" is mapped to 'x' */
 		return (unsigned char) s[1];
 	if (!mapdict)
-		mapdict = dict_make(-1, 1, 1);
+		mapdict = dict_make(-1, 1, 2);
 	i = dict_idx(mapdict, s);
 	if (i < 0) {
 		dict_put(mapdict, s, 0);
--- a/roff.h
+++ b/roff.h
@@ -122,7 +122,7 @@
 int iset_len(struct iset *iset, int key);
 
 /* mapping strings to longs */
-struct dict *dict_make(int notfound, int dupkeys, int level2);
+struct dict *dict_make(int notfound, int dupkeys, int hashlen);
 void dict_free(struct dict *d);
 void dict_put(struct dict *d, char *key, int val);
 int dict_get(struct dict *d, char *key);
--- a/tr.c
+++ b/tr.c
@@ -1253,7 +1253,7 @@
 	int i;
 	for (i = 0; i < LEN(cmds); i++)
 		str_dset(map(cmds[i].id), &cmds[i]);
-	cmap = dict_make(-1, 0, 0);
+	cmap = dict_make(-1, 0, 2);
 }
 
 void tr_done(void)