ref: d06f9ae747166f3a2a5d52ce490f31000c2cd776
parent: 33d6fc3b0d68182ecab7b80e002d1dd2e2d00cfd
author: Ali Gholami Rudi <ali@rudi.ir>
date: Fri May 2 14:35:53 EDT 2014
hyph: per starting character lists for the hyphenation dictionary
--- a/hyph.c
+++ b/hyph.c
@@ -4,15 +4,19 @@
#include <stdio.h>
#include "roff.h"
-#define HYEXLEN (1 << 17) /* hyphenation exception list length */
-#define HYEXWLEN 128 /* hyphenation exception word length */
#define HYPATLEN (1 << 19) /* hyphenation pattern length */
+#define HYHWLEN (HYHWN << 4) /* hyphenation dictionary length */
-/* hyphenation exception list */
-static char hyexcept[HYEXLEN];
-static char hyexcept_hyph[HYEXLEN];
-static int nhyexcept;
-
+/* hyphenation dictionary (.hw) */
+static char hwword[HYHWLEN]; /* buffer for .hw words */
+static char hwhyph[HYHWLEN]; /* buffer for .hw hyphenations */
+static int hwword_len; /* used hyword[] length */
+/* per starting character word lists for .hw word */
+static int hwhead[256]; /* the head of hw_*[] lists */
+static int hw_next[HYHWN]; /* the next word with the same initial */
+static int hw_idx[HYHWN]; /* the offset of this word in hwword[] */
+static int hw_len[HYHWN]; /* the length of the word */
+static int hw_n = 1; /* number of words in hw_*[] lists */
/* tex hyphenation algorithm */
static int hyinit; /* hyphenation data initialized */
static char hypats[HYPATLEN];
@@ -20,23 +24,28 @@
static int nhypats;
static char *hyhash[32 * 32];
-static void hyph_initpatterns(void);
-static void hyph_initexceptions(void);
-static void hyfind(char *hyph, char *word, int flg);
+/* functions for the hyphenation dictionary */
-static void hyexcept_add(char *s)
+static void hw_add(char *word)
{
- char *d = hyexcept + nhyexcept;
- *d++ = ' ';
- while (*s) {
- if (*s == '-') {
- hyexcept_hyph[d - hyexcept - 1] = 1;
- s++;
- } else {
- d += utf8read(&s, d);
- }
+ char *s = word;
+ char *d = hwword + hwword_len;
+ int c, i;
+ if (hw_n == LEN(hw_idx) || hwword_len + 128 > sizeof(hwword))
+ return;
+ i = hw_n++;
+ while ((c = *s++)) {
+ if (c == '-')
+ hwhyph[d - hwword] = 1;
+ else
+ *d++ = c;
}
- nhyexcept = d - hyexcept;
+ *d++ = '\0';
+ hw_idx[i] = hwword_len;
+ hwword_len = d - hwword;
+ hw_len[i] = hwword_len - hw_idx[i] - 1;
+ hw_next[i] = hwhead[(unsigned char) word[0]];
+ hwhead[(unsigned char) word[0]] = i;
}
static void strcpy_lower(char *d, char *s)
@@ -50,45 +59,30 @@
*d = '\0';
}
-static char *hyexcept_lookup(char *s)
+static char *hw_lookup(char *s)
{
char word[ILNLEN];
- char *r;
- int len, i;
- word[0] = ' ';
- strcpy_lower(word + 1, s);
- len = strlen(word);
- for (i = len; i >= 4; i--) {
- word[i] = ' ';
- word[i + 1] = '\0';
- if ((r = strstr(hyexcept, word)))
- return hyexcept_hyph + (r - hyexcept);
+ int i;
+ strcpy_lower(word, s);
+ /* finding a .hw word that matches a prefix of word */
+ i = hwhead[(unsigned char) word[0]];
+ while (i > 0) {
+ if (!strncmp(word, hwword + hw_idx[i], hw_len[i]))
+ return hwhyph + hw_idx[i];
+ i = hw_next[i];
}
return NULL;
}
-void hyphenate(char *hyph, char *word, int flg)
-{
- char *r;
- if (!hyinit) {
- hyinit = 1;
- hyph_initpatterns();
- hyph_initexceptions();
- }
- r = hyexcept_lookup(word);
- if (r)
- memcpy(hyph, r, strlen(word) + 1);
- else
- hyfind(hyph, word, flg);
-}
-
void tr_hw(char **args)
{
int i;
for (i = 1; i < NARGS && args[i]; i++)
- hyexcept_add(args[i]);
+ hw_add(args[i]);
}
+/* functions implementing tex hyphenation algorithm */
+
#define HYC_MAP(c) ((c) == '.' ? 0 : (c) - 'a' + 1)
static int hyidx(int a, int b)
@@ -586,8 +580,23 @@
while (*s != ' ')
*d++ = *s++;
*d = '\0';
- hyexcept_add(word);
+ hw_add(word);
while (*s == ' ')
s++;
}
+}
+
+void hyphenate(char *hyph, char *word, int flg)
+{
+ char *r;
+ if (!hyinit) {
+ hyinit = 1;
+ hyph_initpatterns();
+ hyph_initexceptions();
+ }
+ r = hw_lookup(word);
+ if (r)
+ memcpy(hyph, r, strlen(word) + 1);
+ else
+ hyfind(hyph, word, flg);
}
--- a/roff.h
+++ b/roff.h
@@ -47,6 +47,7 @@
#define MAXFRAC 100000 /* maximum value of the fractional part */
#define LIGLEN 4 /* length of ligatures */
#define NCDEFS 128 /* number of character definitions (.char) */
+#define HYHWN 1024 /* hyphenation dictionary size (.hw) */
#define NHYPHS 8 /* maximum hyphenations per word */
/* converting scales */