ref: 3ca7606bfb3ba941944d8103f1012dce78820e4d
parent: c6e989fa2ac98bb2db4c1419428ab8e655fcfd7b
author: Ali Gholami Rudi <ali@rudi.ir>
date: Tue Dec 30 13:56:15 EST 2014
hyph: handle special characters Before this change hyphenation was limited to utf-8 characters; now it handles special characters (\x, \(xy, \[xyz]) as well. Suggested by Carsten Kunze <carsten.kunze@arcor.de>.
--- a/char.c
+++ b/char.c
@@ -185,7 +185,7 @@
}
/* read quoted arguments; this is called only for internal neatroff strings */
-static void quotedread(char **sp, char *d)
+void quotedread(char **sp, char *d)
{
char *s = *sp;
int q = *s++;
--- a/hyph.c
+++ b/hyph.c
@@ -18,6 +18,59 @@
static int hwoff[NHYPHS]; /* the offset of words in hwword[] */
static int hw_n; /* the number of dictionary words */
+/* read a single character from s into d; return the number of characters read */
+static int hy_cget(char *d, char *s)
+{
+ if (s[0] != '\\')
+ return utf8read(&s, d);
+ if (s[1] == '[') {
+ char *o = s;
+ s += 2;
+ while (*s && *s != ']')
+ *d++ = *s++;
+ *d = '\0';
+ return s - o;
+ }
+ if (s[1] == '(') {
+ d[0] = s[2];
+ d[1] = s[3];
+ d[2] = '\0';
+ return 4;
+ }
+ if (s[1] == 'C') {
+ char *o = s;
+ quotedread(&s, d);
+ return s - o;
+ }
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = '\0';
+ return 2;
+}
+
+/* append character s to d; return the number of characters written */
+static int hy_cput(char *d, char *s)
+{
+ if (!s[0] || !s[1] || utf8one(s)) {
+ strcpy(d, s);
+ } else if (s[0] == '\\' && !s[2]) {
+ s[0] = d[0];
+ s[1] = d[1];
+ s[2] = '\0';
+ return 2;
+ } else if (!s[2]) {
+ d[0] = '\\';
+ d[1] = '(';
+ d[2] = s[0];
+ d[3] = s[1];
+ d[4] = '\0';
+ return 4;
+ } else {
+ snprintf(d, GNLEN, "\\[%s]", s);
+ }
+ return strlen(d);
+}
+
/* insert word s into hwword[] and hwhyph[] */
static void hw_add(char *s)
{
@@ -60,9 +113,18 @@
void tr_hw(char **args)
{
+ char c[GNLEN];
+ char word[WORDLEN];
int i;
- for (i = 1; i < NARGS && args[i]; i++)
- hw_add(args[i]);
+ for (i = 1; i < NARGS && args[i]; i++) {
+ char *s = args[i];
+ char *d = word;
+ while (d - word < WORDLEN - GNLEN && s[0]) {
+ s += hy_cget(c, s);
+ d += hy_cput(d, c);
+ }
+ hw_add(word);
+ }
}
/* the tex hyphenation algorithm */
@@ -98,12 +160,13 @@
char n[WORDLEN] = {0};
char w[WORDLEN] = {0};
int c[WORDLEN]; /* start of the i-th character in w */
- int wmap[WORDLEN] = {0}; /* word[wmap[i]] is w[i] */
+ int wmap[WORDLEN] = {0}; /* w[i] corresponds to word[wmap[i]] */
int nc = 0;
int i, wlen;
hcode_strcpy(w, word, wmap, 1);
wlen = strlen(w);
- for (i = 0; i < wlen - 1; i += utf8len((unsigned char) w[i]))
+ char dum[GNLEN];
+ for (i = 0; i < wlen - 1; i += hy_cget(dum, w + i))
c[nc++] = i;
for (i = 0; i < nc - 1; i++)
hy_find(w + c[i], n + c[i]);
@@ -159,15 +222,15 @@
/* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
static void hcode_strcpy(char *d, char *s, int *map, int dots)
{
- int di = 0, si = 0, len;
+ char c[GNLEN];
+ int di = 0, si = 0;
if (dots)
d[di++] = '.';
while (di < WORDLEN - GNLEN && s[si]) {
- len = utf8len((unsigned char) s[si]);
map[di] = si;
- memcpy(d + di, s + si, len);
- si += len;
- di += hcode_mapchar(d + di);
+ si += hy_cget(c, s + si);
+ hcode_mapchar(c);
+ di += hy_cput(d + di, c);
}
if (dots)
d[di++] = '.';
@@ -191,7 +254,7 @@
{
char c1[GNLEN], c2[GNLEN];
char *s = args[1];
- while (s && utf8read(&s, c1) && utf8read(&s, c2))
+ while (s && charread(&s, c1) >= 0 && charread(&s, c2) >= 0)
hcode_add(c1, c2);
}
--- a/roff.h
+++ b/roff.h
@@ -396,6 +396,7 @@
void charnext_str(char *d, char *c);
void quotednext(char *d, int (*next)(void), void (*back)(int));
void unquotednext(char *d, int cmd, int (*next)(void), void (*back)(int));
+void quotedread(char **sp, char *d);
int escread(char **s, char *d);
/* string streams; nested next()/back() interface for string buffers */
void sstr_push(char *s);
--- a/wb.c
+++ b/wb.c
@@ -176,10 +176,7 @@
return 1;
if (c_hymark(s))
continue;
- if (!utf8one(s))
- strcpy(d, ".");
- else
- strcpy(d, s);
+ charnext_str(d, s);
d = strchr(d, '\0');
}
memset(hyph, 0, (d - word) * sizeof(hyph[0]));