ref: 3ca7606bfb3ba941944d8103f1012dce78820e4d
parent: c6e989fa2ac98bb2db4c1419428ab8e655fcfd7b
	author: Ali Gholami Rudi <ali@rudi.ir>
	date: Tue Dec 30 13:56:15 EST 2014
	
hyph: handle special characters Before this change hyphenation was limited to utf-8 characters; now it handles special characters (\x, \(xy, \[xyz]) as well. Suggested by Carsten Kunze <carsten.kunze@arcor.de>.
--- a/char.c
+++ b/char.c
@@ -185,7 +185,7 @@
}
/* read quoted arguments; this is called only for internal neatroff strings */
-static void quotedread(char **sp, char *d)
+void quotedread(char **sp, char *d)
 {char *s = *sp;
int q = *s++;
--- a/hyph.c
+++ b/hyph.c
@@ -18,6 +18,59 @@
static int hwoff[NHYPHS]; /* the offset of words in hwword[] */
static int hw_n; /* the number of dictionary words */
+/* read a single character from s into d; return the number of characters read */
+static int hy_cget(char *d, char *s)
+{+ if (s[0] != '\\')
+ return utf8read(&s, d);
+	if (s[1] == '[') {+ char *o = s;
+ s += 2;
+ while (*s && *s != ']')
+ *d++ = *s++;
+ *d = '\0';
+ return s - o;
+ }
+	if (s[1] == '(') {+ d[0] = s[2];
+ d[1] = s[3];
+ d[2] = '\0';
+ return 4;
+ }
+	if (s[1] == 'C') {+ char *o = s;
+ quotedread(&s, d);
+ return s - o;
+ }
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = '\0';
+ return 2;
+}
+
+/* append character s to d; return the number of characters written */
+static int hy_cput(char *d, char *s)
+{+	if (!s[0] || !s[1] || utf8one(s)) {+ strcpy(d, s);
+	} else if (s[0] == '\\' && !s[2]) {+ s[0] = d[0];
+ s[1] = d[1];
+ s[2] = '\0';
+ return 2;
+	} else if (!s[2]) {+ d[0] = '\\';
+		d[1] = '(';+ d[2] = s[0];
+ d[3] = s[1];
+ d[4] = '\0';
+ return 4;
+	} else {+ snprintf(d, GNLEN, "\\[%s]", s);
+ }
+ return strlen(d);
+}
+
/* insert word s into hwword[] and hwhyph[] */
static void hw_add(char *s)
 {@@ -60,9 +113,18 @@
void tr_hw(char **args)
 {+ char c[GNLEN];
+ char word[WORDLEN];
int i;
- for (i = 1; i < NARGS && args[i]; i++)
- hw_add(args[i]);
+	for (i = 1; i < NARGS && args[i]; i++) {+ char *s = args[i];
+ char *d = word;
+		while (d - word < WORDLEN - GNLEN && s[0]) {+ s += hy_cget(c, s);
+ d += hy_cput(d, c);
+ }
+ hw_add(word);
+ }
}
/* the tex hyphenation algorithm */
@@ -98,12 +160,13 @@
 	char n[WORDLEN] = {0}; 	char w[WORDLEN] = {0};int c[WORDLEN]; /* start of the i-th character in w */
-	int wmap[WORDLEN] = {0};	/* word[wmap[i]] is w[i] */+	int wmap[WORDLEN] = {0};	/* w[i] corresponds to word[wmap[i]] */int nc = 0;
int i, wlen;
hcode_strcpy(w, word, wmap, 1);
wlen = strlen(w);
- for (i = 0; i < wlen - 1; i += utf8len((unsigned char) w[i]))
+ char dum[GNLEN];
+ for (i = 0; i < wlen - 1; i += hy_cget(dum, w + i))
c[nc++] = i;
for (i = 0; i < nc - 1; i++)
hy_find(w + c[i], n + c[i]);
@@ -159,15 +222,15 @@
/* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
static void hcode_strcpy(char *d, char *s, int *map, int dots)
 {- int di = 0, si = 0, len;
+ char c[GNLEN];
+ int di = 0, si = 0;
if (dots)
d[di++] = '.';
 	while (di < WORDLEN - GNLEN && s[si]) {- len = utf8len((unsigned char) s[si]);
map[di] = si;
- memcpy(d + di, s + si, len);
- si += len;
- di += hcode_mapchar(d + di);
+ si += hy_cget(c, s + si);
+ hcode_mapchar(c);
+ di += hy_cput(d + di, c);
}
if (dots)
d[di++] = '.';
@@ -191,7 +254,7 @@
 {char c1[GNLEN], c2[GNLEN];
char *s = args[1];
- while (s && utf8read(&s, c1) && utf8read(&s, c2))
+ while (s && charread(&s, c1) >= 0 && charread(&s, c2) >= 0)
hcode_add(c1, c2);
}
--- a/roff.h
+++ b/roff.h
@@ -396,6 +396,7 @@
void charnext_str(char *d, char *c);
void quotednext(char *d, int (*next)(void), void (*back)(int));
void unquotednext(char *d, int cmd, int (*next)(void), void (*back)(int));
+void quotedread(char **sp, char *d);
int escread(char **s, char *d);
/* string streams; nested next()/back() interface for string buffers */
void sstr_push(char *s);
--- a/wb.c
+++ b/wb.c
@@ -176,10 +176,7 @@
return 1;
if (c_hymark(s))
continue;
- if (!utf8one(s))
- strcpy(d, ".");
- else
- strcpy(d, s);
+ charnext_str(d, s);
d = strchr(d, '\0');
}
memset(hyph, 0, (d - word) * sizeof(hyph[0]));
--
⑨