shithub: neatroff

Download patch

ref: 3ca7606bfb3ba941944d8103f1012dce78820e4d
parent: c6e989fa2ac98bb2db4c1419428ab8e655fcfd7b
author: Ali Gholami Rudi <ali@rudi.ir>
date: Tue Dec 30 13:56:15 EST 2014

hyph: handle special characters

Before this change hyphenation was limited to utf-8 characters;
now it handles special characters (\x, \(xy, \[xyz]) as well.
Suggested by Carsten Kunze <carsten.kunze@arcor.de>.

--- a/char.c
+++ b/char.c
@@ -185,7 +185,7 @@
 }
 
 /* read quoted arguments; this is called only for internal neatroff strings */
-static void quotedread(char **sp, char *d)
+void quotedread(char **sp, char *d)
 {
 	char *s = *sp;
 	int q = *s++;
--- a/hyph.c
+++ b/hyph.c
@@ -18,6 +18,59 @@
 static int hwoff[NHYPHS];	/* the offset of words in hwword[] */
 static int hw_n;		/* the number of dictionary words */
 
+/* read a single character from s into d; return the number of characters read */
+static int hy_cget(char *d, char *s)
+{
+	if (s[0] != '\\')
+		return utf8read(&s, d);
+	if (s[1] == '[') {
+		char *o = s;
+		s += 2;
+		while (*s && *s != ']')
+			*d++ = *s++;
+		*d = '\0';
+		return s - o;
+	}
+	if (s[1] == '(') {
+		d[0] = s[2];
+		d[1] = s[3];
+		d[2] = '\0';
+		return 4;
+	}
+	if (s[1] == 'C') {
+		char *o = s;
+		quotedread(&s, d);
+		return s - o;
+	}
+	d[0] = s[0];
+	d[1] = s[1];
+	d[2] = '\0';
+	return 2;
+}
+
+/* append character s to d; return the number of characters written */
+static int hy_cput(char *d, char *s)
+{
+	if (!s[0] || !s[1] || utf8one(s)) {
+		strcpy(d, s);
+	} else if (s[0] == '\\' && !s[2]) {
+		s[0] = d[0];
+		s[1] = d[1];
+		s[2] = '\0';
+		return 2;
+	} else if (!s[2]) {
+		d[0] = '\\';
+		d[1] = '(';
+		d[2] = s[0];
+		d[3] = s[1];
+		d[4] = '\0';
+		return 4;
+	} else {
+		snprintf(d, GNLEN, "\\[%s]", s);
+	}
+	return strlen(d);
+}
+
 /* insert word s into hwword[] and hwhyph[] */
 static void hw_add(char *s)
 {
@@ -60,9 +113,18 @@
 
 void tr_hw(char **args)
 {
+	char c[GNLEN];
+	char word[WORDLEN];
 	int i;
-	for (i = 1; i < NARGS && args[i]; i++)
-		hw_add(args[i]);
+	for (i = 1; i < NARGS && args[i]; i++) {
+		char *s = args[i];
+		char *d = word;
+		while (d - word < WORDLEN - GNLEN && s[0]) {
+			s += hy_cget(c, s);
+			d += hy_cput(d, c);
+		}
+		hw_add(word);
+	}
 }
 
 /* the tex hyphenation algorithm */
@@ -98,12 +160,13 @@
 	char n[WORDLEN] = {0};
 	char w[WORDLEN] = {0};
 	int c[WORDLEN];			/* start of the i-th character in w */
-	int wmap[WORDLEN] = {0};	/* word[wmap[i]] is w[i] */
+	int wmap[WORDLEN] = {0};	/* w[i] corresponds to word[wmap[i]] */
 	int nc = 0;
 	int i, wlen;
 	hcode_strcpy(w, word, wmap, 1);
 	wlen = strlen(w);
-	for (i = 0; i < wlen - 1; i += utf8len((unsigned char) w[i]))
+	char dum[GNLEN];
+	for (i = 0; i < wlen - 1; i += hy_cget(dum, w + i))
 		c[nc++] = i;
 	for (i = 0; i < nc - 1; i++)
 		hy_find(w + c[i], n + c[i]);
@@ -159,15 +222,15 @@
 /* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
 static void hcode_strcpy(char *d, char *s, int *map, int dots)
 {
-	int di = 0, si = 0, len;
+	char c[GNLEN];
+	int di = 0, si = 0;
 	if (dots)
 		d[di++] = '.';
 	while (di < WORDLEN - GNLEN && s[si]) {
-		len = utf8len((unsigned char) s[si]);
 		map[di] = si;
-		memcpy(d + di, s + si, len);
-		si += len;
-		di += hcode_mapchar(d + di);
+		si += hy_cget(c, s + si);
+		hcode_mapchar(c);
+		di += hy_cput(d + di, c);
 	}
 	if (dots)
 		d[di++] = '.';
@@ -191,7 +254,7 @@
 {
 	char c1[GNLEN], c2[GNLEN];
 	char *s = args[1];
-	while (s && utf8read(&s, c1) && utf8read(&s, c2))
+	while (s && charread(&s, c1) >= 0 && charread(&s, c2) >= 0)
 		hcode_add(c1, c2);
 }
 
--- a/roff.h
+++ b/roff.h
@@ -396,6 +396,7 @@
 void charnext_str(char *d, char *c);
 void quotednext(char *d, int (*next)(void), void (*back)(int));
 void unquotednext(char *d, int cmd, int (*next)(void), void (*back)(int));
+void quotedread(char **sp, char *d);
 int escread(char **s, char *d);
 /* string streams; nested next()/back() interface for string buffers */
 void sstr_push(char *s);
--- a/wb.c
+++ b/wb.c
@@ -176,10 +176,7 @@
 			return 1;
 		if (c_hymark(s))
 			continue;
-		if (!utf8one(s))
-			strcpy(d, ".");
-		else
-			strcpy(d, s);
+		charnext_str(d, s);
 		d = strchr(d, '\0');
 	}
 	memset(hyph, 0, (d - word) * sizeof(hyph[0]));