shithub: neatmkfn

Download patch

ref: c895fdcb6006d9ff27e1bdc7082897294dffadab
parent: c576e2a6b897f17dfd01b2749bb3c42d5367d89e
author: Ali Gholami Rudi <ali@rudi.ir>
date: Sun Jul 27 19:47:59 EDT 2014

otf: parse ttf files

--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@
 all: mkfn
 %.o: %.c
 	$(CC) -c $(CFLAGS) $<
-mkfn: mkfn.o trfn.o sbuf.o tab.o
+mkfn: mkfn.o trfn.o sbuf.o tab.o otf.o
 	$(CC) -o $@ $^ $(LDFLAGS)
 clean:
 	rm -f *.o mkfn
--- a/mkfn.c
+++ b/mkfn.c
@@ -15,33 +15,6 @@
 
 #define TOKLEN		256
 
-static void otfdump_read(void)
-{
-	char cmd[TOKLEN];
-	char name[TOKLEN];
-	char ch[TOKLEN];
-	char c1[TOKLEN], c2[TOKLEN];
-	char wid[TOKLEN];
-	while (scanf("%s", cmd) == 1) {
-		if (!strcmp("name", cmd)) {
-			scanf("%s", name);
-			trfn_psfont(name);
-		}
-		if (!strcmp("char", cmd)) {
-			scanf("%s width %s", ch, wid);
-			trfn_char(ch, NULL, atoi(wid), 0, 0, 0, 0);
-		}
-		if (!strcmp("kernpair", cmd)) {
-			scanf("%s %s width %s", c1, c2, wid);
-			trfn_kern(c1, c2, atoi(wid));
-		}
-		if (!strcmp("feature", cmd)) {
-			scanf("%s substitution %s %s", name, c1, c2);
-			trfn_sub(c1, c2);
-		}
-	}
-}
-
 static char *afm_charfield(char *s, char *d)
 {
 	while (*s && !isspace(*s) && *s != ';')
@@ -52,7 +25,7 @@
 	return s;
 }
 
-static void afm_read(void)
+static int afm_read(void)
 {
 	char ln[1024];
 	char ch[TOKLEN] = "", pos[TOKLEN] = "";
@@ -107,7 +80,7 @@
 			break;
 		}
 		if (ch[0] && pos[0] && wid[0])
-			trfn_char(ch, pos, atoi(wid),
+			trfn_char(ch, atoi(pos), 0, atoi(wid),
 				atoi(llx), atoi(lly), atoi(urx), atoi(ury));
 	}
 	while (fgets(ln, sizeof(ln), stdin)) {
@@ -124,13 +97,16 @@
 		if (sscanf(ln, "KPX %s %s %s", c1, c2, wid) == 3)
 			trfn_kern(c1, c2, atoi(wid));
 	}
+	return 0;
 }
 
+int otf_read(void);
+
 static char *usage =
 	"Usage: mktrfn [options] <input >output\n"
 	"Options:\n"
 	"  -a      \tread an AFM file (default)\n"
-	"  -o      \tread the output of otfdump\n"
+	"  -o      \tread an OTF file\n"
 	"  -s      \tspecial font\n"
 	"  -p name \toverride font postscript name\n"
 	"  -t name \tset font troff name\n"
@@ -181,7 +157,7 @@
 	if (afm)
 		afm_read();
 	else
-		otfdump_read();
+		otf_read();
 	trfn_print();
 	trfn_done();
 	return 0;
--- /dev/null
+++ b/otf.c
@@ -1,0 +1,286 @@
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "trfn.h"
+
+#define NGLYPHS			(1 << 14)
+#define GNLEN			(64)
+#define BUFLEN			(1 << 23)
+#define OWID(w)			((w) * 1000 / (upm))
+
+#define U32(buf, off)		(htonl(*(u32 *) ((buf) + (off))))
+#define U16(buf, off)		(htons(*(u16 *) ((buf) + (off))))
+#define U8(buf, off)		(*(u8 *) ((buf) + (off)))
+#define S16(buf, off)		((s16) htons(*(u16 *) ((buf) + (off))))
+#define S32(buf, off)		((s32) htonl(*(u32 *) ((buf) + (off))))
+
+#define OTFLEN		12	/* otf header length */
+#define OTFRECLEN	16	/* otf header record length */
+#define CMAPLEN		4	/* cmap header length */
+#define CMAPRECLEN	8	/* cmap record length */
+#define CMAP4LEN	8	/* format 4 cmap subtable header length */
+
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+typedef int s32;
+typedef short s16;
+
+static char glyph_name[NGLYPHS][GNLEN];
+static int glyph_code[NGLYPHS];
+static int glyph_bbox[NGLYPHS][4];
+static int glyph_wid[NGLYPHS];
+static int glyph_n;
+static int upm;			/* units per em */
+
+static char *macset[];
+
+/* find the otf table with the given name */
+static void *otf_table(void *otf, char *name)
+{
+	void *recs = otf + OTFLEN;	/* otf table records */
+	void *rec;			/* beginning of a table record */
+	int nrecs = U16(otf, 4);
+	int i;
+	for (i = 0; i < nrecs; i++) {
+		rec = recs + i * OTFRECLEN;
+		if (!strncmp(rec, name, 4))
+			return otf + U32(rec, 8);
+	}
+	return NULL;
+}
+
+/* parse otf cmap format 4 subtable */
+static void otf_cmap4(void *otf, void *cmap4)
+{
+	int nsegs;
+	void *ends, *begs, *deltas, *offsets;
+	void *idarray;
+	int beg, end, delta, offset;
+	int i, j;
+	nsegs = U16(cmap4, 6) / 2;
+	ends = cmap4 + 14;
+	begs = ends + 2 * nsegs + 2;
+	deltas = begs + 2 * nsegs;
+	offsets = deltas + 2 * nsegs;
+	idarray = offsets + 2 * nsegs;
+	for (i = 0; i < nsegs; i++) {
+		beg = U16(begs, 2 * i);
+		end = U16(ends, 2 * i);
+		delta = U16(deltas, 2 * i);
+		offset = U16(offsets, 2 * i);
+		if (offset) {
+			for (j = beg; j <= end; j++)
+				glyph_code[U16(offsets + i * 2,
+						offset + (j - beg) * 2)] = j;
+		} else {
+			for (j = beg; j <= end; j++)
+				glyph_code[(j + delta) & 0xffff] = j;
+		}
+	}
+}
+
+/* parse otf cmap header */
+static void otf_cmap(void *otf, void *cmap)
+{
+	void *recs = cmap + CMAPLEN;	/* cmap records */
+	void *rec;			/* a cmap record */
+	void *tab;			/* a cmap subtable */
+	int plat, enc;
+	int fmt;
+	int nrecs = U16(cmap, 2);
+	int i;
+	for (i = 0; i < nrecs; i++) {
+		rec = recs + i * CMAPRECLEN;
+		plat = U16(rec, 0);
+		enc = U16(rec, 2);
+		tab = cmap + U32(rec, 4);
+		fmt = U16(tab, 0);
+		if (plat == 3 && enc == 1 && fmt == 4)
+			otf_cmap4(otf, tab);
+	}
+}
+
+static void otf_post(void *otf, void *post)
+{
+	void *post2;			/* version 2.0 header */
+	void *index;			/* glyph name indices */
+	void *names;			/* glyph names */
+	int i, idx;
+	int cname = 0;
+	if (U32(post, 0) != 0x00020000)
+		return;
+	post2 = post + 32;
+	glyph_n = U16(post2, 0);
+	index = post2 + 2;
+	names = index + 2 * glyph_n;
+	for (i = 0; i < glyph_n; i++) {
+		idx = U16(index, 2 * i);
+		if (idx <= 257) {
+			strcpy(glyph_name[i], macset[idx]);
+		} else {
+			memcpy(glyph_name[i], names + cname + 1,
+				U8(names, cname));
+			glyph_name[i][U8(names, cname)] = '\0';
+			cname += U8(names, cname) + 1;
+		}
+	}
+}
+
+static void otf_glyf(void *otf, void *glyf)
+{
+	void *maxp = otf_table(otf, "maxp");
+	void *head = otf_table(otf, "head");
+	void *loca = otf_table(otf, "loca");
+	void *gdat;
+	void *gdat_next;
+	int n = U16(maxp, 4);
+	int fmt = U16(head, 50);
+	int i, j;
+	for (i = 0; i < n; i++) {
+		if (fmt) {
+			gdat = glyf + U32(loca, 4 * i);
+			gdat_next = glyf + U32(loca, 4 * (i + 1));
+		} else {
+			gdat = glyf + U16(loca, 2 * i) * 2;
+			gdat_next = glyf + U16(loca, 2 * (i + 1)) * 2;
+		}
+		if (gdat < gdat_next)
+			for (j = 0; j < 4; j++)
+				glyph_bbox[i][j] = S16(gdat, 2 + 2 * j);
+	}
+}
+
+static void otf_hmtx(void *otf, void *hmtx)
+{
+	void *hhea = otf_table(otf, "hhea");
+	int n;
+	int i;
+	n = U16(hhea, 34);
+	for (i = 0; i < n; i++)
+		glyph_wid[i] = U16(hmtx, i * 4);
+	for (i = n; i < glyph_n; i++)
+		glyph_wid[i] = glyph_wid[n - 1];
+}
+
+static void otf_kern(void *otf, void *kern)
+{
+	int n;		/* number of kern subtables */
+	void *tab;	/* a kern subtable */
+	int off = 4;
+	int npairs;
+	int cov;
+	int i, j;
+	int c1, c2, val;
+	n = U16(kern, 2);
+	for (i = 0; i < n; i++) {
+		tab = kern + off;
+		off += U16(tab, 2);
+		cov = U16(tab, 4);
+		if ((cov >> 8) == 0 && (cov & 1)) {	/* format 0 */
+			npairs = U16(tab, 6);
+			for (j = 0; j < npairs; j++) {
+				c1 = U16(tab, 14 + 6 * j);
+				c2 = U16(tab, 14 + 6 * j + 2);
+				val = S16(tab, 14 + 6 * j + 4);
+				trfn_kern(glyph_name[c1], glyph_name[c2],
+					OWID(val));
+			}
+		}
+	}
+}
+
+int xread(int fd, char *buf, int len)
+{
+	int nr = 0;
+	while (nr < len) {
+		int ret = read(fd, buf + nr, len - nr);
+		if (ret == -1 && (errno == EAGAIN || errno == EINTR))
+			continue;
+		if (ret <= 0)
+			break;
+		nr += ret;
+	}
+	return nr;
+}
+
+static char buf[BUFLEN];
+
+int otf_read(void)
+{
+	int i;
+	if (xread(0, buf, sizeof(buf)) <= 0)
+		return 1;
+	upm = U16(otf_table(buf, "head"), 18);
+	otf_cmap(buf, otf_table(buf, "cmap"));
+	otf_post(buf, otf_table(buf, "post"));
+	if (otf_table(buf, "glyf"))
+		otf_glyf(buf, otf_table(buf, "glyf"));
+	otf_hmtx(buf, otf_table(buf, "hmtx"));
+	for (i = 0; i < glyph_n; i++) {
+		trfn_char(glyph_name[i], -1,
+			glyph_code[i] != 0xffff ? glyph_code[i] : 0,
+			OWID(glyph_wid[i]),
+			OWID(glyph_bbox[i][0]), OWID(glyph_bbox[i][1]),
+			OWID(glyph_bbox[i][2]), OWID(glyph_bbox[i][3]));
+	}
+	otf_kern(buf, otf_table(buf, "kern"));
+	return 0;
+}
+
+static char *macset[] = {
+	".notdef", ".null", "nonmarkingreturn", "space", "exclam",
+	"quotedbl", "numbersign", "dollar", "percent", "ampersand",
+	"quotesingle", "parenleft", "parenright", "asterisk", "plus",
+	"comma", "hyphen", "period", "slash", "zero",
+	"one", "two", "three", "four", "five",
+	"six", "seven", "eight", "nine", "colon",
+	"semicolon", "less", "equal", "greater", "question",
+	"at", "A", "B", "C", "D",
+	"E", "F", "G", "H", "I",
+	"J", "K", "L", "M", "N",
+	"O", "P", "Q", "R", "S",
+	"T", "U", "V", "W", "X",
+	"Y", "Z", "bracketleft", "backslash", "bracketright",
+	"asciicircum", "underscore", "grave", "a", "b",
+	"c", "d", "e", "f", "g",
+	"h", "i", "j", "k", "l",
+	"m", "n", "o", "p", "q",
+	"r", "s", "t", "u", "v",
+	"w", "x", "y", "z", "braceleft",
+	"bar", "braceright", "asciitilde", "Adieresis", "Aring",
+	"Ccedilla", "Eacute", "Ntilde", "Odieresis", "Udieresis",
+	"aacute", "agrave", "acircumflex", "adieresis", "atilde",
+	"aring", "ccedilla", "eacute", "egrave", "ecircumflex",
+	"edieresis", "iacute", "igrave", "icircumflex", "idieresis",
+	"ntilde", "oacute", "ograve", "ocircumflex", "odieresis",
+	"otilde", "uacute", "ugrave", "ucircumflex", "udieresis",
+	"dagger", "degree", "cent", "sterling", "section",
+	"bullet", "paragraph", "germandbls", "registered", "copyright",
+	"trademark", "acute", "dieresis", "notequal", "AE",
+	"Oslash", "infinity", "plusminus", "lessequal", "greaterequal",
+	"yen", "mu", "partialdiff", "summation", "product",
+	"pi", "integral", "ordfeminine", "ordmasculine", "Omega",
+	"ae", "oslash", "questiondown", "exclamdown", "logicalnot",
+	"radical", "florin", "approxequal", "Delta", "guillemotleft",
+	"guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde",
+	"Otilde", "OE", "oe", "endash", "emdash",
+	"quotedblleft", "quotedblright", "quoteleft", "quoteright", "divide",
+	"lozenge", "ydieresis", "Ydieresis", "fraction", "currency",
+	"guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl",
+	"periodcentered", "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex",
+	"Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute",
+	"Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
+	"apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave",
+	"dotlessi", "circumflex", "tilde", "macron", "breve",
+	"dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek",
+	"caron", "Lslash", "lslash", "Scaron", "scaron",
+	"Zcaron", "zcaron", "brokenbar", "Eth", "eth",
+	"Yacute", "yacute", "Thorn", "thorn", "minus",
+	"multiply", "onesuperior", "twosuperior", "threesuperior", "onehalf",
+	"onequarter", "threequarters", "franc", "Gbreve", "gbreve",
+	"Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute",
+	"Ccaron", "ccaron", "dcroat",
+};
--- a/trfn.c
+++ b/trfn.c
@@ -13,8 +13,6 @@
 #define NCHAR		8	/* number of characters per glyph */
 #define GNLEN		64	/* glyph name length */
 #define AGLLEN		8192	/* adobe glyphlist length */
-#define NSUBS		2048	/* number of substitutions */
-#define NPSAL		32	/* number of substitutions per glyph */
 
 static struct sbuf sbuf_char;	/* characters */
 static struct sbuf sbuf_kern;	/* kerning pairs */
@@ -26,10 +24,6 @@
 static char trfn_ligs[8192];	/* font ligatures */
 static char trfn_trname[256];	/* font troff name */
 static char trfn_psname[256];	/* font ps name */
-/* glyph substition */
-static char subs_src[NSUBS][GNLEN];
-static char subs_dst[NSUBS][GNLEN];
-static int subs_n;
 /* character type */
 static int trfn_asc;		/* minimum height of glyphs with ascender */
 static int trfn_desc;		/* minimum depth of glyphs with descender */
@@ -205,41 +199,17 @@
 		utf8put(&str, s[i]);
 }
 
-void trfn_sub(char *c1, char *c2)
+/* find the utf-8 name of src with the given unicode codepoint */
+static int trfn_name(char *dst, char *src, int codepoint)
 {
-	if (subs_n < NSUBS && !strchr(c1, '.')) {
-		strcpy(subs_src[subs_n], c1);
-		strcpy(subs_dst[subs_n], c2);
-		subs_n++;
-	}
-}
-
-/* return the list of postscript glyph aliases of character c */
-static void trfn_subs(char *c, char **a)
-{
-	char *dot;
-	int i, subs = 0;
-	/* adding c itself to the list of aliases only if not substituded */
-	for (i = 0; i < subs_n; i++)
-		if (!strcmp(c, subs_src[i]))
-			subs = 1;
-	dot = strrchr(c, '.');
-	if (!subs && (!dot || !strcmp(".isol", dot) || !strcmp(".init", dot) ||
-				!strcmp(".fina", dot) || !strcmp(".medi", dot)))
-			*a++ = c;
-	/* adding aliases added via trfn_subs() */
-	for (i = 0; i < subs_n; i++)
-		if (!strcmp(c, subs_dst[i]))
-			*a++ = subs_src[i];
-	*a++ = NULL;
-}
-
-static int trfn_name(char *dst, char *src)
-{
 	char ch[GNLEN];
 	char *d = dst;
 	char *s;
 	int i;
+	if (codepoint) {
+		utf8put(&dst, codepoint);
+		return 0;
+	}
 	if (!src || src[0] == '.')
 		return 1;
 	while (*src && *src != '.') {
@@ -310,44 +280,39 @@
 	return typ;
 }
 
-void trfn_char(char *psname, char *n, int wid,
+/* n is the position and u is the unicode codepoint */
+void trfn_char(char *psname, int n, int u, int wid,
 		int llx, int lly, int urx, int ury)
 {
 	char uc[GNLEN];			/* mapping unicode character */
-	char *a_ps[NPSAL] = {NULL};	/* postscript glyph substitutions */
 	char **a_tr;			/* troff character names */
 	char pos[GNLEN] = "";		/* postscript character position/name */
-	int i_ps = 0;			/* current name in a_ps */
 	int typ;			/* character type */
 	/* initializing character attributes */
-	if (trfn_name(uc, psname))
+	if (trfn_name(uc, psname, u))
 		strcpy(uc, "---");
-	if (n && atoi(n) >= 0 && atoi(n) < 256)
-		strcpy(pos, n);
-	if (!n && !strchr(psname, '.') && !uc[1] && uc[0] >= 32 && uc[0] <= 125)
-		sprintf(pos, "%d", uc[0]);
+	if (n >= 0 && n < 256)
+		sprintf(pos, "%d", n);
+	if (n < 0 && !uc[1] && uc[0] >= 32 && uc[0] <= 125)
+		if (!strchr(psname, '.'))
+			sprintf(pos, "%d", uc[0]);
 	typ = trfn_type(!strchr(psname, '.') ? uc : "", lly, ury);
 	/* printing troff charset */
-	trfn_subs(psname, a_ps);
-	for (i_ps = 0; !i_ps || a_ps[i_ps]; i_ps++) {
-		if (trfn_name(uc, a_ps[i_ps]))
-			strcpy(uc, "---");
-		if (strchr(uc, ' ')) {		/* space not allowed in char names */
-			if (!trfn_swid && !strcmp(" ", uc))
-				trfn_swid = WX(wid);
-			continue;
-		}
-		if (strcmp("---", uc))
-			trfn_lig(uc);
-		sbuf_printf(&sbuf_char, "char %s\t%d", uc, WX(wid));
-		if (trfn_bbox && (llx || lly || urx || ury))
-			sbuf_printf(&sbuf_char, ",%d,%d,%d,%d",
-				WX(llx), WX(lly), WX(urx), WX(ury));
-		sbuf_printf(&sbuf_char, "\t%d\t%s\t%s\n", typ, psname, pos);
-		a_tr = tab_get(tab_alts, uc);
-		while (a_tr && *a_tr)
-			sbuf_printf(&sbuf_char, "char %s\t\"\n", *a_tr++);
+	if (strchr(uc, ' ')) {	/* space not allowed in char names */
+		if (!trfn_swid && !strcmp(" ", uc))
+			trfn_swid = WX(wid);
+		return;
 	}
+	if (strcmp("---", uc))
+		trfn_lig(uc);
+	sbuf_printf(&sbuf_char, "char %s\t%d", uc, WX(wid));
+	if (trfn_bbox && (llx || lly || urx || ury))
+		sbuf_printf(&sbuf_char, ",%d,%d,%d,%d",
+			WX(llx), WX(lly), WX(urx), WX(ury));
+	sbuf_printf(&sbuf_char, "\t%d\t%s\t%s\n", typ, psname, pos);
+	a_tr = tab_get(tab_alts, uc);
+	while (a_tr && *a_tr)
+		sbuf_printf(&sbuf_char, "char %s\t\"\n", *a_tr++);
 }
 
 void trfn_kern(char *c1, char *c2, int x)
--- a/trfn.h
+++ b/trfn.h
@@ -3,6 +3,5 @@
 void trfn_trfont(char *name);
 void trfn_psfont(char *fontname);
 void trfn_print(void);
-void trfn_char(char *c, char *n, int wid, int llx, int lly, int urx, int ury);
+void trfn_char(char *c, int n, int u, int wid, int llx, int lly, int urx, int ury);
 void trfn_kern(char *c1, char *c2, int x);
-void trfn_sub(char *c1, char *c2);