shithub: fnt

Download patch

ref: 83519f11fd26cd605dba45d9ce1fc0f39d467d40
parent: baa3c241b77085b6f770be2cd13afaafaaa7559a
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Jul 17 22:27:04 EDT 2024

add support for more cmap subtable formats

--- a/otf.c.in
+++ b/otf.c.in
@@ -650,7 +650,8 @@
 	PLAT_CUSTOM, /* deprecated, unsupported */
 };
 
-/* supported subtable formats: 4, 6, 10, 12, 14 */
+/* supported subtable formats: 4, 10, 12 */
+/* FIXME: need to implement (higher to lower prio): 6, 14, 13 */
 enum {
 	ENC_UNICODE_1_0, /* deprecated, unsupported */
 	ENC_UNICODE_1_1, /* deprecated, unsupported */
@@ -658,7 +659,7 @@
 	ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
 	ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
 	ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
-	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one) */
 
 	ENC_WINDOWS_SYMBOL = 0, /* unsupported */
 	ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
@@ -671,8 +672,73 @@
 };
 
 static int
-cmapGroup12rune2glyph(void *aux, Rune r)
+cmap4rune2glyph(void *aux, Rune r)
 {
+	SubtableCmap4 *sc;
+	int i, b, e, n, x, segC;
+
+	if(r > 0xffff)
+		return -1;
+	sc = aux;
+	segC = sc->segCountX2/2;
+	for(b = 0, e = segC-1; b <= e; ){
+		i = (b + e)/2;
+		if(sc->endCode[i] < r)
+			b = i + 1;
+		else if(sc->startCode[i] > r)
+			e = i - 1;
+		else if(sc->idRangeOffset[i] == 0){
+			x = r + sc->idDelta[i];
+			if(x < 0)
+				x += 65536;
+			return x;
+		}else{
+			x = i + sc->idRangeOffset[i]/2 + (r - sc->startCode[i]);
+			n = (sc->length-((8*2)+(sc->segCountX2*4)))/2;
+			if(x < 0 || x >= n)
+				break;
+			return sc->glyphIdArray[x];
+		}
+	}
+	return -1;
+}
+
+static Rune
+cmap4glyph2rune(void *aux, int g)
+{
+	USED(aux); USED(g);
+	/* FIXME - other mapper will hopefully pick up after */
+	return NoRune;
+}
+
+static int
+cmap10rune2glyph(void *aux, Rune r)
+{
+	SubtableCmap10 *sc;
+
+	sc = aux;
+	if(r >= sc->startCharCode){
+		r -= sc->startCharCode;
+		if(r < sc->numChars)
+			return sc->glyphIdArray[r];
+	}
+	return -1;
+}
+
+static Rune
+cmap10glyph2rune(void *aux, int g)
+{
+	SubtableCmap10 *sc;
+
+	sc = aux;
+	if(g >= 0 && g < sc->numChars)
+		return sc->startCharCode + g;
+	return NoRune;
+}
+
+static int
+cmap12rune2glyph(void *aux, Rune r)
+{
 	SubtableCmap12or13 *sc;
 	MapGroup *m;
 	int b, e, x;
@@ -692,7 +758,7 @@
 }
 
 static Rune
-cmapGroup12glyph2rune(void *aux, int g)
+cmap12glyph2rune(void *aux, int g)
 {
 	SubtableCmap12or13 *sc;
 	MapGroup *m;
@@ -714,25 +780,44 @@
 	sc = er->subtable;
 	switch(er->encodingID){
 	case ENC_UNICODE_2_0_BMP:
-		/* FIXME */
-		break;
-
-	case ENC_UNICODE_2_0_FULL: /* this one is good */
-		if(sc->format != 12){
+		if(sc->format != 4){
 			(*unsupported)++;
-			werrstr("unicode 2.0 full: fmt %d", sc->format);
+			werrstr("2.0 bmp: fmt %d", sc->format);
 			goto err;
 		}
-		if(sc->sub12or13.numGroups < 1){
-			werrstr("unicode 2.0 full: no groups");
-			goto err;
-		}
-		c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
-		c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
-		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub4;
 		(*parsed)++;
 		break;
 
+	case ENC_UNICODE_2_0_FULL: /* this one is good */
+		if(sc->format == 10){
+			if(sc->sub10.numChars < 1){
+				werrstr("2.0 full: no chars");
+				goto err;
+			}
+			c->mappers[c->numMappers].rune2glyph = cmap10rune2glyph;
+			c->mappers[c->numMappers].glyph2rune = cmap10glyph2rune;
+			c->mappers[c->numMappers++].aux = &sc->sub10;
+			(*parsed)++;
+			break;
+		}
+		if(sc->format == 12){
+			if(sc->sub12or13.numGroups < 1){
+				werrstr("2.0 full: no groups");
+				goto err;
+			}
+			c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+			c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+			c->mappers[c->numMappers++].aux = &sc->sub12or13;
+			(*parsed)++;
+			break;
+		}
+		(*unsupported)++;
+		werrstr("2.0 full: fmt %d", sc->format);
+		goto err;
+
 	case ENC_UNICODE_VAR_SEQ:
 		/* FIXME */
 		break;
@@ -762,15 +847,36 @@
 static int
 otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
 {
-	USED(c); USED(parsed);
+	SubtableCmap *sc;
 
+	sc = er->subtable;
 	switch(er->encodingID){
 	case ENC_WINDOWS_UNICODE_BMP:
-		/* FIXME */
+		if(sc->format != 4){
+			(*unsupported)++;
+			werrstr("unicode bmp: fmt %d", sc->format);
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub4;
+		(*parsed)++;
 		break;
 
 	case ENC_WINDOWS_UNICODE_FULL:
-		/* FIXME */
+		if(sc->format != 12){
+			(*unsupported)++;
+			werrstr("unicode full: fmt %d", sc->format);
+			goto err;
+		}
+		if(sc->sub12or13.numGroups < 1){
+			werrstr("unicode full: no groups");
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		(*parsed)++;
 		break;
 
 	case ENC_WINDOWS_SYMBOL:
@@ -834,6 +940,8 @@
 	else
 		werrstr("");
 	werrstr("no usable records%r");
+	/* FIXME - eventually this return should be removed */
+	return 0;
 err:
 	werrstr("cmap: %r");
 	return -1;
--- a/otf.rkt
+++ b/otf.rkt
@@ -58,15 +58,14 @@
 (mkcmplx SubtableCmap2
          {uint16 length}
          {uint16 language}
-         {uint16 subHeaderKeys [256]}
+         #;{uint16 subHeaderKeys [256]}
          #;{SubHeader subHeaders [?]}
          #;{uint16 glyphIdArray [?]})
 
-; FIXME
 (mkcmplx SubtableCmap4
          {uint16 length}
          {uint16 language}
-         {uint16 segCountX2}
+         {uint16 segCountX2 (> 0)}
          {uint16 searchRange}
          {uint16 entrySelector}
          {uint16 rangeShift}
@@ -75,7 +74,7 @@
          {uint16 startCode [/ segCountX2 2]}
          {int16 idDelta [/ segCountX2 2]}
          {int16 idRangeOffset [/ segCountX2 2]}
-         #;{uint16 glyphIdArray [?]})
+         {uint16 glyphIdArray [/ (- length (+ (* 8 2) (* segCountX2 4))) 2]})
 
 (mkcmplx SubtableCmap6
          {uint16 length}
@@ -91,7 +90,6 @@
          {uint32 numGroups}
          {MapGroup groups [numGroups]})
 
-; FIXME
 (mkcmplx SubtableCmap10
          {uint16 reserved}
          {uint32 length}
@@ -98,7 +96,7 @@
          {uint32 language}
          {uint32 startCharCode}
          {uint32 numChars}
-         #;{uint16 glyphIdArray [?]})
+         {uint16 glyphIdArray [/ (- length (+ (* 2 2) (* 4 4))) 2]})
 
 (mkcmplx SubtableCmap12or13
          {uint16 reserved}
--- a/plan9/otf.c
+++ b/plan9/otf.c
@@ -1,3 +1,4 @@
+#line 1 "otf.c.in"
 /* this file is generated. do not modify. */
 #include "otfsys.h"
 #include "otf.h"
@@ -33,6 +34,7 @@
 	u16int designAxisSize;
 	u16int designAxisCount;
 	u16int axisValueCount;
+#line 22"otf.c.in"
 };
 
 struct Range {
@@ -664,7 +666,8 @@
 	PLAT_CUSTOM, /* deprecated, unsupported */
 };
 
-/* supported subtable formats: 4, 6, 10, 12, 14 */
+/* supported subtable formats: 4, 10, 12 */
+/* FIXME: need to implement (higher to lower prio): 6, 14, 13 */
 enum {
 	ENC_UNICODE_1_0, /* deprecated, unsupported */
 	ENC_UNICODE_1_1, /* deprecated, unsupported */
@@ -672,7 +675,7 @@
 	ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
 	ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
 	ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
-	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one) */
 
 	ENC_WINDOWS_SYMBOL = 0, /* unsupported */
 	ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
@@ -685,8 +688,73 @@
 };
 
 static int
-cmapGroup12rune2glyph(void *aux, Rune r)
+cmap4rune2glyph(void *aux, Rune r)
 {
+	SubtableCmap4 *sc;
+	int i, b, e, n, x, segC;
+
+	if(r > 0xffff)
+		return -1;
+	sc = aux;
+	segC = sc->segCountX2/2;
+	for(b = 0, e = segC-1; b <= e; ){
+		i = (b + e)/2;
+		if(sc->endCode[i] < r)
+			b = i + 1;
+		else if(sc->startCode[i] > r)
+			e = i - 1;
+		else if(sc->idRangeOffset[i] == 0){
+			x = r + sc->idDelta[i];
+			if(x < 0)
+				x += 65536;
+			return x;
+		}else{
+			x = i + sc->idRangeOffset[i]/2 + (r - sc->startCode[i]);
+			n = (sc->length-((8*2)+(sc->segCountX2*4)))/2;
+			if(x < 0 || x >= n)
+				break;
+			return sc->glyphIdArray[x];
+		}
+	}
+	return -1;
+}
+
+static Rune
+cmap4glyph2rune(void *aux, int g)
+{
+	USED(aux); USED(g);
+	/* FIXME - other mapper will hopefully pick up after */
+	return NoRune;
+}
+
+static int
+cmap10rune2glyph(void *aux, Rune r)
+{
+	SubtableCmap10 *sc;
+
+	sc = aux;
+	if(r >= sc->startCharCode){
+		r -= sc->startCharCode;
+		if(r < sc->numChars)
+			return sc->glyphIdArray[r];
+	}
+	return -1;
+}
+
+static Rune
+cmap10glyph2rune(void *aux, int g)
+{
+	SubtableCmap10 *sc;
+
+	sc = aux;
+	if(g >= 0 && g < sc->numChars)
+		return sc->startCharCode + g;
+	return NoRune;
+}
+
+static int
+cmap12rune2glyph(void *aux, Rune r)
+{
 	SubtableCmap12or13 *sc;
 	MapGroup *m;
 	int b, e, x;
@@ -706,20 +774,16 @@
 }
 
 static Rune
-cmapGroup12glyph2rune(void *aux, int g)
+cmap12glyph2rune(void *aux, int g)
 {
 	SubtableCmap12or13 *sc;
 	MapGroup *m;
-	int i, n, o;
+	int i;
 
 	sc = aux;
-	for(i = 0, m = sc->groups; i < sc->numGroups; i++){
-		if(g >= m->startGlyphID){
-			n = m->endCharCode - m->startCharCode;
-			o = g - m->startGlyphID;
-			if(o < n)
-				return m->startCharCode + o;
-		}
+	for(i = 0, m = sc->groups; i < sc->numGroups; i++, m++){
+		if(g >= m->startGlyphID && g <= m->startGlyphID+(m->endCharCode-m->startCharCode))
+			return m->startCharCode + (g - m->startGlyphID);
 	}
 	return NoRune;
 }
@@ -732,25 +796,44 @@
 	sc = er->subtable;
 	switch(er->encodingID){
 	case ENC_UNICODE_2_0_BMP:
-		/* FIXME */
-		break;
-
-	case ENC_UNICODE_2_0_FULL: /* this one is good */
-		if(sc->format != 12){
+		if(sc->format != 4){
 			(*unsupported)++;
-			werrstr("unicode 2.0 full: fmt %d", sc->format);
+			werrstr("2.0 bmp: fmt %d", sc->format);
 			goto err;
 		}
-		if(sc->sub12or13.numGroups < 1){
-			werrstr("unicode 2.0 full: no groups");
-			goto err;
-		}
-		c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
-		c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
-		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub4;
 		(*parsed)++;
 		break;
 
+	case ENC_UNICODE_2_0_FULL: /* this one is good */
+		if(sc->format == 10){
+			if(sc->sub10.numChars < 1){
+				werrstr("2.0 full: no chars");
+				goto err;
+			}
+			c->mappers[c->numMappers].rune2glyph = cmap10rune2glyph;
+			c->mappers[c->numMappers].glyph2rune = cmap10glyph2rune;
+			c->mappers[c->numMappers++].aux = &sc->sub10;
+			(*parsed)++;
+			break;
+		}
+		if(sc->format == 12){
+			if(sc->sub12or13.numGroups < 1){
+				werrstr("2.0 full: no groups");
+				goto err;
+			}
+			c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+			c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+			c->mappers[c->numMappers++].aux = &sc->sub12or13;
+			(*parsed)++;
+			break;
+		}
+		(*unsupported)++;
+		werrstr("2.0 full: fmt %d", sc->format);
+		goto err;
+
 	case ENC_UNICODE_VAR_SEQ:
 		/* FIXME */
 		break;
@@ -780,15 +863,36 @@
 static int
 otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
 {
-	USED(c); USED(parsed);
+	SubtableCmap *sc;
 
+	sc = er->subtable;
 	switch(er->encodingID){
 	case ENC_WINDOWS_UNICODE_BMP:
-		/* FIXME */
+		if(sc->format != 4){
+			(*unsupported)++;
+			werrstr("unicode bmp: fmt %d", sc->format);
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub4;
+		(*parsed)++;
 		break;
 
 	case ENC_WINDOWS_UNICODE_FULL:
-		/* FIXME */
+		if(sc->format != 12){
+			(*unsupported)++;
+			werrstr("unicode full: fmt %d", sc->format);
+			goto err;
+		}
+		if(sc->sub12or13.numGroups < 1){
+			werrstr("unicode full: no groups");
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		(*parsed)++;
 		break;
 
 	case ENC_WINDOWS_SYMBOL:
@@ -852,6 +956,8 @@
 	else
 		werrstr("");
 	werrstr("no usable records%r");
+	/* FIXME - eventually this return should be removed */
+	return 0;
 err:
 	werrstr("cmap: %r");
 	return -1;
@@ -938,12 +1044,10 @@
 read_SubtableCmap2(Otf *o, SubtableCmap2 *v)
 {
 	u8int *b = nil; USED(b);
-	if((b = otfreadn(o, 516)) == nil)
+	if((b = otfreadn(o, 4)) == nil)
 		goto err;
 	v->length = b[0]<<8 | b[1];
 	v->language = b[2]<<8 | b[3];
-	for(int i = 0; i < 256; i++)
-		v->subHeaderKeys[i] = b[4+i*2]<<8 | b[5+i*2];
 	return 0;
 err:
 	werrstr("%s: %r", "SubtableCmap2");
@@ -955,8 +1059,6 @@
 {
 	f->print(f->aux, "%*s%s: %ud\n", indent, "", "length", v->length);
 	f->print(f->aux, "%*s%s: %ud\n", indent, "", "language", v->language);
-	for(int i = 0; i < 256; i++)
-		f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "subHeaderKeys", i, v->subHeaderKeys[i]);
 	USED(o);
 }
 
@@ -969,6 +1071,10 @@
 	v->length = b[0]<<8 | b[1];
 	v->language = b[2]<<8 | b[3];
 	v->segCountX2 = b[4]<<8 | b[5];
+	if(v->segCountX2 <= 0){
+		werrstr("%s: invalid value: %d (%#ux)", "segCountX2", v->segCountX2, v->segCountX2);
+		goto err;
+	}
 	v->searchRange = b[6]<<8 | b[7];
 	v->entrySelector = b[8]<<8 | b[9];
 	v->rangeShift = b[10]<<8 | b[11];
@@ -995,6 +1101,11 @@
 	v->idRangeOffset = malloc((v->segCountX2/2)*sizeof(*v->idRangeOffset));
 	for(int i = 0; i < (v->segCountX2/2); i++)
 		v->idRangeOffset[i] = b[0+i*2]<<8 | b[1+i*2];
+	if((b = otfreadn(o, ((v->length-((8*2)+(v->segCountX2*4)))/2)*2)) == nil)
+		goto err;
+	v->glyphIdArray = malloc(((v->length-((8*2)+(v->segCountX2*4)))/2)*sizeof(*v->glyphIdArray));
+	for(int i = 0; i < ((v->length-((8*2)+(v->segCountX2*4)))/2); i++)
+		v->glyphIdArray[i] = b[0+i*2]<<8 | b[1+i*2];
 	return 0;
 err:
 	werrstr("%s: %r", "SubtableCmap4");
@@ -1018,6 +1129,8 @@
 		f->print(f->aux, "%*s%s[%d]: %d\n", indent, "", "idDelta", i, v->idDelta[i]);
 	for(int i = 0; i < (v->segCountX2/2); i++)
 		f->print(f->aux, "%*s%s[%d]: %d\n", indent, "", "idRangeOffset", i, v->idRangeOffset[i]);
+	for(int i = 0; i < ((v->length-((8*2)+(v->segCountX2*4)))/2); i++)
+		f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "glyphIdArray", i, v->glyphIdArray[i]);
 	USED(o);
 }
 
@@ -1101,6 +1214,11 @@
 	v->language = b[6]<<24 | b[7]<<16 | b[8]<<8 | b[9];
 	v->startCharCode = b[10]<<24 | b[11]<<16 | b[12]<<8 | b[13];
 	v->numChars = b[14]<<24 | b[15]<<16 | b[16]<<8 | b[17];
+	if((b = otfreadn(o, ((v->length-((2*2)+(4*4)))/2)*2)) == nil)
+		goto err;
+	v->glyphIdArray = malloc(((v->length-((2*2)+(4*4)))/2)*sizeof(*v->glyphIdArray));
+	for(int i = 0; i < ((v->length-((2*2)+(4*4)))/2); i++)
+		v->glyphIdArray[i] = b[0+i*2]<<8 | b[1+i*2];
 	return 0;
 err:
 	werrstr("%s: %r", "SubtableCmap10");
@@ -1115,6 +1233,8 @@
 	f->print(f->aux, "%*s%s: %ud\n", indent, "", "language", v->language);
 	f->print(f->aux, "%*s%s: %ud\n", indent, "", "startCharCode", v->startCharCode);
 	f->print(f->aux, "%*s%s: %ud\n", indent, "", "numChars", v->numChars);
+	for(int i = 0; i < ((v->length-((2*2)+(4*4)))/2); i++)
+		f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "glyphIdArray", i, v->glyphIdArray[i]);
 	USED(o);
 }
 
--- a/plan9/otf.h
+++ b/plan9/otf.h
@@ -1,3 +1,4 @@
+#line 1 "otf.h.in"
 /* this file is generated. do not modify. */
 typedef struct Otf Otf;
 #ifdef __unix__
@@ -261,7 +262,6 @@
 struct SubtableCmap2 {
 	u16int length;
 	u16int language;
-	u16int subHeaderKeys[256];
 };
 
 int read_SubtableCmap2(Otf *o, SubtableCmap2 *v);
@@ -279,6 +279,7 @@
 	u16int *startCode;
 	s16int *idDelta;
 	s16int *idRangeOffset;
+	u16int *glyphIdArray;
 };
 
 int read_SubtableCmap4(Otf *o, SubtableCmap4 *v);
@@ -312,6 +313,7 @@
 	u32int language;
 	u32int startCharCode;
 	u32int numChars;
+	u16int *glyphIdArray;
 };
 
 int read_SubtableCmap10(Otf *o, SubtableCmap10 *v);