ref: 83519f11fd26cd605dba45d9ce1fc0f39d467d40
parent: baa3c241b77085b6f770be2cd13afaafaaa7559a
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Jul 17 22:27:04 EDT 2024
add support for more cmap subtable formats
--- a/otf.c.in
+++ b/otf.c.in
@@ -650,7 +650,8 @@
PLAT_CUSTOM, /* deprecated, unsupported */
};
-/* supported subtable formats: 4, 6, 10, 12, 14 */
+/* supported subtable formats: 4, 10, 12 */
+/* FIXME: need to implement (higher to lower prio): 6, 14, 13 */
enum {
ENC_UNICODE_1_0, /* deprecated, unsupported */
ENC_UNICODE_1_1, /* deprecated, unsupported */
@@ -658,7 +659,7 @@
ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
- ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+ ENC_UNICODE_FULL, /* subtable format 13 (many-to-one) */
ENC_WINDOWS_SYMBOL = 0, /* unsupported */
ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
@@ -671,8 +672,73 @@
};
static int
-cmapGroup12rune2glyph(void *aux, Rune r)
+cmap4rune2glyph(void *aux, Rune r)
{
+ SubtableCmap4 *sc;
+ int i, b, e, n, x, segC;
+
+ if(r > 0xffff)
+ return -1;
+ sc = aux;
+ segC = sc->segCountX2/2;
+ for(b = 0, e = segC-1; b <= e; ){
+ i = (b + e)/2;
+ if(sc->endCode[i] < r)
+ b = i + 1;
+ else if(sc->startCode[i] > r)
+ e = i - 1;
+ else if(sc->idRangeOffset[i] == 0){
+ x = r + sc->idDelta[i];
+ if(x < 0)
+ x += 65536;
+ return x;
+ }else{
+ x = i + sc->idRangeOffset[i]/2 + (r - sc->startCode[i]);
+ n = (sc->length-((8*2)+(sc->segCountX2*4)))/2;
+ if(x < 0 || x >= n)
+ break;
+ return sc->glyphIdArray[x];
+ }
+ }
+ return -1;
+}
+
+static Rune
+cmap4glyph2rune(void *aux, int g)
+{
+ USED(aux); USED(g);
+ /* FIXME - other mapper will hopefully pick up after */
+ return NoRune;
+}
+
+static int
+cmap10rune2glyph(void *aux, Rune r)
+{
+ SubtableCmap10 *sc;
+
+ sc = aux;
+ if(r >= sc->startCharCode){
+ r -= sc->startCharCode;
+ if(r < sc->numChars)
+ return sc->glyphIdArray[r];
+ }
+ return -1;
+}
+
+static Rune
+cmap10glyph2rune(void *aux, int g)
+{
+ SubtableCmap10 *sc;
+
+ sc = aux;
+ if(g >= 0 && g < sc->numChars)
+ return sc->startCharCode + g;
+ return NoRune;
+}
+
+static int
+cmap12rune2glyph(void *aux, Rune r)
+{
SubtableCmap12or13 *sc;
MapGroup *m;
int b, e, x;
@@ -692,7 +758,7 @@
}
static Rune
-cmapGroup12glyph2rune(void *aux, int g)
+cmap12glyph2rune(void *aux, int g)
{
SubtableCmap12or13 *sc;
MapGroup *m;
@@ -714,25 +780,44 @@
sc = er->subtable;
switch(er->encodingID){
case ENC_UNICODE_2_0_BMP:
- /* FIXME */
- break;
-
- case ENC_UNICODE_2_0_FULL: /* this one is good */
- if(sc->format != 12){
+ if(sc->format != 4){
(*unsupported)++;
- werrstr("unicode 2.0 full: fmt %d", sc->format);
+ werrstr("2.0 bmp: fmt %d", sc->format);
goto err;
}
- if(sc->sub12or13.numGroups < 1){
- werrstr("unicode 2.0 full: no groups");
- goto err;
- }
- c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
- c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
- c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub4;
(*parsed)++;
break;
+ case ENC_UNICODE_2_0_FULL: /* this one is good */
+ if(sc->format == 10){
+ if(sc->sub10.numChars < 1){
+ werrstr("2.0 full: no chars");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap10rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap10glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub10;
+ (*parsed)++;
+ break;
+ }
+ if(sc->format == 12){
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("2.0 full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
+ break;
+ }
+ (*unsupported)++;
+ werrstr("2.0 full: fmt %d", sc->format);
+ goto err;
+
case ENC_UNICODE_VAR_SEQ:
/* FIXME */
break;
@@ -762,15 +847,36 @@
static int
otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
{
- USED(c); USED(parsed);
+ SubtableCmap *sc;
+ sc = er->subtable;
switch(er->encodingID){
case ENC_WINDOWS_UNICODE_BMP:
- /* FIXME */
+ if(sc->format != 4){
+ (*unsupported)++;
+ werrstr("unicode bmp: fmt %d", sc->format);
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub4;
+ (*parsed)++;
break;
case ENC_WINDOWS_UNICODE_FULL:
- /* FIXME */
+ if(sc->format != 12){
+ (*unsupported)++;
+ werrstr("unicode full: fmt %d", sc->format);
+ goto err;
+ }
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("unicode full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
break;
case ENC_WINDOWS_SYMBOL:
@@ -834,6 +940,8 @@
else
werrstr("");
werrstr("no usable records%r");
+ /* FIXME - eventually this return should be removed */
+ return 0;
err:
werrstr("cmap: %r");
return -1;
--- a/otf.rkt
+++ b/otf.rkt
@@ -58,15 +58,14 @@
(mkcmplx SubtableCmap2
{uint16 length}
{uint16 language}
- {uint16 subHeaderKeys [256]}
+ #;{uint16 subHeaderKeys [256]}
#;{SubHeader subHeaders [?]}
#;{uint16 glyphIdArray [?]})
-; FIXME
(mkcmplx SubtableCmap4
{uint16 length}
{uint16 language}
- {uint16 segCountX2}
+ {uint16 segCountX2 (> 0)}
{uint16 searchRange}
{uint16 entrySelector}
{uint16 rangeShift}
@@ -75,7 +74,7 @@
{uint16 startCode [/ segCountX2 2]}
{int16 idDelta [/ segCountX2 2]}
{int16 idRangeOffset [/ segCountX2 2]}
- #;{uint16 glyphIdArray [?]})
+ {uint16 glyphIdArray [/ (- length (+ (* 8 2) (* segCountX2 4))) 2]})
(mkcmplx SubtableCmap6
{uint16 length}
@@ -91,7 +90,6 @@
{uint32 numGroups}
{MapGroup groups [numGroups]})
-; FIXME
(mkcmplx SubtableCmap10
{uint16 reserved}
{uint32 length}
@@ -98,7 +96,7 @@
{uint32 language}
{uint32 startCharCode}
{uint32 numChars}
- #;{uint16 glyphIdArray [?]})
+ {uint16 glyphIdArray [/ (- length (+ (* 2 2) (* 4 4))) 2]})
(mkcmplx SubtableCmap12or13
{uint16 reserved}
--- a/plan9/otf.c
+++ b/plan9/otf.c
@@ -1,3 +1,4 @@
+#line 1 "otf.c.in"
/* this file is generated. do not modify. */
#include "otfsys.h"
#include "otf.h"
@@ -33,6 +34,7 @@
u16int designAxisSize;
u16int designAxisCount;
u16int axisValueCount;
+#line 22"otf.c.in"
};
struct Range {
@@ -664,7 +666,8 @@
PLAT_CUSTOM, /* deprecated, unsupported */
};
-/* supported subtable formats: 4, 6, 10, 12, 14 */
+/* supported subtable formats: 4, 10, 12 */
+/* FIXME: need to implement (higher to lower prio): 6, 14, 13 */
enum {
ENC_UNICODE_1_0, /* deprecated, unsupported */
ENC_UNICODE_1_1, /* deprecated, unsupported */
@@ -672,7 +675,7 @@
ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
- ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+ ENC_UNICODE_FULL, /* subtable format 13 (many-to-one) */
ENC_WINDOWS_SYMBOL = 0, /* unsupported */
ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
@@ -685,8 +688,73 @@
};
static int
-cmapGroup12rune2glyph(void *aux, Rune r)
+cmap4rune2glyph(void *aux, Rune r)
{
+ SubtableCmap4 *sc;
+ int i, b, e, n, x, segC;
+
+ if(r > 0xffff)
+ return -1;
+ sc = aux;
+ segC = sc->segCountX2/2;
+ for(b = 0, e = segC-1; b <= e; ){
+ i = (b + e)/2;
+ if(sc->endCode[i] < r)
+ b = i + 1;
+ else if(sc->startCode[i] > r)
+ e = i - 1;
+ else if(sc->idRangeOffset[i] == 0){
+ x = r + sc->idDelta[i];
+ if(x < 0)
+ x += 65536;
+ return x;
+ }else{
+ x = i + sc->idRangeOffset[i]/2 + (r - sc->startCode[i]);
+ n = (sc->length-((8*2)+(sc->segCountX2*4)))/2;
+ if(x < 0 || x >= n)
+ break;
+ return sc->glyphIdArray[x];
+ }
+ }
+ return -1;
+}
+
+static Rune
+cmap4glyph2rune(void *aux, int g)
+{
+ USED(aux); USED(g);
+ /* FIXME - other mapper will hopefully pick up after */
+ return NoRune;
+}
+
+static int
+cmap10rune2glyph(void *aux, Rune r)
+{
+ SubtableCmap10 *sc;
+
+ sc = aux;
+ if(r >= sc->startCharCode){
+ r -= sc->startCharCode;
+ if(r < sc->numChars)
+ return sc->glyphIdArray[r];
+ }
+ return -1;
+}
+
+static Rune
+cmap10glyph2rune(void *aux, int g)
+{
+ SubtableCmap10 *sc;
+
+ sc = aux;
+ if(g >= 0 && g < sc->numChars)
+ return sc->startCharCode + g;
+ return NoRune;
+}
+
+static int
+cmap12rune2glyph(void *aux, Rune r)
+{
SubtableCmap12or13 *sc;
MapGroup *m;
int b, e, x;
@@ -706,20 +774,16 @@
}
static Rune
-cmapGroup12glyph2rune(void *aux, int g)
+cmap12glyph2rune(void *aux, int g)
{
SubtableCmap12or13 *sc;
MapGroup *m;
- int i, n, o;
+ int i;
sc = aux;
- for(i = 0, m = sc->groups; i < sc->numGroups; i++){
- if(g >= m->startGlyphID){
- n = m->endCharCode - m->startCharCode;
- o = g - m->startGlyphID;
- if(o < n)
- return m->startCharCode + o;
- }
+ for(i = 0, m = sc->groups; i < sc->numGroups; i++, m++){
+ if(g >= m->startGlyphID && g <= m->startGlyphID+(m->endCharCode-m->startCharCode))
+ return m->startCharCode + (g - m->startGlyphID);
}
return NoRune;
}
@@ -732,25 +796,44 @@
sc = er->subtable;
switch(er->encodingID){
case ENC_UNICODE_2_0_BMP:
- /* FIXME */
- break;
-
- case ENC_UNICODE_2_0_FULL: /* this one is good */
- if(sc->format != 12){
+ if(sc->format != 4){
(*unsupported)++;
- werrstr("unicode 2.0 full: fmt %d", sc->format);
+ werrstr("2.0 bmp: fmt %d", sc->format);
goto err;
}
- if(sc->sub12or13.numGroups < 1){
- werrstr("unicode 2.0 full: no groups");
- goto err;
- }
- c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
- c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
- c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub4;
(*parsed)++;
break;
+ case ENC_UNICODE_2_0_FULL: /* this one is good */
+ if(sc->format == 10){
+ if(sc->sub10.numChars < 1){
+ werrstr("2.0 full: no chars");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap10rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap10glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub10;
+ (*parsed)++;
+ break;
+ }
+ if(sc->format == 12){
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("2.0 full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
+ break;
+ }
+ (*unsupported)++;
+ werrstr("2.0 full: fmt %d", sc->format);
+ goto err;
+
case ENC_UNICODE_VAR_SEQ:
/* FIXME */
break;
@@ -780,15 +863,36 @@
static int
otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
{
- USED(c); USED(parsed);
+ SubtableCmap *sc;
+ sc = er->subtable;
switch(er->encodingID){
case ENC_WINDOWS_UNICODE_BMP:
- /* FIXME */
+ if(sc->format != 4){
+ (*unsupported)++;
+ werrstr("unicode bmp: fmt %d", sc->format);
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap4rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap4glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub4;
+ (*parsed)++;
break;
case ENC_WINDOWS_UNICODE_FULL:
- /* FIXME */
+ if(sc->format != 12){
+ (*unsupported)++;
+ werrstr("unicode full: fmt %d", sc->format);
+ goto err;
+ }
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("unicode full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmap12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmap12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
break;
case ENC_WINDOWS_SYMBOL:
@@ -852,6 +956,8 @@
else
werrstr("");
werrstr("no usable records%r");
+ /* FIXME - eventually this return should be removed */
+ return 0;
err:
werrstr("cmap: %r");
return -1;
@@ -938,12 +1044,10 @@
read_SubtableCmap2(Otf *o, SubtableCmap2 *v)
{
u8int *b = nil; USED(b);
- if((b = otfreadn(o, 516)) == nil)
+ if((b = otfreadn(o, 4)) == nil)
goto err;
v->length = b[0]<<8 | b[1];
v->language = b[2]<<8 | b[3];
- for(int i = 0; i < 256; i++)
- v->subHeaderKeys[i] = b[4+i*2]<<8 | b[5+i*2];
return 0;
err:
werrstr("%s: %r", "SubtableCmap2");
@@ -955,8 +1059,6 @@
{
f->print(f->aux, "%*s%s: %ud\n", indent, "", "length", v->length);
f->print(f->aux, "%*s%s: %ud\n", indent, "", "language", v->language);
- for(int i = 0; i < 256; i++)
- f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "subHeaderKeys", i, v->subHeaderKeys[i]);
USED(o);
}
@@ -969,6 +1071,10 @@
v->length = b[0]<<8 | b[1];
v->language = b[2]<<8 | b[3];
v->segCountX2 = b[4]<<8 | b[5];
+ if(v->segCountX2 <= 0){
+ werrstr("%s: invalid value: %d (%#ux)", "segCountX2", v->segCountX2, v->segCountX2);
+ goto err;
+ }
v->searchRange = b[6]<<8 | b[7];
v->entrySelector = b[8]<<8 | b[9];
v->rangeShift = b[10]<<8 | b[11];
@@ -995,6 +1101,11 @@
v->idRangeOffset = malloc((v->segCountX2/2)*sizeof(*v->idRangeOffset));
for(int i = 0; i < (v->segCountX2/2); i++)
v->idRangeOffset[i] = b[0+i*2]<<8 | b[1+i*2];
+ if((b = otfreadn(o, ((v->length-((8*2)+(v->segCountX2*4)))/2)*2)) == nil)
+ goto err;
+ v->glyphIdArray = malloc(((v->length-((8*2)+(v->segCountX2*4)))/2)*sizeof(*v->glyphIdArray));
+ for(int i = 0; i < ((v->length-((8*2)+(v->segCountX2*4)))/2); i++)
+ v->glyphIdArray[i] = b[0+i*2]<<8 | b[1+i*2];
return 0;
err:
werrstr("%s: %r", "SubtableCmap4");
@@ -1018,6 +1129,8 @@
f->print(f->aux, "%*s%s[%d]: %d\n", indent, "", "idDelta", i, v->idDelta[i]);
for(int i = 0; i < (v->segCountX2/2); i++)
f->print(f->aux, "%*s%s[%d]: %d\n", indent, "", "idRangeOffset", i, v->idRangeOffset[i]);
+ for(int i = 0; i < ((v->length-((8*2)+(v->segCountX2*4)))/2); i++)
+ f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "glyphIdArray", i, v->glyphIdArray[i]);
USED(o);
}
@@ -1101,6 +1214,11 @@
v->language = b[6]<<24 | b[7]<<16 | b[8]<<8 | b[9];
v->startCharCode = b[10]<<24 | b[11]<<16 | b[12]<<8 | b[13];
v->numChars = b[14]<<24 | b[15]<<16 | b[16]<<8 | b[17];
+ if((b = otfreadn(o, ((v->length-((2*2)+(4*4)))/2)*2)) == nil)
+ goto err;
+ v->glyphIdArray = malloc(((v->length-((2*2)+(4*4)))/2)*sizeof(*v->glyphIdArray));
+ for(int i = 0; i < ((v->length-((2*2)+(4*4)))/2); i++)
+ v->glyphIdArray[i] = b[0+i*2]<<8 | b[1+i*2];
return 0;
err:
werrstr("%s: %r", "SubtableCmap10");
@@ -1115,6 +1233,8 @@
f->print(f->aux, "%*s%s: %ud\n", indent, "", "language", v->language);
f->print(f->aux, "%*s%s: %ud\n", indent, "", "startCharCode", v->startCharCode);
f->print(f->aux, "%*s%s: %ud\n", indent, "", "numChars", v->numChars);
+ for(int i = 0; i < ((v->length-((2*2)+(4*4)))/2); i++)
+ f->print(f->aux, "%*s%s[%d]: %ud\n", indent, "", "glyphIdArray", i, v->glyphIdArray[i]);
USED(o);
}
--- a/plan9/otf.h
+++ b/plan9/otf.h
@@ -1,3 +1,4 @@
+#line 1 "otf.h.in"
/* this file is generated. do not modify. */
typedef struct Otf Otf;
#ifdef __unix__
@@ -261,7 +262,6 @@
struct SubtableCmap2 {
u16int length;
u16int language;
- u16int subHeaderKeys[256];
};
int read_SubtableCmap2(Otf *o, SubtableCmap2 *v);
@@ -279,6 +279,7 @@
u16int *startCode;
s16int *idDelta;
s16int *idRangeOffset;
+ u16int *glyphIdArray;
};
int read_SubtableCmap4(Otf *o, SubtableCmap4 *v);
@@ -312,6 +313,7 @@
u32int language;
u32int startCharCode;
u32int numChars;
+ u16int *glyphIdArray;
};
int read_SubtableCmap10(Otf *o, SubtableCmap10 *v);