ref: 6c44490fbae0874d765876607a10334e72c26a7b
parent: 95df0a4352792cb83be7100c19ad0e815a492a52
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Jul 16 21:55:10 EDT 2024
implement initial rune->glyph and glyph->rune mapping, add -r option to test it
--- a/gen.rkt
+++ b/gen.rkt
@@ -594,9 +594,9 @@
(out (at out-dir "otf.h")
(λ ()
- (printf (port->string (open-input-file (at in-dir "otf.h.in")) #:close? #t))
+ (printf "~a" (port->string (open-input-file (at in-dir "otf.h.in")) #:close? #t))
(printf "\n")
- (printf (format gen-h))))
+ (printf "~a" (format gen-h))))
(define (extra-context-fields c)
(if (cmplx? c)
--- a/meson.build
+++ b/meson.build
@@ -43,6 +43,7 @@
src = [
'rast.c',
'unix/otfsys.c',
+ 'unix/rune.c',
'unix/test.c',
]
--- a/otf.c.in
+++ b/otf.c.in
@@ -614,3 +614,227 @@
{
return o->td.head->unitsPerEm;
}
+
+int
+otfrune2glyph(Otf *o, Rune r)
+{
+ RuneMapper *m;
+ int i, g;
+
+ for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+ if((g = m->rune2glyph(m->aux, r)) >= 0)
+ return g;
+ }
+ return -1;
+}
+
+Rune
+otfglyph2rune(Otf *o, int g)
+{
+ RuneMapper *m;
+ Rune r;
+ int i;
+
+ for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+ if((r = m->glyph2rune(m->aux, g)) != NoRune)
+ return r;
+ }
+ return NoRune;
+}
+
+enum {
+ PLAT_UNICODE,
+ PLAT_MACINTOSH, /* "currently discouraged", unsupported */
+ PLAT_ISO, /* deprecated, unsupported */
+ PLAT_WINDOWS,
+ PLAT_CUSTOM, /* deprecated, unsupported */
+};
+
+/* supported subtable formats: 4, 6, 10, 12, 14 */
+enum {
+ ENC_UNICODE_1_0, /* deprecated, unsupported */
+ ENC_UNICODE_1_1, /* deprecated, unsupported */
+ ENC_UNICODE_ISO, /* deprecated, unsupported */
+ ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
+ ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
+ ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
+ ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+
+ ENC_WINDOWS_SYMBOL = 0, /* unsupported */
+ ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
+ ENC_WINDOWS_SHIFTJIS, /* unsupported */
+ ENC_WINDOWS_PRC, /* unsupported */
+ ENC_WINDOWS_BIG5, /* unsupported */
+ ENC_WINDOWS_WANSUNG, /* unsupported */
+ ENC_WINDOWS_JOHAB, /* unsupported */
+ ENC_WINDOWS_UNICODE_FULL = 10, /* subtable format 12 */
+};
+
+static int
+cmapGroup12rune2glyph(void *aux, Rune r)
+{
+ SubtableCmap12or13 *sc;
+ MapGroup *m;
+ int b, e, x;
+
+ sc = aux;
+ for(b = 0, e = sc->numGroups-1; b <= e; ){
+ x = (b + e)/2;
+ m = sc->groups + x;
+ if(m->endCharCode < r)
+ b = x + 1;
+ else if(m->startCharCode > r)
+ e = x - 1;
+ else
+ return m->startGlyphID + (r - m->startCharCode);
+ }
+ return -1;
+}
+
+static Rune
+cmapGroup12glyph2rune(void *aux, int g)
+{
+ SubtableCmap12or13 *sc;
+ MapGroup *m;
+ int i;
+
+ sc = aux;
+ for(i = 0, m = sc->groups; i < sc->numGroups; i++, m++){
+ if(g >= m->startGlyphID && g <= m->startGlyphID+(m->endCharCode-m->startCharCode))
+ return m->startCharCode + (g - m->startGlyphID);
+ }
+ return NoRune;
+}
+
+static int
+otfcmapUnicode(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+ SubtableCmap *sc;
+
+ sc = er->subtable;
+ switch(er->encodingID){
+ case ENC_UNICODE_2_0_BMP:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_2_0_FULL: /* this one is good */
+ if(sc->format != 12){
+ (*unsupported)++;
+ werrstr("unicode 2.0 full: fmt %d", sc->format);
+ goto err;
+ }
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("unicode 2.0 full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
+ break;
+
+ case ENC_UNICODE_VAR_SEQ:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_FULL:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_1_0:
+ case ENC_UNICODE_1_1:
+ case ENC_UNICODE_ISO:
+ (*unsupported)++;
+ werrstr("deprecated encoding: %d", er->encodingID);
+ goto err;
+ default:
+ (*unsupported)++;
+ werrstr("unknown encoding: %d", er->encodingID);
+ goto err;
+ }
+
+ return 0;
+err:
+ werrstr("unicode: %r");
+ return -1;
+}
+
+static int
+otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+ USED(c); USED(parsed);
+
+ switch(er->encodingID){
+ case ENC_WINDOWS_UNICODE_BMP:
+ /* FIXME */
+ break;
+
+ case ENC_WINDOWS_UNICODE_FULL:
+ /* FIXME */
+ break;
+
+ case ENC_WINDOWS_SYMBOL:
+ case ENC_WINDOWS_SHIFTJIS:
+ case ENC_WINDOWS_PRC:
+ case ENC_WINDOWS_BIG5:
+ case ENC_WINDOWS_WANSUNG:
+ case ENC_WINDOWS_JOHAB:
+ (*unsupported)++;
+ werrstr("unsupported encoding: %d", er->encodingID);
+ goto err;
+ default:
+ (*unsupported)++;
+ werrstr("unknown encoding: %d", er->encodingID);
+ goto err;
+ }
+ return 0;
+err:
+ werrstr("windows: %r");
+ return -1;
+}
+
+static int
+otfcmap(TableCmap *c)
+{
+ int i, parsed, unsupported;
+ EncodingRecord *er;
+
+ parsed = 0;
+ unsupported = 0;
+ for(i = 0, er = c->encodingRecords; i < c->numTables; i++, er++){
+ if(c->numMappers >= nelem(c->mappers)) /* give up */
+ break;
+
+ switch(er->platformID){
+ case PLAT_UNICODE:
+ /* FIXME issue a warning if returned non-zero */
+ if(otfcmapUnicode(c, er, &parsed, &unsupported) != 0)
+ goto err;
+ break;
+
+ case PLAT_WINDOWS:
+ /* FIXME issue a warning if returned non-zero */
+ if(otfcmapWindows(c, er, &parsed, &unsupported) != 0)
+ goto err;
+ break;
+
+ case PLAT_MACINTOSH:
+ case PLAT_ISO:
+ case PLAT_CUSTOM:
+ default:
+ unsupported++;
+ break;
+ }
+ }
+ if(parsed > 0)
+ return 0;
+
+ if(unsupported > 0)
+ werrstr(" (%d unsupported)", unsupported);
+ else
+ werrstr("");
+ werrstr("no usable records%r");
+err:
+ werrstr("cmap: %r");
+ return -1;
+}
--- a/otf.h.in
+++ b/otf.h.in
@@ -10,6 +10,7 @@
typedef uint16_t u16int;
typedef uint32_t u32int;
typedef uint64_t u64int;
+typedef uint32_t Rune;
char *otferrstr(void);
#else
#pragma incomplete Otf
@@ -25,6 +26,16 @@
int (*print)(void *aux, const char *fmt, ...);
};
+typedef struct RuneMapper RuneMapper;
+
+struct RuneMapper {
+ int (*rune2glyph)(void *aux,Rune r);
+ Rune (*glyph2rune)(void *aux, int g);
+ void *aux;
+};
+
+#define NoRune (~(Rune)0)
+
typedef struct ComponentGlyph ComponentGlyph;
enum {
@@ -97,3 +108,5 @@
int otfglyfnum(Otf *o);
int otfdrawglyf(Otf *o, Glyf *g, double ppem, int gap, GlyfImage *im);
int otfupem(Otf *o);
+int otfrune2glyph(Otf *o, Rune r);
+Rune otfglyph2rune(Otf *o, int g);
--- a/otf.rkt
+++ b/otf.rkt
@@ -149,7 +149,9 @@
{uint16 version unused (== 0)}
{uint16 numTables}
{EncodingRecord encodingRecords [numTables]}
- #:tag "cmap")
+ #:tag "cmap"
+ #:extra (list (cons 'read (list (~a "if(otfcmap(v) < 0)") (~a " goto err;")))
+ (cons 'field (list (~a "RuneMapper mappers[8];") (~a "int numMappers;")))))
(define headFlags
#hash((0 . HEAD_FL_BASELINE_Y_0)
--- a/plan9/otf.c
+++ b/plan9/otf.c
@@ -629,7 +629,235 @@
return o->td.head->unitsPerEm;
}
+int
+otfrune2glyph(Otf *o, Rune r)
+{
+ RuneMapper *m;
+ int i, g;
+ for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+ if((g = m->rune2glyph(m->aux, r)) >= 0)
+ return g;
+ }
+ return -1;
+}
+
+Rune
+otfglyph2rune(Otf *o, int g)
+{
+ RuneMapper *m;
+ Rune r;
+ int i;
+
+ for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+ if((r = m->glyph2rune(m->aux, g)) != NoRune)
+ return r;
+ }
+ return NoRune;
+}
+
+enum {
+ PLAT_UNICODE,
+ PLAT_MACINTOSH, /* "currently discouraged", unsupported */
+ PLAT_ISO, /* deprecated, unsupported */
+ PLAT_WINDOWS,
+ PLAT_CUSTOM, /* deprecated, unsupported */
+};
+
+/* supported subtable formats: 4, 6, 10, 12, 14 */
+enum {
+ ENC_UNICODE_1_0, /* deprecated, unsupported */
+ ENC_UNICODE_1_1, /* deprecated, unsupported */
+ ENC_UNICODE_ISO, /* deprecated, unsupported */
+ ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
+ ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
+ ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
+ ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+
+ ENC_WINDOWS_SYMBOL = 0, /* unsupported */
+ ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
+ ENC_WINDOWS_SHIFTJIS, /* unsupported */
+ ENC_WINDOWS_PRC, /* unsupported */
+ ENC_WINDOWS_BIG5, /* unsupported */
+ ENC_WINDOWS_WANSUNG, /* unsupported */
+ ENC_WINDOWS_JOHAB, /* unsupported */
+ ENC_WINDOWS_UNICODE_FULL = 10, /* subtable format 12 */
+};
+
+static int
+cmapGroup12rune2glyph(void *aux, Rune r)
+{
+ SubtableCmap12or13 *sc;
+ MapGroup *m;
+ int b, e, x;
+
+ sc = aux;
+ for(b = 0, e = sc->numGroups-1; b <= e; ){
+ x = (b + e)/2;
+ m = sc->groups + x;
+ if(m->endCharCode < r)
+ b = x + 1;
+ else if(m->startCharCode > r)
+ e = x - 1;
+ else
+ return m->startGlyphID + (r - m->startCharCode);
+ }
+ return -1;
+}
+
+static Rune
+cmapGroup12glyph2rune(void *aux, int g)
+{
+ SubtableCmap12or13 *sc;
+ MapGroup *m;
+ int i, n, o;
+
+ sc = aux;
+ for(i = 0, m = sc->groups; i < sc->numGroups; i++){
+ if(g >= m->startGlyphID){
+ n = m->endCharCode - m->startCharCode;
+ o = g - m->startGlyphID;
+ if(o < n)
+ return m->startCharCode + o;
+ }
+ }
+ return NoRune;
+}
+
+static int
+otfcmapUnicode(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+ SubtableCmap *sc;
+
+ sc = er->subtable;
+ switch(er->encodingID){
+ case ENC_UNICODE_2_0_BMP:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_2_0_FULL: /* this one is good */
+ if(sc->format != 12){
+ (*unsupported)++;
+ werrstr("unicode 2.0 full: fmt %d", sc->format);
+ goto err;
+ }
+ if(sc->sub12or13.numGroups < 1){
+ werrstr("unicode 2.0 full: no groups");
+ goto err;
+ }
+ c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
+ c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
+ c->mappers[c->numMappers++].aux = &sc->sub12or13;
+ (*parsed)++;
+ break;
+
+ case ENC_UNICODE_VAR_SEQ:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_FULL:
+ /* FIXME */
+ break;
+
+ case ENC_UNICODE_1_0:
+ case ENC_UNICODE_1_1:
+ case ENC_UNICODE_ISO:
+ (*unsupported)++;
+ werrstr("deprecated encoding: %d", er->encodingID);
+ goto err;
+ default:
+ (*unsupported)++;
+ werrstr("unknown encoding: %d", er->encodingID);
+ goto err;
+ }
+
+ return 0;
+err:
+ werrstr("unicode: %r");
+ return -1;
+}
+
+static int
+otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+ USED(c); USED(parsed);
+
+ switch(er->encodingID){
+ case ENC_WINDOWS_UNICODE_BMP:
+ /* FIXME */
+ break;
+
+ case ENC_WINDOWS_UNICODE_FULL:
+ /* FIXME */
+ break;
+
+ case ENC_WINDOWS_SYMBOL:
+ case ENC_WINDOWS_SHIFTJIS:
+ case ENC_WINDOWS_PRC:
+ case ENC_WINDOWS_BIG5:
+ case ENC_WINDOWS_WANSUNG:
+ case ENC_WINDOWS_JOHAB:
+ (*unsupported)++;
+ werrstr("unsupported encoding: %d", er->encodingID);
+ goto err;
+ default:
+ (*unsupported)++;
+ werrstr("unknown encoding: %d", er->encodingID);
+ goto err;
+ }
+ return 0;
+err:
+ werrstr("windows: %r");
+ return -1;
+}
+
+static int
+otfcmap(TableCmap *c)
+{
+ int i, parsed, unsupported;
+ EncodingRecord *er;
+
+ parsed = 0;
+ unsupported = 0;
+ for(i = 0, er = c->encodingRecords; i < c->numTables; i++, er++){
+ if(c->numMappers >= nelem(c->mappers)) /* give up */
+ break;
+
+ switch(er->platformID){
+ case PLAT_UNICODE:
+ /* FIXME issue a warning if returned non-zero */
+ if(otfcmapUnicode(c, er, &parsed, &unsupported) != 0)
+ goto err;
+ break;
+
+ case PLAT_WINDOWS:
+ /* FIXME issue a warning if returned non-zero */
+ if(otfcmapWindows(c, er, &parsed, &unsupported) != 0)
+ goto err;
+ break;
+
+ case PLAT_MACINTOSH:
+ case PLAT_ISO:
+ case PLAT_CUSTOM:
+ default:
+ unsupported++;
+ break;
+ }
+ }
+ if(parsed > 0)
+ return 0;
+
+ if(unsupported > 0)
+ werrstr(" (%d unsupported)", unsupported);
+ else
+ werrstr("");
+ werrstr("no usable records%r");
+err:
+ werrstr("cmap: %r");
+ return -1;
+}
+
+
int
read_SubHeader(Otf *o, SubHeader *v)
{
@@ -1268,6 +1496,8 @@
werrstr("%s: %r", "encodingRecords");
goto err;
}
+ if(otfcmap(v) < 0)
+ goto err;
return 0;
err:
werrstr("%s: %r", "TableCmap");
--- a/plan9/otf.h
+++ b/plan9/otf.h
@@ -10,6 +10,7 @@
typedef uint16_t u16int;
typedef uint32_t u32int;
typedef uint64_t u64int;
+typedef uint32_t Rune;
char *otferrstr(void);
#else
#pragma incomplete Otf
@@ -25,6 +26,16 @@
int (*print)(void *aux, const char *fmt, ...);
};
+typedef struct RuneMapper RuneMapper;
+
+struct RuneMapper {
+ int (*rune2glyph)(void *aux,Rune r);
+ Rune (*glyph2rune)(void *aux, int g);
+ void *aux;
+};
+
+#define NoRune (~(Rune)0)
+
typedef struct ComponentGlyph ComponentGlyph;
enum {
@@ -97,6 +108,8 @@
int otfglyfnum(Otf *o);
int otfdrawglyf(Otf *o, Glyf *g, double ppem, int gap, GlyfImage *im);
int otfupem(Otf *o);
+int otfrune2glyph(Otf *o, Rune r);
+Rune otfglyph2rune(Otf *o, int g);
typedef struct SubHeader SubHeader;
typedef struct MapGroup MapGroup;
@@ -396,6 +409,8 @@
// u16int version;
u16int numTables;
EncodingRecord *encodingRecords;
+ RuneMapper mappers[8];
+ int numMappers;
};
int read_TableCmap(Otf *o, TableCmap *v);
--- a/test.h
+++ b/test.h
@@ -1,8 +1,9 @@
static void
printusage(Otfile *f)
{
- f->print(f->aux, "usage: %s [-i GLYPH_ID] [-p PPEM [-g PIXELS] [-m ... -[-H GLYPH_ID]]] font.otf\n", argv0);
- f->print(f->aux, " -i: specifies a single glyph id\n");
+ f->print(f->aux, "usage: %s [-i GLYPH_ID | -r RUNE] [-p PPEM [-g PIXELS] [-m ... -[-H GLYPH_ID]]] font.otf\n", argv0);
+ f->print(f->aux, " -i: operate on a single glyph by its id\n");
+ f->print(f->aux, " -r: operate on a single glyph by its rune\n");
f->print(f->aux, " -p: draw (of size in pixels per em) and write the image to stdout\n");
f->print(f->aux, " -g: gap (in pixels) to add to every border of a glyph\n");
f->print(f->aux, " -m: print out glyph ids or render them all as a map (with -p)\n");
@@ -13,6 +14,7 @@
static int gap, gind = -1, map, highlight = -1;
static double ppem;
+static Rune rune = NoRune;
static int
dumpmap(Otfile *f, GlyfImage *im, int n)
@@ -111,6 +113,10 @@
if((o = otfopen(in)) == nil)
return -1;
n = otfglyfnum(o);
+ if(rune != NoRune && (gind = otfrune2glyph(o, rune)) < 0){
+ werrstr("no such rune->glyph mapping\n");
+ return -1;
+ }
if(gind >= n){
werrstr("out of range (max %d)", n-1);
goto glypherr;
@@ -171,6 +177,11 @@
highlight = strtol(EARGF(usage(&out)), nil, 0); \
break; \
case 'i': \
+ if(rune != Runeerror){ \
+errboth: \
+ out.print(out.aux, "can't specify both rune and glyph\n"); \
+ usage(&out); \
+ } \
gind = strtol(EARGF(usage(&out)), nil, 0); \
break; \
case 'm': \
@@ -178,6 +189,14 @@
break; \
case 'p': \
ppem = strtod(EARGF(usage(&out)), nil); \
+ break; \
+ case 'r': \
+ if(gind >= 0) \
+ goto errboth; \
+ if(chartorune(&rune, EARGF(usage(&out))) == 1 && rune == Runeerror){ \
+ out.print(out.aux, "invalid rune\n"); \
+ usage(&out); \
+ } \
break; \
default: \
usage(&out); \
--- a/unix/otfsys.h
+++ b/unix/otfsys.h
@@ -10,5 +10,10 @@
#define USED(x) (void)(x)
#define nelem(a) (int)(sizeof(a)/sizeof((a)[0]))
+#define Runeerror ((Rune)0xfffd)
+
+typedef uint32_t Rune;
+
char *fmttime(long long v);
void werrstr(char *fmt, ...);
+int chartorune(Rune *rune, char *str);
--- /dev/null
+++ b/unix/rune.c
@@ -1,0 +1,128 @@
+/*
+Copyright © 2021 Plan 9 Foundation
+Copyright © 20XX 9front authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+#include "otfsys.h"
+
+enum
+{
+ UTFmax = 4,
+ Runemax = 0x10ffff,
+
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror,
+};
+
+int
+chartorune(Rune *rune, char *str)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(uint8_t*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(uint8_t*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(uint8_t*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(uint8_t*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
+ goto bad;
+ if(l > Runemax)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ }
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+}