shithub: fnt

Download patch

ref: 6c44490fbae0874d765876607a10334e72c26a7b
parent: 95df0a4352792cb83be7100c19ad0e815a492a52
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Tue Jul 16 21:55:10 EDT 2024

implement initial rune->glyph and glyph->rune mapping, add -r option to test it

--- a/gen.rkt
+++ b/gen.rkt
@@ -594,9 +594,9 @@
 
 (out (at out-dir "otf.h")
      (λ ()
-       (printf (port->string (open-input-file (at in-dir "otf.h.in")) #:close? #t))
+       (printf "~a" (port->string (open-input-file (at in-dir "otf.h.in")) #:close? #t))
        (printf "\n")
-       (printf (format gen-h))))
+       (printf "~a" (format gen-h))))
 
 (define (extra-context-fields c)
   (if (cmplx? c)
--- a/meson.build
+++ b/meson.build
@@ -43,6 +43,7 @@
 src = [
 	'rast.c',
 	'unix/otfsys.c',
+	'unix/rune.c',
 	'unix/test.c',
 ]
 
--- a/otf.c.in
+++ b/otf.c.in
@@ -614,3 +614,227 @@
 {
 	return o->td.head->unitsPerEm;
 }
+
+int
+otfrune2glyph(Otf *o, Rune r)
+{
+	RuneMapper *m;
+	int i, g;
+
+	for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+		if((g = m->rune2glyph(m->aux, r)) >= 0)
+			return g;
+	}
+	return -1;
+}
+
+Rune
+otfglyph2rune(Otf *o, int g)
+{
+	RuneMapper *m;
+	Rune r;
+	int i;
+
+	for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+		if((r = m->glyph2rune(m->aux, g)) != NoRune)
+			return r;
+	}
+	return NoRune;
+}
+
+enum {
+	PLAT_UNICODE,
+	PLAT_MACINTOSH, /* "currently discouraged", unsupported */
+	PLAT_ISO, /* deprecated, unsupported */
+	PLAT_WINDOWS,
+	PLAT_CUSTOM, /* deprecated, unsupported */
+};
+
+/* supported subtable formats: 4, 6, 10, 12, 14 */
+enum {
+	ENC_UNICODE_1_0, /* deprecated, unsupported */
+	ENC_UNICODE_1_1, /* deprecated, unsupported */
+	ENC_UNICODE_ISO, /* deprecated, unsupported */
+	ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
+	ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
+	ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
+	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+
+	ENC_WINDOWS_SYMBOL = 0, /* unsupported */
+	ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
+	ENC_WINDOWS_SHIFTJIS, /* unsupported */
+	ENC_WINDOWS_PRC, /* unsupported */
+	ENC_WINDOWS_BIG5, /* unsupported */
+	ENC_WINDOWS_WANSUNG, /* unsupported */
+	ENC_WINDOWS_JOHAB, /* unsupported */
+	ENC_WINDOWS_UNICODE_FULL = 10, /* subtable format 12 */
+};
+
+static int
+cmapGroup12rune2glyph(void *aux, Rune r)
+{
+	SubtableCmap12or13 *sc;
+	MapGroup *m;
+	int b, e, x;
+
+	sc = aux;
+	for(b = 0, e = sc->numGroups-1; b <= e; ){
+		x = (b + e)/2;
+		m = sc->groups + x;
+		if(m->endCharCode < r)
+			b = x + 1;
+		else if(m->startCharCode > r)
+			e = x - 1;
+		else
+			return m->startGlyphID + (r - m->startCharCode);
+	}
+	return -1;
+}
+
+static Rune
+cmapGroup12glyph2rune(void *aux, int g)
+{
+	SubtableCmap12or13 *sc;
+	MapGroup *m;
+	int i;
+
+	sc = aux;
+	for(i = 0, m = sc->groups; i < sc->numGroups; i++, m++){
+		if(g >= m->startGlyphID && g <= m->startGlyphID+(m->endCharCode-m->startCharCode))
+			return m->startCharCode + (g - m->startGlyphID);
+	}
+	return NoRune;
+}
+
+static int
+otfcmapUnicode(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+	SubtableCmap *sc;
+
+	sc = er->subtable;
+	switch(er->encodingID){
+	case ENC_UNICODE_2_0_BMP:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_2_0_FULL: /* this one is good */
+		if(sc->format != 12){
+			(*unsupported)++;
+			werrstr("unicode 2.0 full: fmt %d", sc->format);
+			goto err;
+		}
+		if(sc->sub12or13.numGroups < 1){
+			werrstr("unicode 2.0 full: no groups");
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		(*parsed)++;
+		break;
+
+	case ENC_UNICODE_VAR_SEQ:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_FULL:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_1_0:
+	case ENC_UNICODE_1_1:
+	case ENC_UNICODE_ISO:
+		(*unsupported)++;
+		werrstr("deprecated encoding: %d", er->encodingID);
+		goto err;
+	default:
+		(*unsupported)++;
+		werrstr("unknown encoding: %d", er->encodingID);
+		goto err;
+	}
+
+	return 0;
+err:
+	werrstr("unicode: %r");
+	return -1;
+}
+
+static int
+otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+	USED(c); USED(parsed);
+
+	switch(er->encodingID){
+	case ENC_WINDOWS_UNICODE_BMP:
+		/* FIXME */
+		break;
+
+	case ENC_WINDOWS_UNICODE_FULL:
+		/* FIXME */
+		break;
+
+	case ENC_WINDOWS_SYMBOL:
+	case ENC_WINDOWS_SHIFTJIS:
+	case ENC_WINDOWS_PRC:
+	case ENC_WINDOWS_BIG5:
+	case ENC_WINDOWS_WANSUNG:
+	case ENC_WINDOWS_JOHAB:
+		(*unsupported)++;
+		werrstr("unsupported encoding: %d", er->encodingID);
+		goto err;
+	default:
+		(*unsupported)++;
+		werrstr("unknown encoding: %d", er->encodingID);
+		goto err;
+	}
+	return 0;
+err:
+	werrstr("windows: %r");
+	return -1;
+}
+
+static int
+otfcmap(TableCmap *c)
+{
+	int i, parsed, unsupported;
+	EncodingRecord *er;
+
+	parsed = 0;
+	unsupported = 0;
+	for(i = 0, er = c->encodingRecords; i < c->numTables; i++, er++){
+		if(c->numMappers >= nelem(c->mappers)) /* give up */
+			break;
+
+		switch(er->platformID){
+		case PLAT_UNICODE:
+			/* FIXME issue a warning if returned non-zero */
+			if(otfcmapUnicode(c, er, &parsed, &unsupported) != 0)
+				goto err;
+			break;
+
+		case PLAT_WINDOWS:
+			/* FIXME issue a warning if returned non-zero */
+			if(otfcmapWindows(c, er, &parsed, &unsupported) != 0)
+				goto err;
+			break;
+
+		case PLAT_MACINTOSH:
+		case PLAT_ISO:
+		case PLAT_CUSTOM:
+		default:
+			unsupported++;
+			break;
+		}
+	}
+	if(parsed > 0)
+		return 0;
+
+	if(unsupported > 0)
+		werrstr(" (%d unsupported)", unsupported);
+	else
+		werrstr("");
+	werrstr("no usable records%r");
+err:
+	werrstr("cmap: %r");
+	return -1;
+}
--- a/otf.h.in
+++ b/otf.h.in
@@ -10,6 +10,7 @@
 typedef uint16_t u16int;
 typedef uint32_t u32int;
 typedef uint64_t u64int;
+typedef uint32_t Rune;
 char *otferrstr(void);
 #else
 #pragma incomplete Otf
@@ -25,6 +26,16 @@
 	int (*print)(void *aux, const char *fmt, ...);
 };
 
+typedef struct RuneMapper RuneMapper;
+
+struct RuneMapper {
+	int (*rune2glyph)(void *aux,Rune r);
+	Rune (*glyph2rune)(void *aux, int g);
+	void *aux;
+};
+
+#define NoRune (~(Rune)0)
+
 typedef struct ComponentGlyph ComponentGlyph;
 
 enum {
@@ -97,3 +108,5 @@
 int otfglyfnum(Otf *o);
 int otfdrawglyf(Otf *o, Glyf *g, double ppem, int gap, GlyfImage *im);
 int otfupem(Otf *o);
+int otfrune2glyph(Otf *o, Rune r);
+Rune otfglyph2rune(Otf *o, int g);
--- a/otf.rkt
+++ b/otf.rkt
@@ -149,7 +149,9 @@
          {uint16 version unused (== 0)}
          {uint16 numTables}
          {EncodingRecord encodingRecords [numTables]}
-         #:tag "cmap")
+         #:tag "cmap"
+         #:extra (list (cons 'read (list (~a "if(otfcmap(v) < 0)") (~a "	goto err;")))
+                       (cons 'field (list (~a "RuneMapper mappers[8];") (~a "int numMappers;")))))
 
 (define headFlags
   #hash((0 . HEAD_FL_BASELINE_Y_0)
--- a/plan9/otf.c
+++ b/plan9/otf.c
@@ -629,7 +629,235 @@
 	return o->td.head->unitsPerEm;
 }
 
+int
+otfrune2glyph(Otf *o, Rune r)
+{
+	RuneMapper *m;
+	int i, g;
 
+	for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+		if((g = m->rune2glyph(m->aux, r)) >= 0)
+			return g;
+	}
+	return -1;
+}
+
+Rune
+otfglyph2rune(Otf *o, int g)
+{
+	RuneMapper *m;
+	Rune r;
+	int i;
+
+	for(i = 0, m = o->td.cmap->mappers; i < o->td.cmap->numMappers; i++, m++){
+		if((r = m->glyph2rune(m->aux, g)) != NoRune)
+			return r;
+	}
+	return NoRune;
+}
+
+enum {
+	PLAT_UNICODE,
+	PLAT_MACINTOSH, /* "currently discouraged", unsupported */
+	PLAT_ISO, /* deprecated, unsupported */
+	PLAT_WINDOWS,
+	PLAT_CUSTOM, /* deprecated, unsupported */
+};
+
+/* supported subtable formats: 4, 6, 10, 12, 14 */
+enum {
+	ENC_UNICODE_1_0, /* deprecated, unsupported */
+	ENC_UNICODE_1_1, /* deprecated, unsupported */
+	ENC_UNICODE_ISO, /* deprecated, unsupported */
+	ENC_UNICODE_2_0_BMP, /* subtable format 4, 6 */
+	ENC_UNICODE_2_0_FULL, /* subtable format 10, 12 */
+	ENC_UNICODE_VAR_SEQ, /* subtable format 14 */
+	ENC_UNICODE_FULL, /* subtable format 13 (many-to-one), unsupported */
+
+	ENC_WINDOWS_SYMBOL = 0, /* unsupported */
+	ENC_WINDOWS_UNICODE_BMP, /* subtable format 4 */
+	ENC_WINDOWS_SHIFTJIS, /* unsupported */
+	ENC_WINDOWS_PRC, /* unsupported */
+	ENC_WINDOWS_BIG5, /* unsupported */
+	ENC_WINDOWS_WANSUNG, /* unsupported */
+	ENC_WINDOWS_JOHAB, /* unsupported */
+	ENC_WINDOWS_UNICODE_FULL = 10, /* subtable format 12 */
+};
+
+static int
+cmapGroup12rune2glyph(void *aux, Rune r)
+{
+	SubtableCmap12or13 *sc;
+	MapGroup *m;
+	int b, e, x;
+
+	sc = aux;
+	for(b = 0, e = sc->numGroups-1; b <= e; ){
+		x = (b + e)/2;
+		m = sc->groups + x;
+		if(m->endCharCode < r)
+			b = x + 1;
+		else if(m->startCharCode > r)
+			e = x - 1;
+		else
+			return m->startGlyphID + (r - m->startCharCode);
+	}
+	return -1;
+}
+
+static Rune
+cmapGroup12glyph2rune(void *aux, int g)
+{
+	SubtableCmap12or13 *sc;
+	MapGroup *m;
+	int i, n, o;
+
+	sc = aux;
+	for(i = 0, m = sc->groups; i < sc->numGroups; i++){
+		if(g >= m->startGlyphID){
+			n = m->endCharCode - m->startCharCode;
+			o = g - m->startGlyphID;
+			if(o < n)
+				return m->startCharCode + o;
+		}
+	}
+	return NoRune;
+}
+
+static int
+otfcmapUnicode(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+	SubtableCmap *sc;
+
+	sc = er->subtable;
+	switch(er->encodingID){
+	case ENC_UNICODE_2_0_BMP:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_2_0_FULL: /* this one is good */
+		if(sc->format != 12){
+			(*unsupported)++;
+			werrstr("unicode 2.0 full: fmt %d", sc->format);
+			goto err;
+		}
+		if(sc->sub12or13.numGroups < 1){
+			werrstr("unicode 2.0 full: no groups");
+			goto err;
+		}
+		c->mappers[c->numMappers].rune2glyph = cmapGroup12rune2glyph;
+		c->mappers[c->numMappers].glyph2rune = cmapGroup12glyph2rune;
+		c->mappers[c->numMappers++].aux = &sc->sub12or13;
+		(*parsed)++;
+		break;
+
+	case ENC_UNICODE_VAR_SEQ:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_FULL:
+		/* FIXME */
+		break;
+
+	case ENC_UNICODE_1_0:
+	case ENC_UNICODE_1_1:
+	case ENC_UNICODE_ISO:
+		(*unsupported)++;
+		werrstr("deprecated encoding: %d", er->encodingID);
+		goto err;
+	default:
+		(*unsupported)++;
+		werrstr("unknown encoding: %d", er->encodingID);
+		goto err;
+	}
+
+	return 0;
+err:
+	werrstr("unicode: %r");
+	return -1;
+}
+
+static int
+otfcmapWindows(TableCmap *c, EncodingRecord *er, int *parsed, int *unsupported)
+{
+	USED(c); USED(parsed);
+
+	switch(er->encodingID){
+	case ENC_WINDOWS_UNICODE_BMP:
+		/* FIXME */
+		break;
+
+	case ENC_WINDOWS_UNICODE_FULL:
+		/* FIXME */
+		break;
+
+	case ENC_WINDOWS_SYMBOL:
+	case ENC_WINDOWS_SHIFTJIS:
+	case ENC_WINDOWS_PRC:
+	case ENC_WINDOWS_BIG5:
+	case ENC_WINDOWS_WANSUNG:
+	case ENC_WINDOWS_JOHAB:
+		(*unsupported)++;
+		werrstr("unsupported encoding: %d", er->encodingID);
+		goto err;
+	default:
+		(*unsupported)++;
+		werrstr("unknown encoding: %d", er->encodingID);
+		goto err;
+	}
+	return 0;
+err:
+	werrstr("windows: %r");
+	return -1;
+}
+
+static int
+otfcmap(TableCmap *c)
+{
+	int i, parsed, unsupported;
+	EncodingRecord *er;
+
+	parsed = 0;
+	unsupported = 0;
+	for(i = 0, er = c->encodingRecords; i < c->numTables; i++, er++){
+		if(c->numMappers >= nelem(c->mappers)) /* give up */
+			break;
+
+		switch(er->platformID){
+		case PLAT_UNICODE:
+			/* FIXME issue a warning if returned non-zero */
+			if(otfcmapUnicode(c, er, &parsed, &unsupported) != 0)
+				goto err;
+			break;
+
+		case PLAT_WINDOWS:
+			/* FIXME issue a warning if returned non-zero */
+			if(otfcmapWindows(c, er, &parsed, &unsupported) != 0)
+				goto err;
+			break;
+
+		case PLAT_MACINTOSH:
+		case PLAT_ISO:
+		case PLAT_CUSTOM:
+		default:
+			unsupported++;
+			break;
+		}
+	}
+	if(parsed > 0)
+		return 0;
+
+	if(unsupported > 0)
+		werrstr(" (%d unsupported)", unsupported);
+	else
+		werrstr("");
+	werrstr("no usable records%r");
+err:
+	werrstr("cmap: %r");
+	return -1;
+}
+
+
 int
 read_SubHeader(Otf *o, SubHeader *v)
 {
@@ -1268,6 +1496,8 @@
 		werrstr("%s: %r", "encodingRecords");
 		goto err;
 	}
+	if(otfcmap(v) < 0)
+		goto err;
 	return 0;
 err:
 	werrstr("%s: %r", "TableCmap");
--- a/plan9/otf.h
+++ b/plan9/otf.h
@@ -10,6 +10,7 @@
 typedef uint16_t u16int;
 typedef uint32_t u32int;
 typedef uint64_t u64int;
+typedef uint32_t Rune;
 char *otferrstr(void);
 #else
 #pragma incomplete Otf
@@ -25,6 +26,16 @@
 	int (*print)(void *aux, const char *fmt, ...);
 };
 
+typedef struct RuneMapper RuneMapper;
+
+struct RuneMapper {
+	int (*rune2glyph)(void *aux,Rune r);
+	Rune (*glyph2rune)(void *aux, int g);
+	void *aux;
+};
+
+#define NoRune (~(Rune)0)
+
 typedef struct ComponentGlyph ComponentGlyph;
 
 enum {
@@ -97,6 +108,8 @@
 int otfglyfnum(Otf *o);
 int otfdrawglyf(Otf *o, Glyf *g, double ppem, int gap, GlyfImage *im);
 int otfupem(Otf *o);
+int otfrune2glyph(Otf *o, Rune r);
+Rune otfglyph2rune(Otf *o, int g);
 
 typedef struct SubHeader SubHeader;
 typedef struct MapGroup MapGroup;
@@ -396,6 +409,8 @@
 	// u16int version;
 	u16int numTables;
 	EncodingRecord *encodingRecords;
+	RuneMapper mappers[8];
+	int numMappers;
 };
 
 int read_TableCmap(Otf *o, TableCmap *v);
--- a/test.h
+++ b/test.h
@@ -1,8 +1,9 @@
 static void
 printusage(Otfile *f)
 {
-	f->print(f->aux, "usage: %s [-i GLYPH_ID] [-p PPEM  [-g PIXELS] [-m ... -[-H GLYPH_ID]]] font.otf\n", argv0);
-	f->print(f->aux, " -i: specifies a single glyph id\n");
+	f->print(f->aux, "usage: %s [-i GLYPH_ID | -r RUNE] [-p PPEM  [-g PIXELS] [-m ... -[-H GLYPH_ID]]] font.otf\n", argv0);
+	f->print(f->aux, " -i: operate on a single glyph by its id\n");
+	f->print(f->aux, " -r: operate on a single glyph by its rune\n");
 	f->print(f->aux, " -p: draw (of size in pixels per em) and write the image to stdout\n");
 	f->print(f->aux, " -g: gap (in pixels) to add to every border of a glyph\n");
 	f->print(f->aux, " -m: print out glyph ids or render them all as a map (with -p)\n");
@@ -13,6 +14,7 @@
 
 static int gap, gind = -1, map, highlight = -1;
 static double ppem;
+static Rune rune = NoRune;
 
 static int
 dumpmap(Otfile *f, GlyfImage *im, int n)
@@ -111,6 +113,10 @@
 	if((o = otfopen(in)) == nil)
 		return -1;
 	n = otfglyfnum(o);
+	if(rune != NoRune && (gind = otfrune2glyph(o, rune)) < 0){
+		werrstr("no such rune->glyph mapping\n");
+		return -1;
+	}
 	if(gind >= n){
 		werrstr("out of range (max %d)", n-1);
 		goto glypherr;
@@ -171,6 +177,11 @@
 		highlight = strtol(EARGF(usage(&out)), nil, 0); \
 		break; \
 	case 'i': \
+		if(rune != Runeerror){ \
+errboth: \
+			out.print(out.aux, "can't specify both rune and glyph\n"); \
+			usage(&out); \
+		} \
 		gind = strtol(EARGF(usage(&out)), nil, 0); \
 		break; \
 	case 'm': \
@@ -178,6 +189,14 @@
 		break; \
 	case 'p': \
 		ppem = strtod(EARGF(usage(&out)), nil); \
+		break; \
+	case 'r': \
+		if(gind >= 0) \
+			goto errboth; \
+		if(chartorune(&rune, EARGF(usage(&out))) == 1 && rune == Runeerror){ \
+			out.print(out.aux, "invalid rune\n"); \
+			usage(&out); \
+		} \
 		break; \
 	default: \
 		usage(&out); \
--- a/unix/otfsys.h
+++ b/unix/otfsys.h
@@ -10,5 +10,10 @@
 #define USED(x) (void)(x)
 #define nelem(a) (int)(sizeof(a)/sizeof((a)[0]))
 
+#define Runeerror ((Rune)0xfffd)
+
+typedef uint32_t Rune;
+
 char *fmttime(long long v);
 void werrstr(char *fmt, ...);
+int chartorune(Rune *rune, char *str);
--- /dev/null
+++ b/unix/rune.c
@@ -1,0 +1,128 @@
+/*
+Copyright © 2021 Plan 9 Foundation
+Copyright © 20XX 9front authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+#include "otfsys.h"
+
+enum
+{
+	UTFmax = 4,
+	Runemax  = 0x10ffff,
+
+	Bit1	= 7,
+	Bitx	= 6,
+	Bit2	= 5,
+	Bit3	= 4,
+	Bit4	= 3,
+	Bit5	= 2,
+
+	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
+	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
+
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0000 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 0000 0000 1111 1111 1111 1111 */
+	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0011 1111 1111 1111 1111 1111 */
+
+	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
+
+	Bad	= Runeerror,
+};
+
+int
+chartorune(Rune *rune, char *str)
+{
+	int c, c1, c2, c3;
+	long l;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(uint8_t*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(uint8_t*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(uint8_t*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+
+ 	/*
+	 * four character sequence
+	 *	10000-10FFFF => T4 Tx Tx Tx
+	 */
+	if(UTFmax >= 4) {
+		c3 = *(uint8_t*)(str+3) ^ Tx;
+		if(c3 & Testx)
+			goto bad;
+		if(c < T5) {
+			l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+			if(l <= Rune3)
+				goto bad;
+			if(l > Runemax)
+				goto bad;
+			*rune = l;
+			return 4;
+		}
+	}
+
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}