ref: b2c6861bee0841dbe2d99cc19c2512630094b5a0
parent: b1c5646248fdf3bc73ea9bcee3d158c27c56e5fe
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Mon Jan 8 07:48:20 EST 2024
Some UTF8 stuff
--- a/src/runtime/bfile.c
+++ b/src/runtime/bfile.c
@@ -9,10 +9,36 @@
/* BFILE will have different implementations, they all have these methods */
typedef struct BFILE {int (*getb)(struct BFILE*);
- void (*ungetb)(int c, struct BFILE*);
+ void (*ungetb)(int, struct BFILE*);
+ void (*putb)(int, struct BFILE*);
void (*closeb)(struct BFILE*);
} BFILE;
+static inline int
+getb(struct BFILE *p)
+{+ return p->getb(p);
+}
+
+static inline void
+ungetb(int c, struct BFILE *p)
+{+ p->ungetb(c, p);
+}
+
+static inline void
+putb(int c, struct BFILE *p)
+{+ p->putb(c, p);
+}
+
+static inline void
+closeb(struct BFILE *p)
+{+ p->closeb(p);
+}
+
+
/***************** BFILE from static buffer *******************/
struct BFILE_buffer {BFILE mets;
@@ -45,6 +71,8 @@
(void)bp; /* shut up warning */
}
+/* There is no open(). Only used with statically allocated buffers. */
+
#if WANT_STDIO
/***************** BFILE via FILE *******************/
struct BFILE_file {@@ -67,6 +95,13 @@
}
void
+putb_file(int c, BFILE *bp)
+{+ struct BFILE_file *p = (struct BFILE_file *)bp;
+ (void)fputc(c, p->file);
+}
+
+void
closeb_file(BFILE *bp)
{struct BFILE_file *p = (struct BFILE_file *)bp;
@@ -81,6 +116,7 @@
memerr();
p->mets.getb = getb_file;
p->mets.ungetb = ungetb_file;
+ p->mets.putb = putb_file;
p->mets.closeb = closeb_file;
p->file = f;
return (BFILE*)p;
@@ -151,9 +187,9 @@
int c, n;
/* Do we have an ungetb character? */
- if (p->unget) {+ if (p->unget >= 0) {c = p->unget;
- p->unget = 0;
+ p->unget = -1;
return c;
}
/* Are we in the middle of emitting a string? */
@@ -193,7 +229,7 @@
ungetb_lzw(int c, BFILE *bp)
{struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
- if (p->unget)
+ if (p->unget >= 0)
ERR("ungetb_lzw");p->unget = c;
}
@@ -214,15 +250,18 @@
BFILE *
add_lzw_decompressor(BFILE *file)
{- struct BFILE_lzw *p = calloc(1, sizeof(struct BFILE_lzw));
+ struct BFILE_lzw *p = MALLOC(sizeof(struct BFILE_lzw));
int i;
if (!p)
memerr();
+ memset(p, 0, sizeof(struct BFILE_lzw));
p->mets.getb = getb_lzw;
p->mets.ungetb = ungetb_lzw;
+ p->mets.putb = 0; /* no compressor yet. */
p->mets.closeb = closeb_lzw;
p->bfile = file;
+ p->unget = -1;
/* initialize dictionary with printable ASCII */
for(i = 0; i < ASCIISIZE-1; i++) {@@ -240,3 +279,111 @@
return (BFILE *)p;
}
+/***************** BFILE with UTF8 encode/decode *******************/
+
+struct BFILE_utf8 {+ BFILE mets;
+ BFILE *bfile;
+ int unget;
+};
+
+int
+getb_utf8(BFILE *bp)
+{+ struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
+ int c1, c2, c3, c4;
+
+ /* Do we have an ungetb character? */
+ if (p->unget >= 0) {+ c1 = p->unget;
+ p->unget = -1;
+ return c1;
+ }
+ c1 = p->bfile->getb(p->bfile);
+ if (c1 < 0)
+ return -1;
+ if ((c1 & 0x80) == 0)
+ return c1;
+ c2 = p->bfile->getb(p->bfile);
+ if (c2 < 0)
+ return -1;
+ if ((c1 & 0xe0) == 0xc0)
+ return ((c1 & 0x1f) << 6) | (c2 & 0x3f);
+ c3 = p->bfile->getb(p->bfile);
+ if (c3 < 0)
+ return -1;
+ if ((c1 & 0xf0) == 0xe0)
+ return ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f);
+ c4 = p->bfile->getb(p->bfile);
+ if (c4 < 0)
+ return -1;
+ if ((c1 & 0xf8) == 0xf0)
+ return ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f);
+ ERR("getb_utf8");+}
+
+void
+ungetb_utf8(int c, BFILE *bp)
+{+ struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
+ if (p->unget >= 0)
+ ERR("ungetb_utf8");+ p->unget = c;
+}
+
+void
+putb_utf8(int c, BFILE *bp)
+{+ struct BFILE_utf8 *p = (struct BFILE_utf8 *)bp;
+ if (c < 0)
+ ERR("putb_utf8: < 0");+ if (c < 0x80) {+ p->bfile->putb(c, p->bfile);
+ return;
+ }
+ if (c < 0x800) {+ p->bfile->putb(((c >> 6 ) ) | 0xc0, p->bfile);
+ p->bfile->putb(((c ) & 0x3f) | 0x80, p->bfile);
+ return;
+ }
+ if (c < 0x10000) {+ p->bfile->putb(((c >> 12) ) | 0xe0, p->bfile);
+ p->bfile->putb(((c >> 6 ) & 0x3f) | 0x80, p->bfile);
+ p->bfile->putb(((c ) & 0x3f) | 0x80, p->bfile);
+ return;
+ }
+ if (c < 0x110000) {+ p->bfile->putb(((c >> 18) ) | 0xf0, p->bfile);
+ p->bfile->putb(((c >> 12) & 0x3f) | 0x80, p->bfile);
+ p->bfile->putb(((c >> 6 ) & 0x3f) | 0x80, p->bfile);
+ p->bfile->putb(((c ) & 0x3f) | 0x80, p->bfile);
+ return;
+ }
+ ERR("putb_utf8");+}
+
+void
+closeb_utf8(BFILE *bp)
+{+ struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
+
+ p->bfile->closeb(p->bfile);
+ FREE(p);
+}
+
+BFILE *
+add_utf8(BFILE *file)
+{+ struct BFILE_utf8 *p = MALLOC(sizeof(struct BFILE_utf8));
+
+ if (!p)
+ memerr();
+ p->mets.getb = getb_utf8;
+ p->mets.ungetb = ungetb_utf8;
+ p->mets.putb = putb_utf8;
+ p->mets.closeb = closeb_utf8;
+ p->bfile = file;
+ p->unget = -1;
+
+ return (BFILE*)p;
+}
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -1018,11 +1018,11 @@
int
gobble(BFILE *f, int c)
{- int d = f->getb(f);
+ int d = getb(f);
if (c == d) {return 1;
} else {- f->ungetb(d, f);
+ ungetb(d, f);
return 0;
}
}
@@ -1033,9 +1033,9 @@
{int c;
- c = f->getb(f);
+ c = getb(f);
if (c == ' ' || c == ')') {- f->ungetb(c, f);
+ ungetb(c, f);
return 0;
} else {return c;
@@ -1047,16 +1047,16 @@
{value_t i = 0;
value_t neg = 1;
- int c = f->getb(f);
+ int c = getb(f);
if (c == '-') {neg = -1;
- c = f->getb(f);
+ c = getb(f);
}
for(;;) {i = i * 10 + c - '0';
- c = f->getb(f);
+ c = getb(f);
if (c < '0' || c > '9') {- f->ungetb(c, f);
+ ungetb(c, f);
break;
}
}
@@ -1129,7 +1129,7 @@
if (!buffer)
memerr();
for(i = 0;;) {- c = f->getb(f);
+ c = getb(f);
if (c == '"')
break;
if (i >= sz) {@@ -1161,7 +1161,7 @@
int c;
char buf[80]; /* store names of primitives. */
- c = f->getb(f);
+ c = getb(f);
if (c < 0) ERR("parse EOF"); switch (c) { case '(' :@@ -1260,7 +1260,7 @@
int c;
while ((c = *p++)) {- if (c != f->getb(f))
+ if (c != getb(f))
ERR("version mismatch");}
gobble(f, '\r'); /* allow extra CR */
@@ -1293,7 +1293,7 @@
BFILE *p = openb_FILE(f);
/* And parse it */
NODEPTR n = parse_top(p);
- p->closeb(p);
+ closeb(p);
return n;
}
@@ -2654,9 +2654,9 @@
if (combexpr) {int c;
- struct BFILE_buffer ibf = { { getb_buf, ungetb_buf, closeb_buf }, combexprlen, 0, combexpr };+ struct BFILE_buffer ibf = { { getb_buf, ungetb_buf, 0, closeb_buf }, combexprlen, 0, combexpr };BFILE *bf = (BFILE*)&ibf;
- c = bf->getb(bf);
+ c = getb(bf);
/* Compressed combinators start with a 'Z', otherwise 'v' (for version) */
if (c == 'Z') {/* add compressor transducer */
@@ -2663,10 +2663,10 @@
bf = add_lzw_decompressor(bf);
} else {/* put it back, we need it */
- bf->ungetb(c, bf);
+ ungetb(c, bf);
}
prog = parse_top(bf);
- bf->closeb(bf);
+ closeb(bf);
} else {#if WANT_STDIO
prog = parse_file(inname, &file_size);
--
⑨