ref: 90d6663b1babebc1d5923ff177f487ec996a6f0e
parent: cac797ad216aeb60f82e7ee137bf0b4fedf0082f
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Wed Sep 27 07:18:06 EDT 2023
Add LZW decompressor.
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -360,9 +360,159 @@
return (BFILE*)p;
}
-/*** Coming soon: BFILE via decompression ***/
+/*** BFILE via simple LZW decompression ***/
+#define DICTSIZE 4096
+#define ASCIISIZE 96 /* ' ' - '~', '\n' */
+struct BFILE_lzw {+ BFILE mets;
+ BFILE *bfile; /* underlying BFILE */
+ int unget; /* storage for a single ungetb */
+ char *table[DICTSIZE]; /* dictionary */
+ int table_size; /* next dictionary slot */
+ char *ptr; /* pointer into output string */
+ int old; /* previous code word */
+ int ch; /* previous first character */
+ char buf[DICTSIZE+1]; /* buffer holding output string */
+ int rdstate; /* state of 3 bytes to 2 codewords transducer */
+ int rdres; /* saved transducer bits */
+};
+
+/* Get a code word. It's 12 bits, so 2 codewords are spread over 3 bytes.
+ * XXX This has 4096 hardcoded.
+*/
+int
+getcode_lzw(struct BFILE_lzw *p)
+{+ int r;
+
+ if (p->rdstate == 0) {+ r = p->bfile->getb(p->bfile);
+ if (r < 0)
+ return -1;
+ r |= p->bfile->getb(p->bfile) << 8;
+ p->rdres = r >> 12; /* save 4 bits */
+ p->rdstate = 1;
+ return r & 0xfff;
+ } else {+ r = p->rdres;
+ r |= p->bfile->getb(p->bfile) << 4;
+ p->rdstate = 0;
+ return r;
+ }
+}
+
+char *
+str_lzw(const char *s, int c)
+{+ int l = strlen(s);
+ char *p = malloc(l + 1 + 1);
+ if (!p)
+ memerr();
+ strcpy(p, s);
+ p[l] = c;
+ p[l+1] = '\0';
+ return p;
+}
+
+int
+getb_lzw(BFILE *bp)
+{+ struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+ char *s;
+ int c, n;
+
+ /* Do we have an ungetb character? */
+ if (p->unget) {+ c = p->unget;
+ p->unget = 0;
+ return c;
+ }
+ /* Are we in the middle of emitting a string? */
+ if (p->ptr) {+ c = *p->ptr++;
+ if (c) {+ //printf("c='%c'\n", c);+ return c;
+ }
+ p->ptr = 0;
+ }
+ n = getcode_lzw(p);
+ if (n < 0)
+ return -1;
+ if (n >= DICTSIZE)
+ ERR("getcode_lzw 1");+ s = p->table[n];
+ if (!s) {+ char *os = p->table[p->old];
+ strcpy(p->buf, os);
+ int l = strlen(os);
+ p->buf[l++] = p->ch;
+ p->buf[l] = '\0';
+ } else {+ strcpy(p->buf, s);
+ }
+ p->ptr = p->buf;
+ p->ch = p->buf[0];
+ if (p->table_size < DICTSIZE) {+ p->table[p->table_size++] = str_lzw(p->table[p->old], p->ch);
+ }
+ p->old = n;
+ return *p->ptr++;
+}
+
+void
+ungetb_lzw(int c, BFILE *bp)
+{+ struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+ if (p->unget)
+ ERR("ungetb_lzw");+ p->unget = c;
+}
+
+void
+closeb_lzw(BFILE *bp)
+{+ struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+
+ for (int i = 0; i < DICTSIZE; i++) {+ if (p->table[i])
+ free(p->table[i]);
+ }
+ p->bfile->closeb(p->bfile);
+ free(p);
+}
+
+BFILE *
+add_lzw_decompressor(BFILE *file)
+{+ struct BFILE_lzw *p = calloc(1, sizeof(struct BFILE_lzw));
+ int i;
+
+ if (!p)
+ memerr();
+ p->mets.getb = getb_lzw;
+ p->mets.ungetb = ungetb_lzw;
+ p->mets.closeb = closeb_lzw;
+ p->bfile = file;
+
+ /* initialize dictionary with printable ASCII */
+ for(i = 0; i < ASCIISIZE-1; i++) {+ p->table[i] = str_lzw("", i + ' ');+ }
+ p->table[i++] = str_lzw("", '\n');+ p->table_size = i;
+
+ /* set up decompressore state */
+ p->old = getcode_lzw(p);
+ strcpy(p->buf, p->table[p->old]);
+ p->ch = p->buf[0];
+ p->ptr = p->buf;
+
+ return (BFILE *)p;
+}
+
/*****************************************************************************/
struct handler {@@ -369,7 +519,7 @@
jmp_buf hdl_buf; /* env storage */
struct handler *hdl_old; /* old handler */
stackptr_t hdl_stack; /* old stack pointer */
- NODEPTR hdl_exn; /* used temporarily to pass the exception value */
+ NODEPTR hdl_exn; /* used temporarily to pass the exception value */
} *cur_handler = 0;
/* Set FREE bit to 0 */
@@ -2079,7 +2229,19 @@
memerr();
if (comb_internal) {- prog = parse_top(comb_internal);
+ int c;
+ BFILE *bf = comb_internal;
+ c = bf->getb(bf);
+ /* Compressed combinators start with a 'Z', otherwise 'v' (for version) */
+ if (c == 'Z') {+ /* add compressor transducer */
+ bf = add_lzw_decompressor(bf);
+ } else {+ /* put it back, we need it */
+ bf->ungetb(c, bf);
+ }
+ prog = parse_top(bf);
+ bf->closeb(bf);
} else {prog = parse_file(fn, &file_size);
}
--
⑨