shithub: MicroHs

Download patch

ref: 90d6663b1babebc1d5923ff177f487ec996a6f0e
parent: cac797ad216aeb60f82e7ee137bf0b4fedf0082f
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Wed Sep 27 07:18:06 EDT 2023

Add LZW decompressor.

--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -360,9 +360,159 @@
   return (BFILE*)p;
 }
 
-/*** Coming soon: BFILE via decompression ***/
+/*** BFILE via simple LZW decompression ***/
 
+#define DICTSIZE 4096
+#define ASCIISIZE 96            /* ' ' - '~', '\n' */
 
+struct BFILE_lzw {
+  BFILE    mets;
+  BFILE    *bfile;              /* underlying BFILE */
+  int      unget;               /* storage for a single ungetb */
+  char     *table[DICTSIZE];    /* dictionary */
+  int      table_size;          /* next dictionary slot */
+  char     *ptr;                /* pointer into output string */
+  int      old;                 /* previous code word */
+  int      ch;                  /* previous first character */
+  char     buf[DICTSIZE+1];     /* buffer holding output string */
+  int      rdstate;             /* state of 3 bytes to 2 codewords transducer */
+  int      rdres;               /* saved transducer bits */
+};
+
+/* Get a code word.  It's 12 bits, so 2 codewords are spread over 3 bytes.
+ * XXX This has 4096 hardcoded.
+*/
+int
+getcode_lzw(struct BFILE_lzw *p)
+{
+  int r;
+
+  if (p->rdstate == 0) {
+    r = p->bfile->getb(p->bfile);
+    if (r < 0)
+      return -1;
+    r |= p->bfile->getb(p->bfile) << 8;
+    p->rdres = r >> 12;         /* save 4 bits */
+    p->rdstate = 1;
+    return r & 0xfff;
+  } else {
+    r = p->rdres;
+    r |= p->bfile->getb(p->bfile) << 4;
+    p->rdstate = 0;
+    return r;
+  }
+}
+
+char *
+str_lzw(const char *s, int c)
+{
+  int l = strlen(s);
+  char *p = malloc(l + 1 + 1);
+  if (!p)
+    memerr();
+  strcpy(p, s);
+  p[l] = c;
+  p[l+1] = '\0';
+  return p;
+}
+
+int
+getb_lzw(BFILE *bp)
+{
+  struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+  char *s;
+  int c, n;
+
+  /* Do we have an ungetb character? */
+  if (p->unget) {
+    c = p->unget;
+    p->unget = 0;
+    return c;
+  }
+  /* Are we in the middle of emitting a string? */
+  if (p->ptr) {
+    c = *p->ptr++;
+    if (c) {
+      //printf("c='%c'\n", c);
+      return c;
+    }
+    p->ptr = 0;
+  }
+  n = getcode_lzw(p);
+  if (n < 0)
+    return -1;
+  if (n >= DICTSIZE)
+    ERR("getcode_lzw 1");
+  s = p->table[n];
+  if (!s) {
+    char *os = p->table[p->old];
+    strcpy(p->buf, os);
+    int l = strlen(os);
+    p->buf[l++] = p->ch;
+    p->buf[l] = '\0';
+  } else {
+    strcpy(p->buf, s);
+  }
+  p->ptr = p->buf;
+  p->ch = p->buf[0];
+  if (p->table_size < DICTSIZE) {
+    p->table[p->table_size++] = str_lzw(p->table[p->old], p->ch);
+  }
+  p->old = n;
+  return *p->ptr++;
+}
+
+void
+ungetb_lzw(int c, BFILE *bp)
+{
+  struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+  if (p->unget)
+    ERR("ungetb_lzw");
+  p->unget = c;
+}
+
+void
+closeb_lzw(BFILE *bp)
+{
+  struct BFILE_lzw *p = (struct BFILE_lzw*)bp;
+
+  for (int i = 0; i < DICTSIZE; i++) {
+    if (p->table[i])
+      free(p->table[i]);
+  }
+  p->bfile->closeb(p->bfile);
+  free(p);
+}
+
+BFILE *
+add_lzw_decompressor(BFILE *file)
+{
+  struct BFILE_lzw *p = calloc(1, sizeof(struct BFILE_lzw));
+  int i;
+  
+  if (!p)
+    memerr();
+  p->mets.getb = getb_lzw;
+  p->mets.ungetb = ungetb_lzw;
+  p->mets.closeb = closeb_lzw;
+  p->bfile = file;
+
+  /* initialize dictionary with printable ASCII */
+  for(i = 0; i < ASCIISIZE-1; i++) {
+    p->table[i] = str_lzw("", i + ' ');
+  }
+  p->table[i++] = str_lzw("", '\n');
+  p->table_size = i;
+
+  /* set up decompressore state */
+  p->old = getcode_lzw(p);
+  strcpy(p->buf, p->table[p->old]);
+  p->ch = p->buf[0];
+  p->ptr = p->buf;
+  
+  return (BFILE *)p;
+}
+
 /*****************************************************************************/
 
 struct handler {
@@ -369,7 +519,7 @@
   jmp_buf         hdl_buf;      /* env storage */
   struct handler *hdl_old;      /* old handler */
   stackptr_t      hdl_stack;    /* old stack pointer */
-  NODEPTR         hdl_exn;     /* used temporarily to pass the exception value */
+  NODEPTR         hdl_exn;      /* used temporarily to pass the exception value */
 } *cur_handler = 0;
 
 /* Set FREE bit to 0 */
@@ -2079,7 +2229,19 @@
     memerr();
 
   if (comb_internal) {
-    prog = parse_top(comb_internal);
+    int c;
+    BFILE *bf = comb_internal;
+    c = bf->getb(bf);
+    /* Compressed combinators start with a 'Z', otherwise 'v' (for version) */
+    if (c == 'Z') {
+      /* add compressor transducer */
+      bf = add_lzw_decompressor(bf);
+    } else {
+      /* put it back, we need it */
+      bf->ungetb(c, bf);
+    }
+    prog = parse_top(bf);
+    bf->closeb(bf);
   } else {
     prog = parse_file(fn, &file_size);
   }
--