ref: 778dcd43d5d7461f2e5f08ae02a8079a6a059ae1
parent: 7ecb904aae1578af2785fccdb6565a674b5b678e
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Thu Feb 22 08:43:16 EST 2024
Add some new compression code.
--- /dev/null
+++ b/src/runtime/lz77.c
@@ -1,0 +1,138 @@
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+
+#define MAXWIN 8192
+#define MAXLEN (9 + 255)
+#define MINMATCH 3
+#define MINOFFS 1
+
+/*
+ * Encoding inspired by FastLZ
+ * 000nnnnn - literal run follows, with n+1 bytes
+ * 111ooooo pppppppp nnnnnnnn - at offset o*256+p, copy n+9 bytes (maximum offset is 8191)
+ * nnnooooo pppppppp - at offset o*256+p, copy n+2 bytes (n != 0, n != 7)
+ * Match length is >= 3.
+ */
+
+/* dst must be big enough to hold the decompressed result */
+size_t
+lz77d(uint8_t *src, size_t srclen, uint8_t *adst)
+{+ uint8_t *end = src + srclen;
+ uint8_t *dst = adst;
+
+ while (src < end) {+ int op = *src++;
+ int opx = op & 0x1f; /* part of offset, or literal length */
+ op >>= 5;
+ if (op == 0) {+ /* 000nnnnn */
+ do {+ *dst++ = *src++;
+ } while (--opx >= 0);
+ } else {+ size_t len;
+ size_t offs = opx * 256 + *src++ + MINOFFS;
+ if (op == 7) {+ /* 111ooooo pppppppp nnnnnnnn */
+ len = 9 + *src++;
+ } else {+ /* nnnooooo pppppppp */
+ len = 2 + op;
+ }
+ //printf("match cur=%d offs=%d len=%d\n", (int)(dst - adst), (int)offs, (int)len);+ uint8_t *p = dst - offs;
+ for (size_t i = 0; i < len; i++) {+ *dst++ = *p++;
+ }
+ }
+ }
+ return dst - adst;
+}
+
+static size_t
+match(uint8_t *src, uint8_t *win, uint8_t *end)
+{+ size_t n;
+
+ for(n = 0; *src == *win && src < end && n < MAXLEN; src++, win++, n++)
+ ;
+ return n;
+}
+
+size_t
+lz77c(uint8_t *src, size_t srclen, uint8_t *adst)
+{+ uint8_t *cur;
+ uint8_t *end = src + srclen;
+ uint8_t *dst = adst;
+
+ for (cur = src; cur < end; ) {+ int win_end = cur - src;
+ int win_len = MIN(win_end, MAXWIN);
+ //size_t max_len = MIN(MAXLEN, end - cur);
+ /* Inefficient match loop */
+ size_t match_len = MINMATCH-1;
+ size_t match_offs = 0;
+ /* Compression is slow, since we use brute force to find a match. */
+ for (size_t offs = MINOFFS; offs < win_len; offs++) {+ size_t n = match(cur, cur - offs, end);
+ if (n > match_len) {+ match_len = n;
+ match_offs = offs;
+ }
+ }
+ if (match_len >= MINMATCH) {+ //printf("match cur=%d offs=%d len=%d str=%.*s\n", (int)(cur-src), (int)match_offs, (int)match_len, (int)match_len, cur);+ //printf("match cur=%d offs=%d len=%d\n", (int)(cur-src), (int)match_offs, (int)match_len);+ /* found a match */
+ cur += match_len;
+ match_offs -= MINOFFS;
+ match_len -= 2;
+ int hi = match_offs >> 8;
+ int lo = match_offs & 0xff;
+ if (match_len < 7) {+ *dst++ = (match_len << 5) + hi;
+ *dst++ = lo;
+ } else {+ *dst++ = (7 << 5) + hi;
+ *dst++ = lo;
+ *dst++ = match_len - 7;
+ }
+ } else {+ /* generate a literal */
+ /* how long should it be? 3 seems to be a sweet spot */
+ size_t len = MIN(3, end - cur);
+ *dst++ = len - 1;
+ for (size_t i = 0; i < len; i++) {+ *dst++ = *cur++;
+ }
+ }
+ }
+ return dst - adst;
+}
+
+int
+main(int argc, char **argv)
+{+ FILE *fi = fopen(argv[1], "r");
+ FILE *fo = fopen(argv[2], "w");
+ int dec = argc > 3;
+ fseek(fi, 0, SEEK_END);
+ size_t ilen = ftell(fi);
+ size_t olen;
+ fseek(fi, 0, SEEK_SET);
+ uint8_t *ibuf = malloc(ilen);
+ uint8_t *obuf = malloc((dec?10:2)*ilen);
+ fread(ibuf, ilen, 1, fi);
+ if (dec)
+ olen = lz77d(ibuf, ilen, obuf);
+ else
+ olen = lz77c(ibuf, ilen, obuf);
+ fwrite(obuf, olen, 1, fo);
+ exit(0);
+}
--
⑨