ref: 42b4e6f6316676e6821722149208d8f7b8086234
dir: /src/runtime/bfile.c/
/***************** BFILE *******************/
/*
* BFILE is a family of stream objects.
* There are two kinds: endpoints (source/sink) and transducers.
* The streams are typically bytes.
*
* FILE source/sink for stdio FILE handles, handles
* buf source/sink where the read/write uses a memory buffer.
*
* lz77 transducer for LZ77 compression
* put - compresses a byte
* get - returns an decompressed byte
* rle transducer for Run Length Encoding
* put - compresses an ASCII character
* get - returns a decompressed ASCII
* bwt transducer for Burrows-Wheeler Transform
* put - transforms a byte
* get - untransforms a byte
* utf8 transducer for UTF8 encoding
* put - encodes a Unicode code point (int) to bytes
* get - decodes a Unicode code point
*/
/* Sanity checking */
void foo(void)
{
printf("foo\n");
}
#define CHECKBFILE(p, f) do { if (p->getb != f) { foo(); ERR("CHECKBFILE"); } } while(0)
/* BFILE will have different implementations, they all have these methods */
typedef struct BFILE {
int (*getb)(struct BFILE*);
void (*ungetb)(int, struct BFILE*);
void (*putb)(int, struct BFILE*);
void (*flushb)(struct BFILE*);
void (*closeb)(struct BFILE*);
} BFILE;
static INLINE int
getb(BFILE *p)
{
return p->getb(p);
}
static INLINE void
ungetb(int c, BFILE *p)
{
p->ungetb(c, p);
}
static INLINE void
putb(int c, BFILE *p)
{
p->putb(c, p);
}
static INLINE void
closeb(BFILE *p)
{
p->closeb(p);
}
static INLINE void
flushb(BFILE *p)
{
p->flushb(p);
}
void
putsb(const char *str, struct BFILE *p)
{
char c;
while ((c = *str++))
putb(c, p);
}
size_t
readb(void *abuf, size_t size, BFILE *p)
{
uint8_t *buf = abuf;
size_t s;
for (s = 0; s < size; s++) {
int c = getb(p);
if (c < 0)
break;
buf[s] = c;
}
return s;
}
/* convert -n to a string, handles MINBOUND correctly */
void
putnegb(value_t n, BFILE *p)
{
int c = '0' - n % 10;
if (n <= -10) {
putnegb(n / 10, p);
}
putb(c, p);
}
void
putdecb(value_t n, BFILE *p)
{
if (n < 0) {
putb('-', p);
putnegb(n, p);
} else {
putnegb(-n, p);
}
}
void
putint32(value_t n, BFILE *p)
{
putb(n % 256, p); n /= 256;
putb(n % 256, p); n /= 256;
putb(n % 256, p); n /= 256;
putb(n % 256, p);
}
value_t
getint32(BFILE *p)
{
value_t b0 = getb(p);
value_t b1 = getb(p);
value_t b2 = getb(p);
value_t b3 = getb(p);
return ((((b3 * 256) + b2) * 256) + b1) * 256 + b0;
}
/***************** BFILE from/to memory buffer *******************/
struct BFILE_buffer {
BFILE mets;
size_t b_size;
size_t b_pos;
uint8_t *b_buffer;
};
int
getb_buf(BFILE *bp)
{
struct BFILE_buffer *p = (struct BFILE_buffer *)bp;
CHECKBFILE(bp, getb_buf);
if (p->b_pos >= p->b_size)
return -1;
return p->b_buffer[p->b_pos++];
}
void
putb_buf(int c, BFILE *bp)
{
struct BFILE_buffer *p = (struct BFILE_buffer *)bp;
CHECKBFILE(bp, getb_buf);
if (p->b_pos >= p->b_size) {
p->b_size *= 2;
p->b_buffer = realloc(p->b_buffer, p->b_size);
if (!p->b_buffer)
ERR("putb_buf");
}
p->b_buffer[p->b_pos++] = c;
}
void
ungetb_buf(int c, BFILE *bp)
{
struct BFILE_buffer *p = (struct BFILE_buffer *)bp;
CHECKBFILE(bp, getb_buf);
if (p->b_pos == 0)
ERR("ungetb");
p->b_buffer[--p->b_pos] = (uint8_t)c;
}
void
closeb_rd_buf(BFILE *bp)
{
CHECKBFILE(bp, getb_buf);
FREE(bp);
}
void
closeb_wr_buf(BFILE *bp)
{
CHECKBFILE(bp, getb_buf);
FREE(bp);
}
void
flushb_buf(BFILE *bp)
{
CHECKBFILE(bp, getb_buf);
}
struct BFILE*
openb_rd_buf(uint8_t *buf, size_t len)
{
struct BFILE_buffer *p = MALLOC(sizeof(struct BFILE_buffer));
if (!p)
memerr();
p->mets.getb = getb_buf;
p->mets.ungetb = ungetb_buf;
p->mets.putb = 0;
p->mets.flushb = 0;
p->mets.closeb = closeb_rd_buf;
p->b_size = len;
p->b_pos = 0;
p->b_buffer = buf;
return (struct BFILE*)p;
}
struct BFILE*
openb_wr_buf(void)
{
struct BFILE_buffer *p = MALLOC(sizeof(struct BFILE_buffer));
if (!p)
memerr();
p->mets.getb = getb_buf; /* Just to make CHECKFILE happy */
p->mets.ungetb = 0;
p->mets.putb = putb_buf;
p->mets.flushb = flushb_buf;
p->mets.closeb = closeb_wr_buf;
p->b_size = 1000;
p->b_pos = 0;
p->b_buffer = malloc(p->b_size);
if (!p->b_buffer)
ERR("openb_wr_buf");
return (struct BFILE*)p;
}
/*
* Get the buffer used by writing.
* This should be the last operation before closing,
* since the buffer can move when writing.
* The caller of openb_wr_buf() and get_buf() owns
* the memory and must free it.
*/
void
get_buf(struct BFILE *bp, uint8_t **bufp, size_t *lenp)
{
struct BFILE_buffer *p = (struct BFILE_buffer *)bp;
CHECKBFILE(bp, getb_buf);
*bufp = p->b_buffer;
*lenp = p->b_pos;
}
#if WANT_STDIO
/***************** BFILE via FILE *******************/
struct BFILE_file {
BFILE mets;
FILE *file;
};
int
getb_file(BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
return fgetc(p->file);
}
void
ungetb_file(int c, BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
ungetc(c, p->file);
}
void
putb_file(int c, BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
(void)fputc(c, p->file);
}
void
flushb_file(BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
fflush(p->file);
}
void
closeb_file(BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
fclose(p->file);
FREE(p);
}
void
freeb_file(BFILE *bp)
{
struct BFILE_file *p = (struct BFILE_file *)bp;
CHECKBFILE(bp, getb_file);
FREE(p);
}
BFILE *
add_FILE(FILE *f)
{
struct BFILE_file *p = MALLOC(sizeof (struct BFILE_file));
if (!p)
memerr();
p->mets.getb = getb_file;
p->mets.ungetb = ungetb_file;
p->mets.putb = putb_file;
p->mets.flushb = flushb_file;
p->mets.closeb = closeb_file;
p->file = f;
return (BFILE*)p;
}
#endif
#if WANT_LZ77
/***************** BFILE via simple LZ77 decompression *******************/
struct BFILE_lz77 {
BFILE mets;
BFILE *bfile; /* underlying BFILE */
uint8_t *buf;
size_t len;
size_t pos;
int read;
int numflush;
};
int
getb_lz77(BFILE *bp)
{
struct BFILE_lz77 *p = (struct BFILE_lz77*)bp;
CHECKBFILE(bp, getb_lz77);
if (p->pos >= p->len)
return -1;
return p->buf[p->pos++];
}
void
ungetb_lz77(int c, BFILE *bp)
{
struct BFILE_lz77 *p = (struct BFILE_lz77*)bp;
CHECKBFILE(bp, getb_lz77);
p->pos--;
}
void
putb_lz77(int b, BFILE *bp)
{
struct BFILE_lz77 *p = (struct BFILE_lz77*)bp;
CHECKBFILE(bp, getb_lz77);
if (p->pos >= p->len) {
p->len *= 2;
p->buf = realloc(p->buf, p->len);
if (!p->buf)
memerr();
}
p->buf[p->pos++] = b;
}
/* Compress and write to output BFILE */
void
flushb_lz77(BFILE *bp)
{
struct BFILE_lz77 *p = (struct BFILE_lz77*)bp;
CHECKBFILE(bp, getb_lz77);
/* If we have had a flush, and there is no new data then do nothing */
if (p->numflush++ && !p->pos)
return;
uint8_t *obuf;
size_t olen = lz77c(p->buf, p->pos, &obuf);
putsb("LZ1", p->bfile); /* Version no */
putint32(olen, p->bfile); /* 32 bit length */
for (size_t i = 0; i < olen; i++) {
putb(obuf[i], p->bfile); /* and the data */
}
free(obuf);
p->pos = 0;
}
void
closeb_lz77(BFILE *bp)
{
struct BFILE_lz77 *p = (struct BFILE_lz77*)bp;
CHECKBFILE(bp, getb_lz77);
if (!p->read) {
/* We are in write mode, so compress and push it down */
flushb_lz77(bp);
FREE(p->buf);
}
closeb(p->bfile);
FREE(p);
}
BFILE *
add_lz77_decompressor(BFILE *file)
{
struct BFILE_lz77 *p = MALLOC(sizeof(struct BFILE_lz77));
if (!p)
memerr();
memset(p, 0, sizeof(struct BFILE_lz77));
p->mets.getb = getb_lz77;
p->mets.ungetb = ungetb_lz77;
p->mets.putb = 0;
p->mets.flushb = 0;
p->mets.closeb = closeb_lz77;
p->read = 1;
p->bfile = file;
p->numflush = 0;
/* First check version */
if (getb(file) != 'L' || getb(file) != 'Z' || getb(file) != '1')
ERR("Bad LZ77 signature");
size_t size = getint32(file); /* then read size */
uint8_t *buf = MALLOC(size); /* temporary buffer for input */
if (!buf)
memerr();
for(size_t i = 0; i < size; i++) {
buf[i] = getb(file); /* and read data */
}
p->len = lz77d(buf, size, &p->buf); /* decompress */
FREE(buf);
p->pos = 0;
return (BFILE*)p;
}
BFILE *
add_lz77_compressor(BFILE *file)
{
struct BFILE_lz77 *p = MALLOC(sizeof(struct BFILE_lz77));
if (!p)
memerr();
memset(p, 0, sizeof(struct BFILE_lz77));
p->mets.getb = getb_lz77;
p->mets.ungetb = 0;
p->mets.putb = putb_lz77;
p->mets.flushb = flushb_lz77;
p->mets.closeb = closeb_lz77;
p->read = 0;
p->bfile = file;
p->numflush = 0;
p->len = 25000;
p->buf = MALLOC(p->len);
if (!p->buf)
memerr();
p->pos = 0;
return (BFILE*)p;
}
#endif /* WANT_LZ77 */
/***************** BFILE with UTF8 encode/decode *******************/
struct BFILE_utf8 {
BFILE mets;
BFILE *bfile;
int unget;
};
/* This is not right with WORD_SIZE==16 */
int
getb_utf8(BFILE *bp)
{
struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
CHECKBFILE(bp, getb_utf8);
int c1, c2, c3, c4;
/* Do we have an ungetb character? */
if (p->unget >= 0) {
c1 = p->unget;
p->unget = -1;
return c1;
}
c1 = getb(p->bfile);
if (c1 < 0)
return -1;
if ((c1 & 0x80) == 0)
return c1;
c2 = getb(p->bfile);
if (c2 < 0)
return -1;
if ((c1 & 0xe0) == 0xc0)
return ((c1 & 0x1f) << 6) | (c2 & 0x3f);
c3 = getb(p->bfile);
if (c3 < 0)
return -1;
if ((c1 & 0xf0) == 0xe0)
return ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f);
c4 = getb(p->bfile);
if (c4 < 0)
return -1;
if ((c1 & 0xf8) == 0xf0)
return ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f);
ERR("getb_utf8");
}
void
ungetb_utf8(int c, BFILE *bp)
{
struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
CHECKBFILE(bp, getb_utf8);
if (p->unget >= 0)
ERR("ungetb_utf8");
p->unget = c;
}
void
putb_utf8(int c, BFILE *bp)
{
struct BFILE_utf8 *p = (struct BFILE_utf8 *)bp;
CHECKBFILE(bp, getb_utf8);
if (c < 0)
ERR("putb_utf8: < 0");
if (c < 0x80) {
putb(c, p->bfile);
return;
}
if (c < 0x800) {
putb(((c >> 6 ) ) | 0xc0, p->bfile);
putb(((c ) & 0x3f) | 0x80, p->bfile);
return;
}
if (c < 0x10000) {
putb(((c >> 12) ) | 0xe0, p->bfile);
putb(((c >> 6 ) & 0x3f) | 0x80, p->bfile);
putb(((c ) & 0x3f) | 0x80, p->bfile);
return;
}
if (c < 0x110000) {
putb(((c >> 18) ) | 0xf0, p->bfile);
putb(((c >> 12) & 0x3f) | 0x80, p->bfile);
putb(((c >> 6 ) & 0x3f) | 0x80, p->bfile);
putb(((c ) & 0x3f) | 0x80, p->bfile);
return;
}
ERR("putb_utf8");
}
void
flushb_utf8(BFILE *bp)
{
struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
CHECKBFILE(bp, getb_utf8);
flushb(p->bfile);
}
void
closeb_utf8(BFILE *bp)
{
struct BFILE_utf8 *p = (struct BFILE_utf8*)bp;
CHECKBFILE(bp, getb_utf8);
closeb(p->bfile);
FREE(p);
}
BFILE *
add_utf8(BFILE *file)
{
struct BFILE_utf8 *p = MALLOC(sizeof(struct BFILE_utf8));
if (!p)
memerr();
p->mets.getb = getb_utf8;
p->mets.ungetb = ungetb_utf8;
p->mets.putb = putb_utf8;
p->mets.flushb = flushb_utf8;
p->mets.closeb = closeb_utf8;
p->bfile = file;
p->unget = -1;
return (BFILE*)p;
}