ref: b4e6b3b89646f4dd5973b770b36aba511843624e
parent: ecd40a88198f1a03b0e87a4f3066f944f09cf0d8
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Mon Aug 31 07:49:51 EDT 2020
fix flate-encoded streams with PNG prediction; parse compressed xref streams
--- a/buffer.c
+++ b/buffer.c
@@ -47,7 +47,13 @@
int
bufeof(Buffer *b)
{
- return b->off == b->sz;
+ return bufleft(b) == 0;
+}
+
+int
+bufleft(Buffer *b)
+{
+ return b->sz - b->off;
}
uchar *
--- a/f_flate.c
+++ b/f_flate.c
@@ -4,7 +4,70 @@
#include <flate.h>
#include "pdf.h"
+typedef struct FlateParms FlateParms;
+
+struct FlateParms {
+ int predictor;
+ int columns;
+};
+
+static uchar
+paeth(uchar a, uchar b, uchar c)
+{
+ int p, pa, pb, pc;
+
+ p = a + b - c;
+ pa = abs(p - a);
+ pb = abs(p - b);
+ pc = abs(p - c);
+
+ if(pa <= pb && pa <= pc)
+ return a;
+ return pb <= pc ? b : c;
+}
+
static int
+pngunpredict(int pred, uchar *buf, uchar *up, int len)
+{
+ int i;
+
+ switch(pred){
+ case 0: /* None */
+ break;
+
+ case 1: /* Sub */
+ for(i = 1; i < len; ++i)
+ buf[i] += buf[i-1];
+ break;
+
+ case 2: /* Up */
+ for(i = 0; i < len; ++i)
+ buf[i] += up[i];
+ break;
+
+ case 3: /* Average */
+ buf[0] += up[0]/2;
+ for(i = 1; i < len; ++i)
+ buf[i] += (buf[i-1]+up[i])/2;
+ break;
+
+ case 4: /* Paeth */
+ buf[0] += paeth(0, up[0], 0);
+ for(i = 0; i < len; ++i)
+ buf[i] += paeth(buf[i-1], up[i], up[i-1]);
+ break;
+
+ /* FIXME 5 optimum??? */
+
+ default:
+ werrstr("unsupported predictor %d", pred);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
bw(void *aux, void *d, int n)
{
return bufput(aux, d, n);
@@ -21,9 +84,11 @@
int
fFlate(void *aux, Buffer *bi, Buffer *bo)
{
- int r;
+ int r, i, rows, n;
+ FlateParms *fp;
+ uchar *x, *y, *zero;
- USED(aux);
+ fp = aux;
do{
r = inflatezlib(bo, bw, bi, bget);
@@ -34,5 +99,57 @@
return -1;
}
+ /* 7.4.4.4 LZW and Flate predictor functions */
+ if(fp->predictor >= 10 && fp->columns > 0){
+ n = fp->columns + 1;
+ rows = bo->sz/n;
+ x = bo->b;
+ y = bo->b;
+ zero = mallocz(fp->columns, 1);
+ for(i = r = 0; i < rows && r == 0; i++, x += n, y += n)
+ r = pngunpredict(x[0], x+1, i < 1 ? zero : y+1-n, fp->columns);
+ free(zero);
+
+ x = bo->b;
+ y = bo->b+1;
+ for(i = 0; i < rows; i++, x += fp->columns, y += n)
+ memmove(x, y, fp->columns);
+ bo->sz -= rows;
+ }
+
+ return r;
+}
+
+int
+openFlate(Filter *f, Object *o)
+{
+ Object *parms;
+ FlateParms *fp;
+ int predictor, columns;
+
+ parms = dictget(o, "DecodeParms");
+ predictor = dictint(parms, "Predictor");
+ columns = dictint(parms, "Columns");
+ if((predictor >= 2 && predictor < 10) || predictor >= 15){
+ werrstr("unsupported flate predictor %d", predictor);
+ return -1;
+ }
+ if(predictor >= 10 && predictor <= 15 && columns < 1){
+ werrstr("invalid columns %d for predictor %d", columns, predictor);
+ return -1;
+ }
+
+ if((fp = malloc(sizeof(FlateParms))) == nil)
+ return -1;
+ fp->predictor = predictor;
+ fp->columns = columns;
+ f->aux = fp;
+
return 0;
+}
+
+void
+closeFlate(Filter *f)
+{
+ free(f->aux);
}
--- a/filter.c
+++ b/filter.c
@@ -5,15 +5,9 @@
/* 7.4 Filters */
-struct Filter {
- char *name;
- int (*readall)(void *aux, Buffer *bi, Buffer *bo);
- int (*open)(Filter *f, Object *o);
- void (*close)(Filter *f);
- void *aux;
-};
-
int fFlate(void *aux, Buffer *bi, Buffer *bo);
+int openFlate(Filter *f, Object *o);
+void closeFlate(Filter *f);
static Filter filters[] = {
{"ASCII85Decode", nil, nil, nil},
@@ -21,7 +15,7 @@
{"CCITTFaxDecode", nil, nil, nil},
{"CryptDecode", nil, nil, nil},
{"DCTDecode", nil, nil, nil},
- {"FlateDecode", fFlate},
+ {"FlateDecode", fFlate, openFlate, closeFlate},
{"JBIG2Decode", nil, nil, nil},
{"JPXDecode", nil, nil, nil},
{"LZWDecode", nil, nil, nil},
--- a/misc.c
+++ b/misc.c
@@ -15,6 +15,12 @@
[Oindir] = "indir",
};
+static char *xtypes[] = {
+ [Xusual] = "usual",
+ [Xuncompressed] = "uncompressed",
+ [Xcompressed] = "compressed",
+};
+
Object null = {
.type = Onull,
};
@@ -30,6 +36,24 @@
if(o->type < 0 || o->type >= nelem(otypes))
return fmtprint(f, "????");
return fmtprint(f, "%s", otypes[o->type]);
+}
+
+int
+⊗fmt(Fmt *f)
+{
+ Xref x;
+
+ x = va_arg(f->args, Xref);
+
+ switch(x.type){
+ case Xusual:
+ return fmtprint(f, "<%s id=%d gen=%d off=%d>", xtypes[x.type], x.id, x.gen, x.off);
+ case Xuncompressed:
+ return fmtprint(f, "<%s gen=%d off=%d>", xtypes[x.type], x.gen, x.off);
+ case Xcompressed:
+ return fmtprint(f, "<%s id=%d objnum=%d>", xtypes[x.type], x.id, x.objnum);
+ }
+ return -1;
}
int
--- a/pdf.c
+++ b/pdf.c
@@ -5,6 +5,7 @@
#include "pdf.h"
int Tfmt(Fmt *f);
+int ⊗fmt(Fmt *f);
/*
* pre-1.5 xref section reader
@@ -41,6 +42,8 @@
}
xref.id = xref0 + i;
xref.off = strtoul(e, nil, 10);
+ /* FIXME xref.gen */
+ xref.type = Xusual;
/* search in already existing xrefs, update if found */
for(j = 0; j < pdf->nxref; j++){
@@ -97,6 +100,104 @@
return -1;
}
+static int
+getint(uchar *b, int sz, int dflt)
+{
+ int x, i;
+
+ if(sz == 0)
+ return dflt;
+ x = 0;
+ for(i = 0; i < sz; i++)
+ x = x<<8 | b[i];
+
+ return x;
+}
+
+/* 7.5.8.3 */
+static int
+xrefstreamread(Pdf *pdf)
+{
+ Object *o;
+ Stream *s;
+ Xref *x;
+ uchar buf[32];
+ int w[8], nw, i, c, n, nxref, newnxref, prev, extra;
+
+ s = nil;
+ if((o = pdfobj(pdf, pdf->bio)) == nil){
+ werrstr("xref stream obj: %r");
+ goto err;
+ }
+ if((prev = dictint(o, "Prev")) > 0){
+ if(Bseek(pdf->bio, prev, 0) != prev){
+ werrstr("xref stream prev seek failed");
+ goto err;
+ }
+ if(xrefstreamread(pdf) != 0){
+ pdfobjfree(o);
+ return -1;
+ }
+ }
+ if((s = streamopen(o)) == nil){
+ werrstr("failed to stream xref: %r");
+ goto err;
+ }
+ if((nw = dictints(o, "W", w, nelem(w))) < 3 || nw >= nelem(w)){
+ werrstr("nW=%d", nw);
+ goto err;
+ }
+
+ for(n = i = 0; i < nw; i++)
+ n += w[i]; /* size of each element. w[i] MAY be 0 */
+ if(n > sizeof(buf)){
+ werrstr("W is beyond imaginable: %d bytes", n);
+ goto err;
+ }
+ if((nxref = streamsize(s)/n) < 1){
+ werrstr("no xref elements in the stream");
+ goto err;
+ }
+ extra = streamsize(s) % (nxref*n);
+ if(extra != 0)
+ fprint(2, "extra %d bytes in xref stream", extra);
+
+ newnxref = pdf->nxref + nxref;
+ if((x = realloc(pdf->xref, newnxref*sizeof(Xref))) == nil)
+ goto err;
+ pdf->xref = x;
+ x += pdf->nxref;
+ while(Bread(s->bio, buf, n) == n){ /* stop on short read or error */
+ c = getint(buf, w[0], 1); /* default type is 1 */
+ if(c == 1){ /* not compressed */
+ x->off = getint(buf+w[0], w[1], 0);
+ x->gen = getint(buf+w[0]+w[1], w[2], 0);
+ x->type = Xuncompressed;
+ pdf->nxref++;
+ fprint(2, "xref %⊗\n", *x);
+ x++;
+ }else if(c == 2){ /* compressed */
+ x->objnum = getint(buf+w[0], w[1], 0);
+ x->id = getint(buf+w[0]+w[1], w[2], 0);
+ x->type = Xcompressed;
+ pdf->nxref++;
+ fprint(2, "xref %⊗\n", *x);
+ x++;
+ }
+ }
+
+ streamclose(s);
+ pdf->root = pdfref(dictget(o, "Root"));
+ pdf->info = pdfref(dictget(o, "Info"));
+ pdfobjfree(o);
+
+ return 0;
+err:
+ streamclose(s);
+ pdfobjfree(o);
+ return -1;
+}
+
Pdf *
pdfopen(Biobuf *b)
{
@@ -106,10 +207,10 @@
int xref0; /* 7.5.4 xref subsection first object number */
int nxref; /* 7.5.4 xref subsection number of objects */
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
- int i, n, off, w[3];
- Stream *stream;
+ int i, n, off;
fmtinstall('T', Tfmt);
+ fmtinstall(L'⊗', ⊗fmt);
o = nil;
if((pdf = calloc(1, sizeof(*pdf))) == nil)
@@ -181,22 +282,14 @@
goto err;
}
}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
- Bseek(b, xreftb, 0);
- if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(o)) == nil){
- werrstr("failed to stream xref: %r");
+ if(Bseek(b, xreftb, 0) != xreftb)
goto badxref;
- }
- if(dictints(o, "W", w, nelem(w)) != 3){
- werrstr("W isn't 3 elements");
- goto badxref;
- }
- streamclose(stream);
- pdf->root = dictget(o, "Root");
- pdf->info = dictget(o, "Info");
+ if(xrefstreamread(pdf) != 0)
+ goto err;
}
/* root is required, info is optional */
- if(pdf->root == nil){
+ if(pdf->root == &null){
werrstr("no root");
goto err;
}
--- a/pdf.h
+++ b/pdf.h
@@ -8,6 +8,10 @@
Ostream, /* 7.3.8 */
Onull, /* 7.3.9 */
Oindir, /* 7.3.10 */
+
+ Xusual = 0,
+ Xuncompressed,
+ Xcompressed,
};
typedef struct Buffer Buffer;
@@ -19,7 +23,8 @@
typedef struct Xref Xref;
#pragma incomplete Filter
-#pragma varargck type "T" Object *
+#pragma varargck type "T" Object*
+#pragma varargck type "⊗" Xref
struct Buffer {
uchar *b;
@@ -30,6 +35,14 @@
int eof;
};
+struct Filter {
+ char *name;
+ int (*readall)(void *aux, Buffer *bi, Buffer *bo);
+ int (*open)(Filter *f, Object *o);
+ void (*close)(Filter *f);
+ void *aux;
+};
+
struct Object {
int type;
int ref;
@@ -83,8 +96,12 @@
struct Xref {
u32int id;
- u32int off;
+ union{
+ u32int off;
+ u32int objnum;
+ };
u16int gen;
+ u16int type;
};
struct Stream {
@@ -140,6 +157,7 @@
int dictints(Object *o, char *name, int *el, int nel);
Stream *streamopen(Object *o);
+int streamsize(Stream *s);
void streamclose(Stream *s);
Filter *filteropen(char *name, Object *o);
@@ -149,6 +167,7 @@
void bufinit(Buffer *b, uchar *d, int sz);
void buffree(Buffer *b);
int bufeof(Buffer *b);
+int bufleft(Buffer *b);
uchar *bufdata(Buffer *b, int *sz);
int bufreadn(Buffer *b, Biobuf *bio, int sz);
int bufput(Buffer *b, uchar *d, int sz);
--- a/stream.c
+++ b/stream.c
@@ -57,6 +57,8 @@
buffree(&x);
goto err;
}
+ if(!bufeof(&b))
+ fprint(2, "buffer has %d bytes left\n", bufleft(&b));
buffree(&b);
b = x;
}
@@ -79,9 +81,18 @@
return nil;
}
+int
+streamsize(Stream *s)
+{
+ return bufleft(&s->buf);
+}
+
void
streamclose(Stream *s)
{
+ if(s == nil)
+ return;
+
buffree(&s->buf);
Bterm(s->bio);
free(s);