shithub: pdffs

Download patch

ref: b4e6b3b89646f4dd5973b770b36aba511843624e
parent: ecd40a88198f1a03b0e87a4f3066f944f09cf0d8
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Mon Aug 31 07:49:51 EDT 2020

fix flate-encoded streams with PNG prediction; parse compressed xref streams

--- a/buffer.c
+++ b/buffer.c
@@ -47,7 +47,13 @@
 int
 bufeof(Buffer *b)
 {
-	return b->off == b->sz;
+	return bufleft(b) == 0;
+}
+
+int
+bufleft(Buffer *b)
+{
+	return b->sz - b->off;
 }
 
 uchar *
--- a/f_flate.c
+++ b/f_flate.c
@@ -4,7 +4,70 @@
 #include <flate.h>
 #include "pdf.h"
 
+typedef struct FlateParms FlateParms;
+
+struct FlateParms {
+	int predictor;
+	int columns;
+};
+
+static uchar 
+paeth(uchar a, uchar b, uchar c)
+{
+	int p, pa, pb, pc;
+
+	p = a + b - c;
+	pa = abs(p - a);
+	pb = abs(p - b);
+	pc = abs(p - c);
+
+	if(pa <= pb && pa <= pc)
+		return a;
+	return pb <= pc ? b : c;
+}
+
 static int
+pngunpredict(int pred, uchar *buf, uchar *up, int len)
+{
+	int i;
+
+	switch(pred){
+	case 0: /* None */
+		break;
+
+	case 1: /* Sub */
+		for(i = 1; i < len; ++i)
+			buf[i] += buf[i-1];
+		break;
+
+	case 2: /* Up */
+		for(i = 0; i < len; ++i)
+			buf[i] += up[i];
+		break;
+
+	case 3: /* Average */
+		buf[0] += up[0]/2;
+		for(i = 1; i < len; ++i)
+			buf[i] += (buf[i-1]+up[i])/2;
+		break;
+
+	case 4: /* Paeth */
+		buf[0] += paeth(0, up[0], 0);
+		for(i = 0; i < len; ++i)
+			buf[i] += paeth(buf[i-1], up[i], up[i-1]);
+		break;
+
+	/* FIXME 5 optimum??? */
+
+	default:
+		werrstr("unsupported predictor %d", pred);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
 bw(void *aux, void *d, int n)
 {
 	return bufput(aux, d, n);
@@ -21,9 +84,11 @@
 int
 fFlate(void *aux, Buffer *bi, Buffer *bo)
 {
-	int r;
+	int r, i, rows, n;
+	FlateParms *fp;
+	uchar *x, *y, *zero;
 
-	USED(aux);
+	fp = aux;
 
 	do{
 		r = inflatezlib(bo, bw, bi, bget);
@@ -34,5 +99,57 @@
 		return -1;
 	}
 
+	/* 7.4.4.4 LZW and Flate predictor functions */
+	if(fp->predictor >= 10 && fp->columns > 0){
+		n = fp->columns + 1;
+		rows = bo->sz/n;
+		x = bo->b;
+		y = bo->b;
+		zero = mallocz(fp->columns, 1);
+		for(i = r = 0; i < rows && r == 0; i++, x += n, y += n)
+			r = pngunpredict(x[0], x+1, i < 1 ? zero : y+1-n, fp->columns);
+		free(zero);
+
+		x = bo->b;
+		y = bo->b+1;
+		for(i = 0; i < rows; i++, x += fp->columns, y += n)
+			memmove(x, y, fp->columns);
+		bo->sz -= rows;
+	}
+
+	return r;
+}
+
+int
+openFlate(Filter *f, Object *o)
+{
+	Object *parms;
+	FlateParms *fp;
+	int predictor, columns;
+
+	parms = dictget(o, "DecodeParms");
+	predictor = dictint(parms, "Predictor");
+	columns = dictint(parms, "Columns");
+	if((predictor >= 2 && predictor < 10) || predictor >= 15){
+		werrstr("unsupported flate predictor %d", predictor);
+		return -1;
+	}
+	if(predictor >= 10 && predictor <= 15 && columns < 1){
+		werrstr("invalid columns %d for predictor %d", columns, predictor);
+		return -1;
+	}
+
+	if((fp = malloc(sizeof(FlateParms))) == nil)
+		return -1;
+	fp->predictor = predictor;
+	fp->columns = columns;
+	f->aux = fp;
+
 	return 0;
+}
+
+void
+closeFlate(Filter *f)
+{
+	free(f->aux);
 }
--- a/filter.c
+++ b/filter.c
@@ -5,15 +5,9 @@
 
 /* 7.4 Filters */
 
-struct Filter {
-	char *name;
-	int (*readall)(void *aux, Buffer *bi, Buffer *bo);
-	int (*open)(Filter *f, Object *o);
-	void (*close)(Filter *f);
-	void *aux;
-};
-
 int fFlate(void *aux, Buffer *bi, Buffer *bo);
+int openFlate(Filter *f, Object *o);
+void closeFlate(Filter *f);
 
 static Filter filters[] = {
 	{"ASCII85Decode", nil, nil, nil},
@@ -21,7 +15,7 @@
 	{"CCITTFaxDecode", nil, nil, nil},
 	{"CryptDecode", nil, nil, nil},
 	{"DCTDecode", nil, nil, nil},
-	{"FlateDecode", fFlate},
+	{"FlateDecode", fFlate, openFlate, closeFlate},
 	{"JBIG2Decode", nil, nil, nil},
 	{"JPXDecode", nil, nil, nil},
 	{"LZWDecode", nil, nil, nil},
--- a/misc.c
+++ b/misc.c
@@ -15,6 +15,12 @@
 	[Oindir] = "indir",
 };
 
+static char *xtypes[] = {
+	[Xusual] = "usual",
+	[Xuncompressed] = "uncompressed",
+	[Xcompressed] = "compressed",
+};
+
 Object null = {
 	.type = Onull,
 };
@@ -30,6 +36,24 @@
 	if(o->type < 0 || o->type >= nelem(otypes))
 		return fmtprint(f, "????");
 	return fmtprint(f, "%s", otypes[o->type]);
+}
+
+int
+⊗fmt(Fmt *f)
+{
+	Xref x;
+
+	x = va_arg(f->args, Xref);
+
+	switch(x.type){
+	case Xusual:
+		return fmtprint(f, "<%s id=%d gen=%d off=%d>", xtypes[x.type], x.id, x.gen, x.off);
+	case Xuncompressed:
+		return fmtprint(f, "<%s gen=%d off=%d>", xtypes[x.type], x.gen, x.off);
+	case Xcompressed:
+		return fmtprint(f, "<%s id=%d objnum=%d>", xtypes[x.type], x.id, x.objnum);
+	}
+	return -1;
 }
 
 int
--- a/pdf.c
+++ b/pdf.c
@@ -5,6 +5,7 @@
 #include "pdf.h"
 
 int Tfmt(Fmt *f);
+int ⊗fmt(Fmt *f);
 
 /*
  * pre-1.5 xref section reader
@@ -41,6 +42,8 @@
 		}
 		xref.id = xref0 + i;
 		xref.off = strtoul(e, nil, 10);
+		/* FIXME xref.gen */
+		xref.type = Xusual;
 
 		/* search in already existing xrefs, update if found */
 		for(j = 0; j < pdf->nxref; j++){
@@ -97,6 +100,104 @@
 	return -1;
 }
 
+static int
+getint(uchar *b, int sz, int dflt)
+{
+	int x, i;
+
+	if(sz == 0)
+		return dflt;
+	x = 0;
+	for(i = 0; i < sz; i++)
+		x = x<<8 | b[i];
+
+	return x;
+}
+
+/* 7.5.8.3 */
+static int
+xrefstreamread(Pdf *pdf)
+{
+	Object *o;
+	Stream *s;
+	Xref *x;
+	uchar buf[32];
+	int w[8], nw, i, c, n, nxref, newnxref, prev, extra;
+
+	s = nil;
+	if((o = pdfobj(pdf, pdf->bio)) == nil){
+		werrstr("xref stream obj: %r");
+		goto err;
+	}
+	if((prev = dictint(o, "Prev")) > 0){
+		if(Bseek(pdf->bio, prev, 0) != prev){
+			werrstr("xref stream prev seek failed");
+			goto err;
+		}
+		if(xrefstreamread(pdf) != 0){
+			pdfobjfree(o);
+			return -1;
+		}
+	}
+	if((s = streamopen(o)) == nil){
+		werrstr("failed to stream xref: %r");
+		goto err;
+	}
+	if((nw = dictints(o, "W", w, nelem(w))) < 3 || nw >= nelem(w)){
+		werrstr("nW=%d", nw);
+		goto err;
+	}
+
+	for(n = i = 0; i < nw; i++)
+		n += w[i]; /* size of each element. w[i] MAY be 0 */
+	if(n > sizeof(buf)){
+		werrstr("W is beyond imaginable: %d bytes", n);
+		goto err;
+	}
+	if((nxref = streamsize(s)/n) < 1){
+		werrstr("no xref elements in the stream");
+		goto err;
+	}
+	extra = streamsize(s) % (nxref*n);
+	if(extra != 0)
+		fprint(2, "extra %d bytes in xref stream", extra);
+
+	newnxref = pdf->nxref + nxref;
+	if((x = realloc(pdf->xref, newnxref*sizeof(Xref))) == nil)
+		goto err;
+	pdf->xref = x;
+	x += pdf->nxref;
+	while(Bread(s->bio, buf, n) == n){ /* stop on short read or error */
+		c = getint(buf, w[0], 1); /* default type is 1 */
+		if(c == 1){ /* not compressed */
+			x->off = getint(buf+w[0], w[1], 0);
+			x->gen = getint(buf+w[0]+w[1], w[2], 0);
+			x->type = Xuncompressed;
+			pdf->nxref++;
+			fprint(2, "xref %⊗\n", *x);
+			x++;
+		}else if(c == 2){ /* compressed */
+			x->objnum = getint(buf+w[0], w[1], 0);
+			x->id = getint(buf+w[0]+w[1], w[2], 0);
+			x->type = Xcompressed;
+			pdf->nxref++;
+			fprint(2, "xref %⊗\n", *x);
+			x++;
+		}
+	}
+
+	streamclose(s);
+	pdf->root = pdfref(dictget(o, "Root"));
+	pdf->info = pdfref(dictget(o, "Info"));
+	pdfobjfree(o);
+
+	return 0;
+err:
+	streamclose(s);
+	pdfobjfree(o);
+	return -1;
+}
+
 Pdf *
 pdfopen(Biobuf *b)
 {
@@ -106,10 +207,10 @@
 	int xref0; /* 7.5.4 xref subsection first object number */
 	int nxref; /* 7.5.4 xref subsection number of objects */
 	int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
-	int i, n, off, w[3];
-	Stream *stream;
+	int i, n, off;
 
 	fmtinstall('T', Tfmt);
+	fmtinstall(L'⊗', ⊗fmt);
 
 	o = nil;
 	if((pdf = calloc(1, sizeof(*pdf))) == nil)
@@ -181,22 +282,14 @@
 			goto err;
 		}
 	}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
-		Bseek(b, xreftb, 0);
-		if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(o)) == nil){
-			werrstr("failed to stream xref: %r");
+		if(Bseek(b, xreftb, 0) != xreftb)
 			goto badxref;
-		}
-		if(dictints(o, "W", w, nelem(w)) != 3){
-			werrstr("W isn't 3 elements");
-			goto badxref;
-		}
-		streamclose(stream);
-		pdf->root = dictget(o, "Root");
-		pdf->info = dictget(o, "Info");
+		if(xrefstreamread(pdf) != 0)
+			goto err;
 	}
 
 	/* root is required, info is optional */
-	if(pdf->root == nil){
+	if(pdf->root == &null){
 		werrstr("no root");
 		goto err;
 	}
--- a/pdf.h
+++ b/pdf.h
@@ -8,6 +8,10 @@
 	Ostream, /* 7.3.8 */
 	Onull,   /* 7.3.9 */
 	Oindir,  /* 7.3.10 */
+
+	Xusual = 0,
+	Xuncompressed,
+	Xcompressed,
 };
 
 typedef struct Buffer Buffer;
@@ -19,7 +23,8 @@
 typedef struct Xref Xref;
 #pragma incomplete Filter
 
-#pragma varargck type "T" Object *
+#pragma varargck type "T" Object*
+#pragma varargck type "⊗" Xref
 
 struct Buffer {
 	uchar *b;
@@ -30,6 +35,14 @@
 	int eof;
 };
 
+struct Filter {
+	char *name;
+	int (*readall)(void *aux, Buffer *bi, Buffer *bo);
+	int (*open)(Filter *f, Object *o);
+	void (*close)(Filter *f);
+	void *aux;
+};
+
 struct Object {
 	int type;
 	int ref;
@@ -83,8 +96,12 @@
 
 struct Xref {
 	u32int id;
-	u32int off;
+	union{
+		u32int off;
+		u32int objnum;
+	};
 	u16int gen;
+	u16int type;
 };
 
 struct Stream {
@@ -140,6 +157,7 @@
 int dictints(Object *o, char *name, int *el, int nel);
 
 Stream *streamopen(Object *o);
+int streamsize(Stream *s);
 void streamclose(Stream *s);
 
 Filter *filteropen(char *name, Object *o);
@@ -149,6 +167,7 @@
 void bufinit(Buffer *b, uchar *d, int sz);
 void buffree(Buffer *b);
 int bufeof(Buffer *b);
+int bufleft(Buffer *b);
 uchar *bufdata(Buffer *b, int *sz);
 int bufreadn(Buffer *b, Biobuf *bio, int sz);
 int bufput(Buffer *b, uchar *d, int sz);
--- a/stream.c
+++ b/stream.c
@@ -57,6 +57,8 @@
 				buffree(&x);
 				goto err;
 			}
+			if(!bufeof(&b))
+				fprint(2, "buffer has %d bytes left\n", bufleft(&b));
 			buffree(&b);
 			b = x;
 		}
@@ -79,9 +81,18 @@
 	return nil;
 }
 
+int
+streamsize(Stream *s)
+{
+	return bufleft(&s->buf);
+}
+
 void
 streamclose(Stream *s)
 {
+	if(s == nil)
+		return;
+
 	buffree(&s->buf);
 	Bterm(s->bio);
 	free(s);