shithub: pdffs

Download patch

ref: 51cd3bfceeb001872d4ff298180875c4229a3d68
parent: a080ae88c6c65503575da220cb131ddada107bf6
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Sat Aug 29 00:44:18 EDT 2020

attach dicts to streams if there is one

--- a/array.c
+++ b/array.c
@@ -6,7 +6,7 @@
 /* 7.3.6 Array Objects */
 
 Object *
-pdfarray(Biobuf *b)
+pdfarray(Pdf *pdf, Biobuf *b)
 {
 	Object *o, *m;
 	Object **a;
@@ -25,7 +25,7 @@
 		}
 
 		Bungetc(b);
-		if((m = pdfobject(b)) == nil){
+		if((m = pdfobject(pdf, b)) == nil){
 			noel = 1;
 			continue;
 		}
--- a/dict.c
+++ b/dict.c
@@ -6,7 +6,7 @@
 /* 7.3.7 Dictionary Objects */
 
 Object *
-pdfdict(Biobuf *b)
+pdfdict(Pdf *pdf, Biobuf *b)
 {
 	Object *o, *k, *v;
 	KeyValue *kv;
@@ -33,11 +33,11 @@
 		}
 
 		Bungetc(b);
-		if((k = pdfobject(b)) == nil){
+		if((k = pdfobject(pdf, b)) == nil){
 			nokey = 1;
 			continue;
 		}
-		if((v = pdfobject(b)) == nil)
+		if((v = pdfobject(pdf, b)) == nil)
 			goto err;
 		if(k->type != Oname){
 			werrstr("expected name as a key");
--- a/eval.c
+++ b/eval.c
@@ -24,7 +24,7 @@
 		werrstr("xref seek failed");
 		return -1;
 	}
-	if((d = pdfobject(pdf->bio)) == nil)
+	if((d = pdfobject(pdf, pdf->bio)) == nil)
 		return -1;
 	*o = *d;
 	memset(d, 0, sizeof(*d));
--- a/main.c
+++ b/main.c
@@ -2,6 +2,7 @@
 #include <libc.h>
 #include <thread.h>
 #include <bio.h>
+#include <flate.h>
 #include "pdf.h"
 
 int mainstacksize = 32768;
@@ -21,6 +22,7 @@
 	Object *v;
 
 	quotefmtinstall();
+	deflateinit();
 
 	ARGBEGIN{
 	default:
--- a/object.c
+++ b/object.c
@@ -6,8 +6,8 @@
 
 Object *pdfstring(Biobuf *b);
 Object *pdfname(Biobuf *b);
-Object *pdfarray(Biobuf *b);
-Object *pdfdict(Biobuf *b);
+Object *pdfarray(Pdf *pdf, Biobuf *b);
+Object *pdfdict(Pdf *pdf, Biobuf *b);
 
 static Object null = {
 	.type = Onull,
@@ -15,11 +15,13 @@
 
 /* General function to parse an object of any type. */
 Object *
-pdfobject(void *b)
+pdfobject(Pdf *pdf, void *b)
 {
-	Object *o, *o2;
+	Object *o, *o2, *m;
 	vlong off;
 	int c, tf;
+	Xref xref;
+	char s[16];
 
 	o = o2 = nil;
 	do; while(isws(c = Bgetc(b)));
@@ -31,7 +33,29 @@
 		c = Bgetc(b);
 		if(c == '<'){
 			Bseek(b, -2, 1);
-			return pdfdict(b);
+			if((o = pdfdict(pdf, b)) != nil){
+				/* check for attached stream */
+				off = Boffset(b);
+				if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
+					/* there IS a stream */
+					if((m = pdfdictget(o, "Length")) == nil || pdfeval(pdf, m) != 0 || m->type != Onum){
+						werrstr("stream has no valid /Length");
+						goto err;
+					}
+					if(c == '\r' && (c = Bgetc(b)) < 0)
+						goto err;
+					if(c != '\n'){
+						werrstr("stream has no newline after dict");
+						goto err;
+					}
+					o->type = Ostream;
+					o->stream.length = m->num;
+					o->stream.offset = Boffset(b);
+					return o;
+				}
+				Bseek(b, off, 0);
+				return o;
+			}
 		}
 		Bungetc(b);
 		/* fall through */
@@ -46,7 +70,7 @@
 
 	case '[':
 		Bungetc(b);
-		return pdfarray(b);
+		return pdfarray(pdf, b);
 
 	case 'n':
 		off = Boffset(b);
@@ -97,7 +121,7 @@
 		Bgetd(b, &o->num); /* get the first number */
 		off = Boffset(b); /* seek here if not an indirect object later */
 
-		if((o2 = pdfobject(b)) != nil && o2->type == Onum){ /* second object is number too */
+		if((o2 = pdfobject(pdf, b)) != nil && o2->type == Onum){ /* second object is number too */
 			do; while(isws(c = Bgetc(b)));
 			if(c < 0)
 				goto err;
@@ -109,9 +133,14 @@
 				return o;
 			}
 			if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
-				freeobject(o2);
+				xref.id = o->num;
+				xref.gen = o2->num;
 				/* FIXME put into a map */
-				return pdfobject(b);
+				freeobject(o);
+				freeobject(o2);
+				if((o = pdfobject(pdf, b)) != nil)
+					return o;
+				o2 = nil;
 			}
 		}
 
@@ -157,6 +186,13 @@
 
 	case Odict:
 	case Ostream:
+		for(i = 0; i < o->dict.nkv; i++){
+			free(o->dict.kv[i].key);
+			freeobject(o->dict.kv[i].value);
+		}
+		free(o->dict.kv);
+		break;
+
 	case Oindir:
 		break;
 	}
--- a/pdf.c
+++ b/pdf.c
@@ -79,7 +79,7 @@
 	Object *o;
 	KeyValue *kv;
 
-	if((o = pdfobject(pdf->bio)) == nil)
+	if((o = pdfobject(pdf, pdf->bio)) == nil)
 		goto err;
 
 	if(o->type != Odict){
@@ -115,6 +115,7 @@
 {
 	Pdf *pdf;
 	Biobuf *b;
+	Object *o;
 	char tmp[64], *s, *x;
 	int xref0; /* 7.5.4 xref subsection first object number */
 	int nxref; /* 7.5.4 xref subsection number of objects */
@@ -122,6 +123,7 @@
 	int i, n, off;
 
 	b = nil;
+	o = nil;
 	if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
 		goto err;
 	pdf->bio = b;
@@ -192,9 +194,11 @@
 			werrstr("invalid trailer: %r");
 			goto err;
 		}
-	}else{ /* could be 7.5.8 xref stream (since PDF 1.5) */
-		werrstr("FIXME xref streams not implemented");
-		goto err;
+	}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
+		Bseek(b, xreftb, 0);
+		if((o = pdfobject(pdf, b)) == nil || pdfeval(pdf, o) != 0)
+			goto err;
+		
 	}
 
 	return pdf;
@@ -201,6 +205,7 @@
 err:
 	werrstr("pdfopen: %r [at %p]", (void*)Boffset(b));
 	pdfclose(pdf);
+	freeobject(o);
 	return nil;
 }
 
--- a/pdf.h
+++ b/pdf.h
@@ -40,6 +40,13 @@
 			Object **e;
 			int ne;
 		}array;
+
+		struct {
+			KeyValue *kv;
+			int nkv;
+			u32int length; /* packed */
+			u32int offset;
+		}stream;
 	};
 };
 
@@ -66,7 +73,7 @@
 Pdf *pdfopen(int fd);
 void pdfclose(Pdf *pdf);
 
-Object *pdfobject(void *b);
+Object *pdfobject(Pdf *pdf, void *b);
 void freeobject(Object *o);
 
 /*