shithub: pdffs

Download patch

ref: 1d93500ddcda77cd265d492c6c9094c0f5c7488f
parent: a9516693e7142a658f4e3ea190272f4cd73b24be
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Sat Aug 29 18:46:35 EDT 2020

better api (less Pdf *pdf); eval more often; use null

--- a/array.c
+++ b/array.c
@@ -13,6 +13,7 @@
 	int c, noel;
 
 	o = calloc(1, sizeof(*o));
+	o->pdf = pdf;
 	o->type = Oarray;
 	Bgetc(b); /* throw away '[' */
 
--- a/dict.c
+++ b/dict.c
@@ -18,6 +18,7 @@
 	k = v = nil;
 	o = calloc(1, sizeof(*o));
 	o->type = Odict;
+	o->pdf = pdf;
 	for(nokey = 0;;){
 		if((c = Bgetc(b)) < 0)
 			goto err;
@@ -37,12 +38,12 @@
 			nokey = 1;
 			continue;
 		}
-		if((v = pdfobj(pdf, b)) == nil)
-			goto err;
 		if(k->type != Oname){
 			werrstr("expected name as a key");
 			goto err;
 		}
+		if((v = pdfobj(pdf, b)) == nil)
+			goto err;
 
 		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
 			goto err;
@@ -65,13 +66,58 @@
 }
 
 Object *
-pdfdictget(Object *o, char *name)
+dictget(Object *o, char *name)
 {
 	int i;
 
-	if(o == nil || (o->type != Ostream && o->type != Odict) || name == nil)
-		return nil;
+	o = pdfeval(o);
+	if((o->type != Ostream && o->type != Odict) || name == nil)
+		return &null;
 	for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
 
-	return i < o->dict.nkv ? o->dict.kv[i].value : nil;
+	return pdfeval(i < o->dict.nkv ? o->dict.kv[i].value : &null);
+}
+
+vlong
+dictint(Object *o, char *name)
+{
+	o = dictget(o, name);
+	return o->type == Onum ? o->num : 0;
+}
+
+char *
+dictstring(Object *o, char *name)
+{
+	o = dictget(o, name);
+	return o->type == Ostr ? o->str : "";
+}
+
+Object *
+dictdict(Object *o, char *name)
+{
+	o = dictget(o, name);
+	return o->type == Odict ? o : &null;
+}
+
+int
+dictints(Object *o, char *name, int *el, int nel)
+{
+	int n;
+	Object *v;
+
+	o = dictget(o, name);
+	if(o->type != Oarray){
+		werrstr("not an array");
+		return -1;
+	}
+
+	for(n = 0; n < o->array.ne && n < nel; n++){
+		if((v = o->array.e[n])->type != Onum){
+			werrstr("not an integer array");
+			return -1;
+		}
+		el[n] = v->num;
+	}
+
+	return n;
 }
--- a/eval.c
+++ b/eval.c
@@ -3,32 +3,33 @@
 #include <bio.h>
 #include "pdf.h"
 
-int
-pdfeval(Pdf *pdf, Object *o)
+Object *
+pdfeval(Object *o)
 {
 	Object *d;
 	Xref *x;
 	int i;
 
-	if(o == nil || o->type != Oindir)
-		return 0;
+	if(o == nil)
+		return &null;
+	if(o->type != Oindir)
+		return o;
 
-	for(i = 0; i < pdf->nxref && pdf->xref[i].id != o->indir.id; i++);
-	if(i >= pdf->nxref){
+	for(i = 0; i < o->pdf->nxref && o->pdf->xref[i].id != o->indir.id; i++);
+	if(i >= o->pdf->nxref){
 		werrstr("no object id %d in xref", o->indir.id);
-		return -1;
+		return &null;
 	}
-	x = &pdf->xref[i];
+	x = &o->pdf->xref[i];
 
-	if(Bseek(pdf->bio, x->off, 0) != x->off){
+	if(Bseek(o->pdf->bio, x->off, 0) != x->off){
 		werrstr("xref seek failed");
-		return -1;
+		return &null;
 	}
-	if((d = pdfobj(pdf, pdf->bio)) == nil)
-		return -1;
-	*o = *d;
-	memset(d, 0, sizeof(*d));
-	pdfobjfree(d);
+	if((d = pdfobj(o->pdf, o->pdf->bio)) == nil)
+		return &null;
 
-	return 0;
+	pdfobjfree(o);
+
+	return d;
 }
--- a/main.c
+++ b/main.c
@@ -47,9 +47,9 @@
 		sysfatal("%r");
 	if((pdf = pdfopen(fd)) == nil)
 		sysfatal("%r");
-	if((v = pdfdictget(pdf->info, "Creator")) != nil)
+	if((v = dictget(pdf->info, "Creator")) != nil)
 		fprint(2, "creator: %s\n", v->str);
-	if((v = pdfdictget(pdf->info, "Producer")) != nil)
+	if((v = dictget(pdf->info, "Producer")) != nil)
 		fprint(2, "producer: %s\n", v->str);
 	pdfclose(pdf);
 
--- a/object.c
+++ b/object.c
@@ -9,7 +9,7 @@
 Object *pdfarray(Pdf *pdf, Biobuf *b);
 Object *pdfdict(Pdf *pdf, Biobuf *b);
 
-static Object null = {
+Object null = {
 	.type = Onull,
 };
 
@@ -42,7 +42,7 @@
 Object *
 pdfobj(Pdf *pdf, void *b)
 {
-	Object *o, *o2, *m;
+	Object *o, *o2;
 	vlong off;
 	int c, tf;
 	Xref xref;
@@ -63,10 +63,6 @@
 				off = Boffset(b);
 				if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
 					/* there IS a stream */
-					if((m = pdfdictget(o, "Length")) == nil || pdfeval(pdf, m) != 0 || m->type != Onum){
-						werrstr("stream has no valid /Length");
-						goto err;
-					}
 					if(c == '\r' && (c = Bgetc(b)) < 0)
 						goto err;
 					if(c != '\n'){
@@ -73,9 +69,9 @@
 						werrstr("stream has no newline after dict");
 						goto err;
 					}
-					o->type = Ostream;
-					o->stream.len = m->num;
 					o->stream.off = Boffset(b);
+					o->type = Ostream;
+					o->stream.len = dictint(o, "Length");
 					return o;
 				}
 				Bseek(b, off, 0);
@@ -87,15 +83,21 @@
 
 	case '(':
 		Bungetc(b);
-		return pdfstring(b);
+		if((o = pdfstring(b)) != nil)
+			o->pdf = pdf;
+		return o;
 
 	case '/':
 		Bungetc(b);
-		return pdfname(b);
+		if((o = pdfname(b)) != nil)
+			o->pdf = pdf;
+		return o;
 
 	case '[':
 		Bungetc(b);
-		return pdfarray(pdf, b);
+		if((o = pdfarray(pdf, b)) != nil)
+			o->pdf = pdf;
+		return o;
 
 	case 'n':
 		off = Boffset(b);
@@ -126,15 +128,14 @@
 		goto unexpected;
 bool:
 		Bungetc(b);
-		if((o = malloc(sizeof(*o))) == nil)
+		if((o = calloc(1, sizeof(*o))) == nil)
 			goto err;
 		o->type = Obool;
+		o->pdf = pdf;
 		o->bool = tf;
 		return o;
 
 	default:
-		if((o = malloc(sizeof(*o))) == nil)
-			goto err;
 		if(!isdigit(c)){
 unexpected:
 			Bungetc(b);
@@ -143,6 +144,9 @@
 		}
 		 /* it could be a number or an indirect object */
 		Bungetc(b);
+		if((o = calloc(1, sizeof(*o))) == nil)
+			goto err;
+		o->pdf = pdf;
 		Bgetd(b, &o->num); /* get the first number */
 		off = Boffset(b); /* seek here if not an indirect object later */
 
--- a/pdf.c
+++ b/pdf.c
@@ -85,8 +85,8 @@
 		goto err;
 	}
 
-	pdf->root = pdfdictget(o, "Root");
-	pdf->info = pdfdictget(o, "Info");
+	pdf->root = dictget(o, "Root");
+	pdf->info = dictget(o, "Info");
 	pdfobjfree(o);
 	o = nil;
 
@@ -112,7 +112,7 @@
 	int xref0; /* 7.5.4 xref subsection first object number */
 	int nxref; /* 7.5.4 xref subsection number of objects */
 	int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
-	int i, n, off;
+	int i, n, off, w[3];
 	Stream *stream;
 
 	fmtinstall('T', Tfmt);
@@ -191,14 +191,18 @@
 		}
 	}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
 		Bseek(b, xreftb, 0);
-		if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(pdf, o)) == nil)
+		if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(o)) == nil){
+			werrstr("failed to stream xref: %r");
 			goto badxref;
+		}
+		if(dictints(o, "W", w, nelem(w)) != 3){
+			werrstr("W isn't 3 elements");
+			goto badxref;
+		}
 		streamclose(stream);
-		pdf->root = pdfdictget(o, "Root");
-		pdf->info = pdfdictget(o, "Info");
+		pdf->root = dictget(o, "Root");
+		pdf->info = dictget(o, "Info");
 	}
-	if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
-		goto err;
 	fprint(2, "root %T\n", pdf->root);
 	fprint(2, "info %T\n", pdf->info);
 
--- a/pdf.h
+++ b/pdf.h
@@ -30,6 +30,7 @@
 
 struct Object {
 	int type;
+	Pdf *pdf;
 	union {
 		int bool;
 		double num;
@@ -88,6 +89,8 @@
 	void *bio;
 };
 
+extern Object null;
+
 Pdf *pdfopen(int fd);
 void pdfclose(Pdf *pdf);
 
@@ -95,17 +98,22 @@
 void pdfobjfree(Object *o);
 
 /*
- * If the object is indirect, resolve it. Operation is not recursive, ie
- * values of a dictionary won't be resolved automatically.
+ * Return a resolved object or &null if can't. Operation is
+ * not recursive, ie values of a dictionary won't be resolved
+ * automatically.
  */
-int pdfeval(Pdf *pdf, Object *o);
+Object *pdfeval(Object *o);
 
 int isws(int c);
 int isdelim(int c);
 
-Object *pdfdictget(Object *o, char *name);
+Object *dictget(Object *o, char *name);
+vlong dictint(Object *o, char *name);
+char *dictstring(Object *o, char *name);
+Object *dictdict(Object *o, char *name);
+int dictints(Object *o, char *name, int *el, int nel);
 
-Stream *streamopen(Pdf *pdf, Object *o);
+Stream *streamopen(Object *o);
 void streamclose(Stream *s);
 
 Filter *filteropen(char *name, Object *o);
--- a/stream.c
+++ b/stream.c
@@ -14,7 +14,7 @@
 }
 
 Stream *
-streamopen(Pdf *pdf, Object *o)
+streamopen(Object *o)
 {
 	Stream *s;
 	Buffer b, x;
@@ -23,21 +23,19 @@
 	int i, nflts;
 
 	s = nil;
-	if(pdfeval(pdf, o) != 0 || o == nil || o->type != Ostream) /* FIXME open a string object as a stream as well? */
+	if(pdfeval(o)->type != Ostream) /* FIXME open a string object as a stream as well? */
 		return nil;
 
 	bufinit(&b, nil, 0);
-	if(Bseek(pdf->bio, o->stream.off, 0) != o->stream.off)
+	if(Bseek(o->pdf->bio, o->stream.off, 0) != o->stream.off)
 		return nil;
-	if(bufreadn(&b, pdf->bio, o->stream.len) < 0)
+	if(bufreadn(&b, o->pdf->bio, o->stream.len) < 0)
 		goto err;
 	bufdump(&b);
 
 	/* see if there are any filters */
-	if((of = pdfdictget(o, "Filter")) != nil){
-		if(pdfeval(pdf, of) != 0)
-			goto err;
-		if(of->type == Oname){ /* one filter */
+	if((of = dictget(o, "Filter")) != nil){
+		if(pdfeval(of)->type == Oname){ /* one filter */
 			flts = &of;
 			nflts = 1;
 		}else if(of->type == Oarray){ /* array of filters */
@@ -71,7 +69,7 @@
 	}
 	s->bio = (uchar*)(s+1);
 	s->buf = b;
-	Binit(s->bio, Bfildes(pdf->bio), OREAD);
+	Binit(s->bio, Bfildes(o->pdf->bio), OREAD);
 	Biofn(s->bio, bufiof);
 
 	bufdump(&s->buf);