ref: 1d93500ddcda77cd265d492c6c9094c0f5c7488f
parent: a9516693e7142a658f4e3ea190272f4cd73b24be
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Sat Aug 29 18:46:35 EDT 2020
better api (less Pdf *pdf); eval more often; use null
--- a/array.c
+++ b/array.c
@@ -13,6 +13,7 @@
int c, noel;
o = calloc(1, sizeof(*o));
+ o->pdf = pdf;
o->type = Oarray;
Bgetc(b); /* throw away '[' */
--- a/dict.c
+++ b/dict.c
@@ -18,6 +18,7 @@
k = v = nil;
o = calloc(1, sizeof(*o));
o->type = Odict;
+ o->pdf = pdf;
for(nokey = 0;;){
if((c = Bgetc(b)) < 0)
goto err;
@@ -37,12 +38,12 @@
nokey = 1;
continue;
}
- if((v = pdfobj(pdf, b)) == nil)
- goto err;
if(k->type != Oname){
werrstr("expected name as a key");
goto err;
}
+ if((v = pdfobj(pdf, b)) == nil)
+ goto err;
if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
goto err;
@@ -65,13 +66,58 @@
}
Object *
-pdfdictget(Object *o, char *name)
+dictget(Object *o, char *name)
{
int i;
- if(o == nil || (o->type != Ostream && o->type != Odict) || name == nil)
- return nil;
+ o = pdfeval(o);
+ if((o->type != Ostream && o->type != Odict) || name == nil)
+ return &null;
for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
- return i < o->dict.nkv ? o->dict.kv[i].value : nil;
+ return pdfeval(i < o->dict.nkv ? o->dict.kv[i].value : &null);
+}
+
+vlong
+dictint(Object *o, char *name)
+{
+ o = dictget(o, name);
+ return o->type == Onum ? o->num : 0;
+}
+
+char *
+dictstring(Object *o, char *name)
+{
+ o = dictget(o, name);
+ return o->type == Ostr ? o->str : "";
+}
+
+Object *
+dictdict(Object *o, char *name)
+{
+ o = dictget(o, name);
+ return o->type == Odict ? o : &null;
+}
+
+int
+dictints(Object *o, char *name, int *el, int nel)
+{
+ int n;
+ Object *v;
+
+ o = dictget(o, name);
+ if(o->type != Oarray){
+ werrstr("not an array");
+ return -1;
+ }
+
+ for(n = 0; n < o->array.ne && n < nel; n++){
+ if((v = o->array.e[n])->type != Onum){
+ werrstr("not an integer array");
+ return -1;
+ }
+ el[n] = v->num;
+ }
+
+ return n;
}
--- a/eval.c
+++ b/eval.c
@@ -3,32 +3,33 @@
#include <bio.h>
#include "pdf.h"
-int
-pdfeval(Pdf *pdf, Object *o)
+Object *
+pdfeval(Object *o)
{
Object *d;
Xref *x;
int i;
- if(o == nil || o->type != Oindir)
- return 0;
+ if(o == nil)
+ return &null;
+ if(o->type != Oindir)
+ return o;
- for(i = 0; i < pdf->nxref && pdf->xref[i].id != o->indir.id; i++);
- if(i >= pdf->nxref){
+ for(i = 0; i < o->pdf->nxref && o->pdf->xref[i].id != o->indir.id; i++);
+ if(i >= o->pdf->nxref){
werrstr("no object id %d in xref", o->indir.id);
- return -1;
+ return &null;
}
- x = &pdf->xref[i];
+ x = &o->pdf->xref[i];
- if(Bseek(pdf->bio, x->off, 0) != x->off){
+ if(Bseek(o->pdf->bio, x->off, 0) != x->off){
werrstr("xref seek failed");
- return -1;
+ return &null;
}
- if((d = pdfobj(pdf, pdf->bio)) == nil)
- return -1;
- *o = *d;
- memset(d, 0, sizeof(*d));
- pdfobjfree(d);
+ if((d = pdfobj(o->pdf, o->pdf->bio)) == nil)
+ return &null;
- return 0;
+ pdfobjfree(o);
+
+ return d;
}
--- a/main.c
+++ b/main.c
@@ -47,9 +47,9 @@
sysfatal("%r");
if((pdf = pdfopen(fd)) == nil)
sysfatal("%r");
- if((v = pdfdictget(pdf->info, "Creator")) != nil)
+ if((v = dictget(pdf->info, "Creator")) != nil)
fprint(2, "creator: %s\n", v->str);
- if((v = pdfdictget(pdf->info, "Producer")) != nil)
+ if((v = dictget(pdf->info, "Producer")) != nil)
fprint(2, "producer: %s\n", v->str);
pdfclose(pdf);
--- a/object.c
+++ b/object.c
@@ -9,7 +9,7 @@
Object *pdfarray(Pdf *pdf, Biobuf *b);
Object *pdfdict(Pdf *pdf, Biobuf *b);
-static Object null = {
+Object null = {
.type = Onull,
};
@@ -42,7 +42,7 @@
Object *
pdfobj(Pdf *pdf, void *b)
{
- Object *o, *o2, *m;
+ Object *o, *o2;
vlong off;
int c, tf;
Xref xref;
@@ -63,10 +63,6 @@
off = Boffset(b);
if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
/* there IS a stream */
- if((m = pdfdictget(o, "Length")) == nil || pdfeval(pdf, m) != 0 || m->type != Onum){
- werrstr("stream has no valid /Length");
- goto err;
- }
if(c == '\r' && (c = Bgetc(b)) < 0)
goto err;
if(c != '\n'){
@@ -73,9 +69,9 @@
werrstr("stream has no newline after dict");
goto err;
}
- o->type = Ostream;
- o->stream.len = m->num;
o->stream.off = Boffset(b);
+ o->type = Ostream;
+ o->stream.len = dictint(o, "Length");
return o;
}
Bseek(b, off, 0);
@@ -87,15 +83,21 @@
case '(':
Bungetc(b);
- return pdfstring(b);
+ if((o = pdfstring(b)) != nil)
+ o->pdf = pdf;
+ return o;
case '/':
Bungetc(b);
- return pdfname(b);
+ if((o = pdfname(b)) != nil)
+ o->pdf = pdf;
+ return o;
case '[':
Bungetc(b);
- return pdfarray(pdf, b);
+ if((o = pdfarray(pdf, b)) != nil)
+ o->pdf = pdf;
+ return o;
case 'n':
off = Boffset(b);
@@ -126,15 +128,14 @@
goto unexpected;
bool:
Bungetc(b);
- if((o = malloc(sizeof(*o))) == nil)
+ if((o = calloc(1, sizeof(*o))) == nil)
goto err;
o->type = Obool;
+ o->pdf = pdf;
o->bool = tf;
return o;
default:
- if((o = malloc(sizeof(*o))) == nil)
- goto err;
if(!isdigit(c)){
unexpected:
Bungetc(b);
@@ -143,6 +144,9 @@
}
/* it could be a number or an indirect object */
Bungetc(b);
+ if((o = calloc(1, sizeof(*o))) == nil)
+ goto err;
+ o->pdf = pdf;
Bgetd(b, &o->num); /* get the first number */
off = Boffset(b); /* seek here if not an indirect object later */
--- a/pdf.c
+++ b/pdf.c
@@ -85,8 +85,8 @@
goto err;
}
- pdf->root = pdfdictget(o, "Root");
- pdf->info = pdfdictget(o, "Info");
+ pdf->root = dictget(o, "Root");
+ pdf->info = dictget(o, "Info");
pdfobjfree(o);
o = nil;
@@ -112,7 +112,7 @@
int xref0; /* 7.5.4 xref subsection first object number */
int nxref; /* 7.5.4 xref subsection number of objects */
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
- int i, n, off;
+ int i, n, off, w[3];
Stream *stream;
fmtinstall('T', Tfmt);
@@ -191,14 +191,18 @@
}
}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
Bseek(b, xreftb, 0);
- if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(pdf, o)) == nil)
+ if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(o)) == nil){
+ werrstr("failed to stream xref: %r");
goto badxref;
+ }
+ if(dictints(o, "W", w, nelem(w)) != 3){
+ werrstr("W isn't 3 elements");
+ goto badxref;
+ }
streamclose(stream);
- pdf->root = pdfdictget(o, "Root");
- pdf->info = pdfdictget(o, "Info");
+ pdf->root = dictget(o, "Root");
+ pdf->info = dictget(o, "Info");
}
- if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
- goto err;
fprint(2, "root %T\n", pdf->root);
fprint(2, "info %T\n", pdf->info);
--- a/pdf.h
+++ b/pdf.h
@@ -30,6 +30,7 @@
struct Object {
int type;
+ Pdf *pdf;
union {
int bool;
double num;
@@ -88,6 +89,8 @@
void *bio;
};
+extern Object null;
+
Pdf *pdfopen(int fd);
void pdfclose(Pdf *pdf);
@@ -95,17 +98,22 @@
void pdfobjfree(Object *o);
/*
- * If the object is indirect, resolve it. Operation is not recursive, ie
- * values of a dictionary won't be resolved automatically.
+ * Return a resolved object or &null if can't. Operation is
+ * not recursive, ie values of a dictionary won't be resolved
+ * automatically.
*/
-int pdfeval(Pdf *pdf, Object *o);
+Object *pdfeval(Object *o);
int isws(int c);
int isdelim(int c);
-Object *pdfdictget(Object *o, char *name);
+Object *dictget(Object *o, char *name);
+vlong dictint(Object *o, char *name);
+char *dictstring(Object *o, char *name);
+Object *dictdict(Object *o, char *name);
+int dictints(Object *o, char *name, int *el, int nel);
-Stream *streamopen(Pdf *pdf, Object *o);
+Stream *streamopen(Object *o);
void streamclose(Stream *s);
Filter *filteropen(char *name, Object *o);
--- a/stream.c
+++ b/stream.c
@@ -14,7 +14,7 @@
}
Stream *
-streamopen(Pdf *pdf, Object *o)
+streamopen(Object *o)
{
Stream *s;
Buffer b, x;
@@ -23,21 +23,19 @@
int i, nflts;
s = nil;
- if(pdfeval(pdf, o) != 0 || o == nil || o->type != Ostream) /* FIXME open a string object as a stream as well? */
+ if(pdfeval(o)->type != Ostream) /* FIXME open a string object as a stream as well? */
return nil;
bufinit(&b, nil, 0);
- if(Bseek(pdf->bio, o->stream.off, 0) != o->stream.off)
+ if(Bseek(o->pdf->bio, o->stream.off, 0) != o->stream.off)
return nil;
- if(bufreadn(&b, pdf->bio, o->stream.len) < 0)
+ if(bufreadn(&b, o->pdf->bio, o->stream.len) < 0)
goto err;
bufdump(&b);
/* see if there are any filters */
- if((of = pdfdictget(o, "Filter")) != nil){
- if(pdfeval(pdf, of) != 0)
- goto err;
- if(of->type == Oname){ /* one filter */
+ if((of = dictget(o, "Filter")) != nil){
+ if(pdfeval(of)->type == Oname){ /* one filter */
flts = &of;
nflts = 1;
}else if(of->type == Oarray){ /* array of filters */
@@ -71,7 +69,7 @@
}
s->bio = (uchar*)(s+1);
s->buf = b;
- Binit(s->bio, Bfildes(pdf->bio), OREAD);
+ Binit(s->bio, Bfildes(o->pdf->bio), OREAD);
Biofn(s->bio, bufiof);
bufdump(&s->buf);