ref: d9638664119a09f7fbab558635c2bdf16f5fd1c1
parent: f8f7ffe655e3a6439adf0614d0232e7520757566
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Thu Aug 27 20:43:29 EDT 2020
add more object types, parse file trailer
--- /dev/null
+++ b/array.c
@@ -1,0 +1,55 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "pdf.h"
+
+Object *
+pdfarray(char *p, char **e, int len)
+{
+ Object *o, *m;
+ Object **a;
+
+ o = nil;
+ if(len < 3 || p[0] != '['){
+ werrstr("invalid array");
+ goto err;
+ }
+ p++;
+ len--;
+
+ o = calloc(1, sizeof(*o));
+ o->type = Oarray;
+ for(; len >= 1;){
+ for(; len > 0 && isws(*p); p++, len--);
+ if(len < 1){
+ werrstr("too short");
+ goto err;
+ }
+
+ if(p[0] == ']'){
+ p++;
+ break;
+ }
+
+ if((m = pdfobject(p, e, len)) == nil)
+ goto err;
+ len -= *e - p;
+ p = *e;
+
+ if((a = realloc(o->array.e, (o->array.ne+1)*sizeof(Object*))) == nil){
+ freeobject(m);
+ goto err;
+ }
+
+ o->array.e = a;
+ a[o->array.ne++] = m;
+ }
+
+ *e = p;
+
+ return o;
+err:
+ freeobject(o);
+
+ return nil;
+}
--- /dev/null
+++ b/dict.c
@@ -1,0 +1,70 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "pdf.h"
+
+Object *
+pdfdict(char *p, char **e, int len)
+{
+ Object *o, *k, *v;
+ KeyValue *kv;
+
+ o = nil;
+ if(len < 4 || p[0] != '<' || p[1] != '<'){
+ werrstr("invalid dict");
+ goto err;
+ }
+ p += 2;
+ len -= 2;
+
+ o = calloc(1, sizeof(*o));
+ o->type = Odict;
+ for(; len >= 2;){
+ for(; len > 2 && isws(*p); p++, len--);
+ if(len < 1){
+ werrstr("too short");
+ goto err;
+ }
+
+ if(p[0] == '>' && p[1] == '>'){
+ p += 2;
+ break;
+ }
+
+ if((k = pdfobject(p, e, len)) == nil)
+ goto err;
+ if(k->type != Oname){
+ freeobject(k);
+ werrstr("expected name as a key");
+ goto err;
+ }
+ len -= *e - p;
+ p = *e;
+
+ if((v = pdfobject(p, e, len)) == nil){
+ freeobject(k);
+ goto err;
+ }
+ len -= *e - p;
+ p = *e;
+
+ if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil){
+ freeobject(k);
+ freeobject(v);
+ goto err;
+ }
+
+ o->dict.kv = kv;
+ kv[o->dict.nkv].key = strdup(k->name);
+ freeobject(k);
+ kv[o->dict.nkv++].value = v;
+ }
+
+ *e = p;
+
+ return o;
+err:
+ freeobject(o);
+
+ return nil;
+}
--- /dev/null
+++ b/misc.c
@@ -1,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+/* 7.2.2 whitespace */
+int
+isws(char c)
+{
+ return /* \0 is missing on purpose */
+ c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
+ c == ' ';
+}
+
+/* 7.2.2 delimeters */
+int
+isdelim(char c)
+{
+ return
+ c == '(' || c == ')' || c == '<' || c == '>' ||
+ c == '[' || c == ']' || c == '{' || c == '}' ||
+ c == '/' || c == '%';
+}
--- a/mkfile
+++ b/mkfile
@@ -4,8 +4,11 @@
TARG=pdfs
OFILES=\
+ array.$O\
+ dict.$O\
filter.$O\
main.$O\
+ misc.$O\
name.$O\
object.$O\
pdf.$O\
--- a/name.c
+++ b/name.c
@@ -19,7 +19,9 @@
for(i = 1, o = 0; i < len; i++){
if(p[i] < '!' || p[i] > '~'){
- werrstr("invalid char");
+ if(isws(p[i]) || isdelim(p[i]))
+ break;
+ werrstr("invalid char %02x", p[i]);
goto err;
}
if(p[i] == '#'){
--- a/object.c
+++ b/object.c
@@ -7,11 +7,12 @@
pdfobject(char *p, char **e, int len)
{
Object *o;
+ char *s, *p0;
int sz;
o = nil;
for(; len > 0 && isws(*p); p++, len--);
- if(len < 2){
+ if(len < 1){
werrstr("too short");
goto err;
}
@@ -53,10 +54,34 @@
}
break;
+ case '[':
+ o = pdfarray(p, e, len);
+ break;
+
default:
if(isdigit(*p)){
- o->type = Onum;
- o->num = strtod(p, e);
+ if((o = malloc(sizeof(*o)+sz+1)) != nil){
+ o->type = Onum;
+ o->num = strtod(p, e);
+ sz = len - (*e - p);
+ p0 = *e;
+ s = p0;
+ for(; sz > 0 && isws(*s); s++, sz--);
+ if(sz > 0 && isdigit(*s)){
+ strtod(s, &p);
+ sz -= (p - s);
+ s = p;
+ for(; sz > 0 && isws(*s); s++, sz--);
+ if(sz > 0 && *s == 'R'){ /* indirect object */
+ sz--;
+ s++;
+ o->type = Oindir;
+ p0 = s;
+ len = sz;
+ }
+ }
+ *e = p0;
+ }
break;
}
werrstr("unexpected char %c", *p);
--- a/pdf.c
+++ b/pdf.c
@@ -4,25 +4,6 @@
#include <ctype.h>
#include "pdf.h"
-/* 7.2.2 whitespace */
-int
-isws(char c)
-{
- return /* \0 is missing on purpose */
- c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
- c == ' ';
-}
-
-/* 7.2.2 delimeters */
-int
-isdelim(char c)
-{
- return
- c == '(' || c == ')' || c == '<' || c == '>' ||
- c == '[' || c == ']' || c == '{' || c == '}' ||
- c == '/' || c == '%';
-}
-
/*
* pre-1.5 xref section reader
* PDF>=1.5 may have BOTH (or either) old xref format and xref streams
@@ -111,16 +92,17 @@
}
for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
- if(strcmp(kv->key, "Root") == 0 && kv->value.type == Onum)
- pdf->root = kv->value.num;
- else if(strcmp(kv->key, "Info") == 0 && kv->value.type == Onum)
- pdf->info = kv->value.num;
+ fprint(2, "# %s %p\n", kv->key, kv->value);
+ if(strcmp(kv->key, "Root") == 0)
+ pdf->root = kv->value;
+ else if(strcmp(kv->key, "Info") == 0)
+ pdf->info = kv->value;
}
freeobject(o);
o = nil;
/* root is required */
- if(pdf->root == 0){
+ if(pdf->root == nil){
werrstr("no root");
goto err;
}
--- a/pdf.h
+++ b/pdf.h
@@ -32,12 +32,17 @@
KeyValue *kv;
int nkv;
}dict;
+
+ struct {
+ Object **e;
+ int ne;
+ }array;
};
};
struct KeyValue {
char *key;
- Object value;
+ Object *value;
};
struct Pdf {
@@ -45,8 +50,8 @@
Xref *xref;
int nxref; /* 7.5.4 xref subsection number of objects */
- u32int root; /* 7.5.5 root object */
- u32int info; /* 7.5.5 info dictionary */
+ Object *root; /* 7.5.5 root object */
+ Object *info; /* 7.5.5 info dictionary */
};
struct Xref {
@@ -76,6 +81,16 @@
* Works the same way as pdfstring, but for name objects.
*/
int pdfname(char *p, char **e, int len);
+
+/*
+ * 7.3.6 Array Objects
+ */
+Object *pdfarray(char *p, char **e, int len);
+
+/*
+ * 7.3.7 Dictionary Objects
+ */
+Object *pdfdict(char *p, char **e, int len);
int isws(char c);
int isdelim(char c);