shithub: pdffs

Download patch

ref: d9638664119a09f7fbab558635c2bdf16f5fd1c1
parent: f8f7ffe655e3a6439adf0614d0232e7520757566
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Thu Aug 27 20:43:29 EDT 2020

add more object types, parse file trailer

--- /dev/null
+++ b/array.c
@@ -1,0 +1,55 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "pdf.h"
+
+Object *
+pdfarray(char *p, char **e, int len)
+{
+	Object *o, *m;
+	Object **a;
+
+	o = nil;
+	if(len < 3 || p[0] != '['){
+		werrstr("invalid array");
+		goto err;
+	}
+	p++;
+	len--;
+
+	o = calloc(1, sizeof(*o));
+	o->type = Oarray;
+	for(; len >= 1;){
+		for(; len > 0 && isws(*p); p++, len--);
+		if(len < 1){
+			werrstr("too short");
+			goto err;
+		}
+
+		if(p[0] == ']'){
+			p++;
+			break;
+		}
+
+		if((m = pdfobject(p, e, len)) == nil)
+			goto err;
+		len -= *e - p;
+		p = *e;
+
+		if((a = realloc(o->array.e, (o->array.ne+1)*sizeof(Object*))) == nil){
+			freeobject(m);
+			goto err;
+		}
+
+		o->array.e = a;
+		a[o->array.ne++] = m;
+	}
+
+	*e = p;
+
+	return o;
+err:
+	freeobject(o);
+
+	return nil;
+}
--- /dev/null
+++ b/dict.c
@@ -1,0 +1,70 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "pdf.h"
+
+Object *
+pdfdict(char *p, char **e, int len)
+{
+	Object *o, *k, *v;
+	KeyValue *kv;
+
+	o = nil;
+	if(len < 4 || p[0] != '<' || p[1] != '<'){
+		werrstr("invalid dict");
+		goto err;
+	}
+	p += 2;
+	len -= 2;
+
+	o = calloc(1, sizeof(*o));
+	o->type = Odict;
+	for(; len >= 2;){
+		for(; len > 2 && isws(*p); p++, len--);
+		if(len < 1){
+			werrstr("too short");
+			goto err;
+		}
+
+		if(p[0] == '>' && p[1] == '>'){
+			p += 2;
+			break;
+		}
+
+		if((k = pdfobject(p, e, len)) == nil)
+			goto err;
+		if(k->type != Oname){
+			freeobject(k);
+			werrstr("expected name as a key");
+			goto err;
+		}
+		len -= *e - p;
+		p = *e;
+
+		if((v = pdfobject(p, e, len)) == nil){
+			freeobject(k);
+			goto err;
+		}
+		len -= *e - p;
+		p = *e;
+
+		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil){
+			freeobject(k);
+			freeobject(v);
+			goto err;
+		}
+
+		o->dict.kv = kv;
+		kv[o->dict.nkv].key = strdup(k->name);
+		freeobject(k);
+		kv[o->dict.nkv++].value = v;
+	}
+
+	*e = p;
+
+	return o;
+err:
+	freeobject(o);
+
+	return nil;
+}
--- /dev/null
+++ b/misc.c
@@ -1,0 +1,22 @@
+#include <u.h>
+#include <libc.h>
+#include "pdf.h"
+
+/* 7.2.2 whitespace */
+int
+isws(char c)
+{
+	return /* \0 is missing on purpose */
+		c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
+		c == ' ';
+}
+
+/* 7.2.2 delimeters */
+int
+isdelim(char c)
+{
+	return
+		c == '(' || c == ')' || c == '<' || c == '>' ||
+		c == '[' || c == ']' || c == '{' || c == '}' ||
+		c == '/' || c == '%';
+}
--- a/mkfile
+++ b/mkfile
@@ -4,8 +4,11 @@
 TARG=pdfs
 
 OFILES=\
+	array.$O\
+	dict.$O\
 	filter.$O\
 	main.$O\
+	misc.$O\
 	name.$O\
 	object.$O\
 	pdf.$O\
--- a/name.c
+++ b/name.c
@@ -19,7 +19,9 @@
 
 	for(i = 1, o = 0; i < len; i++){
 		if(p[i] < '!' || p[i] > '~'){
-			werrstr("invalid char");
+			if(isws(p[i]) || isdelim(p[i]))
+				break;
+			werrstr("invalid char %02x", p[i]);
 			goto err;
 		}
 		if(p[i] == '#'){
--- a/object.c
+++ b/object.c
@@ -7,11 +7,12 @@
 pdfobject(char *p, char **e, int len)
 {
 	Object *o;
+	char *s, *p0;
 	int sz;
 
 	o = nil;
 	for(; len > 0 && isws(*p); p++, len--);
-	if(len < 2){
+	if(len < 1){
 		werrstr("too short");
 		goto err;
 	}
@@ -53,10 +54,34 @@
 		}
 		break;
 
+	case '[':
+		o = pdfarray(p, e, len);
+		break;
+
 	default:
 		if(isdigit(*p)){
-			o->type = Onum;
-			o->num = strtod(p, e);
+			if((o = malloc(sizeof(*o)+sz+1)) != nil){
+				o->type = Onum;
+				o->num = strtod(p, e);
+				sz = len - (*e - p);
+				p0 = *e;
+				s = p0;
+				for(; sz > 0 && isws(*s); s++, sz--);
+				if(sz > 0 && isdigit(*s)){
+					strtod(s, &p);
+					sz -= (p - s);
+					s = p;
+					for(; sz > 0 && isws(*s); s++, sz--);
+					if(sz > 0 && *s == 'R'){ /* indirect object */
+						sz--;
+						s++;
+						o->type = Oindir;
+						p0 = s;
+						len = sz;
+					}
+				}
+				*e = p0;
+			}
 			break;
 		}
 		werrstr("unexpected char %c", *p);
--- a/pdf.c
+++ b/pdf.c
@@ -4,25 +4,6 @@
 #include <ctype.h>
 #include "pdf.h"
 
-/* 7.2.2 whitespace */
-int
-isws(char c)
-{
-	return /* \0 is missing on purpose */
-		c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
-		c == ' ';
-}
-
-/* 7.2.2 delimeters */
-int
-isdelim(char c)
-{
-	return
-		c == '(' || c == ')' || c == '<' || c == '>' ||
-		c == '[' || c == ']' || c == '{' || c == '}' ||
-		c == '/' || c == '%';
-}
-
 /*
  * pre-1.5 xref section reader
  * PDF>=1.5 may have BOTH (or either) old xref format and xref streams
@@ -111,16 +92,17 @@
 	}
 
 	for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
-		if(strcmp(kv->key, "Root") == 0 && kv->value.type == Onum)
-			pdf->root = kv->value.num;
-		else if(strcmp(kv->key, "Info") == 0 && kv->value.type == Onum)
-			pdf->info = kv->value.num;
+		fprint(2, "# %s %p\n", kv->key, kv->value);
+		if(strcmp(kv->key, "Root") == 0)
+			pdf->root = kv->value;
+		else if(strcmp(kv->key, "Info") == 0)
+			pdf->info = kv->value;
 	}
 	freeobject(o);
 	o = nil;
 
 	/* root is required */
-	if(pdf->root == 0){
+	if(pdf->root == nil){
 		werrstr("no root");
 		goto err;
 	}
--- a/pdf.h
+++ b/pdf.h
@@ -32,12 +32,17 @@
 			KeyValue *kv;
 			int nkv;
 		}dict;
+
+		struct {
+			Object **e;
+			int ne;
+		}array;
 	};
 };
 
 struct KeyValue {
 	char *key;
-	Object value;
+	Object *value;
 };
 
 struct Pdf {
@@ -45,8 +50,8 @@
 	Xref *xref;
 	int nxref; /* 7.5.4 xref subsection number of objects */
 
-	u32int root; /* 7.5.5 root object */
-	u32int info; /* 7.5.5 info dictionary */
+	Object *root; /* 7.5.5 root object */
+	Object *info; /* 7.5.5 info dictionary */
 };
 
 struct Xref {
@@ -76,6 +81,16 @@
  * Works the same way as pdfstring, but for name objects.
  */
 int pdfname(char *p, char **e, int len);
+
+/*
+ * 7.3.6 Array Objects
+ */
+Object *pdfarray(char *p, char **e, int len);
+
+/*
+ * 7.3.7 Dictionary Objects
+ */
+Object *pdfdict(char *p, char **e, int len);
 
 int isws(char c);
 int isdelim(char c);