shithub: pdffs

--- /dev/null

+++ b/array.c

@@ -1,0 +1,55 @@

+#include <u.h>

+#include <libc.h>

+#include <ctype.h>

+#include "pdf.h"

+Object *

+pdfarray(char *p, char **e, int len)

+{

+	Object *o, *m;

+	Object **a;

+	o = nil;

+	if(len < 3 || p[0] != '['){

+		werrstr("invalid array");

+		goto err;

+	}

+	p++;

+	len--;

+	o = calloc(1, sizeof(*o));

+	o->type = Oarray;

+	for(; len >= 1;){

+		for(; len > 0 && isws(*p); p++, len--);

+		if(len < 1){

+			werrstr("too short");

+			goto err;

+		}

+		if(p[0] == ']'){

+			p++;

+			break;

+		}

+		if((m = pdfobject(p, e, len)) == nil)

+			goto err;

+		len -= *e - p;

+		p = *e;

+		if((a = realloc(o->array.e, (o->array.ne+1)*sizeof(Object*))) == nil){

+			freeobject(m);

+			goto err;

+		}

+		o->array.e = a;

+		a[o->array.ne++] = m;

+	}

+	*e = p;

+	return o;

+err:

+	freeobject(o);

+	return nil;

+}

--- /dev/null

+++ b/dict.c

@@ -1,0 +1,70 @@

+#include <u.h>

+#include <libc.h>

+#include <ctype.h>

+#include "pdf.h"

+Object *

+pdfdict(char *p, char **e, int len)

+{

+	Object *o, *k, *v;

+	KeyValue *kv;

+	o = nil;

+	if(len < 4 || p[0] != '<' || p[1] != '<'){

+		werrstr("invalid dict");

+		goto err;

+	}

+	p += 2;

+	len -= 2;

+	o = calloc(1, sizeof(*o));

+	o->type = Odict;

+	for(; len >= 2;){

+		for(; len > 2 && isws(*p); p++, len--);

+		if(len < 1){

+			werrstr("too short");

+			goto err;

+		}

+		if(p[0] == '>' && p[1] == '>'){

+			p += 2;

+			break;

+		}

+		if((k = pdfobject(p, e, len)) == nil)

+			goto err;

+		if(k->type != Oname){

+			freeobject(k);

+			werrstr("expected name as a key");

+			goto err;

+		}

+		len -= *e - p;

+		p = *e;

+		if((v = pdfobject(p, e, len)) == nil){

+			freeobject(k);

+			goto err;

+		}

+		len -= *e - p;

+		p = *e;

+		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil){

+			freeobject(k);

+			freeobject(v);

+			goto err;

+		}

+		o->dict.kv = kv;

+		kv[o->dict.nkv].key = strdup(k->name);

+		freeobject(k);

+		kv[o->dict.nkv++].value = v;

+	}

+	*e = p;

+	return o;

+err:

+	freeobject(o);

+	return nil;

+}

--- /dev/null

+++ b/misc.c

@@ -1,0 +1,22 @@

+#include <u.h>

+#include <libc.h>

+#include "pdf.h"

+/* 7.2.2 whitespace */

+int

+isws(char c)

+{

+	return /* \0 is missing on purpose */

+		c == '\t' || c == '\n' || c == '\f' || c == '\r' ||

+		c == ' ';

+}

+/* 7.2.2 delimeters */

+int

+isdelim(char c)

+{

+	return

+		c == '(' || c == ')' || c == '<' || c == '>' ||

+		c == '[' || c == ']' || c == '{' || c == '}' ||

+		c == '/' || c == '%';

+}

--- a/mkfile

+++ b/mkfile

@@ -4,8 +4,11 @@

 TARG=pdfs

 OFILES=\

+	array.$O\

+	dict.$O\

 	filter.$O\

 	main.$O\

+	misc.$O\

 	name.$O\

 	object.$O\

 	pdf.$O\

--- a/name.c

+++ b/name.c

@@ -19,7 +19,9 @@

 	for(i = 1, o = 0; i < len; i++){

 		if(p[i] < '!' || p[i] > '~'){

-			werrstr("invalid char");

+			if(isws(p[i]) || isdelim(p[i]))

+				break;

+			werrstr("invalid char %02x", p[i]);

 			goto err;

 		if(p[i] == '#'){

--- a/object.c

+++ b/object.c

@@ -7,11 +7,12 @@

 pdfobject(char *p, char **e, int len)

 	Object *o;

+	char *s, *p0;

 	int sz;

 	o = nil;

 	for(; len > 0 && isws(*p); p++, len--);

-	if(len < 2){

+	if(len < 1){

 		werrstr("too short");

 		goto err;

@@ -53,10 +54,34 @@

 		break;

+	case '[':

+		o = pdfarray(p, e, len);

+		break;

 	default:

 		if(isdigit(*p)){

-			o->type = Onum;

-			o->num = strtod(p, e);

+			if((o = malloc(sizeof(*o)+sz+1)) != nil){

+				o->type = Onum;

+				o->num = strtod(p, e);

+				sz = len - (*e - p);

+				p0 = *e;

+				s = p0;

+				for(; sz > 0 && isws(*s); s++, sz--);

+				if(sz > 0 && isdigit(*s)){

+					strtod(s, &p);

+					sz -= (p - s);

+					s = p;

+					for(; sz > 0 && isws(*s); s++, sz--);

+					if(sz > 0 && *s == 'R'){ /* indirect object */

+						sz--;

+						s++;

+						o->type = Oindir;

+						p0 = s;

+						len = sz;

+					}

+				}

+				*e = p0;

+			}

 			break;

 		werrstr("unexpected char %c", *p);

--- a/pdf.c

+++ b/pdf.c

@@ -4,25 +4,6 @@

 #include <ctype.h>

 #include "pdf.h"

-/* 7.2.2 whitespace */

-int

-isws(char c)

-{

-	return /* \0 is missing on purpose */

-		c == '\t' || c == '\n' || c == '\f' || c == '\r' ||

-		c == ' ';

-}

-/* 7.2.2 delimeters */

-int

-isdelim(char c)

-{

-	return

-		c == '(' || c == ')' || c == '<' || c == '>' ||

-		c == '[' || c == ']' || c == '{' || c == '}' ||

-		c == '/' || c == '%';

-}

/*

  * pre-1.5 xref section reader

  * PDF>=1.5 may have BOTH (or either) old xref format and xref streams

@@ -111,16 +92,17 @@

 	for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){

-		if(strcmp(kv->key, "Root") == 0 && kv->value.type == Onum)

-			pdf->root = kv->value.num;

-		else if(strcmp(kv->key, "Info") == 0 && kv->value.type == Onum)

-			pdf->info = kv->value.num;

+		fprint(2, "# %s %p\n", kv->key, kv->value);

+		if(strcmp(kv->key, "Root") == 0)

+			pdf->root = kv->value;

+		else if(strcmp(kv->key, "Info") == 0)

+			pdf->info = kv->value;

 	freeobject(o);

 	o = nil;

 	/* root is required */

-	if(pdf->root == 0){

+	if(pdf->root == nil){

 		werrstr("no root");

 		goto err;

--- a/pdf.h

+++ b/pdf.h

@@ -32,12 +32,17 @@

 			KeyValue *kv;

 			int nkv;

 		}dict;

+		struct {

+			Object **e;

+			int ne;

+		}array;

};

};

 struct KeyValue {

 	char *key;

-	Object value;

+	Object *value;

};

 struct Pdf {

@@ -45,8 +50,8 @@

 	Xref *xref;

 	int nxref; /* 7.5.4 xref subsection number of objects */

-	u32int root; /* 7.5.5 root object */

-	u32int info; /* 7.5.5 info dictionary */

+	Object *root; /* 7.5.5 root object */

+	Object *info; /* 7.5.5 info dictionary */

};

 struct Xref {

@@ -76,6 +81,16 @@

  * Works the same way as pdfstring, but for name objects.

*/

 int pdfname(char *p, char **e, int len);

+/*

+ * 7.3.6 Array Objects

+ */

+Object *pdfarray(char *p, char **e, int len);

+/*

+ * 7.3.7 Dictionary Objects

+ */

+Object *pdfdict(char *p, char **e, int len);

 int isws(char c);

 int isdelim(char c);

--

⑨