shithub: pdffs

Download patch

ref: a080ae88c6c65503575da220cb131ddada107bf6
parent: 34238e0feb181a0c120561486d4c86b054d112bc
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Fri Aug 28 22:45:56 EDT 2020

rewrite the API, support more object types and actual evaluation

--- a/array.c
+++ b/array.c
@@ -1,41 +1,35 @@
 #include <u.h>
 #include <libc.h>
-#include <ctype.h>
+#include <bio.h>
 #include "pdf.h"
 
+/* 7.3.6 Array Objects */
+
 Object *
-pdfarray(char *p, char **e, int len)
+pdfarray(Biobuf *b)
 {
 	Object *o, *m;
 	Object **a;
+	int c, noel;
 
-	o = nil;
-	if(len < 3 || p[0] != '['){
-		werrstr("invalid array");
-		goto err;
-	}
-	p++;
-	len--;
-
 	o = calloc(1, sizeof(*o));
 	o->type = Oarray;
-	for(; len >= 1;){
-		for(; len > 0 && isws(*p); p++, len--);
-		if(len < 1){
-			werrstr("too short");
+	Bgetc(b); /* throw away '[' */
+
+	for(noel = 0;;){
+		if((c = Bgetc(b)) < 0 || c == ']')
+			break;
+		if(noel){
+			werrstr("no ']'");
 			goto err;
 		}
 
-		if(p[0] == ']'){
-			p++;
-			break;
+		Bungetc(b);
+		if((m = pdfobject(b)) == nil){
+			noel = 1;
+			continue;
 		}
 
-		if((m = pdfobject(p, e, len)) == nil)
-			goto err;
-		len -= *e - p;
-		p = *e;
-
 		if((a = realloc(o->array.e, (o->array.ne+1)*sizeof(Object*))) == nil){
 			freeobject(m);
 			goto err;
@@ -45,11 +39,14 @@
 		a[o->array.ne++] = m;
 	}
 
-	*e = p;
+	if(c != ']'){
+		werrstr("no ']'");
+		goto err;
+	}
 
 	return o;
 err:
+	werrstr("array: %r");
 	freeobject(o);
-
 	return nil;
 }
--- a/dict.c
+++ b/dict.c
@@ -1,70 +1,77 @@
 #include <u.h>
 #include <libc.h>
-#include <ctype.h>
+#include <bio.h>
 #include "pdf.h"
 
+/* 7.3.7 Dictionary Objects */
+
 Object *
-pdfdict(char *p, char **e, int len)
+pdfdict(Biobuf *b)
 {
 	Object *o, *k, *v;
 	KeyValue *kv;
+	int c, nokey;
 
-	o = nil;
-	if(len < 4 || p[0] != '<' || p[1] != '<'){
-		werrstr("invalid dict");
-		goto err;
-	}
-	p += 2;
-	len -= 2;
+	/* skip '<<' */
+	Bseek(b, 2, 1);
 
+	k = v = nil;
 	o = calloc(1, sizeof(*o));
 	o->type = Odict;
-	for(; len >= 2;){
-		for(; len > 2 && isws(*p); p++, len--);
-		if(len < 1){
-			werrstr("too short");
+	for(nokey = 0;;){
+		if((c = Bgetc(b)) < 0)
 			goto err;
+		if(c == '>'){
+			if(Bgetc(b) == '>')
+				break;
+			werrstr("no '>>'");
+			goto err;
 		}
-
-		if(p[0] == '>' && p[1] == '>'){
-			p += 2;
-			break;
+		if(nokey){
+			werrstr("no '>>'");
+			goto err;
 		}
 
-		if((k = pdfobject(p, e, len)) == nil)
+		Bungetc(b);
+		if((k = pdfobject(b)) == nil){
+			nokey = 1;
+			continue;
+		}
+		if((v = pdfobject(b)) == nil)
 			goto err;
 		if(k->type != Oname){
-			freeobject(k);
 			werrstr("expected name as a key");
 			goto err;
 		}
-		len -= *e - p;
-		p = *e;
 
-		if((v = pdfobject(p, e, len)) == nil){
-			freeobject(k);
+		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
 			goto err;
-		}
-		len -= *e - p;
-		p = *e;
 
-		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil){
-			freeobject(k);
-			freeobject(v);
-			goto err;
-		}
-
 		o->dict.kv = kv;
 		kv[o->dict.nkv].key = strdup(k->name);
 		freeobject(k);
 		kv[o->dict.nkv++].value = v;
+		k = v = nil;
 	}
 
-	*e = p;
-
 	return o;
 err:
 	freeobject(o);
+	freeobject(k);
+	freeobject(v);
+	werrstr("dict: %r");
 
 	return nil;
+}
+
+Object *
+pdfdictget(Object *o, char *name)
+{
+	int i;
+
+	if(o == nil || o->type != Odict || name == nil)
+		return nil;
+	for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
+
+	return i < o->dict.nkv ? o->dict.kv[i].value : nil;
 }
--- a/eval.c
+++ b/eval.c
@@ -1,23 +1,34 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include "pdf.h"
 
 int
 pdfeval(Pdf *pdf, Object *o)
 {
+	Object *d;
 	Xref *x;
 	int i;
 
-	if(o->type != Oindir)
+	if(o == nil || o->type != Oindir)
 		return 0;
 
-	for(i = 0; i < pdf->nxref; i++){
-		x = &pdf->xref[i];
-		if(x->id == o->indir.id)
-			return 0;
+	for(i = 0; i < pdf->nxref && pdf->xref[i].id != o->indir.id; i++);
+	if(i >= pdf->nxref){
+		werrstr("no object id %d in xref", o->indir.id);
+		return -1;
 	}
+	x = &pdf->xref[i];
 
-	werrstr("no object id %d in xref", o->indir.id);
+	if(Bseek(pdf->bio, x->off, 0) != x->off){
+		werrstr("xref seek failed");
+		return -1;
+	}
+	if((d = pdfobject(pdf->bio)) == nil)
+		return -1;
+	*o = *d;
+	memset(d, 0, sizeof(*d));
+	freeobject(d);
 
-	return -1;
+	return 0;
 }
--- a/filter.c
+++ b/filter.c
@@ -1,5 +1,6 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include "pdf.h"
 
 /*
--- a/main.c
+++ b/main.c
@@ -1,8 +1,11 @@
 #include <u.h>
 #include <libc.h>
 #include <thread.h>
+#include <bio.h>
 #include "pdf.h"
 
+int mainstacksize = 32768;
+
 static void
 usage(void)
 {
@@ -15,6 +18,7 @@
 {
 	int fd;
 	Pdf *pdf;
+	Object *v;
 
 	quotefmtinstall();
 
@@ -40,7 +44,11 @@
 	if((fd = open(argv[0], OREAD)) < 0)
 		sysfatal("%r");
 	if((pdf = pdfopen(fd)) == nil)
-		sysfatal("pdfopen: %r");
+		sysfatal("%r");
+	if((v = pdfdictget(pdf->info, "Creator")) != nil)
+		fprint(2, "creator: %s\n", v->str);
+	if((v = pdfdictget(pdf->info, "Producer")) != nil)
+		fprint(2, "producer: %s\n", v->str);
 	pdfclose(pdf);
 
 	threadexitsall(nil);
--- a/misc.c
+++ b/misc.c
@@ -1,10 +1,11 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include "pdf.h"
 
 /* 7.2.2 whitespace */
 int
-isws(char c)
+isws(int c)
 {
 	return /* \0 is missing on purpose */
 		c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
@@ -13,7 +14,7 @@
 
 /* 7.2.2 delimeters */
 int
-isdelim(char c)
+isdelim(int c)
 {
 	return
 		c == '(' || c == ')' || c == '<' || c == '>' ||
--- a/name.c
+++ b/name.c
@@ -1,104 +1,132 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include "pdf.h"
 
-int
-pdfname(char *p, char **e, int len)
+/* 7.3.5 Name Objects */
+
+Object *
+pdfname(Biobuf *b)
 {
-	int i, o;
+	Object *o;
+	char *s, *r, hex[3];
+	int c, sz, maxsz;
 
-	if(len < 2){
-		werrstr("too short");
-		goto err;
-	}
+	Bgetc(b); /* skip '/' */
 
-	if(p[0] != '/'){
-		werrstr("invalid first char");
+	maxsz = 32;
+	if((s = malloc(maxsz)) == nil)
 		goto err;
-	}
 
-	for(i = 1, o = 0; i < len; i++){
-		if(p[i] < '!' || p[i] > '~'){
-			if(isws(p[i]) || isdelim(p[i]))
+	for(sz = 0;;){
+		if((c = Bgetc(b)) < 0){
+			if(c == Beof)
 				break;
-			werrstr("invalid char %02x", p[i]);
 			goto err;
 		}
-		if(p[i] == '#'){
-			i++;
-			if(i+2 > len){
-				werrstr("hex too short");
+
+		if(isws(c) || isdelim(c)){
+			Bungetc(b);
+			break;
+		}
+		if(c < '!' || c > '~'){
+			werrstr("invalid char %02x", c);
+			goto err;
+		}
+		if(c == '#'){
+			if((c = Bgetc(b)) < 0)
 				goto err;
-			}
-			if(dec16((uchar*)p+o, 1, p+i, 2) != 1){
+			hex[0] = c;
+			if((c = Bgetc(b)) < 0)
+				goto err;
+			hex[1] = c;
+			if(dec16((uchar*)hex, 1, hex, 2) != 1){
 				werrstr("invalid hex");
 				goto err;
 			}
-			o++;
-			i++;
-		}else{
-			p[o++] = p[i];
+			c = hex[0];
 		}
+		if(sz+1 >= maxsz){
+			maxsz *= 2;
+			if((r = realloc(s, maxsz)) == nil)
+				goto err;
+			s = r;
+		}
+		s[sz++] = c;
 	}
 
-	p[o] = 0;
-	*e = p + i;
+	if((o = malloc(sizeof(*o) + sz + 1)) != nil){
+		memmove(o->name, s, sz);
+		o->name[sz] = 0;
+		o->type = Oname;
+		free(s);
+		return o;
+	}
 
-	return o;
 err:
 	werrstr("name: %r");
-	return -1;
+	free(s);
+	return nil;
 }
 
 #ifdef TEST
 static struct {
-	char *i;
-	int   len;
-	char *o;
-	int   r;
-	int   e;
+	char *in;
+	char *out;
 }t[] = {
-	{"/SimpleName",            11, "SimpleName", 10},
-	{"WrongName",               9, nil,          -1},
-	{"/.$()",                   5, ".$()",        4},
-	{"/#30",                    4, "0",           1},
-	{"/#3",                     3, nil,          -1},
-	{"/#G0",                    4, nil,          -1},
-	{"/#",                      2, nil,          -1},
-	{"/Wrong Char",            11, nil,          -1},
-	{"/\xff",                   2, nil,          -1},
+	{"/SimpleName", "SimpleName"},
+	{"/.$()", ".$"},
+	{"/#30", "0"},
+	{"/#3", nil},
+	{"/#G0", nil},
+	{"/#", nil},
+	{"/Space Between", "Space"},
+	{"/Two/Names", "Two"},
+	{"/\xff", nil,},
 };
 
+static char *s;
+static int off, n;
+
+static int
+rd(Biobufhdr *, void *data, long sz)
+{
+	if(sz > n-off)
+		sz = n-off;
+	memmove(data, s+off, sz);
+	off += sz;
+	return sz;
+}
+
 void
 test_pdfname(void)
 {
-	char *e;
-	int i, r;
+	Object *o;
+	Biobuf b;
+	int i;
 
 	fprint(2, "pdfname\n");
 	for(i = 0; i < nelem(t); i++){
+		s = t[i].in;
+		n = strlen(s);
+		off = 0;
+		Binit(&b, -1, OREAD);
+		Biofn(&b, rd);
+
 		fprint(2, "\t%d: ", i);
-		r = pdfname(t[i].i, &e, t[i].len);
-		if(r != t[i].r){
-			fprint(2, "expected r=%d, got %d", t[i].r, r);
-			if(r < 0)
-				fprint(2, " (%r)\n");
-			else
-				fprint(2, "\n");
-			continue;
-		}else if(r >= 0){
-			if(t[i].i+t[i].len != e){
-				fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
-				continue;
-			}else if(strcmp(t[i].o, t[i].i) != 0){
-				fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
-				continue;
-			}
-		}
-		fprint(2, "OK");
-		if(r < 0)
-			fprint(2, " (%r)");
-		fprint(2, "\n");
+		o = pdfname(&b);
+		if(o == nil && t[i].out != nil)
+			fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
+		else if(o != nil && t[i].out == nil)
+			fprint(2, "ERROR: expected error, got %q\n", o->name);
+		else if(o == nil && t[i].out == nil)
+			fprint(2, "OK (%r)\n");
+		else if(strcmp(o->name, t[i].out) != 0)
+			fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->name);
+		else
+			fprint(2, "OK\n");
+		freeobject(o);
+		Bterm(&b);
 	}
 }
 #endif
--- a/object.c
+++ b/object.c
@@ -1,98 +1,133 @@
 #include <u.h>
 #include <libc.h>
 #include <ctype.h>
+#include <bio.h>
 #include "pdf.h"
 
+Object *pdfstring(Biobuf *b);
+Object *pdfname(Biobuf *b);
+Object *pdfarray(Biobuf *b);
+Object *pdfdict(Biobuf *b);
+
+static Object null = {
+	.type = Onull,
+};
+
+/* General function to parse an object of any type. */
 Object *
-pdfobject(char *p, char **e, int len)
+pdfobject(void *b)
 {
-	Object *o;
-	char *s, *p0;
-	int sz, gen;
+	Object *o, *o2;
+	vlong off;
+	int c, tf;
 
-	o = nil;
-	for(; len > 0 && isws(*p); p++, len--);
-	if(len < 1){
-		werrstr("too short");
+	o = o2 = nil;
+	do; while(isws(c = Bgetc(b)));
+	if(c < 0)
 		goto err;
-	}
 
-	if(*p < 1){
-		werrstr("unexpected non-ascii char");
-		goto err;
-	}
-
-	switch(*p){
+	switch(c){
 	case '<': /* dictionary or a string */
-		if(len < 2){
-			werrstr("too short");
-			goto err;
+		c = Bgetc(b);
+		if(c == '<'){
+			Bseek(b, -2, 1);
+			return pdfdict(b);
 		}
-		if(p[1] == '<'){ /* dictionary */
-			o = pdfdict(p, e, len);
-			break;
-		}
+		Bungetc(b);
 		/* fall through */
 
-	case '(': /* string */
-		if((sz = pdfstring(p, e, len)) < 0)
-			goto err;
-		if((o = malloc(sizeof(*o)+sz+1)) != nil){
-			o->type = Ostr;
-			o->str = (char*)(o+1);
-			strcpy(o->str, p);
-		}
-		break;
+	case '(':
+		Bungetc(b);
+		return pdfstring(b);
 
 	case '/':
-		if((sz = pdfname(p, e, len)) < 0)
-			goto err;
-		if((o = malloc(sizeof(*o)+sz+1)) != nil){
-			o->type = Oname;
-			o->str = (char*)(o+1);
-			strcpy(o->str, p);
-		}
-		break;
+		Bungetc(b);
+		return pdfname(b);
 
 	case '[':
-		o = pdfarray(p, e, len);
-		break;
+		Bungetc(b);
+		return pdfarray(b);
 
+	case 'n':
+		off = Boffset(b);
+		if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
+			Bungetc(b);
+			return &null;
+		}
+		Bseek(b, off, 0);
+		c = 'f';
+		goto unexpected;
+
+	case 't':
+		off = Boffset(b);
+		tf = 1;
+		if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+			goto bool;
+		Bseek(b, off, 0);
+		c = 't';
+		goto unexpected;
+
+	case 'f':
+		off = Boffset(b);
+		tf = 0;
+		if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+			goto bool;
+		Bseek(b, off, 0);
+		c = 'f';
+		goto unexpected;
+bool:
+		Bungetc(b);
+		if((o = malloc(sizeof(*o))) == nil)
+			goto err;
+		o->type = Obool;
+		o->bool = tf;
+		return o;
+
 	default:
-		if(isdigit(*p)){
-			if((o = malloc(sizeof(*o))) != nil){
-				o->type = Onum;
-				o->num = strtod(p, e);
-				sz = len - (*e - p);
-				p0 = *e;
-				s = p0;
-				for(; sz > 0 && isws(*s); s++, sz--);
-				if(sz > 0 && isdigit(*s)){
-					gen = strtod(s, &p);
-					sz -= (p - s);
-					s = p;
-					for(; sz > 0 && isws(*s); s++, sz--);
-					if(sz > 0 && *s == 'R'){ /* indirect object */
-						s++;
-						o->type = Oindir;
-						o->indir.id = o->num;
-						o->indir.gen = gen;
-						p0 = s;
-					}
-				}
-				*e = p0;
+		if((o = malloc(sizeof(*o))) == nil)
+			goto err;
+		if(!isdigit(c)){
+unexpected:
+			Bungetc(b);
+			werrstr("unexpected char '%c'", c);
+			goto err;
+		}
+		 /* it could be a number or an indirect object */
+		Bungetc(b);
+		Bgetd(b, &o->num); /* get the first number */
+		off = Boffset(b); /* seek here if not an indirect object later */
+
+		if((o2 = pdfobject(b)) != nil && o2->type == Onum){ /* second object is number too */
+			do; while(isws(c = Bgetc(b)));
+			if(c < 0)
+				goto err;
+			if(c == 'R'){ /* indirect object */
+				o->type = Oindir;
+				o->indir.id = o->num;
+				o->indir.gen = o2->num;
+				freeobject(o2);
+				return o;
 			}
-			break;
+			if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
+				freeobject(o2);
+				/* FIXME put into a map */
+				return pdfobject(b);
+			}
 		}
-		werrstr("unexpected char %c", *p);
-		goto err;
-	}
 
-	if(o != nil)
+		/* just a number, go back and return it */
+		o->type = Onum;
+		if(Bseek(b, off, 0) != off){
+			werrstr("seek failed");
+			goto err;
+		}
 		return o;
+	}
+
 err:
 	werrstr("object: %r");
 	freeobject(o);
+	freeobject(o2);
 	return nil;
 }
 
@@ -99,7 +134,32 @@
 void
 freeobject(Object *o)
 {
+	int i;
+
 	if(o == nil)
 		return;
+
+	switch(o->type){
+	case Onull:
+		return;
+
+	case Obool:
+	case Onum:
+	case Ostr:
+	case Oname:
+		break;
+
+	case Oarray:
+		for(i = 0; i < o->array.ne; i++)
+			freeobject(o->array.e[i]);
+		free(o->array.e);
+		break;
+
+	case Odict:
+	case Ostream:
+	case Oindir:
+		break;
+	}
+
 	free(o);
 }
--- a/pdf.c
+++ b/pdf.c
@@ -33,8 +33,8 @@
 	/* store non-free objects only */
 	newnxref = pdf->nxref;
 	for(e = s, i = 0; i < nxref; i++, e += 20){
-		if(e[10] != ' ' || e[18] != '\r' || e[19] != '\n'){
-			werrstr("invalid xref line");
+		if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){
+			werrstr("invalid xref line (%d/%d)", i, nxref);
 			goto err;
 		}
 		xref.id = xref0 + i;
@@ -76,15 +76,11 @@
 trailerread(Pdf *pdf)
 {
 	int i;
-	char *s, *e;
 	Object *o;
 	KeyValue *kv;
 
-	o = nil;
-	if((s = Brdstr(pdf->bio, 0, 1)) == nil || (o = pdfobject(s, &e, Blinelen(pdf->bio))) == nil)
+	if((o = pdfobject(pdf->bio)) == nil)
 		goto err;
-	free(s);
-	s = nil;
 
 	if(o->type != Odict){
 		werrstr("isn't a dictionary");
@@ -92,7 +88,6 @@
 	}
 
 	for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
-		fprint(2, "# %s %p\n", kv->key, kv->value);
 		if(strcmp(kv->key, "Root") == 0)
 			pdf->root = kv->value;
 		else if(strcmp(kv->key, "Info") == 0)
@@ -112,7 +107,6 @@
 	return 0;
 err:
 	freeobject(o);
-	free(s);
 	return -1;
 }
 
@@ -127,6 +121,7 @@
 	int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
 	int i, n, off;
 
+	b = nil;
 	if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
 		goto err;
 	pdf->bio = b;
@@ -204,6 +199,7 @@
 
 	return pdf;
 err:
+	werrstr("pdfopen: %r [at %p]", (void*)Boffset(b));
 	pdfclose(pdf);
 	return nil;
 }
--- a/pdf.h
+++ b/pdf.h
@@ -20,8 +20,11 @@
 	union {
 		int bool;
 		double num;
-		char *str;
-		char *name;
+		struct {
+			int len;
+			char str[1];
+		};
+		char name[1];
 
 		struct {
 			u32int id;
@@ -50,8 +53,8 @@
 	Xref *xref;
 	int nxref; /* 7.5.4 xref subsection number of objects */
 
-	Object *root; /* 7.5.5 root object */
-	Object *info; /* 7.5.5 info dictionary */
+	Object *root; /* 7.7.2 root object */
+	Object *info; /* 14.3.3 info dictionary */
 };
 
 struct Xref {
@@ -63,45 +66,16 @@
 Pdf *pdfopen(int fd);
 void pdfclose(Pdf *pdf);
 
-/*
- * General function to parse an object of any type.
- */
-Object *pdfobject(char *p, char **e, int len);
-
+Object *pdfobject(void *b);
 void freeobject(Object *o);
 
 /*
- * 7.3.4 String Objects
- *
- * Rewrites the string in place with null termination and returns the
- * length in bytes, without the null terminator.
- * Returns < 0 if parsing failed.
- * (*e) is advanced to the position after the string pointed by (p).
- */
-int pdfstring(char *p, char **e, int len);
-
-/*
- * 7.3.5 Name Objects
- *
- * Works the same way as pdfstring, but for name objects.
- */
-int pdfname(char *p, char **e, int len);
-
-/*
- * 7.3.6 Array Objects
- */
-Object *pdfarray(char *p, char **e, int len);
-
-/*
- * 7.3.7 Dictionary Objects
- */
-Object *pdfdict(char *p, char **e, int len);
-
-/*
  * If the object is indirect, resolve it. Operation is not recursive, ie
  * values of a dictionary won't be resolved automatically.
  */
 int pdfeval(Pdf *pdf, Object *o);
 
-int isws(char c);
-int isdelim(char c);
+int isws(int c);
+int isdelim(int c);
+
+Object *pdfdictget(Object *o, char *name);
--- a/string.c
+++ b/string.c
@@ -1,8 +1,11 @@
 #include <u.h>
 #include <libc.h>
+#include <bio.h>
 #include "pdf.h"
 
-static int esc[] = {
+/* 7.3.4 String Objects */
+
+static char esc[] = {
 	['n'] = '\n',
 	['r'] = '\r',
 	['t'] = '\t',
@@ -14,175 +17,207 @@
 	['\n'] = -1,
 };
 
-static int
-pdfstringhex(char *p, char **e, int len)
+static Object *
+stringhex(Biobuf *b)
 {
-	int i;
+	char *s;
+	Object *o;
+	int len, n;
 
-	for(i = 1; i < len; i += 1){
-		if(p[i] == '>')
-			break;
+	if((s = Brdstr(b, '>', 0)) == nil)
+		return nil;
+	len = Blinelen(b) - 1;
+	if(s[len] != '>'){
+		werrstr("no '>'");
+		free(s);
+		return nil;
 	}
-	if(i >= len){
-		werrstr("hex not closed");
-		return -1;
-	}
-	p[i] = '0'; /* the final zero may be missing */
-	*e = p+i+1;
-	i = dec16((uchar*)p, i/2, p+1, i) == i/2 ? i/2 : -1;
-	if(i < 0)
+	s[len] = '0'; /* the final zero may be missing */
+	n = len/2;
+	o = nil;
+	if(dec16((uchar*)s, n, s+1, len) != n){
 		werrstr("invalid hex");
-	p[i] = 0;
-	return i;
+	}else if((o = malloc(sizeof(*o) + n + 1)) != nil){
+		memmove(o->str, s, n);
+		o->str[n] = 0;
+		o->len = n;
+		o->type = Ostr;
+	}
+
+	free(s);
+	return o;
 }
 
-int
-pdfstring(char *p, char **e, int len)
+Object *
+pdfstring(Biobuf *b)
 {
-	Rune r;
-	int c, j, i, o, n, paren;
+	Object *o;
+	char *s, *r;
 	char oct[4];
+	int i, c, paren, sz, maxsz;
 
-	if(len < 2){
-		werrstr("too short");
-		goto err;
-	}
+	maxsz = 64;
+	if((s = malloc(maxsz)) == nil)
+		return nil;
 
-	paren = 0;
-	for(i = o = 0; i < len;){
-		if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
-			werrstr("rune error at byte %d", n);
-			return -1;
-		}
+	for(paren = sz = 0;;){
+		if((c = Bgetc(b)) <= 0)
+			break;
 
-		if(i == 0){
-			if(r == '('){
-				paren = 1;
-				i++;
-				continue;
+		switch(c){
+		case '<':
+			if(sz == 0){
+				Bungetc(b);
+				return stringhex(b);
 			}
-			if(r == '<'){
-				len = pdfstringhex(p, e, len);
-				if(len < 0)
-					goto err;
-				return len;
-			}
-			werrstr("invalid first char");
-			goto err;
-		}
+			break;
 
-		if(r == '\\'){
-			if(++i >= len){
-				werrstr("escaped char out of string len");
-				goto err;
-			}
-			if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
-				werrstr("rune error at byte %d", i);
-				goto err;
-			}
-			if(r >= '0' && r <= '9'){ /* octal */
-				n = 0;
-				for(j = 0; j < 3 && i < len && p[i] >= '0' && p[i] <= '9'; j++, i++)
-					oct[j] = p[i];
-				oct[j] = 0;
-				c = strtol(oct, nil, 8);
-			}else if(r >= nelem(esc) || (c = esc[r]) == 0){
-				werrstr("unknown escape char at byte %d", i);
-				goto err;
-			}
-			r = c;
-			i += n;
-			if(c < 0)
-				continue;
-		}else if(r == '('){
+		case '(':
 			paren++;
-			i++;
 			continue;
-		}else if(r == ')'){
+
+		case ')':
 			paren--;
-			i++;
-			if(paren == 0)
+			if(paren < 1){
+				c = 0;
 				break;
+			}
 			continue;
-		}else{
-			i += n;
+
+		case '\\':
+			if((c = Bgetc(b)) <= 0)
+				break;
+			if(c >= '0' && c <= '7'){ /* octal */
+				oct[0] = c;
+				for(i = 1; i < 3 && (c = Bgetc(b)) >= '0' && c <= '7'; i++)
+					oct[i] = c;
+				if(c <= 0)
+					break;
+				if(c < '0' || c > '7')
+					Bungetc(b);
+				oct[i] = 0;
+				c = strtol(oct, nil, 8);
+			}else if(c >= nelem(esc) || (c = esc[c]) == 0){
+				werrstr("unknown escape char %c", c);
+				goto err;
+			}else if(c < 0){
+				continue;
+			}
+			break;
+
+		default:
+			if(paren < 1){
+				werrstr("unexpected char '%c'", c);
+				goto err;
+			}
+			break;
 		}
 
-		o += runetochar(p+o, &r);
+		if(c <= 0)
+			break;
+		if(sz+1 > maxsz){
+			maxsz *= 2;
+			if((r = realloc(s, maxsz)) == nil)
+				goto err;
+			s = r;
+		}
+		s[sz++] = c;
 	}
 
-	if(paren > 0){
-		werrstr("non-closed paren");
+	if(paren != 0){
+		werrstr("bad paren");
 		goto err;
 	}
+	if(c < 0){
+		werrstr("short");
+		goto err;
+	}
 
-	p[o] = 0;
-	*e = p + i;
+	if(c >= 0 && (o = malloc(sizeof(*o) + sz + 1)) != nil){
+		memmove(o->str, s, sz);
+		o->str[sz] = 0;
+		o->len = sz;
+		o->type = Ostr;
+		free(s);
+		return o;
+	}
 
-	return o;
 err:
+	free(s);
 	werrstr("string: %r");
-	return -1;
+	return nil;
 }
 
 #ifdef TEST
 static struct {
-	char *i;
-	int   len;
-	char *o;
-	int   r;
-	int   e;
+	char *in;
+	char *out;
 }t[] = {
-	{"(simple string)",        15, "simple string", 13},
-	{"(non-closed paren",      17, nil,             -1},
-	{"wrong first char",       16, nil,             -1},
-	{"(parens((()((())))()))", 22, "parens",         6},
-	{"(\\0053)",                7, "\x053",          2},
-	{"(\\053)",                 6, "+",              1},
-	{"(\\53)",                  5, "+",              1},
-	{"()",                      2, "",               0},
-	{")",                       1, nil,             -1},
-	{"(\\)\\()",                6, ")(",             2},
-	{"(\\\\)",                  4, "\\",             1},
-	{"a",                       1, nil,             -1},
-	{"(1\\\n2)",                6, "12",             2},
-	{"<323130>",                8, "210",            3},
-	{"<32313>",                 7, "210",            3},
-	{"<>",                      2, "",               0},
-	{"<",                       1, nil,             -1},
-	{"<zz>",                    4, nil,             -1},
+	{"", nil},
+	{"(test, success)", "test, success"},
+	{"(simple string)", "simple string"},
+	{"(non-closed paren", nil},
+	{"wrong first char", nil},
+	{"(parens((()((())))()))", "parens"},
+	{"(\\0053)", "\x053"},
+	{"(\\053)", "+"},
+	{"(\\53)", "+"},
+	{"()", ""},
+	{")", nil},
+	{"(\\)\\()", ")("},
+	{"(\\\\)", "\\"},
+	{"a", nil},
+	{"(1\\\n2)", "12"},
+	{"<323130>", "210"},
+	{"<32313>", "210"},
+	{"<>", ""},
+	{"<", nil},
+	{"<zz>", nil},
+	{">", nil},
 };
 
+static char *s;
+static int off, n;
+
+static int
+rd(Biobufhdr *, void *data, long sz)
+{
+	if(sz > n-off)
+		sz = n-off;
+	memmove(data, s+off, sz);
+	off += sz;
+	return sz;
+}
+
 void
 test_pdfstring(void)
 {
-	char *e;
-	int i, r;
+	Object *o;
+	Biobuf b;
+	int i;
 
 	fprint(2, "pdfstring\n");
 	for(i = 0; i < nelem(t); i++){
+		s = t[i].in;
+		n = strlen(s);
+		off = 0;
+		Binit(&b, -1, OREAD);
+		Biofn(&b, rd);
+
 		fprint(2, "\t%d: ", i);
-		r = pdfstring(t[i].i, &e, t[i].len);
-		if(r != t[i].r){
-			fprint(2, "expected r=%d, got %d", t[i].r, r);
-			if(r < 0)
-				fprint(2, " (%r)\n");
-			else
-				fprint(2, "\n");
-			continue;
-		}else if(r >= 0){
-			if(t[i].i+t[i].len != e){
-				fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
-				continue;
-			}else if(strcmp(t[i].o, t[i].i) != 0){
-				fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
-				continue;
-			}
-		}
-		fprint(2, "OK");
-		if(r < 0)
-			fprint(2, " (%r)");
-		fprint(2, "\n");
+		o = pdfstring(&b);
+		if(o == nil && t[i].out != nil)
+			fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
+		else if(o != nil && t[i].out == nil)
+			fprint(2, "ERROR: expected error, got %q\n", o->str);
+		else if(o == nil && t[i].out == nil)
+			fprint(2, "OK (%r)\n");
+		else if(strcmp(o->str, t[i].out) != 0)
+			fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->str);
+		else
+			fprint(2, "OK\n");
+		freeobject(o);
+		Bterm(&b);
 	}
 }
 #endif