shithub: pdffs

Download patch

ref: 2ff084d1629e80f99b35576f10ea87dc4d9f8941
parent: 474117ed563f8f84f11d3dcf90635c584be29ec1
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Tue Sep 1 12:19:34 EDT 2020

fix tons of bugs, use proper streaming

--- a/array.c
+++ b/array.c
@@ -1,12 +1,11 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 /* 7.3.6 Array Objects */
 
 Object *
-pdfarray(Pdf *pdf, Biobuf *b)
+pdfarray(Pdf *pdf, Stream *s)
 {
 	Object *o, *m;
 	Object **a;
@@ -15,10 +14,10 @@
 	o = calloc(1, sizeof(*o));
 	o->pdf = pdf;
 	o->type = Oarray;
-	Bgetc(b); /* throw away '[' */
+	Sgetc(s); /* throw away '[' */
 
 	for(noel = 0;;){
-		if((c = Bgetc(b)) < 0 || c == ']')
+		if((c = Sgetc(s)) < 0 || c == ']')
 			break;
 		if(noel){
 			werrstr("no ']'");
@@ -25,8 +24,8 @@
 			goto err;
 		}
 
-		Bungetc(b);
-		if((m = pdfobj(pdf, b)) == nil){
+		Sungetc(s);
+		if((m = pdfobj(pdf, s)) == nil){
 			noel = 1;
 			continue;
 		}
@@ -65,7 +64,9 @@
 {
 	if(arraylen(o) <= i)
 		sysfatal("array: indexing out of range");
-	return o->type == Oarray ? o->array.e[i] : o;
+	o = o->type == Oarray ? o->array.e[i] : o;
+
+	return pdfeval(&o);
 }
 
 int
--- a/buffer.c
+++ b/buffer.c
@@ -64,7 +64,7 @@
 }
 
 int
-bufreadn(Buffer *b, Biobuf *bio, int sz)
+bufreadn(Buffer *b, Stream *s, int sz)
 {
 	int n, end;
 
@@ -71,7 +71,7 @@
 	if(bufgrow(b, sz) != 0)
 		return -1;
 	for(end = b->sz+sz; b->sz < end; b->sz += n){
-		if((n = Bread(bio, b->b+b->sz, sz)) < 1)
+		if((n = Sread(s, b->b+b->sz, sz)) < 1)
 			return -1;
 		sz -= n;
 	}
--- a/dict.c
+++ b/dict.c
@@ -1,12 +1,11 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 /* 7.3.7 Dictionary Objects */
 
 Object *
-pdfdict(Pdf *pdf, Biobuf *b)
+pdfdict(Pdf *pdf, Stream *s)
 {
 	Object *o, *k, *v;
 	KeyValue *kv;
@@ -13,7 +12,7 @@
 	int c, nokey;
 
 	/* skip '<<' */
-	Bseek(b, 2, 1);
+	Sseek(s, 2, 1);
 
 	k = v = nil;
 	o = calloc(1, sizeof(*o));
@@ -20,10 +19,10 @@
 	o->type = Odict;
 	o->pdf = pdf;
 	for(nokey = 0;;){
-		if((c = Bgetc(b)) < 0)
+		if((c = Sgetc(s)) < 0)
 			goto err;
 		if(c == '>'){
-			if(Bgetc(b) == '>')
+			if(Sgetc(s) == '>')
 				break;
 			werrstr("no '>>'");
 			goto err;
@@ -33,8 +32,8 @@
 			goto err;
 		}
 
-		Bungetc(b);
-		if((k = pdfobj(pdf, b)) == nil){
+		Sungetc(s);
+		if((k = pdfobj(pdf, s)) == nil){
 			nokey = 1;
 			continue;
 		}
@@ -42,7 +41,7 @@
 			werrstr("expected name as a key");
 			goto err;
 		}
-		if((v = pdfobj(pdf, b)) == nil)
+		if((v = pdfobj(pdf, s)) == nil)
 			goto err;
 
 		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
@@ -73,9 +72,14 @@
 	pdfeval(&o);
 	if((o->type != Ostream && o->type != Odict) || name == nil)
 		return &null;
-	for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
+	for(i = 0; i < o->dict.nkv; i++){
+		if(strcmp(name, o->dict.kv[i].key) == 0){
+			o = pdfeval(i < o->dict.nkv ? &o->dict.kv[i].value : nil);
+			return o;
+		}
+	}
 
-	return pdfeval(i < o->dict.nkv ? &o->dict.kv[i].value : nil);
+	return &null;
 }
 
 vlong
--- a/eval.c
+++ b/eval.c
@@ -1,8 +1,65 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
+static Object *
+evalobjstm(Pdf *pdf, Xref *x)
+{
+	Object *ostm, *o;
+	Stream *s;
+	Xref *xstm;
+	int i, off, nobj, first, index;
+
+	ostm = nil;
+	s = nil;
+	o = &null;
+	/* x is pointing at ObjStm, need to eval it to the actual object */
+	for(i = 0; i < pdf->nxref && pdf->xref[i].id != x->objstm; i++);
+	if(i >= pdf->nxref){
+		werrstr("no object id %d in xref", x->objstm);
+		goto err;
+	}
+	xstm = &pdf->xref[i];
+
+	if(Sseek(pdf->s, xstm->off, 0) != xstm->off){
+		werrstr("xref seek failed");
+		goto err;
+	}
+	if((ostm = pdfobj(pdf, pdf->s)) == nil)
+		goto err;
+	first = -1;
+	if((nobj = dictint(ostm, "N")) < 1 || (first = dictint(ostm, "First")) < 0){
+		werrstr("invalid ObjStm: nobj=%d first=%d", nobj, first);
+		goto err;
+	}
+
+	if((s = Sopen(ostm)) == nil)
+		goto err;
+	for(i = 0; i < nobj; i++){
+		Sgeti(s, &index);
+		Sgeti(s, &off);
+		if(x->id == index){
+			off += first;
+			if(Sseek(s, off, 0) != off){
+				werrstr("xref obj seek failed");
+				goto err;
+			}
+			if((o = pdfobj(pdf, s)) == nil)
+				goto err;
+			o = pdfeval(&o);
+			break;
+		}
+	}
+	Sclose(s);
+
+	return o;
+
+err:
+	pdfobjfree(ostm);
+	Sclose(s);
+	return &null;
+}
+
 Object *
 pdfeval(Object **oo)
 {
@@ -11,7 +68,7 @@
 	int i;
 
 	if(oo == nil)
-		return &null;
+		sysfatal("nil oo");
 	if(*oo == nil){
 		*oo = &null;
 		return &null;
@@ -20,19 +77,30 @@
 	if(o->type != Oindir)
 		return o;
 
-	for(i = 0; i < o->pdf->nxref && o->pdf->xref[i].id != o->indir.id; i++);
+	for(x = nil, i = 0; i < o->pdf->nxref; i++){
+		x = &o->pdf->xref[i];
+		if(x->id == o->indir.id)
+			break;
+	}
 	if(i >= o->pdf->nxref){
 		werrstr("no object id %d in xref", o->indir.id);
 		return &null;
 	}
-	x = &o->pdf->xref[i];
+	if(x->objstm > 0){
+		if((o = evalobjstm(o->pdf, x)) == &null)
+			werrstr("ObjStm: %r");
+		*oo = o;
+		return o;
+	}
 
-	if(Bseek(o->pdf->bio, x->off, 0) != x->off){
+	if(Sseek(o->pdf->s, x->off, 0) != x->off){
 		werrstr("xref seek failed");
 		return &null;
 	}
-	if((d = pdfobj(o->pdf, o->pdf->bio)) == nil)
+	if((d = pdfobj(o->pdf, o->pdf->s)) == nil){
+		werrstr("eval: %r [at %p]", (void*)x->off);
 		return &null;
+	}
 	*oo = d;
 	pdfobjfree(o);
 
--- a/f_flate.c
+++ b/f_flate.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include <flate.h>
 #include "pdf.h"
 
--- a/filter.c
+++ b/filter.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 /* 7.4 Filters */
--- a/main.c
+++ b/main.c
@@ -20,6 +20,8 @@
 	Pdf *pdf;
 	Biobuf *b;
 	Object *v;
+	Stream *s;
+	int i, n;
 
 	quotefmtinstall();
 	inflateinit();
@@ -29,28 +31,33 @@
 		usage();
 	}ARGEND
 
-#ifdef TEST
-#define T(x) \
-	void x(void); \
-	x();
-
-	if(argc != 1){
-		T(test_pdfstring);
-		T(test_pdfname);
-		threadexitsall(nil);
-	}
-#endif
-
-	if(argc != 1)
+	if(argc < 1)
 		usage();
 	if((b = Bopen(argv[0], OREAD)) == nil)
 		sysfatal("%r");
 	if((pdf = pdfopen(b)) == nil)
 		sysfatal("%s: %r", argv[0]);
+	for(v = pdf->root, i = 1; i < argc; i++){
+		if(argv[i][0] == '['){
+			n = atoi(argv[i]+1);
+			v = arrayget(v, n);
+		}else if(argv[i][0] == '@' && argv[i][1] == 0 && v->type == Ostream){
+			if((s = Sopen(v)) == nil)
+				sysfatal("%r");
+			print("%.*s\n", s->buf.sz, s->buf.b);
+			Sclose(s);
+			break;
+		}else{
+			v = dictget(v, argv[i]);
+		}
+	}
+	print("%O\n", v);
+/*
 	if((v = dictget(pdf->info, "Creator")) != nil)
 		fprint(2, "creator: %s\n", v->str);
 	if((v = dictget(pdf->info, "Producer")) != nil)
 		fprint(2, "producer: %s\n", v->str);
+*/
 	pdfclose(pdf);
 
 	threadexitsall(nil);
--- a/misc.c
+++ b/misc.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 static char *otypes[] = {
@@ -15,17 +14,58 @@
 	[Oindir] = "indir",
 };
 
-static char *xtypes[] = {
-	[Xusual] = "usual",
-	[Xuncompressed] = "uncompressed",
-	[Xcompressed] = "compressed",
-};
-
 Object null = {
 	.type = Onull,
 };
 
 int
+Ofmt(Fmt *f)
+{
+	Object *o;
+	int i;
+
+	o = va_arg(f->args, Object*);
+	if(o == nil || o == &null)
+		return fmtprint(f, "null");
+	switch(o->type){
+	case Obool:
+		return fmtprint(f, o->bool ? "true" : "false");
+
+	case Onum:
+		return fmtprint(f, "%g", o->num);
+
+	case Ostr:
+		if(isutf8(o->str, o->len))
+			return fmtprint(f, "%q", o->str);
+		return fmtprint(f, "<%.*H>", o->len, o->str);
+
+	case Oname:
+		return fmtprint(f, "/%s", o->name);
+
+	case Oarray:
+		fmtprint(f, "[");
+		for(i = 0; i < o->array.ne; i++)
+			fmtprint(f, "%s%O", i > 0 ? ", " : "", o->array.e[i]);
+		return fmtprint(f, "]");
+
+	case Ostream: /* FIXME dump the stream? */
+	case Odict:
+		fmtprint(f, "<<");
+		for(i = 0; i < o->dict.nkv; i++)
+			fmtprint(f, "%s%s = %O", i > 0 ? ", " : "", o->dict.kv[i].key, o->dict.kv[i].value);
+		return fmtprint(f, ">>%s", o->type == Ostream ? "+stream" : "");
+
+	case Onull:
+		return fmtprint(f, "null");
+
+	case Oindir:
+		return fmtprint(f, "@%d[gen=%d]", o->indir.id, o->indir.gen);
+
+	}
+	return fmtprint(f, "???");
+}
+
+int
 Tfmt(Fmt *f)
 {
 	Object *o;
@@ -45,15 +85,10 @@
 
 	x = va_arg(f->args, Xref);
 
-	switch(x.type){
-	case Xusual:
-		return fmtprint(f, "<%s id=%d gen=%d off=%d>", xtypes[x.type], x.id, x.gen, x.off);
-	case Xuncompressed:
-		return fmtprint(f, "<%s gen=%d off=%d>", xtypes[x.type], x.gen, x.off);
-	case Xcompressed:
-		return fmtprint(f, "<%s id=%d objnum=%d>", xtypes[x.type], x.id, x.objnum);
-	}
-	return -1;
+	if(x.objstm > 0)
+		return fmtprint(f, "<compressed id=%d objstm=%d index=%d>", x.id, x.objstm, x.index);
+
+	return fmtprint(f, "<uncompressed id=%d off=%d gen=%d>", x.id, x.off, x.gen);
 }
 
 int
@@ -74,13 +109,15 @@
 }
 
 int
-Bgetint(Biobuf *b, int *i)
+isutf8(char *s, int len)
 {
-	double d;
+	int i, n;
+	Rune r;
 
-	if(Bgetd(b, &d) != 1 || isNaN(d))
-		return -1;
-	*i = d;
+	for(i = 0; i < len; i += n, s += n){
+		if((n = chartorune(&r, s)) < 1 || r == Runeerror)
+			break;
+	}
 
-	return 1;
+	return i >= len;
 }
--- a/mkfile
+++ b/mkfile
@@ -1,6 +1,5 @@
 </$objtype/mkfile
 
-CFLAGS=$CFLAGS -DTEST
 TARG=pdffs
 
 OFILES=\
--- a/name.c
+++ b/name.c
@@ -1,18 +1,17 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 /* 7.3.5 Name Objects */
 
 Object *
-pdfname(Biobuf *b)
+pdfname(Stream *stream)
 {
 	Object *o;
 	char *s, *r, hex[3];
 	int c, sz, maxsz;
 
-	Bgetc(b); /* skip '/' */
+	Sgetc(stream); /* skip '/' */
 
 	maxsz = 32;
 	if((s = malloc(maxsz)) == nil)
@@ -19,14 +18,14 @@
 		goto err;
 
 	for(sz = 0;;){
-		if((c = Bgetc(b)) < 0){
-			if(c == Beof)
+		if((c = Sgetc(stream)) < 0){
+			if(c == -1)
 				break;
 			goto err;
 		}
 
 		if(isws(c) || isdelim(c)){
-			Bungetc(b);
+			Sungetc(stream);
 			break;
 		}
 		if(c < '!' || c > '~'){
@@ -34,10 +33,10 @@
 			goto err;
 		}
 		if(c == '#'){
-			if((c = Bgetc(b)) < 0)
+			if((c = Sgetc(stream)) < 0)
 				goto err;
 			hex[0] = c;
-			if((c = Bgetc(b)) < 0)
+			if((c = Sgetc(stream)) < 0)
 				goto err;
 			hex[1] = c;
 			if(dec16((uchar*)hex, 1, hex, 2) != 1){
@@ -67,65 +66,3 @@
 	free(s);
 	return nil;
 }
-
-#ifdef TEST
-static struct {
-	char *in;
-	char *out;
-}t[] = {
-	{"/SimpleName", "SimpleName"},
-	{"/.$()", ".$"},
-	{"/#30", "0"},
-	{"/#3", nil},
-	{"/#G0", nil},
-	{"/#", nil},
-	{"/Space Between", "Space"},
-	{"/Two/Names", "Two"},
-	{"/\xff", nil,},
-};
-
-static char *s;
-static int off, n;
-
-static int
-rd(Biobufhdr *, void *data, long sz)
-{
-	if(sz > n-off)
-		sz = n-off;
-	memmove(data, s+off, sz);
-	off += sz;
-	return sz;
-}
-
-void
-test_pdfname(void)
-{
-	Object *o;
-	Biobuf b;
-	int i;
-
-	fprint(2, "pdfname\n");
-	for(i = 0; i < nelem(t); i++){
-		s = t[i].in;
-		n = strlen(s);
-		off = 0;
-		Binit(&b, -1, OREAD);
-		Biofn(&b, rd);
-
-		fprint(2, "\t%d: ", i);
-		o = pdfname(&b);
-		if(o == nil && t[i].out != nil)
-			fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
-		else if(o != nil && t[i].out == nil)
-			fprint(2, "ERROR: expected error, got %q\n", o->name);
-		else if(o == nil && t[i].out == nil)
-			fprint(2, "OK (%r)\n");
-		else if(strcmp(o->name, t[i].out) != 0)
-			fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->name);
-		else
-			fprint(2, "OK\n");
-		pdfobjfree(o);
-		Bterm(&b);
-	}
-}
-#endif
--- a/object.c
+++ b/object.c
@@ -1,106 +1,105 @@
 #include <u.h>
 #include <libc.h>
 #include <ctype.h>
-#include <bio.h>
 #include "pdf.h"
 
-Object *pdfstring(Biobuf *b);
-Object *pdfname(Biobuf *b);
-Object *pdfarray(Pdf *pdf, Biobuf *b);
-Object *pdfdict(Pdf *pdf, Biobuf *b);
+Object *pdfstring(Stream *s);
+Object *pdfname(Stream *s);
+Object *pdfarray(Pdf *pdf, Stream *s);
+Object *pdfdict(Pdf *pdf, Stream *s);
 
 /* General function to parse an object of any type. */
 Object *
-pdfobj(Pdf *pdf, Biobuf *b)
+pdfobj(Pdf *pdf, Stream *s)
 {
 	Object *o, *o2;
 	vlong off;
 	int c, tf;
 	Xref xref;
-	char s[16];
+	char b[16];
 
 	o = o2 = nil;
-	do; while(isws(c = Bgetc(b)));
+	do; while(isws(c = Sgetc(s)));
 	if(c < 0)
 		goto err;
 
 	switch(c){
 	case '<': /* dictionary or a string */
-		c = Bgetc(b);
+		c = Sgetc(s);
 		if(c == '<'){
-			Bseek(b, -2, 1);
-			if((o = pdfdict(pdf, b)) != nil){
+			Sseek(s, -2, 1);
+			if((o = pdfdict(pdf, s)) != nil){
 				/* check for attached stream */
-				off = Boffset(b);
-				do; while(isws(Bgetc(b)));
-				Bungetc(b);
-				if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
+				off = Soffset(s);
+				do; while(isws(Sgetc(s)));
+				Sungetc(s);
+				if(Sread(s, b, 7) == 7 && memcmp(b, "stream", 6) == 0 && isws(c = b[6])){
 					/* there IS a stream */
-					if(c == '\r' && (c = Bgetc(b)) < 0)
+					if(c == '\r' && (c = Sgetc(s)) < 0)
 						goto err;
 					if(c != '\n'){
 						werrstr("stream has no newline after dict");
 						goto err;
 					}
-					o->stream.off = Boffset(b);
+					o->stream.off = Soffset(s);
 					o->type = Ostream;
 					o->stream.len = dictint(o, "Length");
 					return o;
 				}
-				Bseek(b, off, 0);
+				Sseek(s, off, 0);
 				return o;
 			}
 		}
-		Bungetc(b);
+		Sungetc(s);
 		/* fall through */
 
 	case '(':
-		Bungetc(b);
-		if((o = pdfstring(b)) != nil)
+		Sungetc(s);
+		if((o = pdfstring(s)) != nil)
 			o->pdf = pdf;
 		return o;
 
 	case '/':
-		Bungetc(b);
-		if((o = pdfname(b)) != nil)
+		Sungetc(s);
+		if((o = pdfname(s)) != nil)
 			o->pdf = pdf;
 		return o;
 
 	case '[':
-		Bungetc(b);
-		if((o = pdfarray(pdf, b)) != nil)
+		Sungetc(s);
+		if((o = pdfarray(pdf, s)) != nil)
 			o->pdf = pdf;
 		return o;
 
 	case 'n':
-		off = Boffset(b);
-		if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
-			Bungetc(b);
+		off = Soffset(s);
+		if(Sgetc(s) == 'u' && Sgetc(s) == 'l' && Sgetc(s) == 'l' && (isws(c = Sgetc(s)) || isdelim(c))){
+			Sungetc(s);
 			return &null;
 		}
-		Bseek(b, off, 0);
+		Sseek(s, off, 0);
 		c = 'f';
 		goto unexpected;
 
 	case 't':
-		off = Boffset(b);
+		off = Soffset(s);
 		tf = 1;
-		if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+		if(Sgetc(s) == 'r' && Sgetc(s) == 'u' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
 			goto bool;
-		Bseek(b, off, 0);
+		Sseek(s, off, 0);
 		c = 't';
 		goto unexpected;
 
 	case 'f':
-		off = Boffset(b);
+		off = Soffset(s);
 		tf = 0;
-		if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+		if(Sgetc(s) == 'a' && Sgetc(s) == 'l' && Sgetc(s) == 's' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
 			goto bool;
-		Bseek(b, off, 0);
+		Sseek(s, off, 0);
 		c = 'f';
 		goto unexpected;
 bool:
-		Bungetc(b);
+		Sungetc(s);
 		if((o = calloc(1, sizeof(*o))) == nil)
 			goto err;
 		o->type = Obool;
@@ -111,20 +110,20 @@
 	default:
 		if(!isdigit(c)){
 unexpected:
-			Bungetc(b);
+			Sungetc(s);
 			werrstr("unexpected char '%c'", c);
 			goto err;
 		}
 		 /* it could be a number or an indirect object */
-		Bungetc(b);
+		Sungetc(s);
 		if((o = calloc(1, sizeof(*o))) == nil)
 			goto err;
 		o->pdf = pdf;
-		Bgetd(b, &o->num); /* get the first number */
-		off = Boffset(b); /* seek here if not an indirect object later */
+		Sgetd(s, &o->num); /* get the first number */
+		off = Soffset(s); /* seek here if not an indirect object later */
 
-		if((o2 = pdfobj(pdf, b)) != nil && o2->type == Onum){ /* second object is number too */
-			do; while(isws(c = Bgetc(b)));
+		if((o2 = pdfobj(pdf, s)) != nil && o2->type == Onum){ /* second object is number too */
+			do; while(isws(c = Sgetc(s)));
 			if(c < 0)
 				goto err;
 			if(c == 'R'){ /* indirect object */
@@ -134,13 +133,13 @@
 				pdfobjfree(o2);
 				return o;
 			}
-			if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
+			if(c == 'o' && Sgetc(s) == 'b' && Sgetc(s) == 'j'){ /* object */
 				xref.id = o->num;
 				xref.gen = o2->num;
 				/* FIXME put into a map */
 				pdfobjfree(o);
 				pdfobjfree(o2);
-				if((o = pdfobj(pdf, b)) != nil)
+				if((o = pdfobj(pdf, s)) != nil)
 					return o;
 				o2 = nil;
 			}
@@ -148,7 +147,7 @@
 
 		/* just a number, go back and return it */
 		o->type = Onum;
-		if(Bseek(b, off, 0) != off){
+		if(Sseek(s, off, 0) != off){
 			werrstr("seek failed");
 			goto err;
 		}
--- a/pdf.c
+++ b/pdf.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include <ctype.h>
 #include "pdf.h"
 
@@ -8,14 +7,16 @@
 trailerread(Pdf *pdf)
 {
 	Object *o;
+	int prev;
 
-	if((o = pdfobj(pdf, pdf->bio)) == nil)
+	if((o = pdfobj(pdf, pdf->s)) == nil)
 		goto err;
-
 	if(o->type != Odict){
 		werrstr("isn't a dictionary");
 		goto err;
 	}
+	if((prev = dictint(o, "Prev")) > 0 && (Sseek(pdf->s, prev, 0) < 0 || xrefreadold(pdf) != 0))
+		goto err;
 
 	pdf->root = pdfref(dictget(o, "Root"));
 	pdf->info = pdfref(dictget(o, "Info"));
@@ -28,24 +29,28 @@
 }
 
 Pdf *
-pdfopen(Biobuf *b)
+pdfopen(void *bio)
 {
 	Pdf *pdf;
 	Object *o;
+	Stream *stream;
 	char tmp[64], *s, *x;
 	int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
 	int i, n, off;
 
+	fmtinstall('H', encodefmt);
+	fmtinstall('O', Ofmt);
 	fmtinstall('T', Tfmt);
 	fmtinstall(L'⊗', ⊗fmt);
 
 	o = nil;
-	if((pdf = calloc(1, sizeof(*pdf))) == nil)
+	pdf = nil;
+	if((stream = Sbio(bio)) == nil || (pdf = calloc(1, sizeof(*pdf))) == nil)
 		goto err;
-	pdf->bio = b;
+	pdf->s = stream;
 
 	/* check header */
-	if(Bread(b, tmp, 8) != 8 ||
+	if(Sread(stream, tmp, 8) != 8 ||
 	   strncmp(tmp, "%PDF-", 5) != 0 || !isdigit(tmp[5]) || tmp[6] != '.' || !isdigit(tmp[7])){
 		werrstr("not a pdf");
 		goto err;
@@ -55,8 +60,8 @@
 
 	/* read a block of data */
 	n = sizeof(tmp)-1;
-	Bseek(b, -n, 2);
-	if(Bread(b, tmp, n) != n){
+	Sseek(stream, -n, 2);
+	if(Sread(stream, tmp, n) != n){
 badtrailer:
 		werrstr("invalid trailer");
 		goto err;
@@ -75,25 +80,27 @@
 		goto badtrailer;
 
 	/* read xref */
-	if(Bseek(b, xreftb, 0) != xreftb){
+	if(Sseek(stream, xreftb, 0) != xreftb){
 		werrstr("xref position out of range");
 		goto err;
 	}
 	for(;;){
-		off = Boffset(b);
-		if(Bread(b, tmp, sizeof(tmp)) < 8){
+		while(isspace(Sgetc(stream)));
+		Sungetc(stream);
+		off = Soffset(stream);
+		if(Sread(stream, tmp, sizeof(tmp)) < 8){
 badxref:
 			werrstr("invalid xref: %r");
 			goto err;
 		}
 		if(memcmp(tmp, "xref", 4) == 0){
-			if(Bseek(b, -sizeof(tmp)+5, 1) < 0 || xrefreadold(pdf) != 0)
+			if(Sseek(stream, -sizeof(tmp), 1) < 0 || xrefreadold(pdf) != 0)
 				goto err;
 			/* there could be more updates, try it */
 		}else if(memcmp(tmp, "trailer", 7) == 0){ /* 7.5.5 file trailer */
 			/* move to the trailer dictionary */
 			n = off + 8;
-			if(Bseek(b, n, 0) != n || trailerread(pdf) != 0){
+			if(Sseek(stream, n, 0) != n || trailerread(pdf) != 0){
 				werrstr("invalid trailer: %r");
 				goto err;
 			}
@@ -100,7 +107,7 @@
 			/* trailer is supposed to be the last thing */
 			break;
 		}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
-			if(Bseek(b, xreftb, 0) != xreftb)
+			if(Sseek(stream, xreftb, 0) != xreftb)
 				goto badxref;
 			if(xrefreadstream(pdf) != 0)
 				goto err;
@@ -110,13 +117,13 @@
 
 	/* root is required, info is optional */
 	if(pdf->root == &null){
-		werrstr("no root");
+		werrstr("no root: %r");
 		goto err;
 	}
 
 	return pdf;
 err:
-	werrstr("pdfopen: %r [at %p]", (void*)Boffset(b));
+	werrstr("pdfopen: %r [at %p]", (void*)Soffset(stream));
 	pdfclose(pdf);
 	pdfobjfree(o);
 	return nil;
@@ -127,8 +134,8 @@
 {
 	if(pdf == nil)
 		return;
-	if(pdf->bio != nil)
-		Bterm(pdf->bio);
+	if(pdf->s != nil)
+		Sclose(pdf->s);
 	free(pdf->xref);
 	free(pdf);
 }
--- a/pdf.h
+++ b/pdf.h
@@ -8,10 +8,6 @@
 	Ostream, /* 7.3.8 */
 	Onull,   /* 7.3.9 */
 	Oindir,  /* 7.3.10 */
-
-	Xusual = 0,
-	Xuncompressed,
-	Xcompressed,
 };
 
 typedef struct Buffer Buffer;
@@ -82,7 +78,7 @@
 };
 
 struct Pdf {
-	Biobuf *bio;
+	Stream *s;
 	Xref *xref;
 	int nxref; /* 7.5.4 xref subsection number of objects */
 
@@ -92,28 +88,34 @@
 
 struct Xref {
 	u32int id;
-	union{
-		u32int off;
-		u32int objnum;
+	union {
+		struct { /* uncompressed */
+			u32int off;
+			u16int gen;
+		};
+
+		struct { /* compressed, objstm > 0 */
+			u16int index; /* index within ObjStm */
+		};
 	};
-	u16int gen;
-	u16int type;
+	u16int objstm; /* > 0 means it's compressed and points to the ObjStm */
 };
 
 struct Stream {
 	Buffer buf;
-	Biobuf *bio;
+	void *bio;
+	int linelen;
 };
 
 extern Object null;
 
-Pdf *pdfopen(Biobuf *b);
+Pdf *pdfopen(void *bio);
 void pdfclose(Pdf *pdf);
 
 /*
  * Parse an object.
  */
-Object *pdfobj(Pdf *pdf, Biobuf *bio);
+Object *pdfobj(Pdf *pdf, Stream *s);
 
 /*
  * Deallocate the object and all its children. Refcount is
@@ -146,6 +148,8 @@
  */
 int isdelim(int c);
 
+int isutf8(char *s, int len);
+
 int arraylen(Object *o);
 Object *arrayget(Object *o, int i);
 int arrayint(Object *o, int i);
@@ -156,9 +160,19 @@
 Object *dictdict(Object *o, char *name);
 int dictints(Object *o, char *name, int *el, int nel);
 
-Stream *streamopen(Object *o);
-int streamsize(Stream *s);
-void streamclose(Stream *s);
+Stream *Sbio(void *bio);
+Stream *Sopen(Object *o);
+int Sread(Stream *s, void *b, int sz);
+int Sgetc(Stream *s);
+int Sungetc(Stream *s);
+int Ssize(Stream *s);
+int Soffset(Stream *s);
+int Sseek(Stream *s, int off, int whence);
+void Sclose(Stream *s);
+int Sgetd(Stream *s, double *d);
+int Sgeti(Stream *s, int *i);
+char *Srdstr(Stream *s, int delim, int zero);
+int Slinelen(Stream *s);
 
 Filter *filteropen(char *name, Object *o);
 int filterrun(Filter *f, Buffer *bi, Buffer *bo);
@@ -169,16 +183,17 @@
 int bufeof(Buffer *b);
 int bufleft(Buffer *b);
 uchar *bufdata(Buffer *b, int *sz);
-int bufreadn(Buffer *b, Biobuf *bio, int sz);
+int bufreadn(Buffer *b, Stream *s, int sz);
 int bufput(Buffer *b, uchar *d, int sz);
 int bufget(Buffer *b, uchar *d, int sz);
 void bufdump(Buffer *b);
 
+#pragma varargck type "O" Object*
 #pragma varargck type "T" Object*
 #pragma varargck type "⊗" Xref
+int Ofmt(Fmt *f);
 int Tfmt(Fmt *f);
 int ⊗fmt(Fmt *f);
-int Bgetint(Biobuf *b, int *i);
 
 int xrefreadold(Pdf *pdf);
 int xrefreadstream(Pdf *pdf);
--- a/stream.c
+++ b/stream.c
@@ -3,18 +3,20 @@
 #include <bio.h>
 #include "pdf.h"
 
-static int
-bufiof(Biobufhdr *b, void *data, long n)
+Stream *
+Sbio(void *bio)
 {
 	Stream *s;
 
-	s = (Stream*)((char*)b - sizeof(*s));
+	if((s = calloc(1, sizeof(*s))) == nil)
+		return nil;
+	s->bio = bio;
 
-	return bufget(&s->buf, data, n);
+	return s;
 }
 
 Stream *
-streamopen(Object *o)
+Sopen(Object *o)
 {
 	Stream *s;
 	Buffer b, x;
@@ -23,13 +25,15 @@
 	int i, nflts;
 
 	s = nil;
-	if(pdfeval(&o)->type != Ostream) /* FIXME open a string object as a stream as well? */
+	if(pdfeval(&o)->type != Ostream){ /* FIXME open a string object as a stream as well? */
+		werrstr("not a stream");
 		return nil;
+	}
 
 	bufinit(&b, nil, 0);
-	if(Bseek(o->pdf->bio, o->stream.off, 0) != o->stream.off)
+	if(Sseek(o->pdf->s, o->stream.off, 0) != o->stream.off)
 		return nil;
-	if(bufreadn(&b, o->pdf->bio, o->stream.len) < 0)
+	if(bufreadn(&b, o->pdf->s, o->stream.len) < 0)
 		goto err;
 
 	/* see if there are any filters */
@@ -64,14 +68,11 @@
 		}
 	}
 
-	if((s = calloc(1, sizeof(*s)+sizeof(Biobuf))) == nil){
+	if((s = calloc(1, sizeof(*s))) == nil){
 		buffree(&b);
 		return nil;
 	}
-	s->bio = (Biobuf*)(s+1);
 	s->buf = b;
-	Binit(s->bio, Bfildes(o->pdf->bio), OREAD);
-	Biofn(s->bio, bufiof);
 
 	return s;
 err:
@@ -82,18 +83,164 @@
 }
 
 int
-streamsize(Stream *s)
+Sread(Stream *s, void *b, int sz)
 {
+	return s->bio != nil ? Bread(s->bio, b, sz) : bufget(&s->buf, b, sz);
+}
+
+int
+Sgetc(Stream *s)
+{
+	int n;
+	uchar c;
+
+	if(s->bio != nil)
+		return Bgetc(s->bio);
+	if((n = bufget(&s->buf, &c, 1)) < 0)
+		return -2;
+
+	return n == 0 ? -1 : (int)c;
+}
+
+int
+Sungetc(Stream *s)
+{
+	return s->bio != nil ? Bungetc(s->bio) : Sseek(s, -1, 1);
+}
+
+int
+Soffset(Stream *s)
+{
+	return s->bio != nil ? Boffset(s->bio) : s->buf.off;
+}
+
+int
+Ssize(Stream *s)
+{
+	assert(s->bio == nil);
 	return bufleft(&s->buf);
 }
 
+struct sgetd
+{
+	Stream *s;
+	int eof;
+};
+
+static int
+Sgetdf(void *vp)
+{
+	int c;
+	struct sgetd *sg = vp;
+
+	c = Sgetc(sg->s);
+	if(c < 0)
+		sg->eof = 1;
+	return c;
+}
+
+int
+Sgetd(Stream *s, double *dp)
+{
+	double d;
+	struct sgetd b;
+
+	b.s = s;
+	b.eof = 0;
+	d = charstod(Sgetdf, &b);
+	if(b.eof)
+		return -1;
+	Sungetc(s);
+	*dp = d;
+
+	return 1;
+}
+
+int
+Sgeti(Stream *s, int *i)
+{
+	double d;
+	int res, c;
+
+	while((c = isws(Sgetc(s))));
+	if(c < 0)
+		return c;
+	Sungetc(s);
+	res = Sgetd(s, &d);
+	*i = d;
+
+	return res;
+}
+
+int
+Sseek(Stream *s, int off, int whence)
+{
+	if(s->bio != nil)
+		return Bseek(s->bio, off, whence);
+
+	if(whence == 1)
+		off += s->buf.off;
+	else if(whence == 2)
+		off += s->buf.sz;
+	if(off < 0){
+		werrstr("seek: %d < 0", off);
+		off = 0;
+	}else if(off > s->buf.sz){
+		werrstr("seek: %d > %d", off, s->buf.sz);
+		off = s->buf.sz;
+	}
+
+	s->buf.off = off;
+
+	return off;
+}
+
+char *
+Srdstr(Stream *s, int delim, int zero)
+{
+	int i, len;
+	char *line;
+
+	if(s->bio != nil){
+		line = Brdstr(s->bio, delim, zero);
+		s->linelen = Blinelen(s->bio);
+		return line;
+	}
+
+	for(i = s->buf.off; i < s->buf.sz;){
+		i++;
+		if(s->buf.b[i-1] == delim)
+			break;
+	}
+	if(i >= s->buf.sz)
+		return nil;
+	len = i - s->buf.off;
+	if((line = malloc(len+1)) == nil)
+		return nil;
+	memmove(line, s->buf.b+s->buf.off, len);
+	s->buf.off += len;
+	if(line[len-1] == delim && zero)
+		len--;
+	line[len] = 0;
+	s->linelen = len;
+
+	return line;
+}
+
+int
+Slinelen(Stream *s)
+{
+	return s->linelen;
+}
+
 void
-streamclose(Stream *s)
+Sclose(Stream *s)
 {
 	if(s == nil)
 		return;
 
 	buffree(&s->buf);
-	Bterm(s->bio);
+	if(s->bio != nil)
+		Bterm(s->bio);
 	free(s);
 }
--- a/string.c
+++ b/string.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include "pdf.h"
 
 /* 7.3.4 String Objects */
@@ -18,15 +17,15 @@
 };
 
 static Object *
-stringhex(Biobuf *b)
+stringhex(Stream *stream)
 {
 	char *s;
 	Object *o;
 	int len, n;
 
-	if((s = Brdstr(b, '>', 0)) == nil)
+	if((s = Srdstr(stream, '>', 0)) == nil)
 		return nil;
-	len = Blinelen(b) - 1;
+	len = Slinelen(stream) - 1;
 	if(s[len] != '>'){
 		werrstr("no '>'");
 		free(s);
@@ -50,7 +49,7 @@
 }
 
 Object *
-pdfstring(Biobuf *b)
+pdfstring(Stream *stream)
 {
 	Object *o;
 	char *s, *r;
@@ -62,14 +61,14 @@
 		return nil;
 
 	for(paren = sz = 0;;){
-		if((c = Bgetc(b)) < 0)
+		if((c = Sgetc(stream)) < 0)
 			break;
 
 		switch(c){
 		case '<':
 			if(sz == 0){
-				Bungetc(b);
-				return stringhex(b);
+				Sungetc(stream);
+				return stringhex(stream);
 			}
 			break;
 
@@ -86,16 +85,16 @@
 			continue;
 
 		case '\\':
-			if((c = Bgetc(b)) <= 0)
+			if((c = Sgetc(stream)) <= 0)
 				break;
 			if(c >= '0' && c <= '7'){ /* octal */
 				oct[0] = c;
-				for(i = 1; i < 3 && (c = Bgetc(b)) >= '0' && c <= '7'; i++)
+				for(i = 1; i < 3 && (c = Sgetc(stream)) >= '0' && c <= '7'; i++)
 					oct[i] = c;
 				if(c <= 0)
 					break;
 				if(c < '0' || c > '7')
-					Bungetc(b);
+					Sungetc(stream);
 				oct[i] = 0;
 				c = strtol(oct, nil, 8);
 			}else if(c >= nelem(esc) || (c = esc[c]) == 0){
@@ -147,77 +146,3 @@
 	werrstr("string: %r");
 	return nil;
 }
-
-#ifdef TEST
-static struct {
-	char *in;
-	char *out;
-}t[] = {
-	{"", nil},
-	{"(test, success)", "test, success"},
-	{"(simple string)", "simple string"},
-	{"(non-closed paren", nil},
-	{"wrong first char", nil},
-	{"(parens((()((())))()))", "parens"},
-	{"(\\0053)", "\x053"},
-	{"(\\053)", "+"},
-	{"(\\53)", "+"},
-	{"()", ""},
-	{")", nil},
-	{"(\\)\\()", ")("},
-	{"(\\\\)", "\\"},
-	{"a", nil},
-	{"(1\\\n2)", "12"},
-	{"<323130>", "210"},
-	{"<32313>", "210"},
-	{"<>", ""},
-	{"<", nil},
-	{"<zz>", nil},
-	{">", nil},
-};
-
-static char *s;
-static int off, n;
-
-static int
-rd(Biobufhdr *, void *data, long sz)
-{
-	if(sz > n-off)
-		sz = n-off;
-	memmove(data, s+off, sz);
-	off += sz;
-	return sz;
-}
-
-void
-test_pdfstring(void)
-{
-	Object *o;
-	Biobuf b;
-	int i;
-
-	fprint(2, "pdfstring\n");
-	for(i = 0; i < nelem(t); i++){
-		s = t[i].in;
-		n = strlen(s);
-		off = 0;
-		Binit(&b, -1, OREAD);
-		Biofn(&b, rd);
-
-		fprint(2, "\t%d: ", i);
-		o = pdfstring(&b);
-		if(o == nil && t[i].out != nil)
-			fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
-		else if(o != nil && t[i].out == nil)
-			fprint(2, "ERROR: expected error, got %q\n", o->str);
-		else if(o == nil && t[i].out == nil)
-			fprint(2, "OK (%r)\n");
-		else if(strcmp(o->str, t[i].out) != 0)
-			fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->str);
-		else
-			fprint(2, "OK\n");
-		pdfobjfree(o);
-		Bterm(&b);
-	}
-}
-#endif
--- a/xref.c
+++ b/xref.c
@@ -1,6 +1,5 @@
 #include <u.h>
 #include <libc.h>
-#include <bio.h>
 #include <ctype.h>
 #include "pdf.h"
 
@@ -18,18 +17,21 @@
 	char *s, *e;
 	Xref *x;
 
-	if(Bgetint(pdf->bio, &xref0) != 1 || xref0 < 0){
+	Sseek(pdf->s, 4, 1);	
+	if(Sgeti(pdf->s, &xref0) != 1 || xref0 < 0){
 		werrstr("invalid xref0");
 		return -1;
 	}
-	if(Bgetint(pdf->bio, &nxref) != 1 || nxref < 0){
+	if(Sgeti(pdf->s, &nxref) != 1 || nxref < 0){
 		werrstr("invalid nxref");
 		return -1;
 	}
+	if(nxref < 1)
+		return 0;
 
 	/* skip whitespace and move to the first subsection */
-	while(isspace(Bgetc(pdf->bio)));
-	Bungetc(pdf->bio);
+	do; while(isspace(Sgetc(pdf->s)));
+	Sungetc(pdf->s);
 
 	s = nil;
 	if((x = realloc(pdf->xref, (pdf->nxref + nxref)*sizeof(Xref))) == nil)
@@ -41,12 +43,13 @@
 	if((s = malloc(sz)) == nil)
 		goto err;
 	for(i = 0; i < sz; i += n){
-		if((n = Bread(pdf->bio, s+i, sz-i)) < 1)
+		if((n = Sread(pdf->s, s+i, sz-i)) < 1)
 			goto err;
 	}
 
 	/* store non-free objects only */
 	newnxref = pdf->nxref;
+	xref.objstm = 0;
 	for(e = s, i = 0; i < nxref; i++, e += 20){
 		if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){
 			werrstr("invalid xref line (%d/%d)", i, nxref);
@@ -54,8 +57,7 @@
 		}
 		xref.id = xref0 + i;
 		xref.off = strtoul(e, nil, 10);
-		/* FIXME xref.gen */
-		xref.type = Xusual;
+		xref.gen = strtoul(e+11, nil, 10);
 
 		/* search in already existing xrefs, update if found */
 		for(j = 0; j < pdf->nxref; j++){
@@ -107,19 +109,24 @@
 int
 xrefreadstream(Pdf *pdf)
 {
-	Object *o;
+	Object *o, *p, *index;
 	Stream *s;
 	Xref *x;
 	uchar buf[32];
-	int w[8], nw, i, c, n, nxref, newnxref, prev, extra;
+	int w[8], nw, c, n, nxref, newnxref, prev, extra;
+	int i, ni, nsubsec, subsec;
 
 	s = nil;
-	if((o = pdfobj(pdf, pdf->bio)) == nil){
+	if((o = pdfobj(pdf, pdf->s)) == nil){
 		werrstr("xref stream obj: %r");
 		goto err;
 	}
-	if((prev = dictint(o, "Prev")) > 0){
-		if(Bseek(pdf->bio, prev, 0) != prev){
+
+	index = dictget(o, "Index"); /* 7.5.8.2 subsection indexing */
+	nsubsec = arraylen(index) / 2;
+
+	if((prev = dictint(o, "Prev")) > 0){ /* 7.5.8.2 previous xref stream */
+		if(Sseek(pdf->s, prev, 0) != prev){
 			werrstr("xref stream prev seek failed");
 			goto err;
 		}
@@ -128,7 +135,7 @@
 			return -1;
 		}
 	}
-	if((s = streamopen(o)) == nil){
+	if((s = Sopen(o)) == nil){
 		werrstr("failed to stream xref: %r");
 		goto err;
 	}
@@ -143,11 +150,11 @@
 		werrstr("W is beyond imaginable: %d bytes", n);
 		goto err;
 	}
-	if((nxref = streamsize(s)/n) < 1){
+	if((nxref = Ssize(s)/n) < 1){
 		werrstr("no xref elements in the stream");
 		goto err;
 	}
-	extra = streamsize(s) % (nxref*n);
+	extra = Ssize(s) % (nxref*n);
 	if(extra != 0)
 		fprint(2, "extra %d bytes in xref stream", extra);
 
@@ -156,34 +163,41 @@
 		goto err;
 	pdf->xref = x;
 	x += pdf->nxref;
-	while(Bread(s->bio, buf, n) == n){ /* stop on short read or error */
+	i = 0;
+	for(ni = subsec = 0; Sread(s, buf, n) == n; ni--, i++){ /* stop on short read or error */
+		if(ni == 0 && nsubsec > 0){
+			i = arrayint(index, subsec*2+0); /* index of the first object */
+			ni = arrayint(index, subsec*2+1); /* number of objects in the subsection */
+			subsec++;
+		}
+
 		c = getint(buf, w[0], 1); /* default type is 1 */
-		if(c == 1){ /* not compressed */
+		if(c == 1){ /* uncompressed */
+			x->objstm = 0;
+			x->id = i;
 			x->off = getint(buf+w[0], w[1], 0);
 			x->gen = getint(buf+w[0]+w[1], w[2], 0);
-			x->type = Xuncompressed;
 			pdf->nxref++;
-			fprint(2, "xref %⊗\n", *x);
 			x++;
 		}else if(c == 2){ /* compressed */
-			x->objnum = getint(buf+w[0], w[1], 0);
-			x->id = getint(buf+w[0]+w[1], w[2], 0);
-			x->type = Xcompressed;
+			x->id = i;
+			x->objstm = getint(buf+w[0], w[1], 0);
+			x->index = getint(buf+w[0]+w[1], w[2], 0);
 			pdf->nxref++;
-			fprint(2, "xref %⊗\n", *x);
 			x++;
 		}
 	}
 
-	streamclose(s);
-	pdf->root = pdfref(dictget(o, "Root"));
-	pdf->info = pdfref(dictget(o, "Info"));
+	Sclose(s);
+	if((p = dictget(o, "Root")) != &null)
+		pdf->root = pdfref(p);
+	if((p = dictget(o, "Info")) != &null)
+		pdf->info = pdfref(p);
 	pdfobjfree(o);
 
 	return 0;
 err:
-	streamclose(s);
+	Sclose(s);
 	pdfobjfree(o);
 	return -1;
 }
-