ref: a080ae88c6c65503575da220cb131ddada107bf6
parent: 34238e0feb181a0c120561486d4c86b054d112bc
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Fri Aug 28 22:45:56 EDT 2020
rewrite the API, support more object types and actual evaluation
--- a/array.c
+++ b/array.c
@@ -1,41 +1,35 @@
#include <u.h>
#include <libc.h>
-#include <ctype.h>
+#include <bio.h>
#include "pdf.h"
+/* 7.3.6 Array Objects */
+
Object *
-pdfarray(char *p, char **e, int len)
+pdfarray(Biobuf *b)
{
Object *o, *m;
Object **a;
+ int c, noel;
- o = nil;
- if(len < 3 || p[0] != '['){
- werrstr("invalid array");
- goto err;
- }
- p++;
- len--;
-
o = calloc(1, sizeof(*o));
o->type = Oarray;
- for(; len >= 1;){
- for(; len > 0 && isws(*p); p++, len--);
- if(len < 1){
- werrstr("too short");
+ Bgetc(b); /* throw away '[' */
+
+ for(noel = 0;;){
+ if((c = Bgetc(b)) < 0 || c == ']')
+ break;
+ if(noel){
+ werrstr("no ']'");
goto err;
}
- if(p[0] == ']'){
- p++;
- break;
+ Bungetc(b);
+ if((m = pdfobject(b)) == nil){
+ noel = 1;
+ continue;
}
- if((m = pdfobject(p, e, len)) == nil)
- goto err;
- len -= *e - p;
- p = *e;
-
if((a = realloc(o->array.e, (o->array.ne+1)*sizeof(Object*))) == nil){
freeobject(m);
goto err;
@@ -45,11 +39,14 @@
a[o->array.ne++] = m;
}
- *e = p;
+ if(c != ']'){
+ werrstr("no ']'");
+ goto err;
+ }
return o;
err:
+ werrstr("array: %r");
freeobject(o);
-
return nil;
}
--- a/dict.c
+++ b/dict.c
@@ -1,70 +1,77 @@
#include <u.h>
#include <libc.h>
-#include <ctype.h>
+#include <bio.h>
#include "pdf.h"
+/* 7.3.7 Dictionary Objects */
+
Object *
-pdfdict(char *p, char **e, int len)
+pdfdict(Biobuf *b)
{
Object *o, *k, *v;
KeyValue *kv;
+ int c, nokey;
- o = nil;
- if(len < 4 || p[0] != '<' || p[1] != '<'){
- werrstr("invalid dict");
- goto err;
- }
- p += 2;
- len -= 2;
+ /* skip '<<' */
+ Bseek(b, 2, 1);
+ k = v = nil;
o = calloc(1, sizeof(*o));
o->type = Odict;
- for(; len >= 2;){
- for(; len > 2 && isws(*p); p++, len--);
- if(len < 1){
- werrstr("too short");
+ for(nokey = 0;;){
+ if((c = Bgetc(b)) < 0)
goto err;
+ if(c == '>'){
+ if(Bgetc(b) == '>')
+ break;
+ werrstr("no '>>'");
+ goto err;
}
-
- if(p[0] == '>' && p[1] == '>'){
- p += 2;
- break;
+ if(nokey){
+ werrstr("no '>>'");
+ goto err;
}
- if((k = pdfobject(p, e, len)) == nil)
+ Bungetc(b);
+ if((k = pdfobject(b)) == nil){
+ nokey = 1;
+ continue;
+ }
+ if((v = pdfobject(b)) == nil)
goto err;
if(k->type != Oname){
- freeobject(k);
werrstr("expected name as a key");
goto err;
}
- len -= *e - p;
- p = *e;
- if((v = pdfobject(p, e, len)) == nil){
- freeobject(k);
+ if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
goto err;
- }
- len -= *e - p;
- p = *e;
- if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil){
- freeobject(k);
- freeobject(v);
- goto err;
- }
-
o->dict.kv = kv;
kv[o->dict.nkv].key = strdup(k->name);
freeobject(k);
kv[o->dict.nkv++].value = v;
+ k = v = nil;
}
- *e = p;
-
return o;
err:
freeobject(o);
+ freeobject(k);
+ freeobject(v);
+ werrstr("dict: %r");
return nil;
+}
+
+Object *
+pdfdictget(Object *o, char *name)
+{
+ int i;
+
+ if(o == nil || o->type != Odict || name == nil)
+ return nil;
+ for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
+
+ return i < o->dict.nkv ? o->dict.kv[i].value : nil;
}
--- a/eval.c
+++ b/eval.c
@@ -1,23 +1,34 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include "pdf.h"
int
pdfeval(Pdf *pdf, Object *o)
{
+ Object *d;
Xref *x;
int i;
- if(o->type != Oindir)
+ if(o == nil || o->type != Oindir)
return 0;
- for(i = 0; i < pdf->nxref; i++){
- x = &pdf->xref[i];
- if(x->id == o->indir.id)
- return 0;
+ for(i = 0; i < pdf->nxref && pdf->xref[i].id != o->indir.id; i++);
+ if(i >= pdf->nxref){
+ werrstr("no object id %d in xref", o->indir.id);
+ return -1;
}
+ x = &pdf->xref[i];
- werrstr("no object id %d in xref", o->indir.id);
+ if(Bseek(pdf->bio, x->off, 0) != x->off){
+ werrstr("xref seek failed");
+ return -1;
+ }
+ if((d = pdfobject(pdf->bio)) == nil)
+ return -1;
+ *o = *d;
+ memset(d, 0, sizeof(*d));
+ freeobject(d);
- return -1;
+ return 0;
}
--- a/filter.c
+++ b/filter.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include "pdf.h"
/*
--- a/main.c
+++ b/main.c
@@ -1,8 +1,11 @@
#include <u.h>
#include <libc.h>
#include <thread.h>
+#include <bio.h>
#include "pdf.h"
+int mainstacksize = 32768;
+
static void
usage(void)
{
@@ -15,6 +18,7 @@
{
int fd;
Pdf *pdf;
+ Object *v;
quotefmtinstall();
@@ -40,7 +44,11 @@
if((fd = open(argv[0], OREAD)) < 0)
sysfatal("%r");
if((pdf = pdfopen(fd)) == nil)
- sysfatal("pdfopen: %r");
+ sysfatal("%r");
+ if((v = pdfdictget(pdf->info, "Creator")) != nil)
+ fprint(2, "creator: %s\n", v->str);
+ if((v = pdfdictget(pdf->info, "Producer")) != nil)
+ fprint(2, "producer: %s\n", v->str);
pdfclose(pdf);
threadexitsall(nil);
--- a/misc.c
+++ b/misc.c
@@ -1,10 +1,11 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include "pdf.h"
/* 7.2.2 whitespace */
int
-isws(char c)
+isws(int c)
{
return /* \0 is missing on purpose */
c == '\t' || c == '\n' || c == '\f' || c == '\r' ||
@@ -13,7 +14,7 @@
/* 7.2.2 delimeters */
int
-isdelim(char c)
+isdelim(int c)
{
return
c == '(' || c == ')' || c == '<' || c == '>' ||
--- a/name.c
+++ b/name.c
@@ -1,104 +1,132 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include "pdf.h"
-int
-pdfname(char *p, char **e, int len)
+/* 7.3.5 Name Objects */
+
+Object *
+pdfname(Biobuf *b)
{
- int i, o;
+ Object *o;
+ char *s, *r, hex[3];
+ int c, sz, maxsz;
- if(len < 2){
- werrstr("too short");
- goto err;
- }
+ Bgetc(b); /* skip '/' */
- if(p[0] != '/'){
- werrstr("invalid first char");
+ maxsz = 32;
+ if((s = malloc(maxsz)) == nil)
goto err;
- }
- for(i = 1, o = 0; i < len; i++){
- if(p[i] < '!' || p[i] > '~'){
- if(isws(p[i]) || isdelim(p[i]))
+ for(sz = 0;;){
+ if((c = Bgetc(b)) < 0){
+ if(c == Beof)
break;
- werrstr("invalid char %02x", p[i]);
goto err;
}
- if(p[i] == '#'){
- i++;
- if(i+2 > len){
- werrstr("hex too short");
+
+ if(isws(c) || isdelim(c)){
+ Bungetc(b);
+ break;
+ }
+ if(c < '!' || c > '~'){
+ werrstr("invalid char %02x", c);
+ goto err;
+ }
+ if(c == '#'){
+ if((c = Bgetc(b)) < 0)
goto err;
- }
- if(dec16((uchar*)p+o, 1, p+i, 2) != 1){
+ hex[0] = c;
+ if((c = Bgetc(b)) < 0)
+ goto err;
+ hex[1] = c;
+ if(dec16((uchar*)hex, 1, hex, 2) != 1){
werrstr("invalid hex");
goto err;
}
- o++;
- i++;
- }else{
- p[o++] = p[i];
+ c = hex[0];
}
+ if(sz+1 >= maxsz){
+ maxsz *= 2;
+ if((r = realloc(s, maxsz)) == nil)
+ goto err;
+ s = r;
+ }
+ s[sz++] = c;
}
- p[o] = 0;
- *e = p + i;
+ if((o = malloc(sizeof(*o) + sz + 1)) != nil){
+ memmove(o->name, s, sz);
+ o->name[sz] = 0;
+ o->type = Oname;
+ free(s);
+ return o;
+ }
- return o;
err:
werrstr("name: %r");
- return -1;
+ free(s);
+ return nil;
}
#ifdef TEST
static struct {
- char *i;
- int len;
- char *o;
- int r;
- int e;
+ char *in;
+ char *out;
}t[] = {
- {"/SimpleName", 11, "SimpleName", 10},
- {"WrongName", 9, nil, -1},
- {"/.$()", 5, ".$()", 4},
- {"/#30", 4, "0", 1},
- {"/#3", 3, nil, -1},
- {"/#G0", 4, nil, -1},
- {"/#", 2, nil, -1},
- {"/Wrong Char", 11, nil, -1},
- {"/\xff", 2, nil, -1},
+ {"/SimpleName", "SimpleName"},
+ {"/.$()", ".$"},
+ {"/#30", "0"},
+ {"/#3", nil},
+ {"/#G0", nil},
+ {"/#", nil},
+ {"/Space Between", "Space"},
+ {"/Two/Names", "Two"},
+ {"/\xff", nil,},
};
+static char *s;
+static int off, n;
+
+static int
+rd(Biobufhdr *, void *data, long sz)
+{
+ if(sz > n-off)
+ sz = n-off;
+ memmove(data, s+off, sz);
+ off += sz;
+ return sz;
+}
+
void
test_pdfname(void)
{
- char *e;
- int i, r;
+ Object *o;
+ Biobuf b;
+ int i;
fprint(2, "pdfname\n");
for(i = 0; i < nelem(t); i++){
+ s = t[i].in;
+ n = strlen(s);
+ off = 0;
+ Binit(&b, -1, OREAD);
+ Biofn(&b, rd);
+
fprint(2, "\t%d: ", i);
- r = pdfname(t[i].i, &e, t[i].len);
- if(r != t[i].r){
- fprint(2, "expected r=%d, got %d", t[i].r, r);
- if(r < 0)
- fprint(2, " (%r)\n");
- else
- fprint(2, "\n");
- continue;
- }else if(r >= 0){
- if(t[i].i+t[i].len != e){
- fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
- continue;
- }else if(strcmp(t[i].o, t[i].i) != 0){
- fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
- continue;
- }
- }
- fprint(2, "OK");
- if(r < 0)
- fprint(2, " (%r)");
- fprint(2, "\n");
+ o = pdfname(&b);
+ if(o == nil && t[i].out != nil)
+ fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
+ else if(o != nil && t[i].out == nil)
+ fprint(2, "ERROR: expected error, got %q\n", o->name);
+ else if(o == nil && t[i].out == nil)
+ fprint(2, "OK (%r)\n");
+ else if(strcmp(o->name, t[i].out) != 0)
+ fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->name);
+ else
+ fprint(2, "OK\n");
+ freeobject(o);
+ Bterm(&b);
}
}
#endif
--- a/object.c
+++ b/object.c
@@ -1,98 +1,133 @@
#include <u.h>
#include <libc.h>
#include <ctype.h>
+#include <bio.h>
#include "pdf.h"
+Object *pdfstring(Biobuf *b);
+Object *pdfname(Biobuf *b);
+Object *pdfarray(Biobuf *b);
+Object *pdfdict(Biobuf *b);
+
+static Object null = {
+ .type = Onull,
+};
+
+/* General function to parse an object of any type. */
Object *
-pdfobject(char *p, char **e, int len)
+pdfobject(void *b)
{
- Object *o;
- char *s, *p0;
- int sz, gen;
+ Object *o, *o2;
+ vlong off;
+ int c, tf;
- o = nil;
- for(; len > 0 && isws(*p); p++, len--);
- if(len < 1){
- werrstr("too short");
+ o = o2 = nil;
+ do; while(isws(c = Bgetc(b)));
+ if(c < 0)
goto err;
- }
- if(*p < 1){
- werrstr("unexpected non-ascii char");
- goto err;
- }
-
- switch(*p){
+ switch(c){
case '<': /* dictionary or a string */
- if(len < 2){
- werrstr("too short");
- goto err;
+ c = Bgetc(b);
+ if(c == '<'){
+ Bseek(b, -2, 1);
+ return pdfdict(b);
}
- if(p[1] == '<'){ /* dictionary */
- o = pdfdict(p, e, len);
- break;
- }
+ Bungetc(b);
/* fall through */
- case '(': /* string */
- if((sz = pdfstring(p, e, len)) < 0)
- goto err;
- if((o = malloc(sizeof(*o)+sz+1)) != nil){
- o->type = Ostr;
- o->str = (char*)(o+1);
- strcpy(o->str, p);
- }
- break;
+ case '(':
+ Bungetc(b);
+ return pdfstring(b);
case '/':
- if((sz = pdfname(p, e, len)) < 0)
- goto err;
- if((o = malloc(sizeof(*o)+sz+1)) != nil){
- o->type = Oname;
- o->str = (char*)(o+1);
- strcpy(o->str, p);
- }
- break;
+ Bungetc(b);
+ return pdfname(b);
case '[':
- o = pdfarray(p, e, len);
- break;
+ Bungetc(b);
+ return pdfarray(b);
+ case 'n':
+ off = Boffset(b);
+ if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
+ Bungetc(b);
+ return &null;
+ }
+ Bseek(b, off, 0);
+ c = 'f';
+ goto unexpected;
+
+ case 't':
+ off = Boffset(b);
+ tf = 1;
+ if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+ goto bool;
+ Bseek(b, off, 0);
+ c = 't';
+ goto unexpected;
+
+ case 'f':
+ off = Boffset(b);
+ tf = 0;
+ if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+ goto bool;
+ Bseek(b, off, 0);
+ c = 'f';
+ goto unexpected;
+bool:
+ Bungetc(b);
+ if((o = malloc(sizeof(*o))) == nil)
+ goto err;
+ o->type = Obool;
+ o->bool = tf;
+ return o;
+
default:
- if(isdigit(*p)){
- if((o = malloc(sizeof(*o))) != nil){
- o->type = Onum;
- o->num = strtod(p, e);
- sz = len - (*e - p);
- p0 = *e;
- s = p0;
- for(; sz > 0 && isws(*s); s++, sz--);
- if(sz > 0 && isdigit(*s)){
- gen = strtod(s, &p);
- sz -= (p - s);
- s = p;
- for(; sz > 0 && isws(*s); s++, sz--);
- if(sz > 0 && *s == 'R'){ /* indirect object */
- s++;
- o->type = Oindir;
- o->indir.id = o->num;
- o->indir.gen = gen;
- p0 = s;
- }
- }
- *e = p0;
+ if((o = malloc(sizeof(*o))) == nil)
+ goto err;
+ if(!isdigit(c)){
+unexpected:
+ Bungetc(b);
+ werrstr("unexpected char '%c'", c);
+ goto err;
+ }
+ /* it could be a number or an indirect object */
+ Bungetc(b);
+ Bgetd(b, &o->num); /* get the first number */
+ off = Boffset(b); /* seek here if not an indirect object later */
+
+ if((o2 = pdfobject(b)) != nil && o2->type == Onum){ /* second object is number too */
+ do; while(isws(c = Bgetc(b)));
+ if(c < 0)
+ goto err;
+ if(c == 'R'){ /* indirect object */
+ o->type = Oindir;
+ o->indir.id = o->num;
+ o->indir.gen = o2->num;
+ freeobject(o2);
+ return o;
}
- break;
+ if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
+ freeobject(o2);
+ /* FIXME put into a map */
+ return pdfobject(b);
+ }
}
- werrstr("unexpected char %c", *p);
- goto err;
- }
- if(o != nil)
+ /* just a number, go back and return it */
+ o->type = Onum;
+ if(Bseek(b, off, 0) != off){
+ werrstr("seek failed");
+ goto err;
+ }
return o;
+ }
+
err:
werrstr("object: %r");
freeobject(o);
+ freeobject(o2);
return nil;
}
@@ -99,7 +134,32 @@
void
freeobject(Object *o)
{
+ int i;
+
if(o == nil)
return;
+
+ switch(o->type){
+ case Onull:
+ return;
+
+ case Obool:
+ case Onum:
+ case Ostr:
+ case Oname:
+ break;
+
+ case Oarray:
+ for(i = 0; i < o->array.ne; i++)
+ freeobject(o->array.e[i]);
+ free(o->array.e);
+ break;
+
+ case Odict:
+ case Ostream:
+ case Oindir:
+ break;
+ }
+
free(o);
}
--- a/pdf.c
+++ b/pdf.c
@@ -33,8 +33,8 @@
/* store non-free objects only */
newnxref = pdf->nxref;
for(e = s, i = 0; i < nxref; i++, e += 20){
- if(e[10] != ' ' || e[18] != '\r' || e[19] != '\n'){
- werrstr("invalid xref line");
+ if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){
+ werrstr("invalid xref line (%d/%d)", i, nxref);
goto err;
}
xref.id = xref0 + i;
@@ -76,15 +76,11 @@
trailerread(Pdf *pdf)
{
int i;
- char *s, *e;
Object *o;
KeyValue *kv;
- o = nil;
- if((s = Brdstr(pdf->bio, 0, 1)) == nil || (o = pdfobject(s, &e, Blinelen(pdf->bio))) == nil)
+ if((o = pdfobject(pdf->bio)) == nil)
goto err;
- free(s);
- s = nil;
if(o->type != Odict){
werrstr("isn't a dictionary");
@@ -92,7 +88,6 @@
}
for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
- fprint(2, "# %s %p\n", kv->key, kv->value);
if(strcmp(kv->key, "Root") == 0)
pdf->root = kv->value;
else if(strcmp(kv->key, "Info") == 0)
@@ -112,7 +107,6 @@
return 0;
err:
freeobject(o);
- free(s);
return -1;
}
@@ -127,6 +121,7 @@
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
int i, n, off;
+ b = nil;
if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
goto err;
pdf->bio = b;
@@ -204,6 +199,7 @@
return pdf;
err:
+ werrstr("pdfopen: %r [at %p]", (void*)Boffset(b));
pdfclose(pdf);
return nil;
}
--- a/pdf.h
+++ b/pdf.h
@@ -20,8 +20,11 @@
union {
int bool;
double num;
- char *str;
- char *name;
+ struct {
+ int len;
+ char str[1];
+ };
+ char name[1];
struct {
u32int id;
@@ -50,8 +53,8 @@
Xref *xref;
int nxref; /* 7.5.4 xref subsection number of objects */
- Object *root; /* 7.5.5 root object */
- Object *info; /* 7.5.5 info dictionary */
+ Object *root; /* 7.7.2 root object */
+ Object *info; /* 14.3.3 info dictionary */
};
struct Xref {
@@ -63,45 +66,16 @@
Pdf *pdfopen(int fd);
void pdfclose(Pdf *pdf);
-/*
- * General function to parse an object of any type.
- */
-Object *pdfobject(char *p, char **e, int len);
-
+Object *pdfobject(void *b);
void freeobject(Object *o);
/*
- * 7.3.4 String Objects
- *
- * Rewrites the string in place with null termination and returns the
- * length in bytes, without the null terminator.
- * Returns < 0 if parsing failed.
- * (*e) is advanced to the position after the string pointed by (p).
- */
-int pdfstring(char *p, char **e, int len);
-
-/*
- * 7.3.5 Name Objects
- *
- * Works the same way as pdfstring, but for name objects.
- */
-int pdfname(char *p, char **e, int len);
-
-/*
- * 7.3.6 Array Objects
- */
-Object *pdfarray(char *p, char **e, int len);
-
-/*
- * 7.3.7 Dictionary Objects
- */
-Object *pdfdict(char *p, char **e, int len);
-
-/*
* If the object is indirect, resolve it. Operation is not recursive, ie
* values of a dictionary won't be resolved automatically.
*/
int pdfeval(Pdf *pdf, Object *o);
-int isws(char c);
-int isdelim(char c);
+int isws(int c);
+int isdelim(int c);
+
+Object *pdfdictget(Object *o, char *name);
--- a/string.c
+++ b/string.c
@@ -1,8 +1,11 @@
#include <u.h>
#include <libc.h>
+#include <bio.h>
#include "pdf.h"
-static int esc[] = {
+/* 7.3.4 String Objects */
+
+static char esc[] = {
['n'] = '\n',
['r'] = '\r',
['t'] = '\t',
@@ -14,175 +17,207 @@
['\n'] = -1,
};
-static int
-pdfstringhex(char *p, char **e, int len)
+static Object *
+stringhex(Biobuf *b)
{
- int i;
+ char *s;
+ Object *o;
+ int len, n;
- for(i = 1; i < len; i += 1){
- if(p[i] == '>')
- break;
+ if((s = Brdstr(b, '>', 0)) == nil)
+ return nil;
+ len = Blinelen(b) - 1;
+ if(s[len] != '>'){
+ werrstr("no '>'");
+ free(s);
+ return nil;
}
- if(i >= len){
- werrstr("hex not closed");
- return -1;
- }
- p[i] = '0'; /* the final zero may be missing */
- *e = p+i+1;
- i = dec16((uchar*)p, i/2, p+1, i) == i/2 ? i/2 : -1;
- if(i < 0)
+ s[len] = '0'; /* the final zero may be missing */
+ n = len/2;
+ o = nil;
+ if(dec16((uchar*)s, n, s+1, len) != n){
werrstr("invalid hex");
- p[i] = 0;
- return i;
+ }else if((o = malloc(sizeof(*o) + n + 1)) != nil){
+ memmove(o->str, s, n);
+ o->str[n] = 0;
+ o->len = n;
+ o->type = Ostr;
+ }
+
+ free(s);
+ return o;
}
-int
-pdfstring(char *p, char **e, int len)
+Object *
+pdfstring(Biobuf *b)
{
- Rune r;
- int c, j, i, o, n, paren;
+ Object *o;
+ char *s, *r;
char oct[4];
+ int i, c, paren, sz, maxsz;
- if(len < 2){
- werrstr("too short");
- goto err;
- }
+ maxsz = 64;
+ if((s = malloc(maxsz)) == nil)
+ return nil;
- paren = 0;
- for(i = o = 0; i < len;){
- if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
- werrstr("rune error at byte %d", n);
- return -1;
- }
+ for(paren = sz = 0;;){
+ if((c = Bgetc(b)) <= 0)
+ break;
- if(i == 0){
- if(r == '('){
- paren = 1;
- i++;
- continue;
+ switch(c){
+ case '<':
+ if(sz == 0){
+ Bungetc(b);
+ return stringhex(b);
}
- if(r == '<'){
- len = pdfstringhex(p, e, len);
- if(len < 0)
- goto err;
- return len;
- }
- werrstr("invalid first char");
- goto err;
- }
+ break;
- if(r == '\\'){
- if(++i >= len){
- werrstr("escaped char out of string len");
- goto err;
- }
- if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
- werrstr("rune error at byte %d", i);
- goto err;
- }
- if(r >= '0' && r <= '9'){ /* octal */
- n = 0;
- for(j = 0; j < 3 && i < len && p[i] >= '0' && p[i] <= '9'; j++, i++)
- oct[j] = p[i];
- oct[j] = 0;
- c = strtol(oct, nil, 8);
- }else if(r >= nelem(esc) || (c = esc[r]) == 0){
- werrstr("unknown escape char at byte %d", i);
- goto err;
- }
- r = c;
- i += n;
- if(c < 0)
- continue;
- }else if(r == '('){
+ case '(':
paren++;
- i++;
continue;
- }else if(r == ')'){
+
+ case ')':
paren--;
- i++;
- if(paren == 0)
+ if(paren < 1){
+ c = 0;
break;
+ }
continue;
- }else{
- i += n;
+
+ case '\\':
+ if((c = Bgetc(b)) <= 0)
+ break;
+ if(c >= '0' && c <= '7'){ /* octal */
+ oct[0] = c;
+ for(i = 1; i < 3 && (c = Bgetc(b)) >= '0' && c <= '7'; i++)
+ oct[i] = c;
+ if(c <= 0)
+ break;
+ if(c < '0' || c > '7')
+ Bungetc(b);
+ oct[i] = 0;
+ c = strtol(oct, nil, 8);
+ }else if(c >= nelem(esc) || (c = esc[c]) == 0){
+ werrstr("unknown escape char %c", c);
+ goto err;
+ }else if(c < 0){
+ continue;
+ }
+ break;
+
+ default:
+ if(paren < 1){
+ werrstr("unexpected char '%c'", c);
+ goto err;
+ }
+ break;
}
- o += runetochar(p+o, &r);
+ if(c <= 0)
+ break;
+ if(sz+1 > maxsz){
+ maxsz *= 2;
+ if((r = realloc(s, maxsz)) == nil)
+ goto err;
+ s = r;
+ }
+ s[sz++] = c;
}
- if(paren > 0){
- werrstr("non-closed paren");
+ if(paren != 0){
+ werrstr("bad paren");
goto err;
}
+ if(c < 0){
+ werrstr("short");
+ goto err;
+ }
- p[o] = 0;
- *e = p + i;
+ if(c >= 0 && (o = malloc(sizeof(*o) + sz + 1)) != nil){
+ memmove(o->str, s, sz);
+ o->str[sz] = 0;
+ o->len = sz;
+ o->type = Ostr;
+ free(s);
+ return o;
+ }
- return o;
err:
+ free(s);
werrstr("string: %r");
- return -1;
+ return nil;
}
#ifdef TEST
static struct {
- char *i;
- int len;
- char *o;
- int r;
- int e;
+ char *in;
+ char *out;
}t[] = {
- {"(simple string)", 15, "simple string", 13},
- {"(non-closed paren", 17, nil, -1},
- {"wrong first char", 16, nil, -1},
- {"(parens((()((())))()))", 22, "parens", 6},
- {"(\\0053)", 7, "\x053", 2},
- {"(\\053)", 6, "+", 1},
- {"(\\53)", 5, "+", 1},
- {"()", 2, "", 0},
- {")", 1, nil, -1},
- {"(\\)\\()", 6, ")(", 2},
- {"(\\\\)", 4, "\\", 1},
- {"a", 1, nil, -1},
- {"(1\\\n2)", 6, "12", 2},
- {"<323130>", 8, "210", 3},
- {"<32313>", 7, "210", 3},
- {"<>", 2, "", 0},
- {"<", 1, nil, -1},
- {"<zz>", 4, nil, -1},
+ {"", nil},
+ {"(test, success)", "test, success"},
+ {"(simple string)", "simple string"},
+ {"(non-closed paren", nil},
+ {"wrong first char", nil},
+ {"(parens((()((())))()))", "parens"},
+ {"(\\0053)", "\x053"},
+ {"(\\053)", "+"},
+ {"(\\53)", "+"},
+ {"()", ""},
+ {")", nil},
+ {"(\\)\\()", ")("},
+ {"(\\\\)", "\\"},
+ {"a", nil},
+ {"(1\\\n2)", "12"},
+ {"<323130>", "210"},
+ {"<32313>", "210"},
+ {"<>", ""},
+ {"<", nil},
+ {"<zz>", nil},
+ {">", nil},
};
+static char *s;
+static int off, n;
+
+static int
+rd(Biobufhdr *, void *data, long sz)
+{
+ if(sz > n-off)
+ sz = n-off;
+ memmove(data, s+off, sz);
+ off += sz;
+ return sz;
+}
+
void
test_pdfstring(void)
{
- char *e;
- int i, r;
+ Object *o;
+ Biobuf b;
+ int i;
fprint(2, "pdfstring\n");
for(i = 0; i < nelem(t); i++){
+ s = t[i].in;
+ n = strlen(s);
+ off = 0;
+ Binit(&b, -1, OREAD);
+ Biofn(&b, rd);
+
fprint(2, "\t%d: ", i);
- r = pdfstring(t[i].i, &e, t[i].len);
- if(r != t[i].r){
- fprint(2, "expected r=%d, got %d", t[i].r, r);
- if(r < 0)
- fprint(2, " (%r)\n");
- else
- fprint(2, "\n");
- continue;
- }else if(r >= 0){
- if(t[i].i+t[i].len != e){
- fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
- continue;
- }else if(strcmp(t[i].o, t[i].i) != 0){
- fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
- continue;
- }
- }
- fprint(2, "OK");
- if(r < 0)
- fprint(2, " (%r)");
- fprint(2, "\n");
+ o = pdfstring(&b);
+ if(o == nil && t[i].out != nil)
+ fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
+ else if(o != nil && t[i].out == nil)
+ fprint(2, "ERROR: expected error, got %q\n", o->str);
+ else if(o == nil && t[i].out == nil)
+ fprint(2, "OK (%r)\n");
+ else if(strcmp(o->str, t[i].out) != 0)
+ fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->str);
+ else
+ fprint(2, "OK\n");
+ freeobject(o);
+ Bterm(&b);
}
}
#endif