shithub: pdffs

ref: 745debcdfb49837a82d112c2623b7a763fdf2cd0
dir: pdffs/string.c

View raw version
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "pdf.h"

/* 7.3.4 String Objects */

static char esc[] = {
	['n'] = '\n',
	['r'] = '\r',
	['t'] = '\t',
	['b'] = '\b',
	['f'] = '\f',
	['('] = '(',
	[')'] = ')',
	['\\'] = '\\',
	['\n'] = -1,
};

static Object *
stringhex(Biobuf *b)
{
	char *s;
	Object *o;
	int len, n;

	if((s = Brdstr(b, '>', 0)) == nil)
		return nil;
	len = Blinelen(b) - 1;
	if(s[len] != '>'){
		werrstr("no '>'");
		free(s);
		return nil;
	}
	s[len] = '0'; /* the final zero may be missing */
	n = len/2;
	o = nil;
	if(dec16((uchar*)s, n, s+1, len) != n){
		werrstr("invalid hex");
	}else if((o = malloc(sizeof(*o))) != nil){
		o->str = s;
		s[n] = 0;
		o->len = n;
		o->type = Ostr;
		return o;
	}

	free(s);
	return o;
}

Object *
pdfstring(Biobuf *b)
{
	Object *o;
	char *s, *r;
	char oct[4];
	int i, c, paren, sz, maxsz;

	maxsz = 64;
	if((s = malloc(maxsz)) == nil)
		return nil;

	for(paren = sz = 0;;){
		if((c = Bgetc(b)) <= 0)
			break;

		switch(c){
		case '<':
			if(sz == 0){
				Bungetc(b);
				return stringhex(b);
			}
			break;

		case '(':
			paren++;
			continue;

		case ')':
			paren--;
			if(paren < 1){
				c = 0;
				break;
			}
			continue;

		case '\\':
			if((c = Bgetc(b)) <= 0)
				break;
			if(c >= '0' && c <= '7'){ /* octal */
				oct[0] = c;
				for(i = 1; i < 3 && (c = Bgetc(b)) >= '0' && c <= '7'; i++)
					oct[i] = c;
				if(c <= 0)
					break;
				if(c < '0' || c > '7')
					Bungetc(b);
				oct[i] = 0;
				c = strtol(oct, nil, 8);
			}else if(c >= nelem(esc) || (c = esc[c]) == 0){
				werrstr("unknown escape char %c", c);
				goto err;
			}else if(c < 0){
				continue;
			}
			break;

		default:
			if(paren < 1){
				werrstr("unexpected char '%c'", c);
				goto err;
			}
			break;
		}

		if(c <= 0)
			break;
		if(sz+1 > maxsz){
			maxsz *= 2;
			if((r = realloc(s, maxsz)) == nil)
				goto err;
			s = r;
		}
		s[sz++] = c;
	}

	if(paren != 0){
		werrstr("bad paren");
		goto err;
	}
	if(c < 0){
		werrstr("short");
		goto err;
	}

	if(c >= 0 && (o = malloc(sizeof(*o))) != nil){
		s[sz] = 0;
		o->str = s;
		o->len = sz;
		o->type = Ostr;
		return o;
	}

err:
	free(s);
	werrstr("string: %r");
	return nil;
}

#ifdef TEST
static struct {
	char *in;
	char *out;
}t[] = {
	{"", nil},
	{"(test, success)", "test, success"},
	{"(simple string)", "simple string"},
	{"(non-closed paren", nil},
	{"wrong first char", nil},
	{"(parens((()((())))()))", "parens"},
	{"(\\0053)", "\x053"},
	{"(\\053)", "+"},
	{"(\\53)", "+"},
	{"()", ""},
	{")", nil},
	{"(\\)\\()", ")("},
	{"(\\\\)", "\\"},
	{"a", nil},
	{"(1\\\n2)", "12"},
	{"<323130>", "210"},
	{"<32313>", "210"},
	{"<>", ""},
	{"<", nil},
	{"<zz>", nil},
	{">", nil},
};

static char *s;
static int off, n;

static int
rd(Biobufhdr *, void *data, long sz)
{
	if(sz > n-off)
		sz = n-off;
	memmove(data, s+off, sz);
	off += sz;
	return sz;
}

void
test_pdfstring(void)
{
	Object *o;
	Biobuf b;
	int i;

	fprint(2, "pdfstring\n");
	for(i = 0; i < nelem(t); i++){
		s = t[i].in;
		n = strlen(s);
		off = 0;
		Binit(&b, -1, OREAD);
		Biofn(&b, rd);

		fprint(2, "\t%d: ", i);
		o = pdfstring(&b);
		if(o == nil && t[i].out != nil)
			fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
		else if(o != nil && t[i].out == nil)
			fprint(2, "ERROR: expected error, got %q\n", o->str);
		else if(o == nil && t[i].out == nil)
			fprint(2, "OK (%r)\n");
		else if(strcmp(o->str, t[i].out) != 0)
			fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->str);
		else
			fprint(2, "OK\n");
		pdfobjfree(o);
		Bterm(&b);
	}
}
#endif