shithub: pdffs

ref: 3c27f041321b91dbf2bfd0ab9e4865e03854cd68
dir: pdffs/string.c

View raw version
#include <u.h>
#include <libc.h>
#include "pdf.h"

static int esc[] = {
	['n'] = '\n',
	['r'] = '\r',
	['t'] = '\t',
	['b'] = '\b',
	['f'] = '\f',
	['('] = '(',
	[')'] = ')',
	['\\'] = '\\',
	['\n'] = -1,
};

static int
pdfstringhex(char *p, char **e, int len)
{
	int i;

	for(i = 1; i < len; i += 1){
		if(p[i] == '>')
			break;
	}
	if(i >= len){
		werrstr("hex not closed");
		return -1;
	}
	p[i] = '0'; /* the final zero may be missing */
	*e = p+i+1;
	i = dec16((uchar*)p, i/2, p+1, i) == i/2 ? i/2 : -1;
	if(i < 0)
		werrstr("invalid hex");
	p[i] = 0;
	return i;
}

int
pdfstring(char *p, char **e, int len)
{
	Rune r;
	int c, j, i, o, n, paren;
	char oct[4];

	if(len < 2){
		werrstr("too short");
		goto err;
	}

	paren = 0;
	for(i = o = 0; i < len;){
		if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
			werrstr("rune error at byte %d", n);
			return -1;
		}

		if(i == 0){
			if(r == '('){
				paren = 1;
				i++;
				continue;
			}
			if(r == '<'){
				len = pdfstringhex(p, e, len);
				if(len < 0)
					goto err;
				return len;
			}
			werrstr("invalid first char");
			goto err;
		}

		if(r == '\\'){
			if(++i >= len){
				werrstr("escaped char out of string len");
				goto err;
			}
			if((n = chartorune(&r, p+i)) == 1 && r == Runeerror){
				werrstr("rune error at byte %d", i);
				goto err;
			}
			if(r >= '0' && r <= '9'){ /* octal */
				n = 0;
				for(j = 0; j < 3 && i < len && p[i] >= '0' && p[i] <= '9'; j++, i++)
					oct[j] = p[i];
				oct[j] = 0;
				c = strtol(oct, nil, 8);
			}else if(r >= nelem(esc) || (c = esc[r]) == 0){
				werrstr("unknown escape char at byte %d", i);
				goto err;
			}
			r = c;
			i += n;
			if(c < 0)
				continue;
		}else if(r == '('){
			paren++;
			i++;
			continue;
		}else if(r == ')'){
			paren--;
			i++;
			if(paren == 0)
				break;
			continue;
		}else{
			i += n;
		}

		o += runetochar(p+o, &r);
	}

	if(paren > 0){
		werrstr("non-closed paren");
		goto err;
	}

	p[o] = 0;
	*e = p + i;

	return o;
err:
	werrstr("string: %r");
	return -1;
}

#ifdef TEST
static struct {
	char *i;
	int   len;
	char *o;
	int   r;
	int   e;
}t[] = {
	{"(simple string)",        15, "simple string", 13},
	{"(non-closed paren",      17, nil,             -1},
	{"wrong first char",       16, nil,             -1},
	{"(parens((()((())))()))", 22, "parens",         6},
	{"(\\0053)",                7, "\x053",          2},
	{"(\\053)",                 6, "+",              1},
	{"(\\53)",                  5, "+",              1},
	{"()",                      2, "",               0},
	{")",                       1, nil,             -1},
	{"(\\)\\()",                6, ")(",             2},
	{"(\\\\)",                  4, "\\",             1},
	{"a",                       1, nil,             -1},
	{"(1\\\n2)",                6, "12",             2},
	{"<323130>",                8, "210",            3},
	{"<32313>",                 7, "210",            3},
	{"<>",                      2, "",               0},
	{"<",                       1, nil,             -1},
	{"<zz>",                    4, nil,             -1},
};

void
test_pdfstring(void)
{
	char *e;
	int i, r;

	fprint(2, "pdfstring\n");
	for(i = 0; i < nelem(t); i++){
		fprint(2, "\t%d: ", i);
		r = pdfstring(t[i].i, &e, t[i].len);
		if(r != t[i].r){
			fprint(2, "expected r=%d, got %d", t[i].r, r);
			if(r < 0)
				fprint(2, " (%r)\n");
			else
				fprint(2, "\n");
			continue;
		}else if(r >= 0){
			if(t[i].i+t[i].len != e){
				fprint(2, "expected e=%p, got %p\n", t[i].i+t[i].len, e);
				continue;
			}else if(strcmp(t[i].o, t[i].i) != 0){
				fprint(2, "expected %q, got %q\n", t[i].o, t[i].i);
				continue;
			}
		}
		fprint(2, "OK");
		if(r < 0)
			fprint(2, " (%r)");
		fprint(2, "\n");
	}
}
#endif