ref: b574ea6ce2f6fb2aa129da26687d4affeb1faabc
parent: 8a7f9b4b5c158944978efb13bc03dfd6e42899d3
author: Noam Preil <noam@pixelhero.dev>
date: Mon Jul 19 20:33:14 EDT 2021
Significantly improved text output
--- a/main.c
+++ b/main.c
@@ -17,17 +17,21 @@
threadexitsall("usage");
}
-static void
+static int
dumppage(Object *page)
{
+ int ret;
Page p;
- pageinit(&p);
- if(pagerender(&p, page) && p.buf.sz != 0)
- write(1, p.buf.b, p.buf.sz);
+ pageinit(&p, page);
+ ret = pagerender(&p);
+ if(ret)
+ if(p.buf.sz != 0)
+ fprint(1, "%s", (char*)p.buf.b);
pagefree(&p);
+ return ret;
}
-static void
+static int
dumppages(Object *pages)
{
Object *page, *kids, *type;
@@ -39,13 +43,19 @@
// Must be a dict, either Page or Pages
type = dictget(page, "Type");
// MUST be a name.
- if(strcmp(type->name, "Pages") == 0)
- dumppages(page);
- else if(strcmp(type->name, "Page") == 0)
- dumppage(page);
+ if(strcmp(type->name, "Pages") == 0){
+ if(!dumppages(page))
+ return 0;
+ }
+ else if(strcmp(type->name, "Page") == 0){
+ if(!dumppage(page))
+ return 0;
+ print("\n");
+ }
else
sysfatal("Unexpected page node type '%s'", type->name);
}
+ return 1;
}
--- a/misc.c
+++ b/misc.c
@@ -50,6 +50,7 @@
return fmtprint(f, "%g", o->num.d);
case Ostr:
+ case Oop:
if(isutf8(o->str, o->len))
return fmtprint(f, "%q", o->str);
return fmtprint(f, "<%.*H>", o->len, o->str);
--- a/object.c
+++ b/object.c
@@ -8,6 +8,52 @@
Object *pdfarray(Pdf *pdf, Stream *s);
Object *pdfdict(Pdf *pdf, Stream *s);
+/* returns 1 if str is at the beginning of the stream, and
+ is followed either by whitespace or, if delim is 1,
+ a delimiter.
+ strlen(str) must be in (0, 16)
+ on match, the stream seeks to right after the string.
+ otherwise, the stream position is unchanged. */
+static int
+sismatch(Stream *s, char *str, int delim)
+{
+ long len = strlen(str);
+ vlong off = Soffset(s);
+ char b[16];
+ if(len == 0 || len > 16)
+ return 0;
+ if(Sread(s, b, len + 1) == len + 1 && memcmp(b, str, len) == 0 && (isws(b[len]) || (delim && isdelim(b[len])))){
+ Sungetc(s);
+ return 1;
+ }
+
+ Sseek(s, off, 0);
+ return 0;
+}
+
+char *
+suntilend(Stream *s)
+{
+ int sz, c, full = 0;
+ char buf[8];
+ for(sz = 0; sz < 7;){
+ c = Sgetc(s);
+ if(c < 0)
+ break;
+ if(isws(c) || isdelim(c)){
+ Sungetc(s);
+ full = 1;
+ break;
+ }
+ buf[sz] = c;
+ sz += 1;
+ }
+ if(!full)
+ return nil;
+ buf[sz] = 0;
+ return strdup(buf);
+}
+
/* General function to parse an object of any type. */
Object *
pdfobj(Pdf *pdf, Stream *s)
@@ -14,9 +60,8 @@
{
Object *o, *o2;
vlong off;
- int c, tf;
+ int c;
Xref xref;
- char b[16];
o = o2 = nil;
do; while(isws(c = Sgetc(s)));
@@ -23,6 +68,30 @@
if(c < 0)
goto err;
+ if(isascii(c) && isalpha(c)){
+ Sungetc(s);
+ // bool, null, or op
+ if(sismatch(s, "null", 1)){
+ fprint(1, "NULL\n");
+ return &null;
+ }
+ if((o = calloc(1, sizeof(*o))) == nil)
+ goto err;
+ o->type = Obool;
+ o->pdf = pdf;
+ if(sismatch(s, "true", 1)){
+ o->bool = 1;
+ return o;
+ }
+ if(sismatch(s, "false", 1)){
+ o->bool = 0;
+ return o;
+ }
+ o->type = Oop;
+ o->str = suntilend(s);
+ return o;
+ }
+
switch(c){
case '<': /* dictionary or a string */
c = Sgetc(s);
@@ -33,7 +102,8 @@
off = Soffset(s);
do; while(isws(Sgetc(s)));
Sungetc(s);
- if(Sread(s, b, 7) == 7 && memcmp(b, "stream", 6) == 0 && isws(c = b[6])){
+ if(sismatch(s, "stream", 0)){
+ c = Sgetc(s);
/* there IS a stream */
if(c == '\r' && (c = Sgetc(s)) < 0)
goto err;
@@ -71,45 +141,8 @@
o->pdf = pdf;
return o;
- case 'n':
- off = Soffset(s);
- if(Sgetc(s) == 'u' && Sgetc(s) == 'l' && Sgetc(s) == 'l' && (isws(c = Sgetc(s)) || isdelim(c))){
- Sungetc(s);
- return &null;
- }
- Sseek(s, off, 0);
- c = 'n';
- goto unexpected;
-
- case 't':
- off = Soffset(s);
- tf = 1;
- if(Sgetc(s) == 'r' && Sgetc(s) == 'u' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
- goto bool;
- Sseek(s, off, 0);
- c = 't';
- goto unexpected;
-
- case 'f':
- off = Soffset(s);
- tf = 0;
- if(Sgetc(s) == 'a' && Sgetc(s) == 'l' && Sgetc(s) == 's' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
- goto bool;
- Sseek(s, off, 0);
- c = 'f';
- goto unexpected;
-bool:
- Sungetc(s);
- if((o = calloc(1, sizeof(*o))) == nil)
- goto err;
- o->type = Obool;
- o->pdf = pdf;
- o->bool = tf;
- return o;
-
default:
if(!isdigit(c) && c != '-'){
-unexpected:
Sungetc(s);
werrstr("unexpected char '%c' at %#x+%#x (%d left)", c, Sobjoffset(s), Soffset(s), Ssize(s));
goto err;
@@ -178,6 +211,7 @@
return;
case Ostr:
+ case Oop:
case Oname:
free(o->str);
break;
--- a/op.c
+++ b/op.c
@@ -12,6 +12,30 @@
typedef struct Op Op;
+static void
+matidentity(double *arr)
+{
+ double src[6] = {
+ 1, 0,
+ 0, 1,
+ 0, 0
+ };
+ memcpy(arr, src, sizeof(double) * 6);
+}
+
+static void
+matmult(double *m1, double *m2, double *out)
+{
+ double result[6];
+ result[0] = m1[0] * m2[0] + m1[1] * m2[2];
+ result[1] = m1[0] * m2[1] + m1[1] * m2[3];
+ result[2] = m1[2] * m2[0] + m1[3] * m2[2];
+ result[3] = m1[2] * m2[1] + m1[3] * m2[3];
+ result[4] = m1[4] * m2[0] + m1[5] * m2[2] + m2[4];
+ result[5] = m1[4] * m2[1] + m1[5] * m2[3] + m2[5];
+ memcpy(out, result, sizeof(double) * 6);
+}
+
struct Op {
char *s;
int (*f)(Op *op, Page *p);
@@ -20,6 +44,16 @@
};
static int
+flagless(Op *op)
+{
+ if(op->flags != 0){
+ fprint(2, "Op '%s' expected no flags\n", op->s);
+ return 0;
+ }
+ return 1;
+}
+
+static int
cobegin(Op *op, Page *p)
{
USED(op, p);
@@ -36,29 +70,54 @@
static int
gspush(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ USED(op);
+ GS *r = realloc(p->GS, sizeof(GS) * (p->nGS + 1));
+ if(r == nil)
+ return 0;
+ p->GS = r;
+ p->nGS += 1;
+ p->GSactive = &p->GS[p->nGS - 1];
+ *(p->GSactive) = p->GS[p->nGS - 2];
+ return 1;
}
static int
gspop(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ USED(op);
+ GS *r = realloc(p->GS, sizeof(GS) * (p->nGS - 1));
+ if(r == nil)
+ return 0;
+ p->GS = r;
+ p->nGS -= 1;
+ p->GSactive = &p->GS[p->nGS - 1];
+ return 1;
}
+/* six parameters give the inputs a,b,c,d,e,f for the matrix
+ [a b 0]
+ [c d 0]
+ [e f 1]
+ That matrix should be premultiplied with the current matrix
+ newCTM = input x oldCTM
+ (8.3.4)
+ */
static int
gsctm(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ double input[6];
+ int i;
+ for(i = 0; i < 6; i += 1)
+ input[i] = arrayget(p->stack, i)->num.d;
+ matmult(input, p->GSactive->CTM, p->GSactive->CTM);
+ return flagless(op);
}
static int
gswidth(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ p->GSactive->LW = arrayget(p->stack, 0)->num.i;
+ return flagless(op);
}
static int
@@ -99,8 +158,8 @@
static int
gsflatness(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ p->GSactive->FL = arrayget(p->stack, 0)->num.d;
+ return flagless(op);
}
static int
@@ -114,7 +173,7 @@
pcmove(Op *op, Page *p)
{
USED(op, p);
- return 0;
+ return 1;
}
static int
@@ -121,7 +180,7 @@
pcline(Op *op, Page *p)
{
USED(op, p);
- return 0;
+ return 1;
}
static int
@@ -128,7 +187,7 @@
pccurve(Op *op, Page *p)
{
USED(op, p);
- return 0;
+ return 1;
}
static int
@@ -149,7 +208,7 @@
ppstroke(Op *op, Page *p)
{
USED(op, p);
- return 0;
+ return 1;
}
static int
@@ -218,8 +277,21 @@
static int
cgray(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ int value = 255 * arrayget(p->stack, 0)->num.d;
+ int i;
+ u32int *color;
+ if(op->flags & Nonstroking){
+ color = &p->GSactive->NSC;
+ p->GSactive->NSCS = DeviceGray;
+ } else{
+ color = &p->GSactive->SC;
+ p->GSactive->SCS = DeviceGray;
+ }
+ *color = 0;
+ for(i = 0; i < 3; i += 1)
+ *color = (*color | value) << 8;
+ *color |= 255;
+ return 1;
}
static int
@@ -295,21 +367,51 @@
static int
tslead(Op *op, Page *p)
{
- int d = arrayget(p->stack, 0)->num.d / 20;
- while(d > 0){
- d -= 1;
- if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
- sysfatal("OOM");
+ p->TS.TL = arrayget(p->stack, 0)->num.d;
+ return flagless(op);
+}
+
+static int
+fontwidths(Page *p)
+{
+ Object *o;
+ int i;
+ if(p->GSactive->Font.widths != nil)
+ free(p->GSactive->Font.widths);
+ o = dictget(p->GSactive->Font.font, "FirstChar");
+ if(o == nil)
+ return 1;
+ p->GSactive->Font.first = o->num.i;
+ p->GSactive->Font.last = dictget(p->GSactive->Font.font, "LastChar")->num.i;
+ p->GSactive->Font.widths = malloc(sizeof(int) * (p->GSactive->Font.last - p->GSactive->Font.first + 1));
+ if(p->GSactive->Font.widths == nil){
+ print("Failed to allocate for (%d, %d): %d\n", p->GSactive->Font.first, p->GSactive->Font.last, p->GSactive->Font.last - p->GSactive->Font.first + 1);
+ return 1;
}
- USED(op, p);
- return 0;
+ o = dictget(p->GSactive->Font.font, "Widths");
+ if(o == nil)
+ return 0;
+ for(i = 0; i < arraylen(o); i += 1)
+ p->GSactive->Font.widths[i] = arrayget(o, i)->num.i;
+ o = dictget(p->GSactive->Font.font, "FontDescriptor");
+ p->GSactive->Font.defwidth = dictget(o, "MissingWidth")->num.i;
+ return 1;
}
static int
tsfontsz(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ char *name = arrayget(p->stack, 0)->name;
+ p->GSactive->Font.font = dictget(dictget(dictget(p->obj, "Resources"), "Font"), name);
+ if(p->GSactive->Font.font == nil){
+ werrstr("Font not found: '%s'", name);
+ return 0;
+ }
+ p->GSactive->Font.enc = dictget(p->GSactive->Font.font, "Encoding");
+ if(p->GSactive->Font.enc)
+ p->GSactive->Font.enc = dictget(p->GSactive->Font.enc, "Differences");
+ p->GSactive->Font.size = arrayget(p->stack, 1)->num.d;
+ return fontwidths(p) && flagless(op);
}
static int
@@ -329,79 +431,182 @@
static int
tobegin(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ if(p->TS.inobj){
+ werrstr("Text objects must not be nested");
+ return 0;
+ }
+ matidentity(p->TS.Tm);
+ matidentity(p->TS.Tlm);
+ p->TS.inobj = 1;
+ p->GSactive->Font.font = nil;
+ return flagless(op);
}
static int
toend(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ if(!p->TS.inobj){
+ werrstr("ET found without BT");
+ return 0;
+ }
+ p->TS.inobj = 0;
+ return flagless(op);
}
static int
+tmove(Page *p, double x, double y, int tlm)
+{
+ double shift[6] = {1, 0, 0, 1, x, y};
+ if(tlm){
+ matmult(shift, p->TS.Tlm, p->TS.Tlm);
+ memcpy(p->TS.Tm, p->TS.Tlm, sizeof(double) * 6);
+ } else{
+ matmult(shift, p->TS.Tm, p->TS.Tm);
+ }
+ return 1;
+}
+
+static int
tpmove(Op *op, Page *p)
{
Object *x, *y;
x = arrayget(p->stack, 0);
y = arrayget(p->stack, 1);
- if(y->num.d != 0){
- if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
- sysfatal("OOM");
- }
- else if(x->num.d < 50)
- if(bufput(&p->buf, (uchar*)" ", 1) == -1)
- sysfatal("OOM");
- USED(op, p);
- return 0;
+ if(op->flags & Leading)
+ p->TS.TL = -y->num.d;
+ return tmove(p, x->num.d, y->num.d, 1);
}
static int
tpmatrix(Op *op, Page *p)
{
- USED(op, p);
- return 0;
+ int i;
+ for(i = 0; i < 6; i += 1){
+ p->TS.Tm[i] = arrayget(p->stack, i)->num.d;
+ p->TS.Tlm[i] = p->TS.Tm[i];
+ }
+ return flagless(op);
}
static int
tpmove0(Op *op, Page *p)
{
- USED(op, p);
- if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
- sysfatal("OOM");
- return 0;
+ return tmove(p, 0, 0 - p->TS.TL, 1) && flagless(op);
}
static int
+writepatched(Page *p, uchar c)
+{
+ int i, len, d = 0;
+ Object *o;
+ if(p->GSactive->Font.enc != nil){
+ len = arraylen(p->GSactive->Font.enc);
+ for(i = 0; i < len; i += 1){
+ o = arrayget(p->GSactive->Font.enc, i);
+ if(o->type == Onum)
+ d = o->num.i;
+ else if(d == c){
+ if(strcmp(o->name, "endash") == 0)
+ return bufput(&p->buf, (uchar*)"-", 1) == 1;
+ if(strcmp(o->name, "fi") == 0)
+ return bufput(&p->buf, (uchar*)"fi", 2) == 2;
+ if(strcmp(o->name, "ff") == 0)
+ return bufput(&p->buf, (uchar*)"ff", 2) == 2;
+ if(strcmp(o->name, "ffi") == 0)
+ return bufput(&p->buf, (uchar*)"ffi", 3) == 3;
+ if(strcmp(o->name, "bullet") == 0)
+ return bufput(&p->buf, (uchar*)"•", strlen("•")) == 3;
+ if(strcmp(o->name, "quotedblleft") == 0)
+ return bufput(&p->buf, (uchar*)"\"", 1) == 1;
+ if(strcmp(o->name, "quotedblright") == 0)
+ return bufput(&p->buf, (uchar*)"\"", 1) == 1;
+ if(strcmp(o->name, "quoteleft") == 0)
+ return bufput(&p->buf, (uchar*)"'", 1) == 1;
+ if(strcmp(o->name, "quoteright") == 0)
+ return bufput(&p->buf, (uchar*)"'", 1) == 1;
+ fprint(2, "TODO: recognize glyph name '%s'\n", o->name);
+ return 1;
+ } else
+ d += 1;
+ }
+ }
+ return bufput(&p->buf, (uchar*)&c, 1) == 1;
+}
+
+/* Renders one character / glyph and updates the text state */
+static int
+tchar(Page *p, ulong c)
+{
+ double Trm[6] = {p->GSactive->Font.size, 0, 0, p->GSactive->Font.size, 0, 0};
+ double tx;
+ int i;
+ matmult(Trm, p->TS.Tm, Trm);
+ matmult(Trm, p->GSactive->CTM, Trm);
+ tx = p->GSactive->Font.size / 1000;
+ if(c >= p->GSactive->Font.first && c <= p->GSactive->Font.last)
+ tx = tx * (double)p->GSactive->Font.widths[c - p->GSactive->Font.first];
+ else
+ tx = tx * (double)p->GSactive->Font.defwidth;
+ // Check if whitespace is needed
+ if(p->buf.sz > 1){
+ if(p->TS.y != Trm[5]){
+ for(i = 0; i < (int)((p->TS.y - Trm[5]) / p->GSactive->Font.size); i += 1)
+ if(bufput(&p->buf, (uchar*)"\n", 1) != 1)
+ return 0;
+ }
+ if(Trm[4] - p->TS.x > 2.5){
+ if(bufput(&p->buf, (uchar*)" ", 1) != 1)
+ return 0;
+ }
+ }
+ if(!writepatched(p, c) || !tmove(p, tx, 0, 0))
+ return 0;
+ p->TS.x = Trm[4] + tx;
+ p->TS.y = Trm[5];
+ return 1;
+}
+
+static int
+tstr(Page *p, char *str, ulong len)
+{
+ ulong i;
+ for(i = 0; i < len; i += 1)
+ if(!tchar(p, str[i]))
+ return 0;
+ return 1;
+}
+
+static int
thshow(Op *op, Page *p)
{
+ if(op->flags != 0){
+ fprint(2, "TODO: thshow != Tj\n");
+ return 0;
+ }
Object *o = arrayget(p->stack, 0);
- if(bufput(&p->buf, (uchar*)o->str, o->len) == -1)
- sysfatal("OOM");
- USED(op);
- return 0;
+ if(!tstr(p, o->str, o->len))
+ return 0;
+ return 1;
}
static int
thshowarr(Op *op, Page *p)
{
- Object *arr = arrayget(p->stack, 0);
- Object *o;
+ Object *o, *arr = arrayget(p->stack, 0);
int i;
for(i = 0; i < arraylen(arr); i += 1){
o = arrayget(arr, i);
if(o->type == Ostr){
- if(bufput(&p->buf, (uchar*)o->str, o->len) == -1)
- sysfatal("OOM");
+ if(!tstr(p, o->str, o->len))
+ return 0;
}
- else if(o->num.d < -150){
- if(bufput(&p->buf, (uchar*)" ", 1) == -1)
- sysfatal("OOM");
+ else{
+ double shift = 0 - (p->GSactive->Font.size * o->num.d / 1000);
+ if(!tmove(p, shift, 0, 0))
+ return 0;
}
}
- USED(op);
- return 0;
+ return flagless(op);
}
static int
@@ -746,7 +951,7 @@
opignore(Op *op, Page *p)
{
USED(op, p);
- return 1;
+ return 0;
}
static Op ops[] = {
@@ -833,8 +1038,8 @@
/* 9.4.2 Text position operators */
{"Td", tpmove, 2,}, /* move, next line */
{"TD", tpmove, 2, Leading,}, /* move, next line, leading */
- {"Tm", tpmatrix, 6,}, /* (line) matrix */
- {"T*", tpmove0, 0, Leading,}, /* move, next line, leading */
+ {"Tm", tpmatrix, 6,}, /* set Tm and Tlm */
+ {"T*", tpmove0, 0,}, /* move, next line, leading */
/* 9.4.3 Text showing operators */
{"Tj", thshow, 1,}, /* show string */
@@ -902,24 +1107,15 @@
{nil, nil, 0,},
};
-// If an op is found at the current position in the stream, the associated Op is
-// returned and the stream is advanced. Otherwise, nil is returned and the stream
-// is left unchanged.
Op *
-opfind(Stream *s)
+opfind(char *name)
{
- int i;
- uint len;
+ int i = 0;
Op *op;
- char *b = (char*)s->buf.b + s->buf.off;
- i = 0;
while(ops[i].s != nil){
op = &ops[i];
- len = strlen(op->s);
- if(strncmp(op->s, b, len) == 0 && (isws(b[len]) || isdelim(b[len]))){
- s->buf.off += len;
+ if(strcmp(op->s, name) == 0)
return op;
- }
i += 1;
}
return nil;
@@ -926,32 +1122,77 @@
}
void
-pageinit(Page *page)
+pageinit(Page *page, Object *o)
{
bufinit(&page->buf, 0, 0);
// Stack is per-content-stream, so we don't create it here
page->stack = nil;
+ page->obj = o;
+ page->TS.inobj = 0;
+ page->TS.x = 0;
+ page->TS.y = 0;
}
void
-pagefree(Page *p)
+gsinit(Page *p, GS *gs)
{
- buffree(&p->buf);
- pdfobjfree(p->stack);
+ USED(p);
+ /* todo: actually initialize the full state */
+ /* CTM maps user coords to device coords.
+ TODO: use mediabox and screen info to init CTM
+ */
+ matidentity(gs->CTM);
+ gs->LW = 1;
+ gs->LC = 0;
+ gs->LJ = 0;
+ gs->ML = 10;
+ gs->SCS = gs->NSCS = DeviceGray;
+ // Alpha is lowest byte; this is (0, 0, 0, 255) == black
+ gs->SC = gs->NSC = 255;
+ gs->Font.font = nil;
+ gs->Font.enc = nil;
+ gs->Font.widths = nil;
}
-static void
-stackreset(Object *stack)
+void
+gsfree(GS gs)
{
+ free(gs.Font.widths);
+ pdfobjfree(gs.Font.font);
+ gs.Font.font = nil;
+ gs.Font.enc = nil;
+ gs.Font.widths = nil;
+}
+
+void
+pagegsclean(Page *p)
+{
int i;
- for(i = 0; i < stack->array.ne; i += 1)
- pdfobjfree(stack->array.e[i]);
- stack->array.ne = 0;
- free(stack->array.e);
- stack->array.e = nil;
+ p->GSactive = nil;
+ for(i = 0; i < p->nGS; i += 1)
+ gsfree(p->GS[i]);
+ free(p->GS);
+ p->GS = nil;
+ p->nGS = 0;
}
-static void
+static int
+stackreset(Page *p)
+{
+ pdfobjfree(p->stack);
+ p->stack = arraynew(p->obj->pdf);
+ return p->stack != nil;
+}
+
+void
+pagefree(Page *p)
+{
+ buffree(&p->buf);
+ pdfobjfree(p->stack);
+ pagegsclean(p);
+}
+
+static int
pagerendercontent(Page *p, Object *content)
{
Stream *s;
@@ -964,43 +1205,63 @@
}
p->stack = arraynew(content->pdf);
if(p->stack == nil)
- return;
+ return 0;
while(s->buf.off != s->buf.sz){
while(isws(s->buf.b[s->buf.off]) && s->buf.off != s->buf.sz)
s->buf.off += 1;
if(s->buf.off == s->buf.sz)
break;
- op = opfind(s);
- if(op != nil){
- op->f(op, p);
- stackreset(p->stack);
- } else{
- o = pdfobj(content->pdf, s);
- if(o == nil){
- fprint(2, "failed to read operand: %r\n");
- break;
+ o = pdfobj(content->pdf, s);
+ if(o == nil)
+ return 0;
+ if(o->type == Oop){
+ op = opfind(o->str);
+ if(op == nil){
+ fprint(2, "Unknown op: %s\n", o->str);
+ pdfobjfree(o);
+ return 0;
}
+ pdfobjfree(o);
+ if(!op->f(op, p)){
+ fprint(2, "'%s' failed!\n", op->s);
+ return 0;
+ }
+ if(!stackreset(p))
+ return 0;
+ } else{
if(!arrayadd(p->stack, o)){
fprint(2, "Failed to push operand to stack: %r\n");
- break;
+ return 0;
}
}
}
- if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
- sysfatal("OOM");
+ if(bufput(&p->buf, (uchar*)"\n\0", 2) != 2)
+ return 0;
Sclose(s);
+ return 1;
}
int
-pagerender(Page *p, Object *o)
+pagerender(Page *p)
{
Object *content;
int i;
- content = dictget(o, "Contents");
- if(content->type == Oarray)
+ p->nGS = 1;
+ p->GS = malloc(sizeof(GS));
+ if(p->GS == nil){
+ werrstr("Out of memory");
+ return 0;
+ }
+ gsinit(p, p->GS);
+ content = dictget(p->obj, "Contents");
+ if(content->type == Oarray){
for(i = 0; i < arraylen(content); i += 1)
- pagerendercontent(p, arrayget(content, i));
+ if(!pagerendercontent(p, arrayget(content, i)))
+ return 0;
+ }
else if(content->type != Onull)
- pagerendercontent(p, content);
+ if(!pagerendercontent(p, content))
+ return 0;
+ pagegsclean(p);
return 1;
}
--- a/pdf.h
+++ b/pdf.h
@@ -8,10 +8,12 @@
Ostream, /* 7.3.8 */
Onull, /* 7.3.9 */
Oindir, /* 7.3.10 */
+ Oop, /* 7.8.2 */
};
typedef struct Buffer Buffer;
typedef struct Filter Filter;
+typedef struct TS TS;
typedef struct GS GS;
typedef struct GSD GSD;
typedef struct GSFont GSFont;
@@ -35,11 +37,6 @@
void *(*memimage)(Buffer *b);
};
-struct Page {
- Object *stack;
- Buffer buf;
-};
-
struct Filter {
char *name;
int (*readall)(void *aux, Buffer *bi, Buffer *bo);
@@ -104,16 +101,36 @@
struct GSFont {
Object *font;
double size;
+ Object *enc; /* TODO: drop enc, use the encoding table */
+ struct{
+ // If a character c is in [first, last], replace it with values[c], which may be multibyte.
+ int first, last;
+ char **values;
+ } encoding;
+ struct{
+ int first, last;
+ int *widths;
+ int defwidth;
+ };
};
+/* Color spaces; 8.6.3 / table 61 */
+typedef enum ColorSpace {
+ DeviceGray, DeviceRGB, DeviceCMYK, /* Device family */
+ CalGray, CalRGB, Lab, ICCBased, /* CIE-based family */
+ Pattern, Indexed, Separation, DeviceN, /* Special family */
+} ColorSpace;
+
struct GS {
+ double CTM[6]; /* current transformation matrix ; 8.3 */
Object *BG, *UCR, *UCR2, *TR, *TR2, *HT, *BM, *SMask, *UseBlackPTComp, *HTO;
int LW, LC, LJ, ML, RI, OP, op, OPM, SA, AIS, TK;
- double SM, CA, ca;
- struct {
- GSFont *Font;
- int nFont;
+ double SM, CA, ca, FL;
+ struct{ /* coloring info */
+ ColorSpace SCS, NSCS; /* stroking color space and nonstroking color space */
+ u32int SC, NSC;
};
+ GSFont Font;
struct {
GSD *d;
int nd;
@@ -120,6 +137,29 @@
};
};
+struct TS {
+ double Tm[6]; /* text matrix */
+ double Tlm[6]; /* text line matrix */
+ /* Tracks if we're in a text object; nesting is verboten */
+ int inobj;
+ double TL;
+ /* Temporary, for pdf2txt functionality: tracks the last character's position so we know whether whitespace is needed */
+ double x, y;
+};
+
+struct Page {
+ Object *obj;
+ Object *stack;
+ Buffer buf;
+ /* The graphical state stack. GSactive is always a shortcut for the top of the stack, GS[nGS - 1] */
+ struct{
+ GS *GS;
+ GS *GSactive;
+ int nGS;
+ };
+ TS TS;
+};
+
struct Pdf {
Stream *s;
Xref *xref;
@@ -246,8 +286,8 @@
int bufget(Buffer *b, uchar *d, int sz);
void bufdump(Buffer *b);
-void pageinit(Page *p);
-int pagerender(Page *p, Object *o);
+void pageinit(Page *p, Object *o);
+int pagerender(Page *p);
void pagefree(Page *p);
#pragma varargck type "O" Object*