ref: 3c27f041321b91dbf2bfd0ab9e4865e03854cd68
dir: /pdf.c/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include "pdf.h"
/*
* pre-1.5 xref section reader
* PDF>=1.5 may have BOTH (or either) old xref format and xref streams
*/
static int
xrefread(Pdf *pdf, int xref0, int nxref)
{
int i, j, sz, n, newnxref;
Xref xref;
char *s, *e;
Xref *x;
s = nil;
if((x = realloc(pdf->xref, (pdf->nxref + nxref)*sizeof(Xref))) == nil)
goto err;
pdf->xref = x;
/* read the entire thing at once */
sz = nxref*20;
if((s = malloc(sz)) == nil)
goto err;
for(i = 0; i < sz; i += n){
if((n = Bread(pdf->bio, s+i, sz-i)) < 1)
goto err;
}
/* store non-free objects only */
newnxref = pdf->nxref;
for(e = s, i = 0; i < nxref; i++, e += 20){
if(e[10] != ' ' || e[18] != '\r' || e[19] != '\n'){
werrstr("invalid xref line");
goto err;
}
xref.id = xref0 + nxref;
xref.off = strtoul(e, nil, 10);
/* search in already existing xrefs, update if found */
for(j = 0; j < pdf->nxref; j++){
if(pdf->xref[j].id != xref.id)
continue;
if(e[17] == 'f') /* it was freed */
pdf->xref[j].id = 0;
else if(e[17] == 'n')
pdf->xref[j].off = xref.off;
break;
}
if(j >= pdf->nxref && e[17] == 'n') /* that's a new one, insert unless it's free */
pdf->xref[newnxref++] = xref;
}
free(s);
s = nil;
/* scale down */
for(i = j = 0; i < newnxref; i++){
if(pdf->xref[i].id != 0)
pdf->xref[j++] = pdf->xref[i];
}
if((x = realloc(pdf->xref, j*sizeof(Xref))) == nil)
goto err;
pdf->xref = x;
pdf->nxref = j;
return 0;
err:
free(s);
return -1;
}
static int
trailerread(Pdf *pdf)
{
int i;
char *s, *e;
Object *o;
KeyValue *kv;
o = nil;
if((s = Brdstr(pdf->bio, 0, 1)) == nil || (o = pdfobject(s, &e, Blinelen(pdf->bio))) == nil)
goto err;
free(s);
s = nil;
if(o->type != Odict){
werrstr("isn't a dictionary");
goto err;
}
for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
fprint(2, "# %s %p\n", kv->key, kv->value);
if(strcmp(kv->key, "Root") == 0)
pdf->root = kv->value;
else if(strcmp(kv->key, "Info") == 0)
pdf->info = kv->value;
}
freeobject(o);
o = nil;
/* root is required */
if(pdf->root == nil){
werrstr("no root");
goto err;
}
/* info is optional */
return 0;
err:
freeobject(o);
free(s);
return -1;
}
Pdf *
pdfopen(int fd)
{
Pdf *pdf;
Biobuf *b;
char tmp[64], *s, *x;
int xref0; /* 7.5.4 xref subsection first object number */
int nxref; /* 7.5.4 xref subsection number of objects */
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
int i, n, off;
if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
goto err;
pdf->bio = b;
/* check header */
if(Bread(b, tmp, 8) != 8 ||
strncmp(tmp, "%PDF-", 5) != 0 || !isdigit(tmp[5]) || tmp[6] != '.' || !isdigit(tmp[7])){
werrstr("not a pdf");
goto err;
}
/* 7.5.4, 7.5.8 xref table */
/* read a block of data */
n = sizeof(tmp)-1;
Bseek(b, -n, 2);
if(Bread(b, tmp, n) != n){
badtrailer:
werrstr("invalid trailer");
goto err;
}
tmp[n] = 0;
/* search for a valid string that the block ends with */
for(i = n-1, s = &tmp[i]; i > 0 && *s != 0; i--, s--);
s++;
/* find "startxref" */
if((x = strrchr(s, 'f')) == nil || !isws(x[1]) || x-8 < s+1 || memcmp(x-8, "startxref", 9) != 0)
goto badtrailer;
x++;
if((xreftb = strtol(x, nil, 10)) < 1)
goto badtrailer;
/* read xref */
if(Bseek(b, xreftb, 0) != xreftb){
werrstr("xref position out of range");
goto err;
}
morexref:
off = Bseek(b, 0, 1);
n = sizeof(tmp)-1;
if((n = Bread(b, tmp, n)) < 16){
badxref:
werrstr("invalid xref: %r");
goto err;
}
tmp[n] = 0;
if(memcmp(tmp, "xref", 4) == 0){
/* 7.5.4 xref */
x = tmp+4;
nxref = -1;
if((xref0 = strtol(x, &x, 10)) < 0 || (nxref = strtol(x, &x, 10)) < 1){
werrstr("xref0=%d nxref=%d", xref0, nxref);
goto badxref;
}
/* skip whitespace and move to the first subsection */
for(; isws(*x) && x < tmp+n; x++);
n = x-tmp+off;
if(Bseek(b, n, 0) != n || xrefread(pdf, xref0, nxref) != 0)
goto badxref;
goto morexref; /* there could be more updates, try it */
}else if(memcmp(tmp, "trailer", 7) == 0){ /* 7.5.5 file trailer */
/* move to the trailer dictionary */
n = off + 8;
if(Bseek(b, n, 0) != n || trailerread(pdf) != 0){
werrstr("invalid trailer: %r");
goto err;
}
}else{ /* could be 7.5.8 xref stream (since PDF 1.5) */
werrstr("FIXME xref streams not implemented");
goto err;
}
return pdf;
err:
pdfclose(pdf);
return nil;
}
void
pdfclose(Pdf *pdf)
{
if(pdf == nil)
return;
if(pdf->bio != nil)
Bterm(pdf->bio);
free(pdf->xref);
free(pdf);
}