ref: d613b76a67a6dbb4d2c16a17e6994e7bdf2731df
dir: /pdf.c/
#include <u.h> #include <libc.h> #include <bio.h> #include <ctype.h> #include "pdf.h" int Tfmt(Fmt *f); int ⊗fmt(Fmt *f); /* * pre-1.5 xref section reader * PDF>=1.5 may have BOTH (or either) old xref format and xref streams */ static int xrefread(Pdf *pdf, int xref0, int nxref) { int i, j, sz, n, newnxref; Xref xref; char *s, *e; Xref *x; s = nil; if((x = realloc(pdf->xref, (pdf->nxref + nxref)*sizeof(Xref))) == nil) goto err; pdf->xref = x; /* read the entire thing at once */ sz = nxref*20; if((s = malloc(sz)) == nil) goto err; for(i = 0; i < sz; i += n){ if((n = Bread(pdf->bio, s+i, sz-i)) < 1) goto err; } /* store non-free objects only */ newnxref = pdf->nxref; for(e = s, i = 0; i < nxref; i++, e += 20){ if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){ werrstr("invalid xref line (%d/%d)", i, nxref); goto err; } xref.id = xref0 + i; xref.off = strtoul(e, nil, 10); /* FIXME xref.gen */ xref.type = Xusual; /* search in already existing xrefs, update if found */ for(j = 0; j < pdf->nxref; j++){ if(pdf->xref[j].id != xref.id) continue; if(e[17] == 'f') /* it was freed */ pdf->xref[j].id = 0; else if(e[17] == 'n') pdf->xref[j].off = xref.off; break; } if(j >= pdf->nxref && e[17] == 'n') /* that's a new one, insert unless it's free */ pdf->xref[newnxref++] = xref; } free(s); s = nil; /* scale down */ for(i = j = 0; i < newnxref; i++){ if(pdf->xref[i].id != 0) pdf->xref[j++] = pdf->xref[i]; } if((x = realloc(pdf->xref, j*sizeof(Xref))) == nil) goto err; pdf->xref = x; pdf->nxref = j; return 0; err: free(s); return -1; } static int trailerread(Pdf *pdf) { Object *o; if((o = pdfobj(pdf, pdf->bio)) == nil) goto err; if(o->type != Odict){ werrstr("isn't a dictionary"); goto err; } pdf->root = pdfref(dictget(o, "Root")); pdf->info = pdfref(dictget(o, "Info")); pdfobjfree(o); return 0; err: pdfobjfree(o); return -1; } static int getint(uchar *b, int sz, int dflt) { int x, i; if(sz == 0) return dflt; x = 0; for(i = 0; i < sz; i++) x = x<<8 | b[i]; return x; } /* 7.5.8.3 */ static int xrefstreamread(Pdf *pdf) { Object *o; Stream *s; Xref *x; uchar buf[32]; int w[8], nw, i, c, n, nxref, newnxref, prev, extra; s = nil; if((o = pdfobj(pdf, pdf->bio)) == nil){ werrstr("xref stream obj: %r"); goto err; } if((prev = dictint(o, "Prev")) > 0){ if(Bseek(pdf->bio, prev, 0) != prev){ werrstr("xref stream prev seek failed"); goto err; } if(xrefstreamread(pdf) != 0){ pdfobjfree(o); return -1; } } if((s = streamopen(o)) == nil){ werrstr("failed to stream xref: %r"); goto err; } if((nw = dictints(o, "W", w, nelem(w))) < 3 || nw >= nelem(w)){ werrstr("nW=%d", nw); goto err; } for(n = i = 0; i < nw; i++) n += w[i]; /* size of each element. w[i] MAY be 0 */ if(n > sizeof(buf)){ werrstr("W is beyond imaginable: %d bytes", n); goto err; } if((nxref = streamsize(s)/n) < 1){ werrstr("no xref elements in the stream"); goto err; } extra = streamsize(s) % (nxref*n); if(extra != 0) fprint(2, "extra %d bytes in xref stream", extra); newnxref = pdf->nxref + nxref; if((x = realloc(pdf->xref, newnxref*sizeof(Xref))) == nil) goto err; pdf->xref = x; x += pdf->nxref; while(Bread(s->bio, buf, n) == n){ /* stop on short read or error */ c = getint(buf, w[0], 1); /* default type is 1 */ if(c == 1){ /* not compressed */ x->off = getint(buf+w[0], w[1], 0); x->gen = getint(buf+w[0]+w[1], w[2], 0); x->type = Xuncompressed; pdf->nxref++; fprint(2, "xref %⊗\n", *x); x++; }else if(c == 2){ /* compressed */ x->objnum = getint(buf+w[0], w[1], 0); x->id = getint(buf+w[0]+w[1], w[2], 0); x->type = Xcompressed; pdf->nxref++; fprint(2, "xref %⊗\n", *x); x++; } } streamclose(s); pdf->root = pdfref(dictget(o, "Root")); pdf->info = pdfref(dictget(o, "Info")); pdfobjfree(o); return 0; err: streamclose(s); pdfobjfree(o); return -1; } Pdf * pdfopen(Biobuf *b) { Pdf *pdf; Object *o; char tmp[64], *s, *x; int xref0; /* 7.5.4 xref subsection first object number */ int nxref; /* 7.5.4 xref subsection number of objects */ int xreftb; /* 7.5.4 xref table offset from the beginning of the file */ int i, n, off; fmtinstall('T', Tfmt); fmtinstall(L'⊗', ⊗fmt); o = nil; if((pdf = calloc(1, sizeof(*pdf))) == nil) goto err; pdf->bio = b; /* check header */ if(Bread(b, tmp, 8) != 8 || strncmp(tmp, "%PDF-", 5) != 0 || !isdigit(tmp[5]) || tmp[6] != '.' || !isdigit(tmp[7])){ werrstr("not a pdf"); goto err; } /* 7.5.4, 7.5.8 xref table */ /* read a block of data */ n = sizeof(tmp)-1; Bseek(b, -n, 2); if(Bread(b, tmp, n) != n){ badtrailer: werrstr("invalid trailer"); goto err; } tmp[n] = 0; /* search for a valid string that the block ends with */ for(i = n-1, s = &tmp[i]; i > 0 && *s != 0; i--, s--); s++; /* find "startxref" */ if((x = strrchr(s, 'f')) == nil || !isws(x[1]) || x-8 < s+1 || memcmp(x-8, "startxref", 9) != 0) goto badtrailer; x++; if((xreftb = strtol(x, nil, 10)) < 1) goto badtrailer; /* read xref */ if(Bseek(b, xreftb, 0) != xreftb){ werrstr("xref position out of range"); goto err; } morexref: off = Boffset(b); n = sizeof(tmp)-1; if((n = Bread(b, tmp, n)) < 16){ badxref: werrstr("invalid xref: %r"); goto err; } tmp[n] = 0; if(memcmp(tmp, "xref", 4) == 0){ /* 7.5.4 xref */ x = tmp+4; xref0 = strtol(x, &x, 10); nxref = strtol(x, &x, 10); /* skip whitespace and move to the first subsection */ for(; isws(*x) && x < tmp+n; x++); n = x-tmp+off; if(Bseek(b, n, 0) != n) goto badxref; if(xref0 >= 0 && nxref > 0 && xrefread(pdf, xref0, nxref) != 0) goto badxref; goto morexref; /* there could be more updates, try it */ }else if(memcmp(tmp, "trailer", 7) == 0){ /* 7.5.5 file trailer */ /* move to the trailer dictionary */ n = off + 8; if(Bseek(b, n, 0) != n || trailerread(pdf) != 0){ werrstr("invalid trailer: %r"); goto err; } }else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */ if(Bseek(b, xreftb, 0) != xreftb) goto badxref; if(xrefstreamread(pdf) != 0) goto err; } /* root is required, info is optional */ if(pdf->root == &null){ werrstr("no root"); goto err; } return pdf; err: werrstr("pdfopen: %r [at %p]", (void*)Boffset(b)); pdfclose(pdf); pdfobjfree(o); return nil; } void pdfclose(Pdf *pdf) { if(pdf == nil) return; if(pdf->bio != nil) Bterm(pdf->bio); free(pdf->xref); free(pdf); }