shithub: pdffs

Download patch

ref: a65b2474e954e41d6cc3b0d4542bc9da6bf108c1
parent: 7dd87721538f0392ba840134ecf3e7468b5d4a3f
author: Noam Preil <noam@pixelhero.dev>
date: Tue Jun 1 18:14:04 EDT 2021

improve pdf2txt heuristics

--- a/op.c
+++ b/op.c
@@ -342,6 +342,13 @@
 static int
 tpmove(Op *op, Page *p)
 {
+	Object *x, *y;
+	x = arrayget(p->stack, 0);
+	y = arrayget(p->stack, 1);
+	if(y->num.d != 0)
+		fprint(1, "\n");
+	else if(x->num.d < 50)
+		fprint(1, " ");
 	USED(op, p);
 	return 0;
 }
@@ -378,7 +385,7 @@
 		o = arrayget(arr, i);
 		if(o->type == Ostr)
 			fprint(1, "%s", o->str);
-		else if(o->num.d < -14)
+		else if(o->num.d < -150)
 			fprint(1, " ");
 	}
 	USED(op);