shithub: fnt

Download patch

ref: 0c863b1ce85c144b8359b75a631876bed095907c
parent: e20b6c643f188f387549db5af1891d8159db8a64
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Sun Jul 21 21:53:13 EDT 2024

rasterizer: slightly faster

    -p 32 -m Symbola.ttf >/dev/null

linux/amd64: 0m1.767s -> 0m1.552s
9front/arm64: 17.23u -> 14.82u

--- a/plan9/otfsys.h
+++ b/plan9/otfsys.h
@@ -2,6 +2,9 @@
 #include <u.h>
 #include <libc.h>
 
+#define unlikely(c) c
+#define likely(c) c
+
 #define PRIx8 "ux"
 #define PRIx16 "ux"
 #define PRIu16 "ud"
--- a/rast.c
+++ b/rast.c
@@ -83,7 +83,7 @@
 	Sval p, q, d;
 	int n;
 
-	if(is₀(a)){
+	if(unlikely(is₀(a))){
 		if(is₀(b))
 			return 0;
 		qs[0] = -c/b;
@@ -91,15 +91,8 @@
 	}
 
 	p = b/(2.0*a);
-	q = c/a;
-	d = p*p - q;
-
-	if(is₀(d)){
-		qs[0] = -p;
-		return qs[0] > 0 && qs[0] < 1;
-	}
-
-	if(d < 0.0)
+	d = p*p - c/a;
+	if(d < ε)
 		return 0;
 
 	d = sqrt(d);
@@ -127,18 +120,13 @@
 	int i, j, n, r;
 
 	/* transform */
-	for(i = 0; i < nelem(s.v); i += 2){
-		s.v[i+0] = s₀->v[i+0]*jj - px;
-		s.v[i+1] = s₀->v[i+1]*jj - py;
-	}
+	s.p0.x = s₀->p0.x*jj - px;
+	s.p0.y = s₀->p0.y*jj - py;
+	s.p1.x = s₀->p1.x*jj - px;
+	s.p1.y = s₀->p1.y*jj - py;
+	s.p2.x = s₀->p2.x*jj - px;
+	s.p2.y = s₀->p2.y*jj - py;
 
-	/* FIXME would it make things faster to do proper convex hull test here? */
-	if(s.p0.x <= 0 && s.p1.x <= 0 && s.p2.x <= 0 ||
-	   s.p0.x >= 1 && s.p1.x >= 1 && s.p2.x >= 1 ||
-	   s.p0.y <= 0 && s.p1.y <= 0 && s.p2.y <= 0 ||
-	   s.p0.y >= 1 && s.p1.y >= 1 && s.p2.y >= 1)
-		return 0;
-
 #define e(t,a) (s.p0.a*(1-t)*(1-t) + 2*s.p1.a*(1-t)*t + s.p2.a*t*t)
 #define within(v) ((w = e(v, x)) >= -ε && w <= 1+ε && (w = e(v, y)) >= -ε && w <= 1+ε)
 
@@ -146,14 +134,19 @@
 	n = 0;
 	if(s.p0.x >= 0 && s.p0.x <= 1 && s.p0.y >= 0 && s.p0.y <= 1)
 		qs[n++] = 0;
-	for(i = 0; i < 2; i++){
-		c = s.v0[i];
-		a = c - 2*s.v1[i] + s.v2[i];
-		b = 2*(s.v1[i] - c);
-		n += qslv(qs+n, a, b, c);
-		n += qslv(qs+n, a, b, c-1);
-	}
+
+	c = s.p0.x;
+	a = c - 2*s.p1.x + s.p2.x;
+	b = 2*(s.p1.x - c);
+	n += qslv(qs+n, a, b, c);
+	n += qslv(qs+n, a, b, c-1);
+	c = s.p0.y;
+	a = c - 2*s.p1.y + s.p2.y;
+	b = 2*(s.p1.y - c);
+	n += qslv(qs+n, a, b, c);
+	n += qslv(qs+n, a, b, c-1);
 	qsort(qs, n, sizeof(Sval), Svalcmp);
+
 	if(s.p2.x >= 0 && s.p2.x <= 1 && s.p2.y >= 0 && s.p2.y <= 1)
 		qs[n++] = 1;
 	j = 0;
@@ -267,11 +260,12 @@
 }
 
 static u64int
-qCxy(SegQ *s, int ns, int jj, int px, int py, Sval *c, u64int *m, u64int tm)
+Cxy(SegQ *s, int ns, int jj, int px, int py, Sval *c, u64int *m, u64int tm)
 {
 	int (*f)(SegQ*, int, Sval, Sval, Sval*, Sval*);
+	Sval K[4][2], L[4][2], q[6], j;
 	u64int tx₀, tx₁, z, all;
-	Sval K[4][2], L[4][2];
+	u8int w;
 	int i;
 
 	jj *= 2;
@@ -284,9 +278,20 @@
 		tx₀ = 1ULL<<(px*jj + py);
 		tx₁ = 1ULL<<((px+1)*jj + py);
 	}
+	j = 1.0/(Sval)jj;
+	q[0] = j*px;
+	q[1] = j*py;
+	q[2] = q[0]+j;
+	q[3] = q[1]+j;
+	q[4] = q[2]+j;
+	q[5] = q[3]+j;
 	all = 0;
 	for(i = 0; i < ns; i++, s++){
-		if((m[i] & tm) == 0)
+		if((m[i] & tm) == 0 ||
+		   s->p0.x <= q[0] && s->p1.x <= q[0] && s->p2.x <= q[0] ||
+		   s->p0.x >= q[4] && s->p1.x >= q[4] && s->p2.x >= q[4] ||
+		   s->p0.y <= q[1] && s->p1.y <= q[1] && s->p2.y <= q[1] ||
+		   s->p0.y >= q[5] && s->p1.y >= q[5] && s->p2.y >= q[5])
 			continue;
 
 		K[0][0] = K[0][1] = 0;
@@ -297,36 +302,33 @@
 		L[1][0] = L[1][1] = 0;
 		L[2][0] = L[2][1] = 0;
 		L[3][0] = L[3][1] = 0;
+		z = 0;
+		f = s->p1.x == s->p2.x && s->p1.y == s->p2.y ? lKL : qKL;
 
-		if(s->p1.x == s->p2.x && s->p1.y == s->p2.y)
-			f = lKL;
-		else
-			f = qKL;
+		w =
+			(s->p0.x <= q[2] || s->p1.x <= q[2] || s->p2.x <= q[2])<<0 |
+			(s->p0.x >= q[2] || s->p1.x >= q[2] || s->p2.x >= q[2])<<1 |
+			(s->p0.y <= q[3] || s->p1.y <= q[3] || s->p2.y <= q[3])<<2 |
+			(s->p0.y >= q[3] || s->p1.y >= q[3] || s->p2.y >= q[3])<<3;
 
-		z = 0;
 		if(tx₀ == 0){
-			z |= f(s, jj, px+0, py+0, K[0], L[0]);
-			z |= f(s, jj, px+0, py+1, K[1], L[1]);
-			z |= f(s, jj, px+1, py+0, K[2], L[2]);
-			z |= f(s, jj, px+1, py+1, K[3], L[3]);
+			if((w & 5) == 5) z |= f(s, jj, px+0, py+0, K[0], L[0]);
+			if((w & 6) == 6) z |= f(s, jj, px+1, py+0, K[2], L[2]);
+			if((w & 9) == 9) z |= f(s, jj, px+0, py+1, K[1], L[1]);
+			if((w & 10) == 10) z |= f(s, jj, px+1, py+1, K[3], L[3]);
 		}else{
-			z = 0;
-			if(f(s, jj, px+0, py+0, K[0], L[0]))
-				z |= tx₀;
-			if(f(s, jj, px+0, py+1, K[1], L[1]))
-				z |= tx₀<<1;
-			if(f(s, jj, px+1, py+0, K[2], L[2]))
-				z |= tx₁;
-			if(f(s, jj, px+1, py+1, K[3], L[3]))
-				z |= tx₁<<1;
+			if((w & 5) == 5 && f(s, jj, px+0, py+0, K[0], L[0])) z |= tx₀;
+			if((w & 6) == 6 && f(s, jj, px+1, py+0, K[2], L[2])) z |= tx₁;
+			if((w & 9) == 9 && f(s, jj, px+0, py+1, K[1], L[1])) z |= tx₀<<1;
+			if((w & 10) == 10 && f(s, jj, px+1, py+1, K[3], L[3])) z |= tx₁<<1;
 			m[ns+i] |= z;
 			all |= z;
 		}
 
 		if(z != 0){
-			c[0] += L[0][1] + L[2][1] + K[1][1] - L[1][1] + K[3][1] - L[3][1];
-			c[1] += L[0][0] + L[1][0] + K[2][0] - L[2][0] + K[3][0] - L[3][0];
-			c[2] += L[0][0] - L[1][0] + K[2][0] - L[2][0] - K[3][0] + L[3][0];
+			c[0] += L[0][1] - L[1][1] + L[2][1] - L[3][1] + K[1][1] + K[3][1];
+			c[1] += L[0][0] + L[1][0] - L[2][0] - L[3][0] + K[2][0] + K[3][0];
+			c[2] += L[0][0] - L[1][0] - L[2][0] + L[3][0] + K[2][0] - K[3][0];
 		}
 	}
 	return all;
@@ -395,7 +397,7 @@
 			for(py = 0; py < j²; py++, c += 3){
 				u64int tm = 1ULL<<(px*j² + py);
 				if(all & tm)
-					nall |= qCxy(seg, ns, j², px, py, c, ma, tm);
+					nall |= Cxy(seg, ns, j², px, py, c, ma, tm);
 			}
 		}
 		if(j != 3){
@@ -418,7 +420,7 @@
 			for(py = 0; py < j²; py++, c += 3){
 				u64int tm = tm₀ << (py>>(j-3));
 				if(all & tm)
-					qCxy(seg, ns, j², px, py, c, ma, tm);
+					Cxy(seg, ns, j², px, py, c, ma, tm);
 			}
 		}
 	}
--- a/unix/otfsys.h
+++ b/unix/otfsys.h
@@ -9,6 +9,8 @@
 #define nil NULL
 #define USED(x) (void)(x)
 #define nelem(a) (int)(sizeof(a)/sizeof((a)[0]))
+#define unlikely(c) __builtin_expect(!!(c), 0)
+#define likely(c) __builtin_expect(!!(c), 1)
 
 #define Runeerror ((Rune)0xfffd)