ref: 0c863b1ce85c144b8359b75a631876bed095907c
parent: e20b6c643f188f387549db5af1891d8159db8a64
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Sun Jul 21 21:53:13 EDT 2024
rasterizer: slightly faster -p 32 -m Symbola.ttf >/dev/null linux/amd64: 0m1.767s -> 0m1.552s 9front/arm64: 17.23u -> 14.82u
--- a/plan9/otfsys.h
+++ b/plan9/otfsys.h
@@ -2,6 +2,9 @@
#include <u.h>
#include <libc.h>
+#define unlikely(c) c
+#define likely(c) c
+
#define PRIx8 "ux"
#define PRIx16 "ux"
#define PRIu16 "ud"
--- a/rast.c
+++ b/rast.c
@@ -83,7 +83,7 @@
Sval p, q, d;
int n;
- if(is₀(a)){
+ if(unlikely(is₀(a))){
if(is₀(b))
return 0;
qs[0] = -c/b;
@@ -91,15 +91,8 @@
}
p = b/(2.0*a);
- q = c/a;
- d = p*p - q;
-
- if(is₀(d)){
- qs[0] = -p;
- return qs[0] > 0 && qs[0] < 1;
- }
-
- if(d < 0.0)
+ d = p*p - c/a;
+ if(d < ε)
return 0;
d = sqrt(d);
@@ -127,18 +120,13 @@
int i, j, n, r;
/* transform */
- for(i = 0; i < nelem(s.v); i += 2){
- s.v[i+0] = s₀->v[i+0]*jj - px;
- s.v[i+1] = s₀->v[i+1]*jj - py;
- }
+ s.p0.x = s₀->p0.x*jj - px;
+ s.p0.y = s₀->p0.y*jj - py;
+ s.p1.x = s₀->p1.x*jj - px;
+ s.p1.y = s₀->p1.y*jj - py;
+ s.p2.x = s₀->p2.x*jj - px;
+ s.p2.y = s₀->p2.y*jj - py;
- /* FIXME would it make things faster to do proper convex hull test here? */
- if(s.p0.x <= 0 && s.p1.x <= 0 && s.p2.x <= 0 ||
- s.p0.x >= 1 && s.p1.x >= 1 && s.p2.x >= 1 ||
- s.p0.y <= 0 && s.p1.y <= 0 && s.p2.y <= 0 ||
- s.p0.y >= 1 && s.p1.y >= 1 && s.p2.y >= 1)
- return 0;
-
#define e(t,a) (s.p0.a*(1-t)*(1-t) + 2*s.p1.a*(1-t)*t + s.p2.a*t*t)
#define within(v) ((w = e(v, x)) >= -ε && w <= 1+ε && (w = e(v, y)) >= -ε && w <= 1+ε)
@@ -146,14 +134,19 @@
n = 0;
if(s.p0.x >= 0 && s.p0.x <= 1 && s.p0.y >= 0 && s.p0.y <= 1)
qs[n++] = 0;
- for(i = 0; i < 2; i++){
- c = s.v0[i];
- a = c - 2*s.v1[i] + s.v2[i];
- b = 2*(s.v1[i] - c);
- n += qslv(qs+n, a, b, c);
- n += qslv(qs+n, a, b, c-1);
- }
+
+ c = s.p0.x;
+ a = c - 2*s.p1.x + s.p2.x;
+ b = 2*(s.p1.x - c);
+ n += qslv(qs+n, a, b, c);
+ n += qslv(qs+n, a, b, c-1);
+ c = s.p0.y;
+ a = c - 2*s.p1.y + s.p2.y;
+ b = 2*(s.p1.y - c);
+ n += qslv(qs+n, a, b, c);
+ n += qslv(qs+n, a, b, c-1);
qsort(qs, n, sizeof(Sval), Svalcmp);
+
if(s.p2.x >= 0 && s.p2.x <= 1 && s.p2.y >= 0 && s.p2.y <= 1)
qs[n++] = 1;
j = 0;
@@ -267,11 +260,12 @@
}
static u64int
-qCxy(SegQ *s, int ns, int jj, int px, int py, Sval *c, u64int *m, u64int tm)
+Cxy(SegQ *s, int ns, int jj, int px, int py, Sval *c, u64int *m, u64int tm)
{
int (*f)(SegQ*, int, Sval, Sval, Sval*, Sval*);
+ Sval K[4][2], L[4][2], q[6], j;
u64int tx₀, tx₁, z, all;
- Sval K[4][2], L[4][2];
+ u8int w;
int i;
jj *= 2;
@@ -284,9 +278,20 @@
tx₀ = 1ULL<<(px*jj + py);
tx₁ = 1ULL<<((px+1)*jj + py);
}
+ j = 1.0/(Sval)jj;
+ q[0] = j*px;
+ q[1] = j*py;
+ q[2] = q[0]+j;
+ q[3] = q[1]+j;
+ q[4] = q[2]+j;
+ q[5] = q[3]+j;
all = 0;
for(i = 0; i < ns; i++, s++){
- if((m[i] & tm) == 0)
+ if((m[i] & tm) == 0 ||
+ s->p0.x <= q[0] && s->p1.x <= q[0] && s->p2.x <= q[0] ||
+ s->p0.x >= q[4] && s->p1.x >= q[4] && s->p2.x >= q[4] ||
+ s->p0.y <= q[1] && s->p1.y <= q[1] && s->p2.y <= q[1] ||
+ s->p0.y >= q[5] && s->p1.y >= q[5] && s->p2.y >= q[5])
continue;
K[0][0] = K[0][1] = 0;
@@ -297,36 +302,33 @@
L[1][0] = L[1][1] = 0;
L[2][0] = L[2][1] = 0;
L[3][0] = L[3][1] = 0;
+ z = 0;
+ f = s->p1.x == s->p2.x && s->p1.y == s->p2.y ? lKL : qKL;
- if(s->p1.x == s->p2.x && s->p1.y == s->p2.y)
- f = lKL;
- else
- f = qKL;
+ w =
+ (s->p0.x <= q[2] || s->p1.x <= q[2] || s->p2.x <= q[2])<<0 |
+ (s->p0.x >= q[2] || s->p1.x >= q[2] || s->p2.x >= q[2])<<1 |
+ (s->p0.y <= q[3] || s->p1.y <= q[3] || s->p2.y <= q[3])<<2 |
+ (s->p0.y >= q[3] || s->p1.y >= q[3] || s->p2.y >= q[3])<<3;
- z = 0;
if(tx₀ == 0){
- z |= f(s, jj, px+0, py+0, K[0], L[0]);
- z |= f(s, jj, px+0, py+1, K[1], L[1]);
- z |= f(s, jj, px+1, py+0, K[2], L[2]);
- z |= f(s, jj, px+1, py+1, K[3], L[3]);
+ if((w & 5) == 5) z |= f(s, jj, px+0, py+0, K[0], L[0]);
+ if((w & 6) == 6) z |= f(s, jj, px+1, py+0, K[2], L[2]);
+ if((w & 9) == 9) z |= f(s, jj, px+0, py+1, K[1], L[1]);
+ if((w & 10) == 10) z |= f(s, jj, px+1, py+1, K[3], L[3]);
}else{
- z = 0;
- if(f(s, jj, px+0, py+0, K[0], L[0]))
- z |= tx₀;
- if(f(s, jj, px+0, py+1, K[1], L[1]))
- z |= tx₀<<1;
- if(f(s, jj, px+1, py+0, K[2], L[2]))
- z |= tx₁;
- if(f(s, jj, px+1, py+1, K[3], L[3]))
- z |= tx₁<<1;
+ if((w & 5) == 5 && f(s, jj, px+0, py+0, K[0], L[0])) z |= tx₀;
+ if((w & 6) == 6 && f(s, jj, px+1, py+0, K[2], L[2])) z |= tx₁;
+ if((w & 9) == 9 && f(s, jj, px+0, py+1, K[1], L[1])) z |= tx₀<<1;
+ if((w & 10) == 10 && f(s, jj, px+1, py+1, K[3], L[3])) z |= tx₁<<1;
m[ns+i] |= z;
all |= z;
}
if(z != 0){
- c[0] += L[0][1] + L[2][1] + K[1][1] - L[1][1] + K[3][1] - L[3][1];
- c[1] += L[0][0] + L[1][0] + K[2][0] - L[2][0] + K[3][0] - L[3][0];
- c[2] += L[0][0] - L[1][0] + K[2][0] - L[2][0] - K[3][0] + L[3][0];
+ c[0] += L[0][1] - L[1][1] + L[2][1] - L[3][1] + K[1][1] + K[3][1];
+ c[1] += L[0][0] + L[1][0] - L[2][0] - L[3][0] + K[2][0] + K[3][0];
+ c[2] += L[0][0] - L[1][0] - L[2][0] + L[3][0] + K[2][0] - K[3][0];
}
}
return all;
@@ -395,7 +397,7 @@
for(py = 0; py < j²; py++, c += 3){
u64int tm = 1ULL<<(px*j² + py);
if(all & tm)
- nall |= qCxy(seg, ns, j², px, py, c, ma, tm);
+ nall |= Cxy(seg, ns, j², px, py, c, ma, tm);
}
}
if(j != 3){
@@ -418,7 +420,7 @@
for(py = 0; py < j²; py++, c += 3){
u64int tm = tm₀ << (py>>(j-3));
if(all & tm)
- qCxy(seg, ns, j², px, py, c, ma, tm);
+ Cxy(seg, ns, j², px, py, c, ma, tm);
}
}
}
--- a/unix/otfsys.h
+++ b/unix/otfsys.h
@@ -9,6 +9,8 @@
#define nil NULL
#define USED(x) (void)(x)
#define nelem(a) (int)(sizeof(a)/sizeof((a)[0]))
+#define unlikely(c) __builtin_expect(!!(c), 0)
+#define likely(c) __builtin_expect(!!(c), 1)
#define Runeerror ((Rune)0xfffd)