ref: 68c95564fc32ef6ada2e7aed5afa7b25e353b9ad
parent: b75bca337a9cf272c8dfc42af11b867d11aeb804
author: David <gek@katherine>
date: Wed Feb 17 21:38:43 EST 2021
Micro-optimization that gains 50 FPS on my i7
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@
A rework of Fabrice Bellard's TinyGL (still compiling with -std=c99) to be
more useful as a software rasterizer.
+Tightly tweaked for performance,
+
Valgrind'd for memory leaks in the demos.
It's also lightning fast.
--- a/src/clip.c
+++ b/src/clip.c
@@ -11,13 +11,14 @@
#define CLIP_ZMAX (1 << 5)
void gl_transform_to_viewport(GLContext* c, GLVertex* v) {
- GLfloat winv;
/* coordinates */
- winv = 1.0 / v->pc.W;
+ {
+ GLfloat winv = 1.0 / v->pc.W;
v->zp.x = (GLint)(v->pc.X * winv * c->viewport.scale.X + c->viewport.trans.X);
v->zp.y = (GLint)(v->pc.Y * winv * c->viewport.scale.Y + c->viewport.trans.Y);
v->zp.z = (GLint)(v->pc.Z * winv * c->viewport.scale.Z + c->viewport.trans.Z);
+ }
/* color */
v->zp.r = (GLuint)(v->color.v[0] * 65535) & 65535;
v->zp.g = (GLuint)(v->color.v[1] * 65535) & 65535;
@@ -99,7 +100,7 @@
void gl_draw_line(GLContext* c, GLVertex* p1, GLVertex* p2) {
GLfloat dx, dy, dz, dw, x1, y1, z1, w1;
- GLfloat tmin, tmax;
+
GLVertex q1, q2;
GLint cc1, cc2;
@@ -127,8 +128,8 @@
z1 = p1->pc.Z;
w1 = p1->pc.W;
- tmin = 0;
- tmax = 1;
+ GLfloat tmin = 0;
+ GLfloat tmax = 1;
if (ClipLine1(dx + dw, -x1 - w1, &tmin, &tmax) && ClipLine1(-dx + dw, x1 - w1, &tmin, &tmax) && ClipLine1(dy + dw, -y1 - w1, &tmin, &tmax) &&
ClipLine1(-dy + dw, y1 - w1, &tmin, &tmax) && ClipLine1(dz + dw, -z1 - w1, &tmin, &tmax) && ClipLine1(-dz + dw, z1 - w1, &tmin, &tmax)) {
@@ -191,18 +192,29 @@
static inline void updateTmp(GLContext* c, GLVertex* q, GLVertex* p0, GLVertex* p1, GLfloat t) {
{
+
+
q->color.v[0] = p0->color.v[0] + (p1->color.v[0] - p0->color.v[0]) * t;
q->color.v[1] = p0->color.v[1] + (p1->color.v[1] - p0->color.v[1]) * t;
q->color.v[2] = p0->color.v[2] + (p1->color.v[2] - p0->color.v[2]) * t;
- q->zp.r = p0->zp.r + (p1->zp.r - p0->zp.r) * t;
- q->zp.g = p0->zp.g + (p1->zp.g - p0->zp.g) * t;
- q->zp.b = p0->zp.b + (p1->zp.b - p0->zp.b) * t;
- // q->color.v[3]=p0->color.v[3] + (p1->color.v[3] - p0->color.v[3])*t;
- // tgl_warning("\np0 Components are %f, %f, %f", p0->color.v[0], p0->color.v[1], p0->color.v[2]);
- // tgl_warning("\nZbuffer point r,g,b for p0 are: %d %d %d",p0->zp.r, p0->zp.g,p0->zp.b);
- // tgl_warning("\n~\nNew Components are %f, %f, %f", q->color.v[0], q->color.v[1], q->color.v[2]);
- // tgl_warning("\nZbuffer point r,g,b for new point are: %d %d %d",q->zp.r, q->zp.g,q->zp.b);
- /// *
+// q->zp.r = p0->zp.r + (p1->zp.r - p0->zp.r) * t;
+// q->zp.g = p0->zp.g + (p1->zp.g - p0->zp.g) * t;
+// q->zp.b = p0->zp.b + (p1->zp.b - p0->zp.b) * t;
+
+/*
+ v->zp.r = (GLuint)(v->color.v[0] * 65535) & 65535;
+ v->zp.g = (GLuint)(v->color.v[1] * 65535) & 65535;
+ v->zp.b = (GLuint)(v->color.v[2] * 65535) & 65535;
+
+*/
+/*
+ q->zp.r = 0xffFF * p0->color.v[0];
+ q->zp.g = 0xffFF * p0->color.v[1];
+ q->zp.b = 0xffFF * p0->color.v[2];
+*/
+// q->zp.r = (GLuint)(p0->color.v[0] * 65535) & 65535;
+// q->zp.g = (GLuint)(p0->color.v[1] * 65535) & 65535;
+// q->zp.b = (GLuint)(p0->color.v[2] * 65535) & 65535;
}
// */
if (c->texture_2d_enabled) {
@@ -219,7 +231,7 @@
void gl_draw_triangle(GLContext* c, GLVertex* p0, GLVertex* p1, GLVertex* p2) {
GLint co, c_and, cc[3], front;
- GLfloat norm;
+
cc[0] = p0->clip_code;
cc[1] = p1->clip_code;
@@ -229,7 +241,7 @@
/* we handle the non clipped case here to go faster */
if (co == 0) {
-
+ GLfloat norm;
norm = (GLfloat)(p1->zp.x - p0->zp.x) * (GLfloat)(p2->zp.y - p0->zp.y) - (GLfloat)(p2->zp.x - p0->zp.x) * (GLfloat)(p1->zp.y - p0->zp.y);
if (norm == 0)
@@ -270,9 +282,10 @@
static void gl_draw_triangle_clip(GLContext* c, GLVertex* p0, GLVertex* p1, GLVertex* p2, GLint clip_bit) {
GLint co, c_and, co1, cc[3], edge_flag_tmp, clip_mask;
- GLVertex tmp1, tmp2, *q[3];
- GLfloat tt;
+ //GLVertex tmp1, tmp2, *q[3];
+ GLVertex *q[3];
+
cc[0] = p0->clip_code;
cc[1] = p1->clip_code;
cc[2] = p2->clip_code;
@@ -281,6 +294,7 @@
if (co == 0) {
gl_draw_triangle(c, p0, p1, p2);
} else {
+
c_and = cc[0] & cc[1] & cc[2];
/* the triangle is completely outside */
if (c_and != 0)
@@ -294,10 +308,7 @@
/* this test can be true only in case of rounding errors */
if (clip_bit == 6) {
#if 0
- tgl_warning("Error:\n");
- tgl_warning("%f %f %f %f\n",p0->pc.X,p0->pc.Y,p0->pc.Z,p0->pc.W);
- tgl_warning("%f %f %f %f\n",p1->pc.X,p1->pc.Y,p1->pc.Z,p1->pc.W);
- tgl_warning("%f %f %f %f\n",p2->pc.X,p2->pc.Y,p2->pc.Z,p2->pc.W);
+ tgl_warning("Error:\n");tgl_warning("%f %f %f %f\n",p0->pc.X,p0->pc.Y,p0->pc.Z,p0->pc.W);tgl_warning("%f %f %f %f\n",p1->pc.X,p1->pc.Y,p1->pc.Z,p1->pc.W);tgl_warning("%f %f %f %f\n",p2->pc.X,p2->pc.Y,p2->pc.Z,p2->pc.W);
#endif
return;
}
@@ -307,7 +318,7 @@
if (co1) {
/* one point outside */
-
+
if (cc[0] & clip_mask) {
q[0] = p0;
q[1] = p1;
@@ -321,25 +332,26 @@
q[1] = p0;
q[2] = p1;
}
+ {GLVertex tmp1, tmp2;GLfloat tt;
+ tt = clip_proc[clip_bit](&tmp1.pc, &q[0]->pc, &q[1]->pc);
+ updateTmp(c, &tmp1, q[0], q[1], tt);
- tt = clip_proc[clip_bit](&tmp1.pc, &q[0]->pc, &q[1]->pc);
- updateTmp(c, &tmp1, q[0], q[1], tt);
+ tt = clip_proc[clip_bit](&tmp2.pc, &q[0]->pc, &q[2]->pc);
+ updateTmp(c, &tmp2, q[0], q[2], tt);
- tt = clip_proc[clip_bit](&tmp2.pc, &q[0]->pc, &q[2]->pc);
- updateTmp(c, &tmp2, q[0], q[2], tt);
+ tmp1.edge_flag = q[0]->edge_flag;
+ edge_flag_tmp = q[2]->edge_flag;
+ q[2]->edge_flag = 0;
+ gl_draw_triangle_clip(c, &tmp1, q[1], q[2], clip_bit + 1);
- tmp1.edge_flag = q[0]->edge_flag;
- edge_flag_tmp = q[2]->edge_flag;
- q[2]->edge_flag = 0;
- gl_draw_triangle_clip(c, &tmp1, q[1], q[2], clip_bit + 1);
-
- tmp2.edge_flag = 1;
- tmp1.edge_flag = 0;
- q[2]->edge_flag = edge_flag_tmp;
- gl_draw_triangle_clip(c, &tmp2, &tmp1, q[2], clip_bit + 1);
+ tmp2.edge_flag = 1;
+ tmp1.edge_flag = 0;
+ q[2]->edge_flag = edge_flag_tmp;
+ gl_draw_triangle_clip(c, &tmp2, &tmp1, q[2], clip_bit + 1);
+ }
} else {
/* two points outside */
-
+
if ((cc[0] & clip_mask) == 0) {
q[0] = p0;
q[1] = p1;
@@ -353,16 +365,17 @@
q[1] = p0;
q[2] = p1;
}
+ {GLVertex tmp1, tmp2;GLfloat tt;
+ tt = clip_proc[clip_bit](&tmp1.pc, &q[0]->pc, &q[1]->pc);
+ updateTmp(c, &tmp1, q[0], q[1], tt);
- tt = clip_proc[clip_bit](&tmp1.pc, &q[0]->pc, &q[1]->pc);
- updateTmp(c, &tmp1, q[0], q[1], tt);
+ tt = clip_proc[clip_bit](&tmp2.pc, &q[0]->pc, &q[2]->pc);
+ updateTmp(c, &tmp2, q[0], q[2], tt);
- tt = clip_proc[clip_bit](&tmp2.pc, &q[0]->pc, &q[2]->pc);
- updateTmp(c, &tmp2, q[0], q[2], tt);
-
- tmp1.edge_flag = 1;
- tmp2.edge_flag = q[2]->edge_flag;
- gl_draw_triangle_clip(c, q[0], &tmp1, &tmp2, clip_bit + 1);
+ tmp1.edge_flag = 1;
+ tmp2.edge_flag = q[2]->edge_flag;
+ gl_draw_triangle_clip(c, q[0], &tmp1, &tmp2, clip_bit + 1);
+ }
}
}
}
@@ -406,24 +419,24 @@
#endif
} else if (c->current_shade_model == GL_SMOOTH) {
//ZB_fillTriangleSmooth(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #if TGL_FEATURE_BLEND == 1
- if(c->zb->enable_blend)
- ZB_fillTriangleSmooth(c->zb, &p0->zp, &p1->zp, &p2->zp);
- else
- ZB_fillTriangleSmoothNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #else
- ZB_fillTriangleSmoothNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #endif
+#if TGL_FEATURE_BLEND == 1
+ if(c->zb->enable_blend)
+ ZB_fillTriangleSmooth(c->zb, &p0->zp, &p1->zp, &p2->zp);
+ else
+ ZB_fillTriangleSmoothNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
+#else
+ ZB_fillTriangleSmoothNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
+#endif
} else {
//ZB_fillTriangleFlat(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #if TGL_FEATURE_BLEND == 1
- if(c->zb->enable_blend)
- ZB_fillTriangleFlat(c->zb, &p0->zp, &p1->zp, &p2->zp);
- else
- ZB_fillTriangleFlatNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #else
- ZB_fillTriangleFlatNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
- #endif
+#if TGL_FEATURE_BLEND == 1
+ if(c->zb->enable_blend)
+ ZB_fillTriangleFlat(c->zb, &p0->zp, &p1->zp, &p2->zp);
+ else
+ ZB_fillTriangleFlatNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
+#else
+ ZB_fillTriangleFlatNOBLEND(c->zb, &p0->zp, &p1->zp, &p2->zp);
+#endif
}
}