shithub: qk1

Download patch

ref: c483a0f2077d7f1325a801aa1e0d5301bc6eb527
parent: affa540c607a2f2f5906e5897ab84643f430ff5a
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Sun Dec 31 23:49:47 EST 2023

-O3 -march=native; produce mip/fullbright/additive specific surface lighting funcs

--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
 BIN=${DESTDIR}${PREFIX}/bin
 EXTRA_CFLAGS=$$(pkg-config --cflags sdl2 openal)
 EXTRA_LDFLAGS=$$(pkg-config --libs sdl2 openal)
-CFLAGS?=-O2 -g
+CFLAGS?=-O3 -g -march=native
 CFLAGS+=-Wall -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-microsoft-anon-tag
 CFLAGS+=-fms-extensions
 CFLAGS+=-I3rd/parg -Iunix -I. ${EXTRA_CFLAGS}
--- a/model.h
+++ b/model.h
@@ -28,11 +28,12 @@
 	vec3_t		position;
 } mvertex_t;
 
-#define	SIDE_FRONT	0
-#define	SIDE_BACK	1
-#define	SIDE_ON		2
+enum {
+	SIDE_FRONT,
+	SIDE_BACK,
+	SIDE_ON,
+};
 
-
 // plane_t structure
 // !!! if this is changed, it must be changed in asm_i386.h too !!!
 typedef struct mplane_s
@@ -44,9 +45,14 @@
 	byte	pad[2];
 } mplane_t;
 
+enum {
+	DRAWSURF_FULLBRIGHT = 1<<0,
+};
+
 typedef struct texture_s
 {
 	char name[16];
+	int drawsurf;
 	int width, height;
 	int anim_total;				// total tenths in sequence ( 0 = no)
 	int anim_min, anim_max;		// time for this frame min <=time< max
--- a/model_bsp.c
+++ b/model_bsp.c
@@ -133,6 +133,12 @@
 			tx->height = h;
 			// the pixels immediately follow the structures
 			torgbx(p, tx->pixels, pixels);
+			for(j = 0; j < pixels; j++){
+				if(tx->pixels[j] != 0 && (tx->pixels[j] & 0xff000000) == 0){
+					tx->drawsurf |= DRAWSURF_FULLBRIGHT;
+					break;
+				}
+			}
 			if(tx->name[0] == '{'){
 				for(j = 1; j < MIPLEVELS; j++){
 					w /= 2;
--- a/model_bsp30.c
+++ b/model_bsp30.c
@@ -175,6 +175,7 @@
 					}
 				}
 				if(strchr(tx->name, '~') != nil){
+					tx->drawsurf |= DRAWSURF_FULLBRIGHT;
 					/* last 32 colors are fullbright */
 					for(j = 0; j < pixels; j++){
 						if(p[j] >= palsz-32)
--- a/r_surf.c
+++ b/r_surf.c
@@ -5,15 +5,10 @@
 static int sourcetstep;
 static int surfrowbytes;	// used by ASM files
 static int r_stepback;
-static int r_numhblocks, r_numvblocks;
 static pixel_t *r_source, *r_sourcemax;
-static unsigned *r_lightptr[3];
-static int r_lightwidth;
 static pixel_t *pbasesource;
 static void *prowdestbase;
 
-static void R_DrawSurfaceBlock8(int mip);
-
 static unsigned blocklights[3][18*18];
 
 /*
@@ -193,7 +188,118 @@
 	return base;
 }
 
+inline pixel_t
+addlight(pixel_t x, int lr, int lg, int lb)
+{
+	int r, g, b;
 
+	if((x & 0xff000000U) == 0)
+		return x;
+
+	if(currententity != nil && (currententity->effects & EF_ADDITIVE) != 0)
+		return x;
+
+	r = (x>>16) & 0xff;
+	g = (x>>8)  & 0xff;
+	b = (x>>0)  & 0xff;
+
+	r = (r * ((64<<8)-(lr & 0xffff))) >> (8+VID_CBITS);
+	g = (g * ((64<<8)-(lg & 0xffff))) >> (8+VID_CBITS);
+	b = (b * ((64<<8)-(lb & 0xffff))) >> (8+VID_CBITS);
+	x = (x & 0xff000000) | r<<16 | g<<8 | b<<0;
+
+	return x;
+}
+
+#define fullbright 1
+#define additive 1
+#define addlight addlight_f1_a1
+#define DrawSurfaceBlock_m0 DrawSurfaceBlock_f1_a1_m0
+#define DrawSurfaceBlock_m1 DrawSurfaceBlock_f1_a1_m1
+#define DrawSurfaceBlock_m2 DrawSurfaceBlock_f1_a1_m2
+#define DrawSurfaceBlock_m3 DrawSurfaceBlock_f1_a1_m3
+#include "r_surf_x.h"
+#undef fullbright
+#undef additive
+#undef addlight
+#undef DrawSurfaceBlock_m0
+#undef DrawSurfaceBlock_m1
+#undef DrawSurfaceBlock_m2
+#undef DrawSurfaceBlock_m3
+
+#define fullbright 0
+#define additive 1
+#define addlight addlight_f0_a1
+#define DrawSurfaceBlock_m0 DrawSurfaceBlock_f0_a1_m0
+#define DrawSurfaceBlock_m1 DrawSurfaceBlock_f0_a1_m1
+#define DrawSurfaceBlock_m2 DrawSurfaceBlock_f0_a1_m2
+#define DrawSurfaceBlock_m3 DrawSurfaceBlock_f0_a1_m3
+#include "r_surf_x.h"
+#undef fullbright
+#undef additive
+#undef addlight
+#undef DrawSurfaceBlock_m0
+#undef DrawSurfaceBlock_m1
+#undef DrawSurfaceBlock_m2
+#undef DrawSurfaceBlock_m3
+
+#define fullbright 1
+#define additive 0
+#define addlight addlight_f1_a0
+#define DrawSurfaceBlock_m0 DrawSurfaceBlock_f1_a0_m0
+#define DrawSurfaceBlock_m1 DrawSurfaceBlock_f1_a0_m1
+#define DrawSurfaceBlock_m2 DrawSurfaceBlock_f1_a0_m2
+#define DrawSurfaceBlock_m3 DrawSurfaceBlock_f1_a0_m3
+#include "r_surf_x.h"
+#undef fullbright
+#undef additive
+#undef addlight
+#undef DrawSurfaceBlock_m0
+#undef DrawSurfaceBlock_m1
+#undef DrawSurfaceBlock_m2
+#undef DrawSurfaceBlock_m3
+
+#define fullbright 0
+#define additive 0
+#define addlight addlight_f0_a0
+#define DrawSurfaceBlock_m0 DrawSurfaceBlock_f0_a0_m0
+#define DrawSurfaceBlock_m1 DrawSurfaceBlock_f0_a0_m1
+#define DrawSurfaceBlock_m2 DrawSurfaceBlock_f0_a0_m2
+#define DrawSurfaceBlock_m3 DrawSurfaceBlock_f0_a0_m3
+#include "r_surf_x.h"
+#undef fullbright
+#undef additive
+#undef addlight
+#undef DrawSurfaceBlock_m0
+#undef DrawSurfaceBlock_m1
+#undef DrawSurfaceBlock_m2
+#undef DrawSurfaceBlock_m3
+
+typedef void (*drawfunc)(unsigned *lp[4], unsigned lw, int nb);
+
+static const drawfunc drawsurf[2/*fullbright*/][2/*additive*/][4/*mipmap*/] = {
+	{
+		{
+			DrawSurfaceBlock_f0_a0_m0, DrawSurfaceBlock_f0_a0_m1,
+			DrawSurfaceBlock_f0_a0_m2, DrawSurfaceBlock_f0_a0_m3,
+		},
+		{
+			DrawSurfaceBlock_f0_a1_m0, DrawSurfaceBlock_f0_a1_m1,
+			DrawSurfaceBlock_f0_a1_m2, DrawSurfaceBlock_f0_a1_m3,
+		},
+	},
+	{
+		{
+			DrawSurfaceBlock_f1_a0_m0, DrawSurfaceBlock_f1_a0_m1,
+			DrawSurfaceBlock_f1_a0_m2, DrawSurfaceBlock_f1_a0_m3,
+		},
+		{
+			DrawSurfaceBlock_f1_a1_m0, DrawSurfaceBlock_f1_a1_m1,
+			DrawSurfaceBlock_f1_a1_m2, DrawSurfaceBlock_f1_a1_m3,
+		},
+	},
+};
+
 /*
 ===============
 R_DrawSurface
@@ -202,12 +308,15 @@
 void R_DrawSurface (void)
 {
 	pixel_t	*basetptr;
-	int				smax, tmax, twidth;
+	int				smax, tmax, twidth, lightwidth;
 	int				u, blockdivshift, blocksize;
 	int				soffset, basetoffset, texwidth;
 	int				horzblockstep;
+	int				r_numhblocks, r_numvblocks;
 	pixel_t	*pcolumndest;
 	texture_t		*mt;
+	drawfunc draw;
+	unsigned *lp[3];
 
 	// calculate the lightings
 	R_BuildLightMap ();
@@ -216,6 +325,8 @@
 
 	mt = r_drawsurf.texture;
 
+	draw = drawsurf[mt->drawsurf][currententity != nil && (currententity->effects & EF_ADDITIVE) != 0][r_drawsurf.surfmip];
+
 	r_source = mt->pixels + mt->offsets[r_drawsurf.surfmip];
 
 	// the fractional light values should range from 0 to (VID_GRADES - 1) << 16
@@ -226,7 +337,7 @@
 	blocksize = 16 >> r_drawsurf.surfmip;
 	blockdivshift = 4 - r_drawsurf.surfmip;
 
-	r_lightwidth = (r_drawsurf.surf->extents[0]>>4)+1;
+	lightwidth = (r_drawsurf.surf->extents[0]>>4)+1;
 
 	r_numhblocks = r_drawsurf.surfwidth >> blockdivshift;
 	r_numvblocks = r_drawsurf.surfheight >> blockdivshift;
@@ -247,22 +358,19 @@
 
 	// << 16 components are to guarantee positive values for %
 	soffset = ((soffset >> r_drawsurf.surfmip) + (smax << 16)) % smax;
-	basetptr = &r_source[((((basetoffset >> r_drawsurf.surfmip)
-		+ (tmax << 16)) % tmax) * twidth)];
+	basetptr = &r_source[((((basetoffset >> r_drawsurf.surfmip) + (tmax << 16)) % tmax) * twidth)];
 
 	pcolumndest = r_drawsurf.surfdat;
 
-	for (u=0 ; u<r_numhblocks; u++)
-	{
-		r_lightptr[0] = blocklights[0] + u;
-		r_lightptr[1] = blocklights[1] + u;
-		r_lightptr[2] = blocklights[2] + u;
-
+	for (u=0 ; u<r_numhblocks; u++){
 		prowdestbase = pcolumndest;
 
 		pbasesource = basetptr + soffset;
+		lp[0] = blocklights[0]+u;
+		lp[1] = blocklights[1]+u;
+		lp[2] = blocklights[2]+u;
 
-		R_DrawSurfaceBlock8(r_drawsurf.surfmip);
+		draw(lp, lightwidth, r_numvblocks);
 
 		soffset = soffset + blocksize;
 		if (soffset >= smax)
@@ -269,78 +377,5 @@
 			soffset = 0;
 
 		pcolumndest += horzblockstep;
-	}
-}
-
-
-//=============================================================================
-
-inline pixel_t
-addlight(pixel_t x, int lr, int lg, int lb)
-{
-	int r, g, b;
-
-	if((x & 0xff000000U) == 0)
-		return x;
-
-	if(currententity != nil && (currententity->effects & EF_ADDITIVE) != 0)
-		return x;
-
-	r = (x>>16) & 0xff;
-	g = (x>>8)  & 0xff;
-	b = (x>>0)  & 0xff;
-
-	r = (r * ((64<<8)-(lr & 0xffff))) >> (8+VID_CBITS);
-	g = (g * ((64<<8)-(lg & 0xffff))) >> (8+VID_CBITS);
-	b = (b * ((64<<8)-(lb & 0xffff))) >> (8+VID_CBITS);
-	x = (x & 0xff000000) | r<<16 | g<<8 | b<<0;
-
-	return x;
-}
-
-static void
-R_DrawSurfaceBlock8(int mip)
-{
-	int b, v, i, j, lightstep[3], light[3], lightleft[3], lightright[3];
-	int lightleftstep[3], lightrightstep[3];
-	pixel_t	*psource, *prowdest;
-
-	psource = pbasesource;
-	prowdest = prowdestbase;
-
-	for (v=0 ; v<r_numvblocks ; v++)
-	{
-		for(j = 0; j < 3; j++){
-			lightleft[j] = r_lightptr[j][0];
-			lightright[j] = r_lightptr[j][1];
-			r_lightptr[j] += r_lightwidth;
-			lightleftstep[j] = (r_lightptr[j][0] - lightleft[j]) >> (4-mip);
-			lightrightstep[j] = (r_lightptr[j][1] - lightright[j]) >> (4-mip);
-		}
-
-		for (i=0 ; i<16>>mip ; i++)
-		{
-			for(j = 0; j < 3; j++){
-				lightstep[j] = (lightleft[j] - lightright[j]) >> (4-mip);
-				light[j] = lightright[j];
-			}
-
-			for(b = (16>>mip)-1; b >= 0; b--){
-				prowdest[b] = addlight(psource[b], light[0], light[1], light[2]);
-				light[0] += lightstep[0];
-				light[1] += lightstep[1];
-				light[2] += lightstep[2];
-			}
-
-			psource += sourcetstep;
-			prowdest += surfrowbytes;
-			for(j = 0; j < 3; j++){
-				lightright[j] += lightrightstep[j];
-				lightleft[j] += lightleftstep[j];
-			}
-		}
-
-		if (psource >= r_sourcemax)
-			psource -= r_stepback;
 	}
 }
--- /dev/null
+++ b/r_surf_block.h
@@ -1,0 +1,48 @@
+#define N 3
+static void
+DrawSurfaceBlock(unsigned *lp[N], unsigned lw, int nb)
+{
+	int b, v, i, j, lightstep[N], light[N], lightleft[N], lightright[N];
+	int lightleftstep[N], lightrightstep[N];
+	pixel_t	*psource, *prowdest;
+
+	psource = pbasesource;
+	prowdest = prowdestbase;
+
+	for (v=0 ; v<nb ; v++)
+	{
+		for(j = 0; j < N; j++){
+			lightleft[j] = lp[j][0];
+			lightright[j] = lp[j][1];
+			lp[j] += lw;
+			lightleftstep[j] = (lp[j][0] - lightleft[j]) >> (4-mip);
+			lightrightstep[j] = (lp[j][1] - lightright[j]) >> (4-mip);
+		}
+
+		for (i=0 ; i<16>>mip; i++)
+		{
+			for(j = 0; j < N; j++){
+				lightstep[j] = (lightleft[j] - lightright[j]) >> (4-mip);
+				light[j] = lightright[j];
+			}
+
+			for(b = (16>>mip)-1; b >= 0; b--){
+				prowdest[b] = addlight(psource[b], light[0], light[1], light[2]);
+				for(j = 0; j < N; j++)
+					light[j] += lightstep[j];
+			}
+
+			psource += sourcetstep;
+			prowdest += surfrowbytes;
+
+			for(j = 0; j < N; j++){
+				lightright[j] += lightrightstep[j];
+				lightleft[j] += lightleftstep[j];
+			}
+		}
+
+		if (psource >= r_sourcemax)
+			psource -= r_stepback;
+	}
+}
+#undef N
--- /dev/null
+++ b/r_surf_light.h
@@ -1,0 +1,22 @@
+static inline pixel_t
+addlight(pixel_t x, int lr, int lg, int lb)
+{
+	int r, g, b;
+
+	if(fullbright && (x & 0xff000000U) == 0)
+		return x;
+
+	if(additive)
+		return x;
+
+	r = (x>>16) & 0xff;
+	g = (x>>8)  & 0xff;
+	b = (x>>0)  & 0xff;
+
+	r = (r * ((64<<8)-(lr & 0xffff))) >> (8+VID_CBITS);
+	g = (g * ((64<<8)-(lg & 0xffff))) >> (8+VID_CBITS);
+	b = (b * ((64<<8)-(lb & 0xffff))) >> (8+VID_CBITS);
+	x = (x & 0xff000000) | r<<16 | g<<8 | b<<0;
+
+	return x;
+}
--- /dev/null
+++ b/r_surf_x.h
@@ -1,0 +1,25 @@
+#include "r_surf_light.h"
+
+#define mip 0
+#define DrawSurfaceBlock DrawSurfaceBlock_m0
+#include "r_surf_block.h"
+#undef mip
+#undef DrawSurfaceBlock
+
+#define mip 1
+#define DrawSurfaceBlock DrawSurfaceBlock_m1
+#include "r_surf_block.h"
+#undef mip
+#undef DrawSurfaceBlock
+
+#define mip 2
+#define DrawSurfaceBlock DrawSurfaceBlock_m2
+#include "r_surf_block.h"
+#undef mip
+#undef DrawSurfaceBlock
+
+#define mip 3
+#define DrawSurfaceBlock DrawSurfaceBlock_m3
+#include "r_surf_block.h"
+#undef mip
+#undef DrawSurfaceBlock