ref: cff9f164d3ee4e480522f28d440daaa841e8ce32
parent: d8f3a8ba78e93d6c421d0fad28cf3359ebb729da
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Thu May 11 10:54:33 EDT 2023
D_DrawSpans16 + loop unroll (thanks qbism & others)
--- a/d_init.c
+++ b/d_init.c
@@ -126,7 +126,7 @@
for (i=0 ; i<(NUM_MIPS-1) ; i++)
d_scalemip[i] = basemip[i] * d_mipscale.value;
- d_drawspans = D_DrawSpans8;
+ d_drawspans = D_DrawSpans16;
d_aflatcolor = 0;
}
--- a/d_scan.c
+++ b/d_scan.c
@@ -221,19 +221,18 @@
} while ((pspan = pspan->pnext) != nil);
}
-
/*
=============
-D_DrawSpans8
+D_DrawSpans16
=============
*/
-void D_DrawSpans8 (espan_t *pspan)
+void D_DrawSpans16 (espan_t *pspan) //qbism- up it from 8 to 16
{
int count, spancount;
unsigned char *pbase, *pdest;
fixed16_t s, t, snext, tnext, sstep, tstep;
float sdivz, tdivz, zi, z, du, dv, spancountminus1;
- float sdivz8stepu, tdivz8stepu, zi8stepu;
+ float sdivzstepu, tdivzstepu, zistepu;
sstep = 0; // keep compiler happy
tstep = 0; // ditto
@@ -240,9 +239,9 @@
pbase = (unsigned char *)cacheblock;
- sdivz8stepu = d_sdivzstepu * 8;
- tdivz8stepu = d_tdivzstepu * 8;
- zi8stepu = d_zistepu * 8;
+ sdivzstepu = d_sdivzstepu * 16;
+ tdivzstepu = d_tdivzstepu * 16;
+ zistepu = d_zistepu * 16;
do
{
@@ -275,8 +274,8 @@
do
{
// calculate s and t at the far end of the span
- if (count >= 8)
- spancount = 8;
+ if (count >= 16)
+ spancount = 16;
else
spancount = count;
@@ -286,16 +285,16 @@
{
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
- sdivz += sdivz8stepu;
- tdivz += tdivz8stepu;
- zi += zi8stepu;
+ sdivz += sdivzstepu;
+ tdivz += tdivzstepu;
+ zi += zistepu;
z = (float)0x10000 / zi; // prescale to 16.16 fixed-point
snext = (int)(sdivz * z) + sadjust;
if (snext > bbextents)
snext = bbextents;
- else if (snext < 8)
- snext = 8; // prevent round-off error on <0 steps from
+ else if (snext <= 16)
+ snext = 16; // prevent round-off error on <0 steps from
// from causing overstepping & running off the
// edge of the texture
@@ -302,11 +301,11 @@
tnext = (int)(tdivz * z) + tadjust;
if (tnext > bbextentt)
tnext = bbextentt;
- else if (tnext < 8)
- tnext = 8; // guard against round-off error on <0 steps
+ else if (tnext < 16)
+ tnext = 16; // guard against round-off error on <0 steps
- sstep = (snext - s) >> 3;
- tstep = (tnext - t) >> 3;
+ sstep = (snext - s) >> 4;
+ tstep = (tnext - t) >> 4;
}
else
{
@@ -322,8 +321,8 @@
snext = (int)(sdivz * z) + sadjust;
if (snext > bbextents)
snext = bbextents;
- else if (snext < 8)
- snext = 8; // prevent round-off error on <0 steps from
+ else if (snext < 16)
+ snext = 16; // prevent round-off error on <0 steps from
// from causing overstepping & running off the
// edge of the texture
@@ -330,8 +329,8 @@
tnext = (int)(tdivz * z) + tadjust;
if (tnext > bbextentt)
tnext = bbextentt;
- else if (tnext < 8)
- tnext = 8; // guard against round-off error on <0 steps
+ else if (tnext < 16)
+ tnext = 16; // guard against round-off error on <0 steps
if (spancount > 1)
{
@@ -340,12 +339,27 @@
}
}
- do
+ pdest += spancount;
+ switch (spancount)
{
- *pdest++ = *(pbase + (s >> 16) + (t >> 16) * cachewidth);
- s += sstep;
- t += tstep;
- } while (--spancount > 0);
+ case 16: pdest[-16] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 15: pdest[-15] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 14: pdest[-14] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 13: pdest[-13] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 12: pdest[-12] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 11: pdest[-11] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 10: pdest[-10] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 9: pdest[-9] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 8: pdest[-8] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 7: pdest[-7] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 6: pdest[-6] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 5: pdest[-5] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 4: pdest[-4] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 3: pdest[-3] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 2: pdest[-2] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 1: pdest[-1] = pbase[(s >> 16) + (t >> 16) * cachewidth];
+ case 0: break;
+ }
s = snext;
t = tnext;
@@ -354,7 +368,6 @@
} while ((pspan = pspan->pnext) != nil);
}
-
/*
=============
--- a/qw/d_init.c
+++ b/qw/d_init.c
@@ -121,7 +121,7 @@
for (i=0 ; i<(NUM_MIPS-1) ; i++)
d_scalemip[i] = basemip[i] * d_mipscale.value;
- d_drawspans = D_DrawSpans8; /* no DrawSpans16 for non-asm heathens */
+ d_drawspans = D_DrawSpans16;
d_aflatcolor = 0;
}
--- a/qw/d_scan.c
+++ b/qw/d_scan.c
@@ -221,16 +221,16 @@
/*
=============
-D_DrawSpans8
+D_DrawSpans16
=============
*/
-void D_DrawSpans8 (espan_t *pspan)
+void D_DrawSpans16 (espan_t *pspan) //qbism- up it from 8 to 16
{
int count, spancount;
unsigned char *pbase, *pdest;
fixed16_t s, t, snext, tnext, sstep, tstep;
float sdivz, tdivz, zi, z, du, dv, spancountminus1;
- float sdivz8stepu, tdivz8stepu, zi8stepu;
+ float sdivzstepu, tdivzstepu, zistepu;
sstep = 0; // keep compiler happy
tstep = 0; // ditto
@@ -237,9 +237,9 @@
pbase = (unsigned char *)cacheblock;
- sdivz8stepu = d_sdivzstepu * 8;
- tdivz8stepu = d_tdivzstepu * 8;
- zi8stepu = d_zistepu * 8;
+ sdivzstepu = d_sdivzstepu * 16;
+ tdivzstepu = d_tdivzstepu * 16;
+ zistepu = d_zistepu * 16;
do
{
@@ -272,8 +272,8 @@
do
{
// calculate s and t at the far end of the span
- if (count >= 8)
- spancount = 8;
+ if (count >= 16)
+ spancount = 16;
else
spancount = count;
@@ -283,16 +283,16 @@
{
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
- sdivz += sdivz8stepu;
- tdivz += tdivz8stepu;
- zi += zi8stepu;
+ sdivz += sdivzstepu;
+ tdivz += tdivzstepu;
+ zi += zistepu;
z = (float)0x10000 / zi; // prescale to 16.16 fixed-point
snext = (int)(sdivz * z) + sadjust;
if (snext > bbextents)
snext = bbextents;
- else if (snext < 8)
- snext = 8; // prevent round-off error on <0 steps from
+ else if (snext <= 16)
+ snext = 16; // prevent round-off error on <0 steps from
// from causing overstepping & running off the
// edge of the texture
@@ -299,11 +299,11 @@
tnext = (int)(tdivz * z) + tadjust;
if (tnext > bbextentt)
tnext = bbextentt;
- else if (tnext < 8)
- tnext = 8; // guard against round-off error on <0 steps
+ else if (tnext < 16)
+ tnext = 16; // guard against round-off error on <0 steps
- sstep = (snext - s) >> 3;
- tstep = (tnext - t) >> 3;
+ sstep = (snext - s) >> 4;
+ tstep = (tnext - t) >> 4;
}
else
{
@@ -319,8 +319,8 @@
snext = (int)(sdivz * z) + sadjust;
if (snext > bbextents)
snext = bbextents;
- else if (snext < 8)
- snext = 8; // prevent round-off error on <0 steps from
+ else if (snext < 16)
+ snext = 16; // prevent round-off error on <0 steps from
// from causing overstepping & running off the
// edge of the texture
@@ -327,8 +327,8 @@
tnext = (int)(tdivz * z) + tadjust;
if (tnext > bbextentt)
tnext = bbextentt;
- else if (tnext < 8)
- tnext = 8; // guard against round-off error on <0 steps
+ else if (tnext < 16)
+ tnext = 16; // guard against round-off error on <0 steps
if (spancount > 1)
{
@@ -337,12 +337,28 @@
}
}
- do
+ pdest += spancount;
+
+ switch (spancount)
{
- *pdest++ = *(pbase + (s >> 16) + (t >> 16) * cachewidth);
- s += sstep;
- t += tstep;
- } while (--spancount > 0);
+ case 16: pdest[-16] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 15: pdest[-15] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 14: pdest[-14] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 13: pdest[-13] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 12: pdest[-12] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 11: pdest[-11] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 10: pdest[-10] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 9: pdest[-9] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 8: pdest[-8] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 7: pdest[-7] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 6: pdest[-6] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 5: pdest[-5] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 4: pdest[-4] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 3: pdest[-3] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 2: pdest[-2] = pbase[(s >> 16) + (t >> 16) * cachewidth]; s += sstep; t += tstep;
+ case 1: pdest[-1] = pbase[(s >> 16) + (t >> 16) * cachewidth];
+ case 0: break;
+ }
s = snext;
t = tnext;
--- a/qw/r_surf.c
+++ b/qw/r_surf.c
@@ -307,8 +307,8 @@
*/
void R_DrawSurfaceBlock8_mip0 (void)
{
- int v, i, b, lightstep, lighttemp, light;
- unsigned char pix, *psource, *prowdest;
+ int v, i, lightstep, lighttemp, light;
+ unsigned char *psource, *prowdest;
psource = pbasesource;
prowdest = prowdestbase;
@@ -330,14 +330,23 @@
light = lightright;
- for (b=15; b>=0; b--)
- {
- pix = psource[b];
- prowdest[b] = ((unsigned char *)vid.colormap)
- [(light & 0xFF00) + pix];
- light += lightstep;
- }
-
+ prowdest[15] = vid.colormap[((light += lightstep) & 0xFF00) + psource[15]];
+ prowdest[14] = vid.colormap[((light += lightstep) & 0xFF00) + psource[14]];
+ prowdest[13] = vid.colormap[((light += lightstep) & 0xFF00) + psource[13]];
+ prowdest[12] = vid.colormap[((light += lightstep) & 0xFF00) + psource[12]];
+ prowdest[11] = vid.colormap[((light += lightstep) & 0xFF00) + psource[11]];
+ prowdest[10] = vid.colormap[((light += lightstep) & 0xFF00) + psource[10]];
+ prowdest[9] = vid.colormap[((light += lightstep) & 0xFF00) + psource[9]];
+ prowdest[8] = vid.colormap[((light += lightstep) & 0xFF00) + psource[8]];
+ prowdest[7] = vid.colormap[((light += lightstep) & 0xFF00) + psource[7]];
+ prowdest[6] = vid.colormap[((light += lightstep) & 0xFF00) + psource[6]];
+ prowdest[5] = vid.colormap[((light += lightstep) & 0xFF00) + psource[5]];
+ prowdest[4] = vid.colormap[((light += lightstep) & 0xFF00) + psource[4]];
+ prowdest[3] = vid.colormap[((light += lightstep) & 0xFF00) + psource[3]];
+ prowdest[2] = vid.colormap[((light += lightstep) & 0xFF00) + psource[2]];
+ prowdest[1] = vid.colormap[((light += lightstep) & 0xFF00) + psource[1]];
+ prowdest[0] = vid.colormap[(light & 0xFF00) + psource[0]];
+
psource += sourcetstep;
lightright += lightrightstep;
lightleft += lightleftstep;
--- a/r_surf.c
+++ b/r_surf.c
@@ -294,8 +294,8 @@
*/
void R_DrawSurfaceBlock8_mip0 (void)
{
- int v, i, b, lightstep, lighttemp, light;
- unsigned char pix, *psource, *prowdest;
+ int v, i, lightstep, lighttemp, light;
+ unsigned char *psource, *prowdest;
psource = pbasesource;
prowdest = prowdestbase;
@@ -317,14 +317,23 @@
light = lightright;
- for (b=15; b>=0; b--)
- {
- pix = psource[b];
- prowdest[b] = ((unsigned char *)vid.colormap)
- [(light & 0xFF00) + pix];
- light += lightstep;
- }
-
+ prowdest[15] = vid.colormap[((light += lightstep) & 0xFF00) + psource[15]];
+ prowdest[14] = vid.colormap[((light += lightstep) & 0xFF00) + psource[14]];
+ prowdest[13] = vid.colormap[((light += lightstep) & 0xFF00) + psource[13]];
+ prowdest[12] = vid.colormap[((light += lightstep) & 0xFF00) + psource[12]];
+ prowdest[11] = vid.colormap[((light += lightstep) & 0xFF00) + psource[11]];
+ prowdest[10] = vid.colormap[((light += lightstep) & 0xFF00) + psource[10]];
+ prowdest[9] = vid.colormap[((light += lightstep) & 0xFF00) + psource[9]];
+ prowdest[8] = vid.colormap[((light += lightstep) & 0xFF00) + psource[8]];
+ prowdest[7] = vid.colormap[((light += lightstep) & 0xFF00) + psource[7]];
+ prowdest[6] = vid.colormap[((light += lightstep) & 0xFF00) + psource[6]];
+ prowdest[5] = vid.colormap[((light += lightstep) & 0xFF00) + psource[5]];
+ prowdest[4] = vid.colormap[((light += lightstep) & 0xFF00) + psource[4]];
+ prowdest[3] = vid.colormap[((light += lightstep) & 0xFF00) + psource[3]];
+ prowdest[2] = vid.colormap[((light += lightstep) & 0xFF00) + psource[2]];
+ prowdest[1] = vid.colormap[((light += lightstep) & 0xFF00) + psource[1]];
+ prowdest[0] = vid.colormap[(light & 0xFF00) + psource[0]];
+
psource += sourcetstep;
lightright += lightrightstep;
lightleft += lightleftstep;