shithub: qk1

Download patch

ref: 49a810aeb5cb996bf9a6d4e3c38fc4d17fb77610
parent: 9510b4cae3b95ddea506206756b725a9c5c11148
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Mon Jan 1 22:08:22 EST 2024

spans rewrite WIP

--- a/Makefile
+++ b/Makefile
@@ -82,8 +82,6 @@
 	seprint.o\
 	snd_openal.o\
 	softfloat.o\
-	span.o\
-	span_alpha.o\
 	sv_main.o\
 	sv_move.o\
 	sv_phys.o\
--- a/d_alpha.c
+++ b/d_alpha.c
@@ -1,7 +1,7 @@
 #include "quakedef.h"
 
 pixel_t
-blendalpha(pixel_t ca, pixel_t cb, int alpha, uzint izi)
+blendalpha(pixel_t ca, pixel_t cb, int alpha)
 {
 	int a, b, c;
 
@@ -9,7 +9,6 @@
 		alpha = (ca >> 24)*alpha >> 8;
 
 	if(currententity != nil && currententity->effects & EF_ADDITIVE){
-		ca = R_BlendFog(ca, izi);
 		a = (alpha*((ca>> 0)&0xff) + 255*((cb>> 0)&0xff))>> 8;
 		b = (alpha*((ca>> 8)&0xff) + 255*((cb>> 8)&0xff))>> 8;
 		c = (alpha*((ca>>16)&0xff) + 255*((cb>>16)&0xff))>> 8;
@@ -16,13 +15,11 @@
 		return (cb & 0xff000000) | min(a, 255) | min(b, 255)<<8 | min(c, 255)<<16;
 	}
 
-	return R_BlendFog(
+	return
 		(cb & 0xff000000) |
 		((alpha*((ca>> 0)&0xff) + (255-alpha)*((cb>> 0)&0xff))>> 8) << 0 |
 		((alpha*((ca>> 8)&0xff) + (255-alpha)*((cb>> 8)&0xff))>> 8) << 8 |
-		((alpha*((ca>>16)&0xff) + (255-alpha)*((cb>>16)&0xff))>> 8) << 16,
-		izi
-	);
+		((alpha*((ca>>16)&0xff) + (255-alpha)*((cb>>16)&0xff))>> 8) << 16;
 }
 
 float
--- a/d_edge.c
+++ b/d_edge.c
@@ -1,7 +1,5 @@
 #include "quakedef.h"
 
-static int	miplevel;
-
 float scale_for_mip;
 
 // FIXME: should go away
@@ -25,16 +23,18 @@
 	return max(d_minmip, lmiplevel);
 }
 
-// FIXME: clean this up
 static void
 D_DrawSolidSurface(surf_t *surf, pixel_t color)
 {
 	espan_t *span;
 	pixel_t *pdest;
+	uzint *pz;
 	int u, u2;
 
 	for(span = surf->spans; span; span=span->pnext){
-		pdest = dvars.viewbuffer + dvars.width*span->v;
+		pdest = dvars.viewbuffer + span->v*dvars.width;
+		pz = dvars.zbuffer + span->v*dvars.width;
+		memset(pz, 0xfe, span->count*sizeof(*pz));
 		u2 = span->u + span->count - 1;
 		for(u = span->u; u <= u2; u++)
 			pdest[u] = color;
@@ -43,7 +43,7 @@
 
 
 static void
-D_CalcGradients(msurface_t *pface, vec3_t transformed_modelorg)
+D_CalcGradients(int miplevel, msurface_t *pface, vec3_t transformed_modelorg)
 {
 	float mipscale;
 	vec3_t p_temp1, p_saxis, p_taxis;
@@ -81,6 +81,10 @@
 	dvars.bbextentt = ((pface->extents[1] << 16) >> miplevel) - 1;
 }
 
+void dospan_turb(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, byte alpha, uzint *pz, int izi, int izistep);
+void dospan_alpha(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, byte alpha, uzint *pz, int izi, int izistep);
+void dospan(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, byte alpha, uzint *pz, int izi, int izistep);
+
 void
 D_DrawSurfaces (void)
 {
@@ -87,11 +91,12 @@
 	surf_t *s;
 	msurface_t *pface;
 	surfcache_t *pcurrentcache;
-	vec3_t world_transformed_modelorg, local_modelorg, transformed_modelorg;
+	vec3_t local_modelorg, transformed_modelorg, world_transformed_modelorg;
+	int miplevel;
 	byte alpha;
 	bool blend;
 
-	currententity = &cl_entities[0];
+	currententity = cl_entities;
 	TransformVector(modelorg, transformed_modelorg);
 	VectorCopy(transformed_modelorg, world_transformed_modelorg);
 
@@ -116,72 +121,39 @@
 		dvars.zistepv = s->d_zistepv;
 		dvars.ziorigin = s->d_ziorigin;
 
-		if(s->insubmodel){
-			// FIXME: we don't want to do all this for every polygon!
-			// TODO: store once at start of frame
-			currententity = s->entity;	//FIXME: make this passed in to
-										// R_RotateBmodel()
+		if(s->insubmodel && s->entity != currententity){
+			currententity = s->entity;
 			VectorSubtract(r_origin, currententity->origin, local_modelorg);
 			TransformVector(local_modelorg, transformed_modelorg);
-
-			R_RotateBmodel(s->entity);	// FIXME: don't mess with the frustum,
-								// make entity passed in
+			R_RotateBmodel(currententity);
 		}
 
+		pface = s->data;
 		if(s->flags & SURF_DRAWSKY){
 			D_DrawSkyScans8(s->spans);
-			dvars.ziorigin = -0.8;
-			D_DrawZSpans(s->spans);
 		}else if(s->flags & SURF_DRAWBACKGROUND){
-			// set up a gradient for the background surface that places it
-			// effectively at infinity distance from the viewpoint
-			dvars.zistepu = 0;
-			dvars.zistepv = 0;
-			dvars.ziorigin = -0.9;
-
 			D_DrawSolidSurface(s, q1pal[(int)r_clearcolor.value & 0xFF]);
-			D_DrawZSpans(s->spans);
 		}else if(s->flags & SURF_DRAWTURB){
-			pface = s->data;
-			miplevel = 0;
-			dvars.cacheblock = pface->texinfo->texture->pixels + pface->texinfo->texture->offsets[0];
-			dvars.cachewidth = 64;
-
-			D_CalcGradients (pface, transformed_modelorg);
-			Turbulent8 (s->spans, alpha);
-			if(!blend)
-				D_DrawZSpans (s->spans);
+			D_CalcGradients(0, pface, transformed_modelorg);
+			D_DrawSpans16(s->spans, pface->texinfo->texture->pixels, 64, alpha, SPAN_TURB);
 		}else{
-			pface = s->data;
 			miplevel = D_MipLevelForScale(s->nearzi * scale_for_mip * pface->texinfo->mipadjust);
 			if(s->flags & SURF_FENCE)
 				miplevel = max(miplevel-1, 0);
 
-			// FIXME: make this passed in to D_CacheSurface
 			pcurrentcache = D_CacheSurface(pface, miplevel);
-
-			dvars.cacheblock = pcurrentcache->pixels;
-			dvars.cachewidth = pcurrentcache->width;
-
-			D_CalcGradients(pface, transformed_modelorg);
-
-			D_DrawSpans16(s->spans, blend, alpha);
-			if(!blend)
-				D_DrawZSpans(s->spans);
+			D_CalcGradients(miplevel, pface, transformed_modelorg);
+			D_DrawSpans16(s->spans, pcurrentcache->pixels, pcurrentcache->width, alpha,
+				(alpha == 255 && s->flags & SURF_FENCE) ? SPAN_FENCE : (blend ? SPAN_BLEND : SPAN_SOLID)
+			);
 		}
-
-		if(s->insubmodel){
-			// restore the old drawing state
-			// FIXME: we don't want to do this every time!
-			// TODO: speed up
-			currententity = &cl_entities[0];
-			VectorCopy(world_transformed_modelorg, transformed_modelorg);
-			VectorCopy(base_vpn, vpn);
-			VectorCopy(base_vup, vup);
-			VectorCopy(base_vright, vright);
-			VectorCopy(base_modelorg, modelorg);
-			R_TransformFrustum();
-		}
 	}
-}
 
+	currententity = cl_entities;
+	VectorCopy(world_transformed_modelorg, transformed_modelorg);
+	VectorCopy(base_vpn, vpn);
+	VectorCopy(base_vup, vup);
+	VectorCopy(base_vright, vright);
+	VectorCopy(base_modelorg, modelorg);
+	R_TransformFrustum();
+}
--- a/d_init.c
+++ b/d_init.c
@@ -8,6 +8,8 @@
 int d_minmip;
 float d_scalemip[MIPLEVELS-1];
 
+extern int *r_turb_turb;
+
 static float basemip[MIPLEVELS-1] = {1.0, 0.5*0.8, 0.25*0.8};
 
 void
@@ -24,6 +26,8 @@
 D_SetupFrame(void)
 {
 	int i;
+
+	r_turb_turb = sintable + ((int)(cl.time*SPEED)&(CYCLE-1));
 
 	dvars.viewbuffer = r_dowarp ? r_warpbuffer : vid.buffer;
 	dvars.width = vid.width;
--- a/d_local.h
+++ b/d_local.h
@@ -58,8 +58,14 @@
 extern dvars_t dvars;
 extern skyvars_t skyvars;
 
-void D_DrawSpans16 (espan_t *pspans, bool blend, byte alpha);
-void D_DrawZSpans (espan_t *pspans);
+enum {
+	SPAN_BLEND,
+	SPAN_FENCE,
+	SPAN_SOLID,
+	SPAN_TURB,
+};
+
+void D_DrawSpans16(espan_t *pspan, pixel_t *pbase, int width, byte alpha, int spanfunc);
 void Turbulent8 (espan_t *pspan, byte alpha);
 
 void D_DrawSkyScans8 (espan_t *pspan);
@@ -77,5 +83,5 @@
 extern int d_minmip;
 extern float d_scalemip[3];
 
-pixel_t blendalpha(pixel_t ca, pixel_t cb, int alpha, uzint izi);
+pixel_t blendalpha(pixel_t ca, pixel_t cb, int alpha);
 float alphafor(int flags);
--- a/d_polyse.c
+++ b/d_polyse.c
@@ -123,7 +123,7 @@
 				pixel_t p = addlight(skintable[fv->t >> 16][fv->s >> 16], fv->l[0], fv->l[1], fv->l[2]);
 				int n = d_scantable[fv->v] + fv->u;
 				if(r_drawflags & DRAW_BLEND){
-					dvars.viewbuffer[n] = blendalpha(p, dvars.viewbuffer[n], alpha, z);
+					dvars.viewbuffer[n] = blendalpha(p, dvars.viewbuffer[n], alpha);
 				}else{
 					dvars.viewbuffer[n] = p;
 					*zbuf = z;
@@ -311,7 +311,7 @@
 		pixel_t p = addlight(skintable[new.t >> 16][new.s >> 16], l[0], l[1], l[2]);
 		int n = d_scantable[new.v] + new.u;
 		if(r_drawflags & DRAW_BLEND){
-			dvars.viewbuffer[n] = blendalpha(p, dvars.viewbuffer[n], alpha, z);
+			dvars.viewbuffer[n] = blendalpha(p, dvars.viewbuffer[n], alpha);
 		}else{
 			dvars.viewbuffer[n] = p;
 			*zbuf = z;
@@ -555,8 +555,7 @@
 						*lpdest = blendalpha(
 							p,
 							*lpdest,
-							alpha,
-							lzi
+							alpha
 						);
 					}else{
 						*lpdest = p;
--- a/d_scan.c
+++ b/d_scan.c
@@ -1,10 +1,7 @@
 #include "quakedef.h"
+#include "r_fog.h"
 
-static pixel_t *r_turb_pbase, *r_turb_pdest;
-static fixed16_t r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
-static int *r_turb_turb;
-static int r_turb_spancount;
-static uzint *r_turb_z;
+int *r_turb_turb;
 
 /*
 =============
@@ -16,12 +13,11 @@
 */
 void D_WarpScreen (void)
 {
-	int		w, h, u, v, *turb, *col;
-	pixel_t	*dest;
-	pixel_t	**row;
-	float	wratio, hratio;
-	static pixel_t	*rowptr[MAXHEIGHT+(AMP2*2)];
-	static int	column[MAXWIDTH+(AMP2*2)];
+	static pixel_t *rowptr[MAXHEIGHT+(AMP2*2)];
+	static int column[MAXWIDTH+(AMP2*2)];
+	int w, h, u, v, *turb, *col;
+	pixel_t	*dest, **row;
+	float wratio, hratio;
 
 	w = r_refdef.vrect.width;
 	h = r_refdef.vrect.height;
@@ -51,142 +47,178 @@
 	}
 }
 
-/*
-=============
-D_DrawTurbulent8Span
-=============
-*/
 static inline void
-D_DrawTurbulent8Span (int izi, byte alpha)
+dospan_solid(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, uzint *pz, uzint izi, int izistep)
 {
-	int sturb, tturb;
+	pixel_t pix;
+	do{
+		*pz++ = izi;
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		izi += izistep;
+		*pdest++ = pix;
+	}while(--spancount);
+}
 
+static inline void
+dospan_solid_f1(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, uzint *pz, uzint izi, int izistep, fog_t *fog)
+{
+	pixel_t pix;
 	do{
-		sturb = ((r_turb_s + r_turb_turb[(r_turb_t>>16)&(CYCLE-1)])>>16)&63;
-		tturb = ((r_turb_t + r_turb_turb[(r_turb_s>>16)&(CYCLE-1)])>>16)&63;
-		if(*r_turb_z <= izi || (r_drawflags & DRAW_BLEND) == 0)
-			*r_turb_pdest = blendalpha(*(r_turb_pbase + (tturb<<6) + sturb), *r_turb_pdest, alpha, izi);
-		r_turb_s += r_turb_sstep;
-		r_turb_t += r_turb_tstep;
-		r_turb_pdest++;
-		r_turb_z++;
-	}while(--r_turb_spancount > 0);
+		*pz++ = izi;
+		izi += izistep;
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		*pdest++ = blendfog(pix, *fog);
+		fogstep(*fog);
+	}while(--spancount);
 }
 
-
-void
-Turbulent8(espan_t *pspan, byte alpha)
+static inline void
+dospan_blend(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, byte alpha, uzint *pz, uzint izi, int izistep)
 {
-	int			count, spancountminus1;
-	fixed16_t	snext, tnext;
-	float		sdivz, tdivz, zi, z, du, dv;
-	float		sdivz16stepu, tdivz16stepu, zi16stepu;
+	pixel_t pix;
 
-	r_turb_turb = sintable + ((int)(cl.time*SPEED)&(CYCLE-1));
+	do{
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		if(opaque(pix) && *pz <= izi)
+			*pdest = blendalpha(pix, *pdest, alpha);
+		izi += izistep;
+		pdest++;
+		pz++;
+	}while(--spancount);
+}
 
-	r_turb_sstep = 0;	// keep compiler happy
-	r_turb_tstep = 0;	// ditto
+static inline void
+dospan_blend_f1(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, byte alpha, uzint *pz, uzint izi, int izistep, fog_t *fog)
+{
+	pixel_t pix;
 
-	r_turb_pbase = dvars.cacheblock;
+	do{
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		if(opaque(pix) && *pz <= izi)
+			*pdest = blendalpha(blendfog(pix, *fog), *pdest, alpha);
+		izi += izistep;
+		pdest++;
+		pz++;
+		fogstep(*fog);
+	}while(--spancount);
+}
 
-	sdivz16stepu = dvars.sdivzstepu * 16;
-	tdivz16stepu = dvars.tdivzstepu * 16;
-	zi16stepu = dvars.zistepu * 16;
+static inline void
+dospan_fence(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, uzint *pz, uzint izi, int izistep)
+{
+	pixel_t pix;
 
 	do{
-		r_turb_pdest = dvars.viewbuffer + pspan->v*dvars.width + pspan->u;
-		r_turb_z = dvars.zbuffer + pspan->v*dvars.width + pspan->u;
-		zi = dvars.ziorigin + pspan->v*dvars.zistepv + pspan->u*dvars.zistepu;
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		if(opaque(pix) && *pz <= izi){
+			*pdest = pix;
+			*pz = izi;
+		}
+		izi += izistep;
+		pdest++;
+		pz++;
+	}while(--spancount);
+}
 
-		count = pspan->count;
+static inline void
+dospan_fence_f1(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int width, uzint *pz, uzint izi, int izistep, fog_t *fog)
+{
+	pixel_t pix;
 
-		// calculate the initial s/z, t/z, 1/z, s, and t and clamp
-		du = pspan->u;
-		dv = pspan->v;
+	do{
+		pix = pbase[(s >> 16) + (t >> 16) * width];
+		s += sstep;
+		t += tstep;
+		if(opaque(pix) && *pz <= izi){
+			*pdest = blendfog(pix, *fog);
+			*pz = izi;
+		}
+		izi += izistep;
+		pdest++;
+		pz++;
+		fogstep(*fog);
+	}while(--spancount);
+}
 
-		sdivz = dvars.sdivzorigin + dv*dvars.sdivzstepv + du*dvars.sdivzstepu;
-		tdivz = dvars.tdivzorigin + dv*dvars.tdivzstepv + du*dvars.tdivzstepu;
-		z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+static void
+dospan_turb(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, byte alpha, uzint *pz, uzint izi, int izistep)
+{
+	int sturb, tturb;
+	bool noblend;
 
-		r_turb_s = (int)(sdivz * z) + dvars.sadjust;
-		r_turb_s = clamp(r_turb_s, 0, dvars.bbextents);
+	noblend = (r_drawflags & DRAW_BLEND) == 0;
+	s &= (CYCLE<<16)-1;
+	t &= (CYCLE<<16)-1;
 
-		r_turb_t = (int)(tdivz * z) + dvars.tadjust;
-		r_turb_t = clamp(r_turb_t, 0, dvars.bbextentt);
+	do{
+		if(noblend || *pz <= izi){
+			sturb = ((s + r_turb_turb[(t>>16)&(CYCLE-1)])>>16)&63;
+			tturb = ((t + r_turb_turb[(s>>16)&(CYCLE-1)])>>16)&63;
+			*pdest = blendalpha(*(pbase + (tturb<<6) + sturb), *pdest, alpha);
+			*pz = izi; // FIXME(sigrid): can always update this one?
+		}
+		s += sstep;
+		t += tstep;
+		izi += izistep;
+		pdest++;
+		pz++;
+	}while(--spancount > 0);
 
-		do{
-			// calculate s and t at the far end of the span
-			r_turb_spancount = min(count, 16);
-			count -= r_turb_spancount;
+}
 
-			if(count){
-				// calculate s/z, t/z, zi->fixed s and t at far end of span,
-				// calculate s and t steps across span by shifting
-				sdivz += sdivz16stepu;
-				tdivz += tdivz16stepu;
-				zi += zi16stepu;
-				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+static void
+dospan_turb_f1(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, byte alpha, uzint *pz, uzint izi, int izistep, fog_t *fog)
+{
+	int sturb, tturb;
+	bool noblend;
 
-				snext = (int)(sdivz * z) + dvars.sadjust;
-				// prevent round-off error on <0 steps from causing overstepping & running off the edge of the texture
-				snext = clamp(snext, 16, dvars.bbextents);
+	noblend = (r_drawflags & DRAW_BLEND) == 0;
+	s &= (CYCLE<<16)-1;
+	t &= (CYCLE<<16)-1;
 
-				tnext = (int)(tdivz * z) + dvars.tadjust;
-				tnext = clamp(tnext, 16, dvars.bbextentt); // guard against round-off error on <0 steps
+	do{
+		if(noblend || *pz <= izi){
+			sturb = ((s + r_turb_turb[(t>>16)&(CYCLE-1)])>>16)&63;
+			tturb = ((t + r_turb_turb[(s>>16)&(CYCLE-1)])>>16)&63;
+			*pdest = blendalpha(blendfog(*(pbase + (tturb<<6) + sturb), *fog), *pdest, alpha);
+			*pz = izi; // FIXME(sigrid): can always update this one?
+		}
+		s += sstep;
+		t += tstep;
+		izi += izistep;
+		pdest++;
+		pz++;
+		fogstep(*fog);
+	}while(--spancount > 0);
 
-				r_turb_sstep = (snext - r_turb_s) >> 4;
-				r_turb_tstep = (tnext - r_turb_t) >> 4;
-			}else{
-				// calculate s/z, t/z, zi->fixed s and t at last pixel in span (so
-				// can't step off polygon), clamp, calculate s and t steps across
-				// span by division, biasing steps low so we don't run off the
-				// texture
-				spancountminus1 = r_turb_spancount - 1;
-				sdivz += dvars.sdivzstepu * spancountminus1;
-				tdivz += dvars.tdivzstepu * spancountminus1;
-				zi += dvars.zistepu * spancountminus1;
-				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
-				snext = (int)(sdivz * z) + dvars.sadjust;
-				// prevent round-off error on <0 steps from causing overstepping & running off the edge of the texture
-				snext = clamp(snext, 16, dvars.bbextents);
-
-				tnext = (int)(tdivz * z) + dvars.tadjust;
-				tnext = clamp(tnext, 16, dvars.bbextentt); // guard against round-off error on <0 steps
-
-				if(r_turb_spancount > 1){
-					r_turb_sstep = (snext - r_turb_s) / spancountminus1;
-					r_turb_tstep = (tnext - r_turb_t) / spancountminus1;
-				}
-			}
-
-			r_turb_s = r_turb_s & ((CYCLE<<16)-1);
-			r_turb_t = r_turb_t & ((CYCLE<<16)-1);
-
-			D_DrawTurbulent8Span((int)(zi * 0x8000 * 0x10000), alpha);
-
-			r_turb_s = snext;
-			r_turb_t = tnext;
-
-		}while(count > 0);
-
-	}while((pspan = pspan->pnext) != nil);
 }
 
 void
-D_DrawSpans16(espan_t *pspan, bool blend, byte alpha) //qbism- up it from 8 to 16
+D_DrawSpans16(espan_t *pspan, pixel_t *pbase, int width, byte alpha, int spanfunc)
 {
 	int			count, spancount, izistep, spancountminus1;
-	pixel_t		*pbase, *pdest;
-	uzint		*pz;
+	pixel_t		*pdest;
+	uzint		*pz, izi;
 	fixed16_t	s, t, snext, tnext, sstep, tstep;
 	float		sdivz, tdivz, zi, z, du, dv;
 	float		sdivzstepu, tdivzstepu, zistepu;
+	fog_t fog;
+	bool fogged;
 
 	sstep = 0;	// keep compiler happy
 	tstep = 0;	// ditto
+	memset(&fog, 0, sizeof(fog));
 
-	pbase = dvars.cacheblock;
-
 	sdivzstepu = dvars.sdivzstepu * 16;
 	tdivzstepu = dvars.tdivzstepu * 16;
 	zistepu = dvars.zistepu * 16;
@@ -196,7 +228,7 @@
 		pdest = dvars.viewbuffer + pspan->v*dvars.width + pspan->u;
 		pz = dvars.zbuffer + pspan->v*dvars.width + pspan->u;
 		zi = dvars.ziorigin + pspan->v*dvars.zistepv + pspan->u*dvars.zistepu;
-
+		izi = zi * 0x8000 * 0x10000;
 		count = pspan->count;
 
 		// calculate the initial s/z, t/z, 1/z, s, and t and clamp
@@ -223,7 +255,8 @@
 				// calculate s and t steps across span by shifting
 				sdivz += sdivzstepu;
 				tdivz += tdivzstepu;
-				z = (float)0x10000 / (zi + zistepu);	// prescale to 16.16 fixed-point
+				// prescale to 16.16 fixed-point
+				z = (float)0x10000 / (zi + zistepu);
 
 				snext = (int)(sdivz * z) + dvars.sadjust;
 				// prevent round-off error on <0 steps from
@@ -245,7 +278,8 @@
 				spancountminus1 = spancount - 1;
 				sdivz += dvars.sdivzstepu * spancountminus1;
 				tdivz += dvars.tdivzstepu * spancountminus1;
-				z = (float)0x10000 / (zi + dvars.zistepu * spancountminus1);	// prescale to 16.16 fixed-point
+				// prescale to 16.16 fixed-point
+				z = (float)0x10000 / (zi + dvars.zistepu * spancountminus1);
 				snext = (int)(sdivz * z) + dvars.sadjust;
 				// prevent round-off error on <0 steps from
 				//  from causing overstepping & running off the
@@ -262,38 +296,45 @@
 				}
 			}
 
-			void dospan(pixel_t *, pixel_t *, int, int, int, int, int, int);
-			void dospan_alpha(pixel_t *, pixel_t *, int, int, int, int, int, int, byte, uzint *, int, int);
-			if(blend)
-				dospan_alpha(pdest, pbase, s, t, sstep, tstep, spancount, dvars.cachewidth, alpha, pz, (int)(zi * 0x8000 * 0x10000), izistep);
-			else
-				dospan(pdest, pbase, s, t, sstep, tstep, spancount, dvars.cachewidth);
+			fogged = isfogged() ? fogcalc(izi, izi + izistep*spancount, spancount, &fog) : false;
+			if(fogged){
+				switch(spanfunc){
+				case SPAN_SOLID:
+					dospan_solid_f1(pdest, pbase, s, t, sstep, tstep, spancount, width, pz, izi, izistep, &fog);
+					break;
+				case SPAN_TURB:
+					dospan_turb_f1(pdest, pbase, s, t, sstep, tstep, spancount, alpha, pz, izi, izistep, &fog);
+					break;
+				case SPAN_BLEND:
+					dospan_blend_f1(pdest, pbase, s, t, sstep, tstep, spancount, width, alpha, pz, izi, izistep, &fog);
+					break;
+				case SPAN_FENCE:
+					dospan_fence_f1(pdest, pbase, s, t, sstep, tstep, spancount, width, pz, izi, izistep, &fog);
+					break;
+				}
+			}else{
+				switch(spanfunc){
+				case SPAN_SOLID:
+					dospan_solid(pdest, pbase, s, t, sstep, tstep, spancount, width, pz, izi, izistep);
+					break;
+				case SPAN_TURB:
+					dospan_turb(pdest, pbase, s, t, sstep, tstep, spancount, alpha, pz, izi, izistep);
+					break;
+				case SPAN_BLEND:
+					dospan_blend(pdest, pbase, s, t, sstep, tstep, spancount, width, alpha, pz, izi, izistep);
+					break;
+				case SPAN_FENCE:
+					dospan_fence(pdest, pbase, s, t, sstep, tstep, spancount, width, pz, izi, izistep);
+					break;
+				}
+			}
+
 			pdest += spancount;
 			pz += spancount;
+			izi += izistep*spancount;
 			zi += zistepu;
 			s = snext;
 			t = tnext;
 		}while(count > 0);
-	}while((pspan = pspan->pnext) != nil);
-}
-
-void
-D_DrawZSpans(espan_t *pspan)
-{
-	int			count, izi, izistep;
-	uzint		*pz;
-	float		zi;
-
-	izistep = dvars.zistepu * 0x8000 * 0x10000;
-
-	do{
-		pz = dvars.zbuffer + pspan->v*dvars.width + pspan->u;
-		zi = dvars.ziorigin + pspan->v*dvars.zistepv + pspan->u*dvars.zistepu;
-		count = pspan->count;
-		izi = (int)(zi * 0x8000 * 0x10000);
-		do{
-			*pz++ = izi;
-			izi += izistep;
-		}while(--count > 0);
 	}while((pspan = pspan->pnext) != nil);
 }
--- a/d_sky.c
+++ b/d_sky.c
@@ -39,9 +39,12 @@
 void D_DrawSkyScans8 (espan_t *pspan)
 {
 	int count, spancount, u, v, spancountminus1;
-	pixel_t *pdest, m;
+	pixel_t *pdest, pix;
+	uzint *pz;
 	fixed16_t s[2], t[2], snext[2], tnext[2], sstep[2], tstep[2];
 	float skydist;
+	bool fog;
+	int c0, c1, c2, inva;
 
 	if(skyvars.source[0] == nil || skyvars.source[1] == nil)
 		return;
@@ -49,11 +52,19 @@
 	sstep[0] = sstep[1] = 0;	// keep compiler happy
 	tstep[0] = tstep[1] = 0;	// ditto
 	skydist = skyvars.time*skyvars.speed;	// TODO: add D_SetupFrame & set this there
+	if((fog = isskyfogged())){
+		c0 = fogvars.skyc0;
+		c1 = fogvars.skyc1;
+		c2 = fogvars.skyc1;
+		inva = 255 - fogvars.sky;
+	}
 
 	do
 	{
-		pdest = dvars.viewbuffer + (dvars.width * pspan->v) + pspan->u;
+		pdest = dvars.viewbuffer + pspan->v*dvars.width + pspan->u;
 		count = pspan->count;
+		pz = dvars.zbuffer + pspan->v*dvars.width + pspan->u;
+		memset(pz, 0xff, count*sizeof(*pz));
 
 		// calculate the initial s & t
 		u = pspan->u;
@@ -62,15 +73,10 @@
 
 		do
 		{
-			if (count >= SKY_SPAN_MAX)
-				spancount = SKY_SPAN_MAX;
-			else
-				spancount = count;
-
+			spancount = min(count, SKY_SPAN_MAX);
 			count -= spancount;
 
-			if (count)
-			{
+			if(count){
 				u += spancount;
 
 				// calculate s and t at far end of span,
@@ -81,9 +87,7 @@
 				tstep[0] = (tnext[0] - t[0]) >> SKY_SPAN_SHIFT;
 				sstep[1] = (snext[1] - s[1]) >> SKY_SPAN_SHIFT;
 				tstep[1] = (tnext[1] - t[1]) >> SKY_SPAN_SHIFT;
-			}
-			else
-			{
+			}else{
 				// calculate s and t at last pixel in span,
 				// calculate s and t steps across span by division
 				spancountminus1 = (float)(spancount - 1);
@@ -100,14 +104,17 @@
 				}
 			}
 
-			do
-			{
-				m = skyvars.source[1][((t[1] & skyvars.tmask) >> skyvars.tshift) + ((s[1] & skyvars.smask) >> 16)];
-				if(opaque(m))
-					*pdest = m;
-				else
-					*pdest = skyvars.source[0][((t[0] & skyvars.tmask) >> skyvars.tshift) + ((s[0] & skyvars.smask) >> 16)];
-				pdest++;
+			do{
+				pix = skyvars.source[1][((t[1] & skyvars.tmask) >> skyvars.tshift) + ((s[1] & skyvars.smask) >> 16)];
+				if(!opaque(pix))
+					pix = skyvars.source[0][((t[0] & skyvars.tmask) >> skyvars.tshift) + ((s[0] & skyvars.smask) >> 16)];
+				if(fog){
+					pix =
+						((c0 + inva*((pix>> 0)&0xff)) >> 8) << 0 |
+						((c1 + inva*((pix>> 8)&0xff)) >> 8) << 8 |
+						((c2 + inva*((pix>>16)&0xff)) >> 8) << 16;
+				}
+				*pdest++ = pix;
 				s[0] += sstep[0];
 				t[0] += tstep[0];
 				s[1] += sstep[1];
--- a/d_sprite.c
+++ b/d_sprite.c
@@ -31,6 +31,11 @@
 	// we count on FP exceptions being turned off to avoid range problems
 	izistep = (int)(dvars.zistepu * 0x8000 * 0x10000);
 
+	if(pspan->v < 0){
+		fprintf(stderr, "%d %d %d %p\n", dvars.width, pspan->v, pspan->u, dvars.zbuffer + dvars.width * pspan->v + pspan->u);
+		return;
+	}
+
 	do
 	{
 		pdest = dvars.viewbuffer + dvars.width * pspan->v + pspan->u;
@@ -115,7 +120,7 @@
 				btemp = *(pbase + (s >> 16) + (t >> 16) * dvars.cachewidth);
 				if(opaque(btemp) && *pz <= izi){
 					if(r_drawflags & DRAW_BLEND){
-						*pdest = blendalpha(btemp, *pdest, alpha, izi);
+						*pdest = blendalpha(btemp, *pdest, alpha);
 					}else{
 						*pz = izi;
 						*pdest = btemp;
--- a/mkfile
+++ b/mkfile
@@ -77,8 +77,6 @@
 	snd_mix.$O\
 	snd_plan9.$O\
 	softfloat.$O\
-	span_alpha.$O\
-	span`{test -f span_$objtype.s && echo -n _$objtype}.$O\
 	sv_main.$O\
 	sv_move.$O\
 	sv_phys.$O\
--- a/r_fog.c
+++ b/r_fog.c
@@ -3,17 +3,20 @@
 static cvar_t r_fog = {"r_fog", "1", true};
 static cvar_t r_skyfog = {"r_skyfog", "0.5"};
 
-static struct {
-	float density;
-	byte c0, c1, c2, sky;
-	bool allowed;
-	int enabled;
-}r_fog_data;
+fogvars_t fogvars;
 
-enum {
-	Enfog = 1<<0,
-	Enskyfog = 1<<1,
-};
+static void
+r_skyfog_cb(cvar_t *var)
+{
+	if(var->value > 0.0)
+		fogvars.enabled |= Enskyfog;
+	else
+		fogvars.enabled &= ~Enskyfog;
+	fogvars.sky = 255 * clamp(var->value, 0.0, 1.0);
+	fogvars.skyc0 = fogvars.sky * fogvars.c0;
+	fogvars.skyc1 = fogvars.sky * fogvars.c1;
+	fogvars.skyc2 = fogvars.sky * fogvars.c2;
+}
 
 static void
 fog(void)
@@ -31,115 +34,50 @@
 		if(n == 2 && strncmp(s = Cmd_Argv(0), "gl_fog", 6) == 0 && s[6] != 'd'){ // Nehahra
 			x = 255 * clamp(x, 0.0, 1.0);
 			if(s[6] == 'r')
-				r_fog_data.c2 = x;
+				fogvars.c2 = x;
 			else if(s[6] == 'g')
-				r_fog_data.c1 = x;
+				fogvars.c1 = x;
 			else if(s[6] == 'b')
-				r_fog_data.c0 = x;
+				fogvars.c0 = x;
 			else if(s[6] == 'e'){
-				r_fog_data.enabled = x > 0 ? (Enfog | Enskyfog) : 0;
+				fogvars.enabled = x > 0 ? (Enfog | Enskyfog) : 0;
 				setcvar("r_skyfog", x > 0 ? "1" : "0");
 			}
 			return;
 		}
-		r_fog_data.density = clamp(x, 0.0, 1.0) * 0.016;
-		r_fog_data.density *= r_fog_data.density;
+		fogvars.density = clamp(x, 0.0, 1.0) * 0.016;
+		fogvars.density *= fogvars.density;
 		if(n == 2)
 			break;
 	case 4:
 		x = atof(Cmd_Argv(i++));
-		r_fog_data.c2 = 0xff * clamp(x, 0.0, 1.0);
+		fogvars.c2 = 0xff * clamp(x, 0.0, 1.0);
 		x = atof(Cmd_Argv(i++));
-		r_fog_data.c1 = 0xff * clamp(x, 0.0, 1.0);
+		fogvars.c1 = 0xff * clamp(x, 0.0, 1.0);
 		x = atof(Cmd_Argv(i));
-		r_fog_data.c0 = 0xff * clamp(x, 0.0, 1.0);
+		fogvars.c0 = 0xff * clamp(x, 0.0, 1.0);
+		r_skyfog_cb(&r_skyfog); /* recalculate sky fog */
 		break;
 	}
-	if(r_fog_data.density > 0.0)
-		r_fog_data.enabled |= Enfog;
+	if(fogvars.density > 0.0)
+		fogvars.enabled |= Enfog;
 	else
-		r_fog_data.enabled &= ~Enfog;
+		fogvars.enabled &= ~Enfog;
 }
 
 void
 R_ResetFog(void)
 {
-	r_fog_data.density = 0;
-	r_fog_data.c0 = r_fog_data.c1 = r_fog_data.c2 = 0x80;
-	r_fog_data.enabled = 0;
-	r_fog_data.allowed = r_fog.value > 0.0;
+	memset(&fogvars, 0, sizeof(fogvars));
+	fogvars.c0 = fogvars.c1 = fogvars.c2 = 0x80;
+	fogvars.allowed = r_fog.value > 0.0;
 	setcvar("r_skyfog", "0");
 }
 
-static inline pixel_t
-blend_fog(pixel_t pix, uzint z)
-{
-	byte a;
-	float d;
-
-	if(z > 65536){
-		d = 65536ULL*65536ULL / (u64int)z;
-		if((pix & ~0xffffff) == 0)
-			d /= 1.5;
-		d = 1.0 - exp2(-r_fog_data.density * d*d);
-		a = 255*d;
-	}else if(z < 0){
-		a = r_fog_data.sky;
-	}else{
-		a = 0;
-	}
-
-	if(a == 0)
-		return pix;
-
-	return
-		((a*r_fog_data.c0 + (255-a)*((pix>> 0)&0xff)) >> 8) << 0 |
-		((a*r_fog_data.c1 + (255-a)*((pix>> 8)&0xff)) >> 8) << 8 |
-		((a*r_fog_data.c2 + (255-a)*((pix>>16)&0xff)) >> 8) << 16;
-}
-
-pixel_t
-R_BlendFog(pixel_t pix, uzint z)
-{
-	if(r_fog_data.enabled && r_fog_data.allowed)
-		pix = blend_fog(pix, z);
-	return pix;
-}
-
-void
-R_DrawFog(void)
-{
-	pixel_t *pix;
-	int i, x, y;
-	uzint *z;
-
-	if(!r_fog_data.enabled || !r_fog_data.allowed)
-		return;
-
-	/* FIXME(sigrid): this is super slow */
-	for(y = r_refdef.vrect.y; y < r_refdef.vrectbottom; y++){
-		i = y * vid.width + r_refdef.vrect.x;
-		pix = vid.buffer + i;
-		z = dvars.zbuffer + i;
-		for(x = r_refdef.vrect.x; x < r_refdef.vrectright; x++, i++, pix++, z++)
-			*pix = blend_fog(*pix, *z);
-	}
-}
-
 static void
 r_fog_cb(cvar_t *var)
 {
-	r_fog_data.allowed = var->value > 0.0;
-}
-
-static void
-r_skyfog_cb(cvar_t *var)
-{
-	if(var->value > 0.0)
-		r_fog_data.enabled |= Enskyfog;
-	else
-		r_fog_data.enabled &= ~Enskyfog;
-	r_fog_data.sky = 255 * clamp(var->value, 0.0, 1.0);
+	fogvars.allowed = var->value > 0.0;
 }
 
 void
--- /dev/null
+++ b/r_fog.h
@@ -1,0 +1,57 @@
+#define fogstep(f) \
+	do{ \
+		(f).v[0] += (f).d[0]; \
+		(f).v[1] += (f).d[1]; \
+		(f).v[2] += (f).d[2]; \
+		(f).v[3] += (f).d[3]; \
+	}while(0)
+
+#define fogshift 8
+
+static inline byte
+z2foga(uzint z)
+{
+	float d;
+
+	if(z <= 65536)
+		return 0;
+	d = 65536ULL*65536ULL / (u64int)z;
+	d = 1.0 - exp2(-fogvars.density * d*d);
+	return 255*d;
+}
+
+static inline pixel_t
+blendfog(pixel_t pix, fog_t fog)
+{
+	byte inva = 0xff - (fog.v[3]>>fogshift);
+	return
+		((fog.v[0] + ((inva*((pix>> 0)&0xff))<<fogshift)) >> (8 + fogshift)) << 0 |
+		((fog.v[1] + ((inva*((pix>> 8)&0xff))<<fogshift)) >> (8 + fogshift)) << 8 |
+		((fog.v[2] + ((inva*((pix>>16)&0xff))<<fogshift)) >> (8 + fogshift)) << 16;
+}
+
+static inline bool
+fogcalc(uzint zi0, uzint zi1, int cnt, fog_t *f)
+{
+	int end[3], v, e;
+
+	if((v = z2foga(zi0)) == 0 || (e = z2foga(zi1)) == 0)
+		return false;
+
+	v <<= fogshift;
+	e <<= fogshift;
+
+	end[0] = e * fogvars.c0;
+	end[1] = e * fogvars.c1;
+	end[2] = e * fogvars.c2;
+	f->v[0] = v * fogvars.c0;
+	f->v[1] = v * fogvars.c1;
+	f->v[2] = v * fogvars.c2;
+	f->v[3] = v;
+	f->d[0] = (end[0] - f->v[0])/cnt;
+	f->d[1] = (end[1] - f->v[1])/cnt;
+	f->d[2] = (end[2] - f->v[2])/cnt;
+	f->d[3] = (e - v)/cnt;
+
+	return true;
+}
--- a/r_local.h
+++ b/r_local.h
@@ -102,8 +102,6 @@
 extern int r_drawflags;
 
 void R_ResetFog(void);
-pixel_t R_BlendFog(pixel_t pix, uzint z);
-void R_DrawFog(void);
 void R_InitFog(void);
 
 //
--- a/r_main.c
+++ b/r_main.c
@@ -728,8 +728,6 @@
 	if (r_dowarp)
 		D_WarpScreen ();
 
-	R_DrawFog();
-
 	V_SetContentsColor (r_viewleaf->contents);
 
 	if (r_reportsurfout.value && r_outofsurfaces)
--- a/r_shared.h
+++ b/r_shared.h
@@ -1,6 +1,34 @@
 // r_shared.h: general refresh-related stuff shared between the refresh and the
 // driver
 
+typedef s32int uzint;
+
+enum {
+	Enfog = 1<<0,
+	Enskyfog = 1<<1,
+};
+
+typedef struct fog_t fog_t;
+typedef struct fogvars_t fogvars_t;
+
+struct fog_t {
+	int v[4];
+	int d[4];
+};
+
+struct fogvars_t {
+	float density;
+	byte c0, c1, c2, sky;
+	bool allowed;
+	int enabled;
+	int skyc0, skyc1, skyc2;
+};
+
+extern fogvars_t fogvars;
+
+#define isfogged() (fogvars.allowed && (fogvars.enabled & Enfog) != 0)
+#define isskyfogged() (fogvars.sky != 0 && fogvars.allowed && (fogvars.enabled & Enskyfog) != 0)
+
 // FIXME: clean up and move into d_iface.h
 
 #define	MAXVERTS	16					// max points in a surface polygon
@@ -12,8 +40,6 @@
 #define MAXDIMENSION	((MAXHEIGHT > MAXWIDTH) ? MAXHEIGHT : MAXWIDTH)
 
 #define SIN_BUFFER_SIZE	(MAXDIMENSION+CYCLE)
-
-typedef s32int uzint;
 
 //===================================================================
 
--- a/span.c
+++ /dev/null
@@ -1,11 +1,0 @@
-#include "quakedef.h"
-
-void
-dospan(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int cachewidth)
-{
-	do{
-		*pdest++ = pbase[(s >> 16) + (t >> 16) * cachewidth];
-		s += sstep;
-		t += tstep;
-	}while(--spancount);
-}
--- a/span_alpha.c
+++ /dev/null
@@ -1,33 +1,0 @@
-#include "quakedef.h"
-
-void
-dospan_alpha(pixel_t *pdest, pixel_t *pbase, int s, int t, int sstep, int tstep, int spancount, int cachewidth, u8int alpha, uzint *pz, int izi, int izistep)
-{
-	pixel_t pix;
-
-	if(alpha != 255){
-		do{
-			pix = pbase[(s >> 16) + (t >> 16) * cachewidth];
-			if(opaque(pix) && *pz <= izi)
-				*pdest = blendalpha(pix, *pdest, alpha, izi);
-			pdest++;
-			pz++;
-			izi += izistep;
-			s += sstep;
-			t += tstep;
-		}while(--spancount);
-	}else{
-		do{
-			pix = pbase[(s >> 16) + (t >> 16) * cachewidth];
-			if(opaque(pix) && *pz <= izi){
-				*pdest = pix;
-				*pz = izi;
-			}
-			pdest++;
-			pz++;
-			izi += izistep;
-			s += sstep;
-			t += tstep;
-		}while(--spancount);
-	}
-}