shithub: qk1

ref: a2f781fe0e786384ed36edb93aceb42b3b1b76ab
dir: /d_edge.c/

View raw version
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <limits.h>
#include "fast_barrier.h"
#include "quakedef.h"

float scale_for_mip;

// FIXME: should go away
void R_RotateBmodel(entity_t *entity, view_t *v);

static int
D_MipLevelForScale(float scale)
{
	int lmiplevel;

	if(scale >= d_scalemip[0])
		lmiplevel = 0;
	else if(scale >= d_scalemip[1])
		lmiplevel = 1;
	else if(scale >= d_scalemip[2])
		lmiplevel = 2;
	else
		lmiplevel = 3;

	return max(d_minmip, lmiplevel);
}

static void
D_DrawSolidSurface(surf_t *surf, pixel_t color, int first, int end)
{
	espan_t *span;
	pixel_t *pdest;
	uzint *pz;
	int u, u2;

	for(span = surf->spans; span; span=span->pnext){
		if(span->v < first || span->v >= end)
			continue;
		pdest = dvars.fb + span->v*dvars.w;
		pz = dvars.zb + span->v*dvars.w;
		memset(pz, 0xfe, span->count*sizeof(*pz));
		u2 = span->u + span->count - 1;
		for(u = span->u; u <= u2; u++)
			pdest[u] = color;
	}
}


static void
D_CalcGradients(int miplevel, msurface_t *pface, vec3_t transformed_modelorg, view_t *v, texvars_t *tv)
{
	vec3_t p_temp1, p_saxis, p_taxis;
	float mipscale;
	float t;

	mipscale = 1.0 / (float)(1 << miplevel);

	TransformVector(pface->texinfo->vecs[0], p_saxis, v);
	TransformVector(pface->texinfo->vecs[1], p_taxis, v);

	t = xscaleinv * mipscale;

	tv->s.divz.stepu = p_saxis[0] * t;
	tv->t.divz.stepu = p_taxis[0] * t;

	t = yscaleinv * mipscale;
	tv->s.divz.stepv = -p_saxis[1] * t;
	tv->t.divz.stepv = -p_taxis[1] * t;

	tv->s.divz.origin = p_saxis[2] * mipscale - xcenter * tv->s.divz.stepu - ycenter * tv->s.divz.stepv;
	tv->t.divz.origin = p_taxis[2] * mipscale - xcenter * tv->t.divz.stepu - ycenter * tv->t.divz.stepv;

	VectorScale(transformed_modelorg, mipscale, p_temp1);

	t = 0x10000*mipscale;
	tv->s.adjust = ((fixed16_t)(DotProduct (p_temp1, p_saxis) * 0x10000 + 0.5)) -
			((pface->texturemins[0] << 16) >> miplevel)
			+ pface->texinfo->vecs[0][3]*t;
	tv->t.adjust = ((fixed16_t)(DotProduct (p_temp1, p_taxis) * 0x10000 + 0.5)) -
			((pface->texturemins[1] << 16) >> miplevel)
			+ pface->texinfo->vecs[1][3]*t;

	// -1 (-epsilon) so we never wander off the edge of the texture
	tv->s.bbextent = ((pface->extents[0] << 16) >> miplevel) - 1;
	tv->t.bbextent = ((pface->extents[1] << 16) >> miplevel) - 1;
}

static fast_barrier_t spansgobrr, spansgohome;
static pthread_spinlock_t spancache;

typedef struct span_thread_t span_thread_t;

struct span_thread_t {
	pthread_t tid;
	int n;
	int first;
	int end;
};

static int nthreads = 8;
static bool spawned = false;
static view_t *v0;

static void
spancachelock(int n)
{
	(n ? pthread_spin_lock : pthread_spin_unlock)(&spancache);
}

static void
spannothread(view_t *v0, int first, int end)
{
	vec3_t local_modelorg, transformed_modelorg, world_transformed_modelorg;
	surfcache_t *pcurrentcache;
	drawsurf_t ds = {0};
	msurface_t *pface;
	int miplevel;
	entity_t *e;
	texvars_t t;
	byte alpha;
	bool blend;
	surf_t *s;
	view_t v;
	espan_t *sp;
	bool yes;

	///uvlong t0 = nanosec();

	memmove(&v, v0, sizeof(v));
	TransformVector(v.modelorg, transformed_modelorg, &v);
	VectorCopy(transformed_modelorg, world_transformed_modelorg);

	// TODO: could preset a lot of this at mode set time
	for(s = surfaces+1; s < surface_p; s++){
		e = s->entity;
		if(!s->spans || ((surfdrawflags(s->flags) | entdrawflags(e)) ^ r_drawflags))
			continue;

		yes = false;
		for(sp = s->spans; sp != nil; sp = sp->pnext){
			if(sp->v >= first && sp->v < end){
				yes = true;
				break;
			}
		}
		if(!yes)
			continue;

		alpha = 255;
		if(enthasalpha(e) && e->alpha != 255)
			alpha = e->alpha;
		else if(s->flags & SURF_TRANS)
			alpha *= alphafor(s->flags);
		if(alpha < 1)
			alpha = 255;

		t.z.stepu = s->d_zistepu;
		t.z.stepv = s->d_zistepv;
		t.z.origin = s->d_ziorigin;

		if(insubmodel(s)){
			VectorSubtract(v.org, e->origin, local_modelorg);
			TransformVector(local_modelorg, transformed_modelorg, &v);
			R_RotateBmodel(e, &v);
		}

		pface = s->data;
		if(s->flags & SURF_DRAWSKY){
			D_DrawSkyScans8(s->spans, first, end);
		}else if(s->flags & SURF_DRAWBACKGROUND){
			D_DrawSolidSurface(s, q1pal[(int)r_clearcolor.value & 0xFF], first, end);
		}else if(s->flags & SURF_DRAWTURB){
			t.p = pface->texinfo->texture->pixels;
			t.w = 64;
			D_CalcGradients(0, pface, transformed_modelorg, &v, &t);
			D_DrawSpans(s->spans, &t, alpha, SPAN_TURB, first, end);
		}else{
			miplevel = D_MipLevelForScale(s->nearzi * scale_for_mip * pface->texinfo->mipadjust);
			if(s->flags & SURF_FENCE)
				miplevel = max(miplevel-1, 0);

			pcurrentcache = D_CacheSurface(s->entity, pface, &ds, miplevel, spancachelock);
			t.p = pcurrentcache->pixels;
			t.w = pcurrentcache->width;
			D_CalcGradients(miplevel, pface, transformed_modelorg, &v, &t);
			blend = (s->flags & SURF_FENCE) || (r_drawflags & DRAW_BLEND);
			D_DrawSpans(s->spans, &t, alpha,
				(alpha == 255 && (s->flags & SURF_FENCE))
					? SPAN_FENCE
					: (blend ? SPAN_BLEND : SPAN_SOLID),
				first,
				end
			);
		}

		if(insubmodel(s)){
			VectorCopy(world_transformed_modelorg, transformed_modelorg);
			memmove(&v, v0, sizeof(v));
		}
	}

	///uvlong t1 = nanosec();
	///if(first != 0 || end != vid.height)
	///	fprintf(stderr, "@%d %llu\n", 0, t1-t0);
}

static void *
spanthread(void *th_)
{
	vec3_t local_modelorg, transformed_modelorg, world_transformed_modelorg;
	surfcache_t *pcurrentcache;
	span_thread_t *th = th_;
	drawsurf_t ds = {0};
	msurface_t *pface;
	int miplevel, ns;
	entity_t *e;
	texvars_t t;
	byte alpha;
	bool blend;
	surf_t *s;
	espan_t *sp;
	bool yes;
	view_t v;

	for(;;){
		fast_barrier_wait(&spansgobrr);

		//uvlong t0 = nanosec();
		memmove(&v, v0, sizeof(v));
		TransformVector(v.modelorg, transformed_modelorg, &v);
		VectorCopy(transformed_modelorg, world_transformed_modelorg);
		ns = 0;

		// TODO: could preset a lot of this at mode set time
		for(s = surfaces+1; s < surface_p; s++){
			e = s->entity;
			if(!s->spans || ((surfdrawflags(s->flags) | entdrawflags(e)) ^ r_drawflags))
				continue;
			yes = false;
			for(sp = s->spans; sp != nil; sp = sp->pnext){
				if(sp->v >= th->first && sp->v < th->end){
					yes = true;
					break;
				}
			}
			if(!yes)
				continue;
			ns++;
			alpha = 255;
			if(enthasalpha(e) && e->alpha != 255)
				alpha = e->alpha;
			else if(s->flags & SURF_TRANS)
				alpha *= alphafor(s->flags);
			if(alpha < 1)
				alpha = 255;

			t.z.stepu = s->d_zistepu;
			t.z.stepv = s->d_zistepv;
			t.z.origin = s->d_ziorigin;

			if(insubmodel(s)){
				VectorSubtract(v.org, e->origin, local_modelorg);
				TransformVector(local_modelorg, transformed_modelorg, &v);
				R_RotateBmodel(e, &v);
			}

			pface = s->data;
			if(s->flags & SURF_DRAWSKY){
				D_DrawSkyScans8(s->spans, th->first, th->end);
			}else if(s->flags & SURF_DRAWBACKGROUND){
				D_DrawSolidSurface(s, q1pal[(int)r_clearcolor.value & 0xFF], th->first, th->end);
			}else if(s->flags & SURF_DRAWTURB){
				t.p = pface->texinfo->texture->pixels;
				t.w = 64;
				D_CalcGradients(0, pface, transformed_modelorg, &v, &t);
				D_DrawSpans(s->spans, &t, alpha, SPAN_TURB, th->first, th->end);
			}else{
				miplevel = D_MipLevelForScale(s->nearzi * scale_for_mip * pface->texinfo->mipadjust);
				if(s->flags & SURF_FENCE)
					miplevel = max(miplevel-1, 0);

				pcurrentcache = D_CacheSurface(s->entity, pface, &ds, miplevel, spancachelock);
				t.p = pcurrentcache->pixels;
				t.w = pcurrentcache->width;
				D_CalcGradients(miplevel, pface, transformed_modelorg, &v, &t);
				blend = (s->flags & SURF_FENCE) || (r_drawflags & DRAW_BLEND);
				D_DrawSpans(s->spans, &t, alpha,
					(alpha == 255 && (s->flags & SURF_FENCE))
						? SPAN_FENCE
						: (blend ? SPAN_BLEND : SPAN_SOLID),
					th->first,
					th->end
				);
			}

			if(insubmodel(s)){
				VectorCopy(world_transformed_modelorg, transformed_modelorg);
				memmove(&v, v0, sizeof(v));
			}
		}

		///uvlong t1 = nanosec();
		///fprintf(stderr, "@%d %llu\n", th->n, t1-t0);
		//fprintf(stderr, "!%d %d\n", th->n, ns);
		fast_barrier_wait(&spansgohome);
	}

	return nil;
}

static span_thread_t *threads;

void
D_DrawSurfaces(view_t *v0_)
{
	static int lastheight = -1;
	span_thread_t *t;
	int i, split, dt, n, y;

	if(lastheight < 0)
		pthread_spin_init(&spancache, PTHREAD_PROCESS_PRIVATE);

	if(nthreads > 1 && threads == nil){
		pthread_barrierattr_t battr;
		cpu_set_t set;

		pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_PRIVATE);
		fast_barrier_init(&spansgobrr, &battr, nthreads);
		fast_barrier_init(&spansgohome, &battr, nthreads);

		threads = calloc(1, sizeof(*threads) * nthreads);
		for(t = threads, i = 0; i < nthreads; i++, t++){
			t->n = i;
			CPU_ZERO(&set);
			CPU_SET(2*i, &set);
			if(i == 0){
				sched_setaffinity(getpid(), sizeof(set), &set);
			}else{
				pthread_create(&t->tid, nil, spanthread, t);
				pthread_setaffinity_np(t->tid, sizeof(set), &set);
			}
		}
		spawned = true;
	}
	if(threads != nil && lastheight != vid.height){
		lastheight = vid.height;
		split = (nthreads+2)*nthreads/8;
		dt = vid.height/2 / split;
		n = dt*nthreads/2;
		y = 0;
		for(t = threads, i = 0; i < nthreads; i++, t++){
			t->first = y;
			t->end = y = y + n;
			if((n -= dt) == 0){
				dt = -dt;
				n = -dt;
			}
			///fprintf(stderr, "# %d: %d...%d\n", i, t->first, t->end);
		}
		t[-1].end = vid.height;
	}

	v0 = v0_;
	if(nthreads < 2 || (r_drawflags & DRAW_BLEND) != 0){
		// overhead (lots of small objects + synchronization)
		// not worth it - run it all in the same thread
		spannothread(v0, 0, vid.height);
	}else{
		///uvlong t0 = nanosec();
		fast_barrier_wait(&spansgobrr);
		///uvlong t1 = nanosec();
		spannothread(v0, threads[0].first, threads[0].end);
		///uvlong t2 = nanosec();
		fast_barrier_wait(&spansgohome);
		///uvlong t3 = nanosec();
		///fprintf(stderr, "---------- total=%llu start_barrier=%llu end_barrier=%llu\n", t3-t0, t1-t0, t3-t2);
	}
}