ref: a2f781fe0e786384ed36edb93aceb42b3b1b76ab
dir: /d_edge.c/
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <limits.h>
#include "fast_barrier.h"
#include "quakedef.h"
float scale_for_mip;
// FIXME: should go away
void R_RotateBmodel(entity_t *entity, view_t *v);
static int
D_MipLevelForScale(float scale)
{
int lmiplevel;
if(scale >= d_scalemip[0])
lmiplevel = 0;
else if(scale >= d_scalemip[1])
lmiplevel = 1;
else if(scale >= d_scalemip[2])
lmiplevel = 2;
else
lmiplevel = 3;
return max(d_minmip, lmiplevel);
}
static void
D_DrawSolidSurface(surf_t *surf, pixel_t color, int first, int end)
{
espan_t *span;
pixel_t *pdest;
uzint *pz;
int u, u2;
for(span = surf->spans; span; span=span->pnext){
if(span->v < first || span->v >= end)
continue;
pdest = dvars.fb + span->v*dvars.w;
pz = dvars.zb + span->v*dvars.w;
memset(pz, 0xfe, span->count*sizeof(*pz));
u2 = span->u + span->count - 1;
for(u = span->u; u <= u2; u++)
pdest[u] = color;
}
}
static void
D_CalcGradients(int miplevel, msurface_t *pface, vec3_t transformed_modelorg, view_t *v, texvars_t *tv)
{
vec3_t p_temp1, p_saxis, p_taxis;
float mipscale;
float t;
mipscale = 1.0 / (float)(1 << miplevel);
TransformVector(pface->texinfo->vecs[0], p_saxis, v);
TransformVector(pface->texinfo->vecs[1], p_taxis, v);
t = xscaleinv * mipscale;
tv->s.divz.stepu = p_saxis[0] * t;
tv->t.divz.stepu = p_taxis[0] * t;
t = yscaleinv * mipscale;
tv->s.divz.stepv = -p_saxis[1] * t;
tv->t.divz.stepv = -p_taxis[1] * t;
tv->s.divz.origin = p_saxis[2] * mipscale - xcenter * tv->s.divz.stepu - ycenter * tv->s.divz.stepv;
tv->t.divz.origin = p_taxis[2] * mipscale - xcenter * tv->t.divz.stepu - ycenter * tv->t.divz.stepv;
VectorScale(transformed_modelorg, mipscale, p_temp1);
t = 0x10000*mipscale;
tv->s.adjust = ((fixed16_t)(DotProduct (p_temp1, p_saxis) * 0x10000 + 0.5)) -
((pface->texturemins[0] << 16) >> miplevel)
+ pface->texinfo->vecs[0][3]*t;
tv->t.adjust = ((fixed16_t)(DotProduct (p_temp1, p_taxis) * 0x10000 + 0.5)) -
((pface->texturemins[1] << 16) >> miplevel)
+ pface->texinfo->vecs[1][3]*t;
// -1 (-epsilon) so we never wander off the edge of the texture
tv->s.bbextent = ((pface->extents[0] << 16) >> miplevel) - 1;
tv->t.bbextent = ((pface->extents[1] << 16) >> miplevel) - 1;
}
static fast_barrier_t spansgobrr, spansgohome;
static pthread_spinlock_t spancache;
typedef struct span_thread_t span_thread_t;
struct span_thread_t {
pthread_t tid;
int n;
int first;
int end;
};
static int nthreads = 8;
static bool spawned = false;
static view_t *v0;
static void
spancachelock(int n)
{
(n ? pthread_spin_lock : pthread_spin_unlock)(&spancache);
}
static void
spannothread(view_t *v0, int first, int end)
{
vec3_t local_modelorg, transformed_modelorg, world_transformed_modelorg;
surfcache_t *pcurrentcache;
drawsurf_t ds = {0};
msurface_t *pface;
int miplevel;
entity_t *e;
texvars_t t;
byte alpha;
bool blend;
surf_t *s;
view_t v;
espan_t *sp;
bool yes;
///uvlong t0 = nanosec();
memmove(&v, v0, sizeof(v));
TransformVector(v.modelorg, transformed_modelorg, &v);
VectorCopy(transformed_modelorg, world_transformed_modelorg);
// TODO: could preset a lot of this at mode set time
for(s = surfaces+1; s < surface_p; s++){
e = s->entity;
if(!s->spans || ((surfdrawflags(s->flags) | entdrawflags(e)) ^ r_drawflags))
continue;
yes = false;
for(sp = s->spans; sp != nil; sp = sp->pnext){
if(sp->v >= first && sp->v < end){
yes = true;
break;
}
}
if(!yes)
continue;
alpha = 255;
if(enthasalpha(e) && e->alpha != 255)
alpha = e->alpha;
else if(s->flags & SURF_TRANS)
alpha *= alphafor(s->flags);
if(alpha < 1)
alpha = 255;
t.z.stepu = s->d_zistepu;
t.z.stepv = s->d_zistepv;
t.z.origin = s->d_ziorigin;
if(insubmodel(s)){
VectorSubtract(v.org, e->origin, local_modelorg);
TransformVector(local_modelorg, transformed_modelorg, &v);
R_RotateBmodel(e, &v);
}
pface = s->data;
if(s->flags & SURF_DRAWSKY){
D_DrawSkyScans8(s->spans, first, end);
}else if(s->flags & SURF_DRAWBACKGROUND){
D_DrawSolidSurface(s, q1pal[(int)r_clearcolor.value & 0xFF], first, end);
}else if(s->flags & SURF_DRAWTURB){
t.p = pface->texinfo->texture->pixels;
t.w = 64;
D_CalcGradients(0, pface, transformed_modelorg, &v, &t);
D_DrawSpans(s->spans, &t, alpha, SPAN_TURB, first, end);
}else{
miplevel = D_MipLevelForScale(s->nearzi * scale_for_mip * pface->texinfo->mipadjust);
if(s->flags & SURF_FENCE)
miplevel = max(miplevel-1, 0);
pcurrentcache = D_CacheSurface(s->entity, pface, &ds, miplevel, spancachelock);
t.p = pcurrentcache->pixels;
t.w = pcurrentcache->width;
D_CalcGradients(miplevel, pface, transformed_modelorg, &v, &t);
blend = (s->flags & SURF_FENCE) || (r_drawflags & DRAW_BLEND);
D_DrawSpans(s->spans, &t, alpha,
(alpha == 255 && (s->flags & SURF_FENCE))
? SPAN_FENCE
: (blend ? SPAN_BLEND : SPAN_SOLID),
first,
end
);
}
if(insubmodel(s)){
VectorCopy(world_transformed_modelorg, transformed_modelorg);
memmove(&v, v0, sizeof(v));
}
}
///uvlong t1 = nanosec();
///if(first != 0 || end != vid.height)
/// fprintf(stderr, "@%d %llu\n", 0, t1-t0);
}
static void *
spanthread(void *th_)
{
vec3_t local_modelorg, transformed_modelorg, world_transformed_modelorg;
surfcache_t *pcurrentcache;
span_thread_t *th = th_;
drawsurf_t ds = {0};
msurface_t *pface;
int miplevel, ns;
entity_t *e;
texvars_t t;
byte alpha;
bool blend;
surf_t *s;
espan_t *sp;
bool yes;
view_t v;
for(;;){
fast_barrier_wait(&spansgobrr);
//uvlong t0 = nanosec();
memmove(&v, v0, sizeof(v));
TransformVector(v.modelorg, transformed_modelorg, &v);
VectorCopy(transformed_modelorg, world_transformed_modelorg);
ns = 0;
// TODO: could preset a lot of this at mode set time
for(s = surfaces+1; s < surface_p; s++){
e = s->entity;
if(!s->spans || ((surfdrawflags(s->flags) | entdrawflags(e)) ^ r_drawflags))
continue;
yes = false;
for(sp = s->spans; sp != nil; sp = sp->pnext){
if(sp->v >= th->first && sp->v < th->end){
yes = true;
break;
}
}
if(!yes)
continue;
ns++;
alpha = 255;
if(enthasalpha(e) && e->alpha != 255)
alpha = e->alpha;
else if(s->flags & SURF_TRANS)
alpha *= alphafor(s->flags);
if(alpha < 1)
alpha = 255;
t.z.stepu = s->d_zistepu;
t.z.stepv = s->d_zistepv;
t.z.origin = s->d_ziorigin;
if(insubmodel(s)){
VectorSubtract(v.org, e->origin, local_modelorg);
TransformVector(local_modelorg, transformed_modelorg, &v);
R_RotateBmodel(e, &v);
}
pface = s->data;
if(s->flags & SURF_DRAWSKY){
D_DrawSkyScans8(s->spans, th->first, th->end);
}else if(s->flags & SURF_DRAWBACKGROUND){
D_DrawSolidSurface(s, q1pal[(int)r_clearcolor.value & 0xFF], th->first, th->end);
}else if(s->flags & SURF_DRAWTURB){
t.p = pface->texinfo->texture->pixels;
t.w = 64;
D_CalcGradients(0, pface, transformed_modelorg, &v, &t);
D_DrawSpans(s->spans, &t, alpha, SPAN_TURB, th->first, th->end);
}else{
miplevel = D_MipLevelForScale(s->nearzi * scale_for_mip * pface->texinfo->mipadjust);
if(s->flags & SURF_FENCE)
miplevel = max(miplevel-1, 0);
pcurrentcache = D_CacheSurface(s->entity, pface, &ds, miplevel, spancachelock);
t.p = pcurrentcache->pixels;
t.w = pcurrentcache->width;
D_CalcGradients(miplevel, pface, transformed_modelorg, &v, &t);
blend = (s->flags & SURF_FENCE) || (r_drawflags & DRAW_BLEND);
D_DrawSpans(s->spans, &t, alpha,
(alpha == 255 && (s->flags & SURF_FENCE))
? SPAN_FENCE
: (blend ? SPAN_BLEND : SPAN_SOLID),
th->first,
th->end
);
}
if(insubmodel(s)){
VectorCopy(world_transformed_modelorg, transformed_modelorg);
memmove(&v, v0, sizeof(v));
}
}
///uvlong t1 = nanosec();
///fprintf(stderr, "@%d %llu\n", th->n, t1-t0);
//fprintf(stderr, "!%d %d\n", th->n, ns);
fast_barrier_wait(&spansgohome);
}
return nil;
}
static span_thread_t *threads;
void
D_DrawSurfaces(view_t *v0_)
{
static int lastheight = -1;
span_thread_t *t;
int i, split, dt, n, y;
if(lastheight < 0)
pthread_spin_init(&spancache, PTHREAD_PROCESS_PRIVATE);
if(nthreads > 1 && threads == nil){
pthread_barrierattr_t battr;
cpu_set_t set;
pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_PRIVATE);
fast_barrier_init(&spansgobrr, &battr, nthreads);
fast_barrier_init(&spansgohome, &battr, nthreads);
threads = calloc(1, sizeof(*threads) * nthreads);
for(t = threads, i = 0; i < nthreads; i++, t++){
t->n = i;
CPU_ZERO(&set);
CPU_SET(2*i, &set);
if(i == 0){
sched_setaffinity(getpid(), sizeof(set), &set);
}else{
pthread_create(&t->tid, nil, spanthread, t);
pthread_setaffinity_np(t->tid, sizeof(set), &set);
}
}
spawned = true;
}
if(threads != nil && lastheight != vid.height){
lastheight = vid.height;
split = (nthreads+2)*nthreads/8;
dt = vid.height/2 / split;
n = dt*nthreads/2;
y = 0;
for(t = threads, i = 0; i < nthreads; i++, t++){
t->first = y;
t->end = y = y + n;
if((n -= dt) == 0){
dt = -dt;
n = -dt;
}
///fprintf(stderr, "# %d: %d...%d\n", i, t->first, t->end);
}
t[-1].end = vid.height;
}
v0 = v0_;
if(nthreads < 2 || (r_drawflags & DRAW_BLEND) != 0){
// overhead (lots of small objects + synchronization)
// not worth it - run it all in the same thread
spannothread(v0, 0, vid.height);
}else{
///uvlong t0 = nanosec();
fast_barrier_wait(&spansgobrr);
///uvlong t1 = nanosec();
spannothread(v0, threads[0].first, threads[0].end);
///uvlong t2 = nanosec();
fast_barrier_wait(&spansgohome);
///uvlong t3 = nanosec();
///fprintf(stderr, "---------- total=%llu start_barrier=%llu end_barrier=%llu\n", t3-t0, t1-t0, t3-t2);
}
}