ref: 4d88a3779d1f5e15ea7ea3bca5330b9c8d1ef2fd
parent: 2c286986893435895528d59c7db624261ac5571b
author: rodri <rgl@antares-labs.eu>
date: Mon Apr 15 00:53:51 EDT 2024
implement a fully concurrent pipeline based on tiles. - got rid of the z-buffer lock to avoid contention. - little improvements to fb.c
--- a/fb.c
+++ b/fb.c
@@ -13,26 +13,29 @@
{
Framebuf *fb;
- fb = ctl->fb[ctl->idx];
- lock(&ctl->swplk);
+ lock(ctl);
+ fb = ctl->getfb(ctl);
loadimage(dst, rectaddpt(fb->r, dst->r.min), byteaddr(fb->cb, fb->r.min), bytesperline(fb->r, fb->cb->depth)*Dy(fb->r));
- unlock(&ctl->swplk);
+ unlock(ctl);
}
static void
framebufctl_memdraw(Framebufctl *ctl, Memimage *dst)
{
- lock(&ctl->swplk);
- memimagedraw(dst, dst->r, ctl->fb[ctl->idx]->cb, ZP, nil, ZP, SoverD);
- unlock(&ctl->swplk);
+ Framebuf *fb;
+
+ lock(ctl);
+ fb = ctl->getfb(ctl);
+ memimagedraw(dst, dst->r, fb->cb, ZP, nil, ZP, SoverD);
+ unlock(ctl);
}
static void
framebufctl_swap(Framebufctl *ctl)
{
- lock(&ctl->swplk);
+ lock(ctl);
ctl->idx ^= 1;
- unlock(&ctl->swplk);
+ unlock(ctl);
}
static void
@@ -41,7 +44,7 @@
Framebuf *fb;
/* address the back buffer—resetting the front buffer is VERBOTEN */
- fb = ctl->fb[ctl->idx^1];
+ fb = ctl->getbb(ctl);
memsetd(fb->zbuf, Inf(-1), Dx(fb->r)*Dy(fb->r));
memfillcolor(fb->cb, DTransparent);
}
--- a/graphics.h
+++ b/graphics.h
@@ -154,8 +154,8 @@
Framebuf *fb;
Memimage *frag;
Renderjob *job;
-
Entity *entity;
+ OBJElem **eb, **ee;
uvlong uni_time;
@@ -177,11 +177,11 @@
struct Renderjob
{
+ Ref;
Framebuf *fb;
Scene *scene;
Shadertab *shaders;
Channel *donec;
- ulong nrem; /* remaining entities to process */
Renderjob *next;
};
@@ -190,15 +190,14 @@
{
Memimage *cb; /* color buffer */
double *zbuf; /* z/depth buffer */
- Lock zbuflk;
Rectangle r;
};
struct Framebufctl
{
+ Lock;
Framebuf *fb[2]; /* double buffering */
uint idx; /* front buffer index */
- Lock swplk;
void (*draw)(Framebufctl*, Image*);
void (*memdraw)(Framebufctl*, Memimage*);
--- a/internal.h
+++ b/internal.h
@@ -1,3 +1,21 @@
+typedef struct Tilerparam Tilerparam;
+typedef struct Rastertask Rastertask;
+
+struct Rastertask
+{
+ SUparams *params;
+ Rectangle wr; /* working rect */
+ Triangle t;
+};
+
+struct Tilerparam
+{
+ Channel *paramsc;
+ Channel **tasksc; /* Channel*[nproc] */
+ Rectangle *wr; /* Rectangle[nproc] */
+ ulong nproc;
+};
+
/* alloc */
void *emalloc(ulong);
void *erealloc(void*, ulong);
--- a/render.c
+++ b/render.c
@@ -292,8 +292,10 @@
}
static void
-rasterize(SUparams *params, Triangle t)
+rasterize(Rastertask *task)
{
+ SUparams *params;
+ Triangle t;
FSparams fsp;
Triangle2 t₂;
Rectangle bbox;
@@ -302,6 +304,9 @@
Color c;
double z, depth;
+ params = task->params;
+ memmove(t, task->t, sizeof t);
+
t₂.p0 = Pt2(t[0].p.x, t[0].p.y, 1);
t₂.p1 = Pt2(t[1].p.x, t[1].p.y, 1);
t₂.p2 = Pt2(t[2].p.x, t[2].p.y, 1);
@@ -310,10 +315,10 @@
min(min(t₂.p0.x, t₂.p1.x), t₂.p2.x), min(min(t₂.p0.y, t₂.p1.y), t₂.p2.y),
max(max(t₂.p0.x, t₂.p1.x), t₂.p2.x)+1, max(max(t₂.p0.y, t₂.p1.y), t₂.p2.y)+1
);
- bbox.min.x = max(bbox.min.x, params->fb->r.min.x);
- bbox.min.y = max(bbox.min.y, params->fb->r.min.y);
- bbox.max.x = min(bbox.max.x, params->fb->r.max.x);
- bbox.max.y = min(bbox.max.y, params->fb->r.max.y);
+ bbox.min.x = max(bbox.min.x, task->wr.min.x);
+ bbox.min.y = max(bbox.min.y, task->wr.min.y);
+ bbox.max.x = min(bbox.max.x, task->wr.max.x);
+ bbox.max.y = min(bbox.max.y, task->wr.max.y);
fsp.su = params;
memset(&fsp.v, 0, sizeof fsp.v);
@@ -325,13 +330,9 @@
z = fberp(t[0].p.z, t[1].p.z, t[2].p.z, bc);
depth = fclamp(z, 0, 1);
- lock(¶ms->fb->zbuflk);
- if(depth <= params->fb->zbuf[p.x + p.y*Dx(params->fb->r)]){
- unlock(¶ms->fb->zbuflk);
+ if(depth <= params->fb->zbuf[p.x + p.y*Dx(params->fb->r)])
continue;
- }
params->fb->zbuf[p.x + p.y*Dx(params->fb->r)] = depth;
- unlock(¶ms->fb->zbuflk);
/* interpolate z⁻¹ and get actual z */
z = fberp(t[0].p.w, t[1].p.w, t[2].p.w, bc);
@@ -354,37 +355,93 @@
}
static void
-entityproc(void *arg)
+rasterizer(void *arg)
{
- Channel *paramsc;
+ Channel *taskc;
+ Rastertask *task;
SUparams *params;
Memimage *frag;
+
+ threadsetname("rasterizer");
+
+ taskc = arg;
+ frag = rgb(DBlack);
+
+ while((task = recvp(taskc)) != nil){
+ params = task->params;
+ /* end of job */
+ if(params->entity == nil){
+ if(decref(params->job) < 1){
+ nbsend(params->job->donec, nil);
+ free(params);
+ }
+ free(task);
+ continue;
+ }
+
+ params->frag = frag;
+ rasterize(task);
+
+ delvattrs(&task->t[0]);
+ delvattrs(&task->t[1]);
+ delvattrs(&task->t[2]);
+ free(params);
+ free(task);
+ }
+}
+
+static void
+tilerdurden(void *arg)
+{
+ Tilerparam *tp;
+ SUparams *params, *newparams;
+ Rastertask *task;
VSparams vsp;
OBJVertex *verts, *tverts, *nverts; /* geometric, texture and normals vertices */
OBJIndexArray *idxtab;
- OBJElem **ep, **eb, **ee;
+ OBJElem **ep;
Point3 n; /* surface normal */
Triangle *t; /* triangles to raster */
+ Rectangle *wr;
+ Channel **taskc;
+ ulong Δx, nproc;
int i, nt;
- threadsetname("entityproc");
+ threadsetname("tilerdurden");
- paramsc = arg;
- frag = rgb(DBlack);
+ tp = arg;
t = emalloc(sizeof(*t)*16);
+ taskc = tp->tasksc;
+ nproc = tp->nproc;
+ wr = emalloc(nproc*sizeof(Rectangle));
- while((params = recvp(paramsc)) != nil){
- params->frag = frag;
+ while((params = recvp(tp->paramsc)) != nil){
+ /* end of job */
+ if(params->entity == nil){
+ if(decref(params->job) < 1){
+ params->job->ref = nproc;
+ for(i = 0; i < nproc; i++){
+ task = emalloc(sizeof *task);
+ memset(task, 0, sizeof *task);
+ task->params = params;
+ sendp(taskc[i], task);
+ }
+ }
+ continue;
+ }
vsp.su = params;
+ wr[0] = params->fb->r;
+ Δx = Dx(wr[0])/nproc;
+ wr[0].max.x = wr[0].min.x + Δx;
+ for(i = 1; i < nproc; i++)
+ wr[i] = rectaddpt(wr[i-1], Pt(Δx,0));
+
verts = params->entity->mdl->obj->vertdata[OBJVGeometric].verts;
tverts = params->entity->mdl->obj->vertdata[OBJVTexture].verts;
nverts = params->entity->mdl->obj->vertdata[OBJVNormal].verts;
- eb = params->entity->mdl->elems;
- ee = eb + params->entity->mdl->nelems;
-
- for(ep = eb; ep != ee; ep++){
+ for(ep = params->eb; ep != params->ee; ep++){
nt = 1; /* start with one. after clipping it might change */
idxtab = &(*ep)->indextab[OBJVGeometric];
@@ -466,7 +523,20 @@
t[nt][1].p = ndc2viewport(params->fb, t[nt][1].p);
t[nt][2].p = ndc2viewport(params->fb, t[nt][2].p);
- rasterize(params, t[nt]);
+ for(i = 0; i < nproc; i++)
+ if(ptinrect(Pt(t[nt][0].p.x,t[nt][0].p.y),wr[i]) ||
+ ptinrect(Pt(t[nt][1].p.x,t[nt][1].p.y),wr[i]) ||
+ ptinrect(Pt(t[nt][2].p.x,t[nt][2].p.y),wr[i])){
+ newparams = emalloc(sizeof *newparams);
+ *newparams = *params;
+ task = emalloc(sizeof *task);
+ task->params = newparams;
+ task->wr = wr[i];
+ task->t[0] = dupvertex(&t[nt][0]);
+ task->t[1] = dupvertex(&t[nt][1]);
+ task->t[2] = dupvertex(&t[nt][2]);
+ sendp(taskc[i], task);
+ }
//skiptri:
delvattrs(&t[nt][0]);
delvattrs(&t[nt][1]);
@@ -473,9 +543,74 @@
delvattrs(&t[nt][2]);
}
}
+ free(params);
+ }
+}
- if(--params->job->nrem < 1)
- nbsend(params->job->donec, nil);
+static void
+entityproc(void *arg)
+{
+ Channel *paramsin, **paramsout, **taskc;
+ Tilerparam *tp;
+ SUparams *params, *newparams;
+ OBJElem **eb, **ee;
+ char *nprocs;
+ ulong stride, nelems, nproc, nworkers;
+ int i;
+
+ threadsetname("entityproc");
+
+ paramsin = arg;
+ nprocs = getenv("NPROC");
+ if(nprocs == nil || (nproc = strtoul(nprocs, nil, 10)) < 2)
+ nproc = 1;
+ else
+ nproc /= 2;
+ free(nprocs);
+
+ paramsout = emalloc(nproc*sizeof(*paramsout));
+ taskc = emalloc(nproc*sizeof(*taskc));
+ for(i = 0; i < nproc; i++){
+ paramsout[i] = chancreate(sizeof(SUparams*), 8);
+ tp = emalloc(sizeof *tp);
+ tp->paramsc = paramsout[i];
+ tp->tasksc = taskc;
+ tp->nproc = nproc;
+ proccreate(tilerdurden, tp, mainstacksize);
+ }
+ for(i = 0; i < nproc; i++){
+ taskc[i] = chancreate(sizeof(Rastertask*), 32);
+ proccreate(rasterizer, taskc[i], mainstacksize);
+ }
+
+ while((params = recvp(paramsin)) != nil){
+ /* end of job */
+ if(params->entity == nil){
+ params->job->ref = nproc;
+ for(i = 0; i < nproc; i++)
+ sendp(paramsout[i], params);
+ continue;
+ }
+
+ eb = params->entity->mdl->elems;
+ nelems = params->entity->mdl->nelems;
+ ee = eb + nelems;
+
+ if(nelems <= nproc){
+ nworkers = nelems;
+ stride = 1;
+ }else{
+ nworkers = nproc;
+ stride = nelems/nproc;
+ }
+
+ for(i = 0; i < nworkers; i++){
+ newparams = emalloc(sizeof *newparams);
+ *newparams = *params;
+ newparams->eb = eb + i*stride;
+ newparams->ee = i == nworkers-1? ee: newparams->eb + stride;
+ sendp(paramsout[i], newparams);
+ }
free(params);
}
}
@@ -500,9 +635,13 @@
while((job = recvp(jobc)) != nil){
sc = job->scene;
- job->nrem = sc->nents;
time = nanosec();
+ if(sc->nents < 1){
+ nbsend(job->donec, nil);
+ continue;
+ }
+
for(ent = sc->ents.next; ent != &sc->ents; ent = ent->next){
params = emalloc(sizeof *params);
memset(params, 0, sizeof *params);
@@ -514,6 +653,11 @@
params->fshader = job->shaders->fshader;
sendp(paramsc, params);
}
+ /* mark end of job */
+ params = emalloc(sizeof *params);
+ memset(params, 0, sizeof *params);
+ params->job = job;
+ sendp(paramsc, params);
}
}