ref: c6ffefd38f9b69b560e543571e98549c3facd611
dir: /hj264.c/
#include "builtins.h"
#define MINIH264_IMPLEMENTATION
#define H264E_MAX_THREADS 7
#include "minih264e.h"
#include <thread.h>
#include <bio.h>
#include <draw.h>
#include <tos.h>
void npe_nsleep(uvlong ns);
uvlong npe_nanosec(void);
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))
#define clp(v,a,b) min((b), max((v),(a)))
#define align(p,a) (void*)((((uintptr)p - 1) | (a-1)) + 1)
enum {
Align = 64,
Maxquality = 10,
Gop = 20,
};
typedef struct Hjob Hjob;
typedef struct Hjthread Hjthread;
typedef struct Hj264 Hj264;
typedef struct Img Img;
struct Hjob {
void (*run)(void *);
void *arg;
};
struct Hjthread {
int id;
Channel *job;
Channel *done;
};
struct Hj264 {
H264E_persist_t *persist;
H264E_scratch_t *scratch;
H264E_run_param_t rp;
H264E_io_yuv_t yuv;
Biobuf out;
Channel *frame;
Channel *done;
Hjthread threads[H264E_MAX_THREADS];
Hjob jobs[H264E_MAX_THREADS];
int nthreads;
u8int buf[1];
};
struct Img {
int w;
int h;
u8int bgrx[];
};
static void
xrgb2yuv(u8int *bgrx, int w, int h, H264E_io_yuv_t *io)
{
u8int *py, *pu, *pv;
int x, y, r, g, b;
py = io->yuv[0];
pu = io->yuv[1];
pv = io->yuv[2];
for(y = 0; y < h;){
for(x = 0; x < w;){
b = bgrx[0];
g = bgrx[1];
r = bgrx[2];
bgrx += 4;
#define YY ((( 77*r + 150*g + 29*b + 128) >> 8) + 0)
#define UU (((-43*r - 84*g + 127*b + 128) >> 8) + 128)
#define VV (((127*r - 106*g - 21*b + 128) >> 8) + 128)
py[x] = YY;
pu[x/2] = UU;
pv[x/2] = VV;
x++;
b = bgrx[0];
g = bgrx[1];
r = bgrx[2];
bgrx += 4;
py[x] = YY;
x++;
}
py += io->stride[0];
y++;
for(x = 0; x < w;){
b = bgrx[0];
g = bgrx[1];
r = bgrx[2];
bgrx += 4;
py[x] = YY;
x++;
#undef YY
#undef UU
#undef VV
}
py += io->stride[0];
pu += io->stride[1];
pv += io->stride[2];
y++;
}
}
#pragma varargck type "ℏ" int
static int
hjerror(Fmt *f)
{
char *s;
int e;
s = nil;
e = va_arg(f->args, int);
switch(e){
case H264E_STATUS_SUCCESS: s = "success"; break;
case H264E_STATUS_BAD_ARGUMENT: s = "bad argument"; break;
case H264E_STATUS_BAD_PARAMETER: s = "bad parameter"; break;
case H264E_STATUS_BAD_FRAME_TYPE: s = "bad frame type"; break;
case H264E_STATUS_SIZE_NOT_MULTIPLE_16: s = "size not multiple of 16"; break;
case H264E_STATUS_SIZE_NOT_MULTIPLE_2: s = "size not multiple of 2"; break;
case H264E_STATUS_BAD_LUMA_ALIGN: s = "bad luma alignment"; break;
case H264E_STATUS_BAD_LUMA_STRIDE: s = "bad luma stride"; break;
case H264E_STATUS_BAD_CHROMA_ALIGN: s = "bad chroma alignment"; break;
case H264E_STATUS_BAD_CHROMA_STRIDE: s = "bad chroma stride"; break;
}
return s == nil ? fmtprint(f, "error %d", e) : fmtprint(f, "%s", s);
}
static void
threadf(void *p)
{
Hjthread *t;
Hjob *j;
Channel *job, *done;
t = p;
threadsetname("hj264/%d", t->id);
job = t->job;
done = t->done;
for(sendp(done, nil); (j = recvp(job)) != nil; sendp(done, j))
j->run(j->arg);
chanfree(done);
chanfree(job);
threadexits(nil);
}
static void
hjobsrun(void *p, void (*run)(void *), void **arg, int njob)
{
int n, t;
Hj264 *h;
Hjob *j;
h = p;
for(n = 0; n < njob;){
for(t = 0; t < h->nthreads && n < njob; t++, n++){
j = &h->jobs[t];
j->run = run;
j->arg = arg[n];
sendp(h->threads[t].job, j);
}
for(t--; t >= 0; t--)
recvp(h->threads[t].done);
}
}
static int
hj264_encode(Hj264 *h, u8int **data, int *sz)
{
int e;
if((e = H264E_encode(h->persist, h->scratch, &h->rp, &h->yuv, data, sz)) != 0){
werrstr("H264E_encode: %ℏ", e);
return -1;
}
return 0;
}
static Hj264 *
hj264new(int nthreads, int denoise, int kbps, int ww, int hh)
{
int i, e, szscratch, szpersist, szyuv;
H264E_create_param_t cp;
Hjthread *t;
u8int *p;
Hj264 *h;
nthreads = clp(nthreads, 1, H264E_MAX_THREADS);
/* YUV logic requires alignment, allow height to be different (pad it) */
hh = ((hh-1) | 15) + 1;
memset(&cp, 0, sizeof(cp));
cp.num_layers = 1;
cp.gop = Gop;
cp.max_threads = nthreads;
cp.temporal_denoise_flag = denoise;
cp.max_long_term_reference_frames = MAX_LONG_TERM_FRAMES;
cp.vbv_size_bytes = kbps/1000*8/2; /* 2 seconds */
cp.width = ww;
cp.height = hh;
if((e = H264E_sizeof(&cp, &szpersist, &szscratch)) != 0){
werrstr("H264E_sizeof: %ℏ", e);
return nil;
}
/* FIXME not padding width yet, so it still has to be multiple of 16 */
/* once we do that, put this line to where "hh" is aligned */
ww = ((ww-1) | 15) + 1;
szyuv = ww*hh*3/2;
if((h = calloc(1, sizeof(*h) + Align+szyuv + Align+szpersist + Align+szscratch)) == nil)
return nil;
p = align(h->buf, Align);
h->yuv.yuv[0] = p;
h->yuv.stride[0] = ww;
h->yuv.yuv[1] = p + ww*hh;
h->yuv.stride[1] = ww/2;
h->yuv.yuv[2] = p + ww*hh*5/4;
h->yuv.stride[2] = ww/2;
h->persist = align(p+szyuv, Align);
h->scratch = align(h->persist+szpersist, Align);
cp.token = h;
cp.run_func_in_thread = hjobsrun;
if((e = H264E_init(h->persist, &cp)) != 0){
werrstr("H264E_init: %ℏ", e);
return nil;
}
h->nthreads = nthreads;
for(i = 0; i < nthreads; i++){
t = &h->threads[i];
t->id = i;
t->job = chancreate(sizeof(void*), 0);
t->done = chancreate(sizeof(void*), 0);
proccreate(threadf, t, mainstacksize);
recvp(t->done);
}
return h;
}
static void
hj264free(Hj264 *h)
{
int i;
for(i = 0; i < h->nthreads; i++){
chanclose(h->threads[i].done);
chanclose(h->threads[i].job);
}
free(h);
}
static void
encthread(void *p)
{
u8int *data;
Img *img;
Hj264 *h;
int sz;
h = p;
for(;;){
if((img = recvp(h->frame)) == nil)
break;
xrgb2yuv(img->bgrx, img->w, img->h, &h->yuv);
free(img);
if(hj264_encode(h, &data, &sz) != 0)
sysfatal("hj264_encode: %r");
if(Bwrite(&h->out, data, sz) != sz)
break;
}
Bflush(&h->out);
if(h->done != nil)
sendp(h->done, nil);
threadexits(nil);
}
static Img *
imgread(int f, int w, int h)
{
int r, n, e;
Img *i;
e = w*h*4;
i = malloc(sizeof(*i) + e);
i->w = w;
i->h = h;
for(n = 0; n < e; n += r){
if((r = pread(f, i->bgrx+n, e-n, n+5*12)) <= 0){
free(i);
return nil;
}
}
return i;
}
static void
usage(void)
{
fprint(2, "usage: %s [-d] [-f FPS] [-n THREADS] [-k KBPS] [-q 0…10] [-Q QP]\n", argv0);
threadexitsall("usage");
}
static uvlong nframes, tstart;
static int
done(void *, char *msg)
{
uvlong s;
Hj264 *h;
if((s = (npe_nanosec() - tstart)/1000000000ULL) != 0)
fprint(2, "%llud fps\n", nframes / s);
h = *threaddata();
Bflush(&h->out);
threadexitsall(msg);
return 1;
}
int
main(int argc, char **argv)
{
int nthreads, fps, kbps, denoise, quality, qp;
char *s, tmp[61], *f[5];
uvlong fstart, fend;
int ww, hh, in;
Img *img;
Hj264 *h;
/* use NPROC-1 threads by default */
nthreads = ((s = getenv("NPROC")) != nil) ? atoi(s)-1 : 1;
denoise = 0;
quality = 10;
kbps = 0;
fps = 30;
qp = 33;
ARGBEGIN{
case 'd':
denoise++;
break;
case 'f':
fps = atoi(EARGF(usage()));
break;
case 'k':
kbps = atoi(EARGF(usage()));
break;
case 'n':
nthreads = atoi(EARGF(usage()));
break;
case 'q':
quality = atoi(EARGF(usage()));
break;
case 'Q':
qp = atoi(EARGF(usage()));
break;
default:
usage();
}ARGEND
if(quality > Maxquality)
quality = Maxquality;
if(kbps < 0)
kbps = 0;
if(argc < 1)
usage();
if((in = open(*argv, OREAD)) < 0)
sysfatal("input: %r");
fmtinstall(L'ℏ', hjerror);
tmp[60] = 0;
if(readn(in, tmp, 60) != 60 || tokenize(tmp, f, 5) != 5)
sysfatal("invalid image");
if(strcmp(f[0], "x8r8g8b8") != 0)
sysfatal("only x8r8g8b8 is supported");
ww = atoi(f[3]) - atoi(f[1]);
hh = atoi(f[4]) - atoi(f[2]);
if(ww & 15)
sysfatal("frame width has to be multiple of 16");
if(ww < 16 || hh < 16)
sysfatal("frame too small: %dx%d", ww, hh);
if((h = hj264new(nthreads, denoise, kbps, ww, hh)) == nil)
sysfatal("hj264new: %r");
if(Binit(&h->out, 1, OWRITE) < 0)
sysfatal("Binit failed: %r");
h->frame = chancreate(sizeof(void*), 1); /* FIXME this is wrong as the encoder might be too late */
h->done = chancreate(sizeof(void*), 0);
/* FIXME how about changing these on the fly? */
h->rp.encode_speed = Maxquality - quality;
h->rp.qp_min = h->rp.qp_max = qp;
if(kbps > 0){
h->rp.qp_min = 10;
h->rp.qp_max = 50;
h->rp.desired_frame_bytes = kbps*1000/8/fps;
}
proccreate(encthread, h, mainstacksize);
*threaddata() = h;
threadnotify(done, 1);
tstart = npe_nanosec();
for(nframes = 0;; nframes++){
fstart = npe_nanosec();
if((img = imgread(in, ww, hh)) == nil)
break;
if(sendp(h->frame, img) != 1)
break;
fend = npe_nanosec();
if(1000000000ULL/fps > (fend - fstart))
npe_nsleep(1000000000ULL/fps - (fend - fstart));
}
chanclose(h->frame);
recvp(h->done);
hj264free(h);
return done(nil, nil);
}