shithub: hj264

ref: 71e5eaf30c76e8bbd82fe4c9592d4406d7fa1c08
dir: /hj264.c/

View raw version
#define MINIH264_IMPLEMENTATION
#define H264E_MAX_THREADS 7
#include "minih264e.h"
#include <thread.h>
#include <bio.h>
#include <draw.h>
#include <memdraw.h>
#include <tos.h>

#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))
#define clp(v,a,b) min((b), max((v),(a)))
#define align(p,a) (void*)((((uintptr)p - 1) | (a-1)) + 1)

enum {
	Align = 64,
	Maxquality = 10,
	Gop = 20,
};

typedef struct Hjob Hjob;
typedef struct Hjthread Hjthread;
typedef struct Hj264 Hj264;

struct Hjob {
	void (*run)(void *);
	void *arg;
};

struct Hjthread {
	int id;
	Channel *job;
	Channel *done;
};

struct Hj264 {
	H264E_persist_t *persist;
	H264E_scratch_t *scratch;
	H264E_run_param_t rp;
	H264E_io_yuv_t yuv;
	Hjthread threads[H264E_MAX_THREADS];
	Hjob jobs[H264E_MAX_THREADS];
	int nthreads;
	u8int buf[1];
};

static void
xrgb2yuv(u8int *src, int stride, int h, H264E_io_yuv_t *io)
{
	int x, y, r, g, b;
	u8int *bgrx, *yuv[3];

	yuv[0] = io->yuv[0];
	yuv[1] = io->yuv[1];
	yuv[2] = io->yuv[2];

	for(y = 0; y < h;){
		bgrx = &src[y * stride];
		for(x = 0; x < stride/4;){
			b = bgrx[0];
			g = bgrx[1];
			r = bgrx[2];
			bgrx += 4;
/* this is not the "full" swing, just sayin' */
#define YY ((( 66*r + 129*g +  25*b + 128) >> 8) +  16)
#define UU (((-38*r -  74*g + 112*b + 128) >> 8) + 128)
#define VV (((112*r -  94*g -  18*b + 128) >> 8) + 128)
			yuv[0][x] = YY;
			yuv[1][x/2] = UU;
			yuv[2][x/2] = VV;
			x++;

			b = bgrx[0];
			g = bgrx[1];
			r = bgrx[2];
			bgrx += 4;
			yuv[0][x] = YY;
			x++;
		}
		yuv[0] += io->stride[0];
		y++;

		for(x = 0; x < stride/4;){
			b = bgrx[0];
			g = bgrx[1];
			r = bgrx[2];
			bgrx += 4;
			yuv[0][x] = YY;
			x++;
#undef YY
#undef UU
#undef VV
		}
		yuv[0] += io->stride[0];
		yuv[1] += io->stride[1];
		yuv[2] += io->stride[1];
		y++;
	}
}

static void
threadf(void *p)
{
	Hjthread *t;
	Hjob *j;
	Channel *job, *done;

	t = p;
	threadsetname("hj264/%d", t->id);

	job = t->job;
	done = t->done;
	for(sendp(done, nil); (j = recvp(job)) != nil; sendp(done, j))
		j->run(j->arg);

	chanfree(done);
	chanfree(job);

	threadexits(nil);
}

static void
hjobsrun(void *p, void (*run)(void *), void **arg, int njob)
{
	int n, t;
	Hj264 *h;
	Hjob *j;

	h = p;
	for(n = 0; n < njob;){
		for(t = 0; t < h->nthreads && n < njob; t++, n++){
			j = &h->jobs[t];
			j->run = run;
			j->arg = arg[n];
			sendp(h->threads[t].job, j);
		}

		for(t--; t >= 0; t--)
			recvp(h->threads[t].done);
	}
}

static int
hj264_encode(Hj264 *h, u8int **data, int *sz)
{
	int e;

	if((e = H264E_encode(h->persist, h->scratch, &h->rp, &h->yuv, data, sz)) != 0){
		werrstr("H264E_encode: error %d", e);
		return -1;
	}

	return 0;
}

static Hj264 *
hj264new(int nthreads, int denoise, int kbps, int ww, int hh)
{
	int i, e, szscratch, szpersist, szyuv;
	H264E_create_param_t cp;
	Hjthread *t;
	u8int *p;
	Hj264 *h;

	nthreads = clp(nthreads, 1, H264E_MAX_THREADS);

	memset(&cp, 0, sizeof(cp));
	cp.num_layers = 1;
	cp.gop = Gop;
	cp.max_threads = nthreads;
	cp.const_input_flag = 1;
	cp.temporal_denoise_flag = denoise;
	cp.vbv_size_bytes = kbps/1000*8/2; /* 2 seconds */
	cp.width = ww;
	cp.height = hh;

	if((e = H264E_sizeof(&cp, &szpersist, &szscratch)) != 0){
		werrstr("H264E_sizeof: error %d", e);
		return nil;
	}

	/* YUV logic requires alignment */
	ww = ((ww-1) | 15) + 1;
	hh = ((hh-1) | 15) + 1;
	szyuv = ww*hh*3/2;
	if((h = calloc(1, sizeof(*h) + Align+szyuv + Align+szpersist + Align+szscratch)) == nil)
		return nil;

	p = align(h->buf, Align);
	h->yuv.yuv[0] = p;
	h->yuv.stride[0] = ww;
	h->yuv.yuv[1] = p + ww*hh;
	h->yuv.stride[1] = ww/2;
	h->yuv.yuv[2] = p + ww*hh*5/4;
	h->yuv.stride[2] = ww/2;
	h->persist = align(p+szyuv, Align);
	h->scratch = align(h->persist+szpersist, Align);

	cp.token = h;
	cp.run_func_in_thread = hjobsrun;
	H264E_init(h->persist, &cp);

	h->nthreads = nthreads;
	for(i = 0; i < nthreads; i++){
		t = &h->threads[i];
		t->id = i;
		t->job = chancreate(sizeof(void*), 0);
		t->done = chancreate(sizeof(void*), 0);
		proccreate(threadf, t, mainstacksize);
		recvp(t->done);
	}

	return h;
}

static void
hj264free(Hj264 *h)
{
	int i;

	for(i = 0; i < h->nthreads; i++){
		chanclose(h->threads[i].done);
		chanclose(h->threads[i].job);
	}

	free(h);
}

static uvlong
nanosec(void)
{
	static uvlong fasthz, xstart;
	uvlong x, div;

	if(fasthz == ~0ULL)
		return nsec() - xstart;

	if(fasthz == 0){
		if(_tos->cyclefreq){
			cycles(&xstart);
			fasthz = _tos->cyclefreq;
		} else {
			xstart = nsec();
			fasthz = ~0ULL;
			fprint(2, "cyclefreq not available, falling back to nsec()\n");
			fprint(2, "you might want to disable aux/timesync\n");
			return 0;
		}
	}
	cycles(&x);
	x -= xstart;

	/* this is ugly */
	for(div = 1000000000ULL; x < 0x1999999999999999ULL && div > 1 ; div /= 10ULL, x *= 10ULL);

	return x / (fasthz / div);
}

static void
usage(void)
{
	fprint(2, "usage: %s [-d] [-f FPS] [-n THREADS] [-k KBPS] [-q 0…10] [-Q QP]\n", argv0);
	threadexitsall("usage");
}

int
main(int argc, char **argv)
{
	int nthreads, fps, kbps, denoise, quality, qp;
	int ww, hh, in, sz, srcsz, nframes;
	uvlong start, end;
	u8int *data, *src;
	Memimage *im;
	Biobuf out;
	Hj264 *h;
	char *s;

	/* use NPROC-1 threads by default */
	nthreads = ((s = getenv("NPROC")) != nil) ? atoi(s)-1 : 1;
	denoise = 0;
	quality = 10;
	kbps = 0;
	fps = 30;
	qp = 33;
	ARGBEGIN{
	case 'd':
		denoise++;
		break;
	case 'f':
		fps = atoi(EARGF(usage()));
		break;
	case 'k':
		kbps = atoi(EARGF(usage()));
		break;
	case 'n':
		nthreads = atoi(EARGF(usage()));
		break;
	case 'q':
		quality = atoi(EARGF(usage()));
		break;
	case 'Q':
		qp = atoi(EARGF(usage()));
		break;
	default:
		usage();
	}ARGEND

	if(argc < 1)
		usage();
	if((in = open(*argv, OREAD)) < 0)
		sysfatal("input: %r");
	if(Binit(&out, 1, OWRITE) < 0)
		sysfatal("Binit failed: %r");

	memimageinit();
	nanosec();

	if(quality > Maxquality)
		quality = Maxquality;
	if(kbps < 0)
		kbps = 0;

	src = nil;
	srcsz = 0;
	h = nil;
	start = nanosec();
	for(nframes = 0;; nframes++){
		seek(in, 0, 0);
		if((im = readmemimage(in)) == nil)
			break;
		ww = Dx(im->r);
		hh = Dy(im->r);

		if(h == nil){
			srcsz = Dy(im->r)*(2+bytesperline(im->r, im->depth));
			if((src = malloc(srcsz)) == nil)
				sysfatal("memory");
			unloadmemimage(im, im->r, src, srcsz);

			if((h = hj264new(nthreads, denoise, kbps, ww, hh)) == nil)
				sysfatal("hj264new: %r");
			h->rp.encode_speed = Maxquality - quality;
			h->rp.qp_min = h->rp.qp_max = qp;
			if(kbps > 0){
				h->rp.qp_min = 10;
				h->rp.qp_max = 50;
				h->rp.desired_frame_bytes = kbps*1000/8/fps;
			}
		}

		unloadmemimage(im, im->r, src, srcsz);
		xrgb2yuv(src, bytesperline(im->r, im->depth), Dy(im->r), &h->yuv);
		freememimage(im);

		if(hj264_encode(h, &data, &sz) != 0)
			sysfatal("hj264_encode: %r");
		if(Bwrite(&out, data, sz) != sz)
			break;
		if(nanosec() - start > 4000000000ULL)
			break;
	}
	end = nanosec();
	fprint(2, "%d fps\n", (int)(nframes / ((end - start)/1000000000ULL)));

	/* FIXME flush on note */
	Bflush(&out);
	hj264free(h);

	threadexitsall(nil);

	return 0;
}