ref: aa0b72fe651b4241e529889865b47956acaee76b
dir: /hj264.c/
#define MINIH264_IMPLEMENTATION #define H264E_MAX_THREADS 7 #include "minih264e.h" #include <thread.h> #include <bio.h> #include <draw.h> #include <memdraw.h> #include <tos.h> void npe_nsleep(uvlong ns); #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) #define clp(v,a,b) min((b), max((v),(a))) #define align(p,a) (void*)((((uintptr)p - 1) | (a-1)) + 1) enum { Align = 64, Maxquality = 10, Gop = 20, }; typedef struct Hjob Hjob; typedef struct Hjthread Hjthread; typedef struct Hj264 Hj264; struct Hjob { void (*run)(void *); void *arg; }; struct Hjthread { int id; Channel *job; Channel *done; }; struct Hj264 { H264E_persist_t *persist; H264E_scratch_t *scratch; H264E_run_param_t rp; H264E_io_yuv_t yuv; Biobuf out; Channel *frame; Hjthread threads[H264E_MAX_THREADS]; Hjob jobs[H264E_MAX_THREADS]; int nthreads; u8int buf[1]; }; static void xrgb2yuv(u8int *bgrx, int w, int h, H264E_io_yuv_t *io) { u8int *py, *pu, *pv; int x, y, r, g, b; py = io->yuv[0]; pu = io->yuv[1]; pv = io->yuv[2]; for(y = 0; y < h;){ for(x = 0; x < w;){ b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; /* this is not the "full" swing, just sayin' */ #define YY ((( 66*r + 129*g + 25*b + 128) >> 8) + 16) #define UU (((-38*r - 74*g + 112*b + 128) >> 8) + 128) #define VV (((112*r - 94*g - 18*b + 128) >> 8) + 128) py[x] = YY; pu[x/2] = UU; pv[x/2] = VV; x++; b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; py[x] = YY; x++; } py += io->stride[0]; y++; for(x = 0; x < w;){ b = bgrx[0]; g = bgrx[1]; r = bgrx[2]; bgrx += 4; py[x] = YY; x++; #undef YY #undef UU #undef VV } py += io->stride[0]; pu += io->stride[1]; pv += io->stride[2]; y++; } } static void threadf(void *p) { Hjthread *t; Hjob *j; Channel *job, *done; t = p; threadsetname("hj264/%d", t->id); job = t->job; done = t->done; for(sendp(done, nil); (j = recvp(job)) != nil; sendp(done, j)) j->run(j->arg); chanfree(done); chanfree(job); threadexits(nil); } static void hjobsrun(void *p, void (*run)(void *), void **arg, int njob) { int n, t; Hj264 *h; Hjob *j; h = p; for(n = 0; n < njob;){ for(t = 0; t < h->nthreads && n < njob; t++, n++){ j = &h->jobs[t]; j->run = run; j->arg = arg[n]; sendp(h->threads[t].job, j); } for(t--; t >= 0; t--) recvp(h->threads[t].done); } } static int hj264_encode(Hj264 *h, u8int **data, int *sz) { int e; if((e = H264E_encode(h->persist, h->scratch, &h->rp, &h->yuv, data, sz)) != 0){ werrstr("H264E_encode: error %d", e); return -1; } return 0; } static Hj264 * hj264new(int nthreads, int denoise, int kbps, int ww, int hh) { int i, e, szscratch, szpersist, szyuv; H264E_create_param_t cp; Hjthread *t; u8int *p; Hj264 *h; nthreads = clp(nthreads, 1, H264E_MAX_THREADS); memset(&cp, 0, sizeof(cp)); cp.num_layers = 1; cp.gop = Gop; cp.max_threads = nthreads; cp.const_input_flag = 1; cp.temporal_denoise_flag = denoise; cp.vbv_size_bytes = kbps/1000*8/2; /* 2 seconds */ cp.width = ww; cp.height = hh; if((e = H264E_sizeof(&cp, &szpersist, &szscratch)) != 0){ werrstr("H264E_sizeof: error %d", e); return nil; } /* YUV logic requires alignment */ ww = ((ww-1) | 15) + 1; hh = ((hh-1) | 15) + 1; szyuv = ww*hh*3/2; if((h = calloc(1, sizeof(*h) + Align+szyuv + Align+szpersist + Align+szscratch)) == nil) return nil; p = align(h->buf, Align); h->yuv.yuv[0] = p; h->yuv.stride[0] = ww; h->yuv.yuv[1] = p + ww*hh; h->yuv.stride[1] = ww/2; h->yuv.yuv[2] = p + ww*hh*5/4; h->yuv.stride[2] = ww/2; h->persist = align(p+szyuv, Align); h->scratch = align(h->persist+szpersist, Align); cp.token = h; cp.run_func_in_thread = hjobsrun; if((e = H264E_init(h->persist, &cp)) != 0){ werrstr("H264E_init: error %d", e); return nil; } h->nthreads = nthreads; for(i = 0; i < nthreads; i++){ t = &h->threads[i]; t->id = i; t->job = chancreate(sizeof(void*), 0); t->done = chancreate(sizeof(void*), 0); proccreate(threadf, t, mainstacksize); recvp(t->done); } return h; } static void hj264free(Hj264 *h) { int i; for(i = 0; i < h->nthreads; i++){ chanclose(h->threads[i].done); chanclose(h->threads[i].job); } free(h); } static uvlong nanosec(void) { static uvlong fasthz, xstart; uvlong x, div; if(fasthz == ~0ULL) return nsec() - xstart; if(fasthz == 0){ if(_tos->cyclefreq){ cycles(&xstart); fasthz = _tos->cyclefreq; } else { xstart = nsec(); fasthz = ~0ULL; fprint(2, "cyclefreq not available, falling back to nsec()\n"); fprint(2, "you might want to disable aux/timesync\n"); return 0; } } cycles(&x); x -= xstart; /* this is ugly */ for(div = 1000000000ULL; x < 0x1999999999999999ULL && div > 1 ; div /= 10ULL, x *= 10ULL); return x / (fasthz / div); } static void encthread(void *p) { u8int *src, *data; int srcsz, sz; Memimage *im; Hj264 *h; h = p; src = nil; for(;;){ if((im = recvp(h->frame)) == nil) break; srcsz = Dy(im->r)*(2+bytesperline(im->r, im->depth)); if(src == nil && (src = malloc(srcsz)) == nil) sysfatal("memory"); unloadmemimage(im, im->r, src, srcsz); xrgb2yuv(src, Dx(im->r), Dy(im->r), &h->yuv); freememimage(im); if(hj264_encode(h, &data, &sz) != 0) sysfatal("hj264_encode: %r"); if(Bwrite(&h->out, data, sz) != sz) break; } Bflush(&h->out); hj264free(h); threadexits(nil); } static void usage(void) { fprint(2, "usage: %s [-d] [-f FPS] [-n THREADS] [-k KBPS] [-q 0…10] [-Q QP]\n", argv0); threadexitsall("usage"); } int main(int argc, char **argv) { int nthreads, fps, kbps, denoise, quality, qp; uvlong start, end, fstart, fend; int ww, hh, in, nframes; Memimage *im; Hj264 *h; char *s; /* use NPROC-1 threads by default */ nthreads = ((s = getenv("NPROC")) != nil) ? atoi(s)-1 : 1; denoise = 0; quality = 10; kbps = 0; fps = 30; qp = 33; ARGBEGIN{ case 'd': denoise++; break; case 'f': fps = atoi(EARGF(usage())); break; case 'k': kbps = atoi(EARGF(usage())); break; case 'n': nthreads = atoi(EARGF(usage())); break; case 'q': quality = atoi(EARGF(usage())); break; case 'Q': qp = atoi(EARGF(usage())); break; default: usage(); }ARGEND if(argc < 1) usage(); if((in = open(*argv, OREAD)) < 0) sysfatal("input: %r"); memimageinit(); nanosec(); if(quality > Maxquality) quality = Maxquality; if(kbps < 0) kbps = 0; h = nil; start = nanosec(); for(nframes = 0;; nframes++){ fstart = nanosec(); seek(in, 0, 0); if((im = readmemimage(in)) == nil) break; ww = Dx(im->r); hh = Dy(im->r); if(h == nil){ if((h = hj264new(nthreads, denoise, kbps, ww, hh)) == nil) sysfatal("hj264new: %r"); if(Binit(&h->out, 1, OWRITE) < 0) sysfatal("Binit failed: %r"); h->frame = chancreate(sizeof(void*), fps); /* FIXME how about changing these on the fly? */ h->rp.encode_speed = Maxquality - quality; h->rp.qp_min = h->rp.qp_max = qp; if(kbps > 0){ h->rp.qp_min = 10; h->rp.qp_max = 50; h->rp.desired_frame_bytes = kbps*1000/8/fps; } proccreate(encthread, h, mainstacksize); } if(sendp(h->frame, im) != 1) break; fend = nanosec(); if(1000000000ULL/fps > (fend - fstart)) npe_nsleep(1000000000ULL/fps - (fend - fstart)); /* FIXME make a graceful shutdown on a note */ if(nanosec() - start > 4000000000ULL) break; } end = nanosec(); fprint(2, "%d fps\n", (int)(nframes / ((end - start)/1000000000ULL))); chanclose(h->frame); threadexitsall(nil); return 0; }