ref: 6cdf4cb5e83da3d89367cd2e2d8b1813b573146d
dir: /hj264.c/
#include "builtins.h" #define MINIH264_IMPLEMENTATION #define H264E_MAX_THREADS 7 #include "minih264e.h" #include <thread.h> #include <bio.h> #include <draw.h> #include <tos.h> #include "yuv.h" void npe_nsleep(uvlong ns); uvlong npe_nanosec(void); #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) #define clp(v,a,b) min((b), max((v),(a))) #define align(p,a) (void*)((((uintptr)p - 1) | (a-1)) + 1) enum { FmtRaw, FmtIVF, Align = 64, Maxquality = 10, }; typedef struct Hjob Hjob; typedef struct Hjthread Hjthread; typedef struct Hj264 Hj264; typedef struct Img Img; struct Hjob { void (*run)(void *); void *arg; }; struct Hjthread { int id; Channel *job; Channel *done; }; struct Hj264 { H264E_persist_t *persist; H264E_scratch_t *scratch; H264E_run_param_t rp; H264E_io_yuv_t ioyuv; int w, h; YUV yuv; int fmt; Biobuf out; Channel *frame; Channel *done; Hjthread threads[H264E_MAX_THREADS]; Hjob jobs[H264E_MAX_THREADS]; int nthreads; u8int buf[1]; }; struct Img { uvlong ns; int w; int h; u8int bgrx[]; }; #pragma varargck type "ℏ" int static int hjerror(Fmt *f) { char *s; int e; s = nil; e = va_arg(f->args, int); switch(e){ case H264E_STATUS_SUCCESS: s = "success"; break; case H264E_STATUS_BAD_ARGUMENT: s = "bad argument"; break; case H264E_STATUS_BAD_PARAMETER: s = "bad parameter"; break; case H264E_STATUS_BAD_FRAME_TYPE: s = "bad frame type"; break; case H264E_STATUS_SIZE_NOT_MULTIPLE_16: s = "size not multiple of 16"; break; case H264E_STATUS_SIZE_NOT_MULTIPLE_2: s = "size not multiple of 2"; break; case H264E_STATUS_BAD_LUMA_ALIGN: s = "bad luma alignment"; break; case H264E_STATUS_BAD_LUMA_STRIDE: s = "bad luma stride"; break; case H264E_STATUS_BAD_CHROMA_ALIGN: s = "bad chroma alignment"; break; case H264E_STATUS_BAD_CHROMA_STRIDE: s = "bad chroma stride"; break; } return s == nil ? fmtprint(f, "error %d", e) : fmtprint(f, "%s", s); } static void threadf(void *p) { Hjthread *t; Hjob *j; Channel *job, *done; t = p; threadsetname("hj264/%d", t->id); job = t->job; done = t->done; for(sendp(done, nil); (j = recvp(job)) != nil; sendp(done, j)) j->run(j->arg); chanfree(done); chanfree(job); threadexits(nil); } static void hjobsrun(void *p, void (*run)(void *), void **arg, int njob) { int n, t; Hj264 *h; Hjob *j; h = p; for(n = 0; n < njob;){ for(t = 0; t < h->nthreads && n < njob; t++, n++){ j = &h->jobs[t]; j->run = run; j->arg = arg[n]; sendp(h->threads[t].job, j); } for(t--; t >= 0; t--) recvp(h->threads[t].done); } } static int hj264_encode(Hj264 *h, u8int **data, int *sz) { int e; if((e = H264E_encode(h->persist, h->scratch, &h->rp, &h->ioyuv, data, sz)) != 0){ werrstr("H264E_encode: %ℏ", e); return -1; } return 0; } static Hj264 * hj264new(int nthreads, int denoise, int kbps, int gop, int ww, int hh) { int i, e, szscratch, szpersist, szyuv; H264E_create_param_t cp; Hjthread *t; u8int *p; Hj264 *h; nthreads = clp(nthreads, 1, H264E_MAX_THREADS); /* YUV logic requires alignment, allow height to be different (pad it) */ hh = ((hh-1) | 15) + 1; memset(&cp, 0, sizeof(cp)); cp.num_layers = 1; cp.gop = gop; cp.max_threads = nthreads; cp.temporal_denoise_flag = denoise; cp.max_long_term_reference_frames = MAX_LONG_TERM_FRAMES; cp.vbv_size_bytes = kbps/1000*8/2; /* 2 seconds */ cp.width = ww; cp.height = hh; if((e = H264E_sizeof(&cp, &szpersist, &szscratch)) != 0){ werrstr("H264E_sizeof: %ℏ", e); return nil; } /* FIXME not padding width yet, so it still has to be multiple of 16 */ /* once we do that, put this line to where "hh" is aligned */ ww = ((ww-1) | 15) + 1; szyuv = ww*hh*3/2; if((h = calloc(1, sizeof(*h) + Align+szyuv + Align+szpersist + Align+szscratch)) == nil) return nil; p = align(h->buf, Align); h->ioyuv.yuv[0] = p; h->ioyuv.stride[0] = ww; h->ioyuv.yuv[1] = p + ww*hh; h->ioyuv.stride[1] = ww/2; h->ioyuv.yuv[2] = p + ww*hh*5/4; h->ioyuv.stride[2] = ww/2; h->yuv.y = h->ioyuv.yuv[0]; h->yuv.ys = h->ioyuv.stride[0]; h->yuv.u = h->ioyuv.yuv[1]; h->yuv.us = h->ioyuv.stride[1]; h->yuv.v = h->ioyuv.yuv[2]; h->yuv.vs = h->ioyuv.stride[2]; h->persist = align(p+szyuv, Align); h->scratch = align(h->persist+szpersist, Align); h->w = ww; h->h = hh; cp.token = h; cp.run_func_in_thread = hjobsrun; if((e = H264E_init(h->persist, &cp)) != 0){ werrstr("H264E_init: %ℏ", e); return nil; } h->nthreads = nthreads; for(i = 0; i < nthreads; i++){ t = &h->threads[i]; t->id = i; t->job = chancreate(sizeof(void*), 0); t->done = chancreate(sizeof(void*), 0); proccreate(threadf, t, mainstacksize); recvp(t->done); } return h; } static void hj264free(Hj264 *h) { int i; for(i = 0; i < h->nthreads; i++){ chanclose(h->threads[i].done); chanclose(h->threads[i].job); } free(h); } static void encthread(void *p) { u8int *data, v[20]; uvlong ts; Img *img; Hj264 *h; int sz; h = p; if(h->fmt == FmtIVF){ enum{ Timedenum = 1000ULL, }; Bwrite(&h->out, "DKIF\x00\x00\x20\x00AVC1", 12); v[0] = h->w; v[1] = h->w >> 8; v[2] = h->h; v[3] = h->h >> 8; v[4] = Timedenum; v[5] = Timedenum >> 8; v[6] = Timedenum >> 16; v[7] = Timedenum >> 24; v[8] = 1; v[9] = 0; v[10] = 0; v[11] = 0; memset(v+12, 0, 8); /* unknown duration */ Bwrite(&h->out, v, sizeof(v)); } for(;;){ if((img = recvp(h->frame)) == nil) break; xrgb2yuv420(img->bgrx, img->w, img->h, &h->yuv); ts = img->ns / 1000000ULL; free(img); if(hj264_encode(h, &data, &sz) != 0) sysfatal("hj264_encode: %r"); if(h->fmt == FmtIVF){ v[0] = sz; v[1] = sz >> 8; v[2] = sz >> 16; v[3] = sz >> 24; v[4] = ts; v[5] = ts >> 8; v[6] = ts >> 16; v[7] = ts >> 24; v[8] = ts >> 32; v[9] = ts >> 40; v[10] = ts >> 48; v[11] = ts >> 56; if(Bwrite(&h->out, v, 12) != 12) break; } if(Bwrite(&h->out, data, sz) != sz) break; } Bflush(&h->out); if(h->done != nil) sendp(h->done, nil); threadexits(nil); } static Img * imgread(int f, int w, int h) { int r, n, e; Img *i; e = w*h*4; i = malloc(sizeof(*i) + e); i->w = w; i->h = h; for(n = 0; n < e; n += r){ if((r = pread(f, i->bgrx+n, e-n, n+5*12)) <= 0){ free(i); return nil; } } i->ns = npe_nanosec(); return i; } static void usage(void) { fprint(2, "usage: %s [-D] [-f FPS] [-F FORMAT] [-g GOP] [-n THREADS] [-k KBPS] [-q 0…10] [-Q QP] FILE\n", argv0); threadexitsall("usage"); } static uvlong nframes, tstart, debug; static int done(void *, char *msg) { uvlong s; Hj264 *h; if(debug){ s = npe_nanosec() - tstart; s /= 1000000000ULL; if(s != 0) fprint(2, "%llud fps\n", nframes / s); } h = *procdata(); Bflush(&h->out); threadexitsall(msg); return 1; } int main(int argc, char **argv) { int nthreads, fps, kbps, denoise, quality, qp, gop; char *s, tmp[61], *f[5]; uvlong fstart, fend; int ww, hh, in, fmt; Img *img; Hj264 *h; /* use NPROC-1 threads by default */ nthreads = ((s = getenv("NPROC")) != nil) ? atoi(s)-1 : 1; denoise = 0; quality = 10; kbps = 0; fps = 30; qp = 33; gop = 20; fmt = FmtIVF; ARGBEGIN{ case 'd': debug++; break; case 'D': denoise++; break; case 'f': fps = atoi(EARGF(usage())); break; case 'F': s = EARGF(usage()); if(cistrcmp(s, "ivf") == 0) fmt = FmtIVF; else if(cistrcmp(s, "raw") == 0) fmt = FmtRaw; else sysfatal("unknown format %s", s); break; case 'g': gop = atoi(EARGF(usage())); break; case 'k': kbps = atoi(EARGF(usage())); break; case 'n': nthreads = atoi(EARGF(usage())); break; case 'q': quality = atoi(EARGF(usage())); break; case 'Q': qp = atoi(EARGF(usage())); break; default: usage(); }ARGEND if(quality > Maxquality) quality = Maxquality; if(kbps < 0) kbps = 0; if(argc != 1) usage(); if((in = open(*argv, OREAD)) < 0) sysfatal("input: %r"); fmtinstall(L'ℏ', hjerror); tmp[60] = 0; if(readn(in, tmp, 60) != 60 || tokenize(tmp, f, 5) != 5) sysfatal("invalid image"); if(strcmp(f[0], "x8r8g8b8") != 0) sysfatal("only x8r8g8b8 is supported"); ww = atoi(f[3]) - atoi(f[1]); hh = atoi(f[4]) - atoi(f[2]); if(ww & 15) sysfatal("frame width has to be multiple of 16"); if(ww < 16 || hh < 16) sysfatal("frame too small: %dx%d", ww, hh); if((h = hj264new(nthreads, denoise, kbps, gop, ww, hh)) == nil) sysfatal("hj264new: %r"); if(Binit(&h->out, 1, OWRITE) < 0) sysfatal("Binit failed: %r"); h->frame = chancreate(sizeof(void*), 1); /* FIXME this is wrong as the encoder might be too late */ h->done = chancreate(sizeof(void*), 0); h->fmt = fmt; /* FIXME how about changing these on the fly? */ h->rp.encode_speed = Maxquality - quality; h->rp.qp_min = h->rp.qp_max = qp; if(kbps > 0){ h->rp.qp_min = 10; h->rp.qp_max = 50; h->rp.desired_frame_bytes = kbps*1000/8/fps; } *procdata() = h; threadnotify(done, 1); proccreate(encthread, h, mainstacksize); tstart = npe_nanosec(); for(nframes = 0;; nframes++){ fstart = npe_nanosec(); if((img = imgread(in, ww, hh)) == nil) break; if(sendp(h->frame, img) != 1) break; fend = npe_nanosec(); if(1000000000ULL/fps > (fend - fstart)) npe_nsleep(1000000000ULL/fps - (fend - fstart)); } chanclose(h->frame); recvp(h->done); hj264free(h); return done(nil, nil); }