ref: 0f7a97ba909929bf312f1c9df3e5fc411479981c
parent: 6bc4acf43c3f76fec817b5fb4e3b4ada4ae41ad6
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Sun Dec 9 04:52:09 EST 2018
Make static CDF tables source directly from rodata Fixes #57, removes one rodata duplicate (vertical mv component is identical to horizontal one), and removes memcpy() of rodata tables to the source Dav1dThreadCdfContext for each frame where primary_ref is PRIMARY_REF_NONE.
--- a/src/cdf.c
+++ b/src/cdf.c
@@ -777,72 +777,42 @@
},
};
-static const CdfMvContext default_mv_cdf = {
- .comp = {
- { /* mv vertical component */
- .classes = {
- AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
- 32757, 32762, 32767)
- }, .class0 = {
- AOM_CDF2(216 * 128)
- }, .classN = {
- { AOM_CDF2(128 * 136) },
- { AOM_CDF2(128 * 140) },
- { AOM_CDF2(128 * 148) },
- { AOM_CDF2(128 * 160) },
- { AOM_CDF2(128 * 176) },
- { AOM_CDF2(128 * 192) },
- { AOM_CDF2(128 * 224) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 240) }
- }, .class0_fp = {
- { AOM_CDF4(16384, 24576, 26624) },
- { AOM_CDF4(12288, 21248, 24128) }
- }, .classN_fp = {
- AOM_CDF4(8192, 17408, 21248)
- }, .class0_hp = {
- AOM_CDF2(160 * 128)
- }, .classN_hp = {
- AOM_CDF2(128 * 128)
- }, .sign = {
- AOM_CDF2(128 * 128)
- }
- }, { /* mv horizontal component */
- .classes = {
- AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
- 32757, 32762, 32767)
- }, .class0 = {
- AOM_CDF2(216 * 128)
- }, .classN = {
- { AOM_CDF2(128 * 136) },
- { AOM_CDF2(128 * 140) },
- { AOM_CDF2(128 * 148) },
- { AOM_CDF2(128 * 160) },
- { AOM_CDF2(128 * 176) },
- { AOM_CDF2(128 * 192) },
- { AOM_CDF2(128 * 224) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 240) }
- }, .class0_fp = {
- { AOM_CDF4(16384, 24576, 26624) },
- { AOM_CDF4(12288, 21248, 24128) }
- }, .classN_fp = {
- AOM_CDF4(8192, 17408, 21248)
- }, .class0_hp = {
- AOM_CDF2(160 * 128)
- }, .classN_hp = {
- AOM_CDF2(128 * 128)
- }, .sign = {
- AOM_CDF2(128 * 128)
- },
- }
- }, .joint = {
- AOM_CDF4(4096, 11264, 19328)
+static const CdfMvComponent default_mv_component_cdf = {
+ .classes = {
+ AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
+ 32757, 32762, 32767)
+ }, .class0 = {
+ AOM_CDF2(216 * 128)
+ }, .classN = {
+ { AOM_CDF2(128 * 136) },
+ { AOM_CDF2(128 * 140) },
+ { AOM_CDF2(128 * 148) },
+ { AOM_CDF2(128 * 160) },
+ { AOM_CDF2(128 * 176) },
+ { AOM_CDF2(128 * 192) },
+ { AOM_CDF2(128 * 224) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 240) }
+ }, .class0_fp = {
+ { AOM_CDF4(16384, 24576, 26624) },
+ { AOM_CDF4(12288, 21248, 24128) }
+ }, .classN_fp = {
+ AOM_CDF4(8192, 17408, 21248)
+ }, .class0_hp = {
+ AOM_CDF2(160 * 128)
+ }, .classN_hp = {
+ AOM_CDF2(128 * 128)
+ }, .sign = {
+ AOM_CDF2(128 * 128)
}
};
+
+static const uint16_t default_mv_joint_cdf[N_MV_JOINTS + 1] = {
+ AOM_CDF4(4096, 11264, 19328)
+};
+
static const uint16_t default_kf_y_mode_cdf[5][5][N_INTRA_PRED_MODES + 1] = {
{
{ AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
@@ -4041,42 +4011,9 @@
}
};
-static inline int get_qcat_idx(int q) {
- if (q <= 20) return 0;
- if (q <= 60) return 1;
- if (q <= 120) return 2;
- return 3;
-}
-
-static CdfThreadContext cdf_init[4] = {
- [0] = { .cdf = NULL },
- [1] = { .cdf = NULL },
- [2] = { .cdf = NULL },
- [3] = { .cdf = NULL },
-};
-
-int dav1d_init_states(CdfThreadContext *const cdf, const int qidx) {
- const int qcat = get_qcat_idx(qidx);
- if (cdf_init[qcat].cdf) {
- dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
- return 0;
- }
-
- int res = dav1d_cdf_thread_alloc(&cdf_init[qcat], NULL);
- if (res < 0) return res;
- cdf_init[qcat].cdf->m = av1_default_cdf;
- memcpy(cdf_init[qcat].cdf->kfym, default_kf_y_mode_cdf,
- sizeof(default_kf_y_mode_cdf));
- cdf_init[qcat].cdf->coef = av1_default_coef_cdf[qcat];
- cdf_init[qcat].cdf->mv = default_mv_cdf;
- cdf_init[qcat].cdf->dmv = default_mv_cdf;
- dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
- return 0;
-}
-
-void dav1d_update_tile_cdf(const Dav1dFrameHeader *const hdr,
- CdfContext *const dst,
- const CdfContext *const src)
+void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
+ CdfContext *const dst,
+ const CdfContext *const src)
{
int i, j, k, l;
@@ -4213,6 +4150,32 @@
/*
* CDF threading wrappers.
*/
+static inline int get_qcat_idx(int q) {
+ if (q <= 20) return 0;
+ if (q <= 60) return 1;
+ if (q <= 120) return 2;
+ return 3;
+}
+
+void dav1d_cdf_thread_init_static(CdfThreadContext *const cdf, const int qidx) {
+ cdf->ref = NULL;
+ cdf->data.qcat = get_qcat_idx(qidx);
+}
+
+void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const src) {
+ if (src->ref) {
+ memcpy(dst, src->data.cdf, sizeof(*dst));
+ } else {
+ dst->m = av1_default_cdf;
+ memcpy(dst->kfym, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
+ dst->coef = av1_default_coef_cdf[src->data.qcat];
+ memcpy(dst->mv.joint, default_mv_joint_cdf, sizeof(default_mv_joint_cdf));
+ memcpy(dst->dmv.joint, default_mv_joint_cdf, sizeof(default_mv_joint_cdf));
+ dst->mv.comp[0] = dst->mv.comp[1] = dst->dmv.comp[0] = dst->dmv.comp[1] =
+ default_mv_component_cdf;
+ }
+}
+
int dav1d_cdf_thread_alloc(CdfThreadContext *const cdf,
struct thread_data *const t)
{
@@ -4219,9 +4182,9 @@
cdf->ref = dav1d_ref_create(sizeof(CdfContext) +
(t != NULL) * sizeof(atomic_uint));
if (!cdf->ref) return -ENOMEM;
- cdf->cdf = cdf->ref->data;
+ cdf->data.cdf = cdf->ref->data;
if (t) {
- cdf->progress = (atomic_uint *) &cdf->cdf[1];
+ cdf->progress = (atomic_uint *) &cdf->data.cdf[1];
atomic_init(cdf->progress, 0);
cdf->t = t;
}
@@ -4231,13 +4194,15 @@
void dav1d_cdf_thread_ref(CdfThreadContext *const dst,
CdfThreadContext *const src)
{
- dav1d_ref_inc(src->ref);
*dst = *src;
+ if (src->ref)
+ dav1d_ref_inc(src->ref);
}
void dav1d_cdf_thread_unref(CdfThreadContext *const cdf) {
- dav1d_ref_dec(&cdf->ref);
memset(cdf, 0, sizeof(*cdf));
+ if (cdf->ref)
+ dav1d_ref_dec(&cdf->ref);
}
void dav1d_cdf_thread_wait(CdfThreadContext *const cdf) {
--- a/src/cdf.h
+++ b/src/cdf.h
@@ -125,19 +125,22 @@
} CdfContext;
typedef struct CdfThreadContext {
- CdfContext *cdf;
Dav1dRef *ref; ///< allocation origin
+ union {
+ CdfContext *cdf; // if ref != NULL
+ unsigned qcat; // if ref == NULL, from static CDF tables
+ } data;
struct thread_data *t;
atomic_uint *progress;
} CdfThreadContext;
-int dav1d_init_states(CdfThreadContext *cdf, int qidx);
-void dav1d_update_tile_cdf(const Dav1dFrameHeader *hdr, CdfContext *dst,
- const CdfContext *src);
-
+void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, int qidx);
int dav1d_cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);
+void dav1d_cdf_thread_copy(CdfContext *dst, const CdfThreadContext *src);
void dav1d_cdf_thread_ref(CdfThreadContext *dst, CdfThreadContext *src);
void dav1d_cdf_thread_unref(CdfThreadContext *cdf);
+void dav1d_cdf_thread_update(const Dav1dFrameHeader *hdr, CdfContext *dst,
+ const CdfContext *src);
/*
* These are binary signals (so a signal is either "done" or "not done").
--- a/src/decode.c
+++ b/src/decode.c
@@ -2215,7 +2215,7 @@
ts->frame_thread.pal_idx = &f->frame_thread.pal_idx[tile_start_off * 2];
ts->frame_thread.cf = &((int32_t *) f->frame_thread.cf)[tile_start_off * 3];
- ts->cdf = *f->in_cdf.cdf;
+ dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
ts->last_qidx = f->frame_hdr->quant.yac;
memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
@@ -2762,7 +2762,7 @@
dav1d_cdf_thread_wait(&f->in_cdf);
if (f->frame_hdr->refresh_context)
- memcpy(f->out_cdf.cdf, f->in_cdf.cdf, sizeof(*f->in_cdf.cdf));
+ dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
// parse individual tiles per tile group
int update_set = 0, tile_row = 0, tile_col = 0;
@@ -2914,8 +2914,8 @@
if (f->frame_thread.pass <= 1 && f->frame_hdr->refresh_context) {
// cdf update
if (update_set)
- dav1d_update_tile_cdf(f->frame_hdr, f->out_cdf.cdf,
- &f->ts[f->frame_hdr->tiling.update].cdf);
+ dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
+ &f->ts[f->frame_hdr->tiling.update].cdf);
dav1d_cdf_thread_signal(&f->out_cdf);
}
if (f->frame_thread.pass == 1) {
@@ -3105,8 +3105,7 @@
// setup entropy
if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
- res = dav1d_init_states(&f->in_cdf, f->frame_hdr->quant.yac);
- if (res < 0) goto error;
+ dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
} else {
const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
@@ -3277,7 +3276,7 @@
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
- if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
+ dav1d_cdf_thread_unref(&c->cdf[i]);
if (f->frame_hdr->refresh_context) {
dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
} else {
@@ -3306,8 +3305,7 @@
if (refresh_frame_flags & (1 << i)) {
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
- if (c->cdf[i].cdf)
- dav1d_cdf_thread_unref(&c->cdf[i]);
+ dav1d_cdf_thread_unref(&c->cdf[i]);
dav1d_ref_dec(&c->refs[i].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
}
--- a/src/lib.c
+++ b/src/lib.c
@@ -399,8 +399,7 @@
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_ref_dec(&c->refs[i].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
- if (c->cdf[i].cdf)
- dav1d_cdf_thread_unref(&c->cdf[i]);
+ dav1d_cdf_thread_unref(&c->cdf[i]);
}
c->frame_hdr = NULL;
c->seq_hdr = NULL;
@@ -493,8 +492,7 @@
for (int n = 0; n < c->n_tile_data; n++)
dav1d_data_unref(&c->tile[n].data);
for (int n = 0; n < 8; n++) {
- if (c->cdf[n].cdf)
- dav1d_cdf_thread_unref(&c->cdf[n]);
+ dav1d_cdf_thread_unref(&c->cdf[n]);
if (c->refs[n].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[n].p);
dav1d_ref_dec(&c->refs[n].refmvs);
--- a/src/obu.c
+++ b/src/obu.c
@@ -1251,8 +1251,7 @@
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_ref_dec(&c->refs[i].segmap);
dav1d_ref_dec(&c->refs[i].refmvs);
- if (c->cdf[i].cdf)
- dav1d_cdf_thread_unref(&c->cdf[i]);
+ dav1d_cdf_thread_unref(&c->cdf[i]);
}
}
dav1d_ref_dec(&c->seq_hdr_ref);
@@ -1394,7 +1393,7 @@
dav1d_thread_picture_unref(&c->refs[i].p);
dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
- if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
+ dav1d_cdf_thread_unref(&c->cdf[i]);
dav1d_cdf_thread_ref(&c->cdf[i], &c->cdf[r]);
dav1d_ref_dec(&c->refs[i].segmap);