shithub: dav1d

Download patch

ref: 0f7a97ba909929bf312f1c9df3e5fc411479981c
parent: 6bc4acf43c3f76fec817b5fb4e3b4ada4ae41ad6
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Sun Dec 9 04:52:09 EST 2018

Make static CDF tables source directly from rodata

Fixes #57, removes one rodata duplicate (vertical mv component is
identical to horizontal one), and removes memcpy() of rodata tables
to the source Dav1dThreadCdfContext for each frame where primary_ref
is PRIMARY_REF_NONE.

--- a/src/cdf.c
+++ b/src/cdf.c
@@ -777,72 +777,42 @@
     },
 };
 
-static const CdfMvContext default_mv_cdf = {
-    .comp = {
-        { /* mv vertical component */
-            .classes = {
-                AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
-                          32757, 32762, 32767)
-            }, .class0 = {
-                AOM_CDF2(216 * 128)
-            }, .classN = {
-                { AOM_CDF2(128 * 136) },
-                { AOM_CDF2(128 * 140) },
-                { AOM_CDF2(128 * 148) },
-                { AOM_CDF2(128 * 160) },
-                { AOM_CDF2(128 * 176) },
-                { AOM_CDF2(128 * 192) },
-                { AOM_CDF2(128 * 224) },
-                { AOM_CDF2(128 * 234) },
-                { AOM_CDF2(128 * 234) },
-                { AOM_CDF2(128 * 240) }
-            }, .class0_fp = {
-                { AOM_CDF4(16384, 24576, 26624) },
-                { AOM_CDF4(12288, 21248, 24128) }
-            }, .classN_fp = {
-                AOM_CDF4(8192, 17408, 21248)
-            }, .class0_hp = {
-                AOM_CDF2(160 * 128)
-            }, .classN_hp = {
-                AOM_CDF2(128 * 128)
-            }, .sign = {
-                AOM_CDF2(128 * 128)
-            }
-        }, { /* mv horizontal component */
-            .classes = {
-                AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
-                          32757, 32762, 32767)
-            }, .class0 = {
-                AOM_CDF2(216 * 128)
-            }, .classN = {
-                { AOM_CDF2(128 * 136) },
-                { AOM_CDF2(128 * 140) },
-                { AOM_CDF2(128 * 148) },
-                { AOM_CDF2(128 * 160) },
-                { AOM_CDF2(128 * 176) },
-                { AOM_CDF2(128 * 192) },
-                { AOM_CDF2(128 * 224) },
-                { AOM_CDF2(128 * 234) },
-                { AOM_CDF2(128 * 234) },
-                { AOM_CDF2(128 * 240) }
-            }, .class0_fp = {
-                { AOM_CDF4(16384, 24576, 26624) },
-                { AOM_CDF4(12288, 21248, 24128) }
-            }, .classN_fp = {
-                AOM_CDF4(8192, 17408, 21248)
-            }, .class0_hp = {
-                AOM_CDF2(160 * 128)
-            }, .classN_hp = {
-                AOM_CDF2(128 * 128)
-            }, .sign = {
-                AOM_CDF2(128 * 128)
-            },
-        }
-    }, .joint = {
-        AOM_CDF4(4096, 11264, 19328)
+static const CdfMvComponent default_mv_component_cdf = {
+    .classes = {
+        AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
+                  32757, 32762, 32767)
+    }, .class0 = {
+        AOM_CDF2(216 * 128)
+    }, .classN = {
+        { AOM_CDF2(128 * 136) },
+        { AOM_CDF2(128 * 140) },
+        { AOM_CDF2(128 * 148) },
+        { AOM_CDF2(128 * 160) },
+        { AOM_CDF2(128 * 176) },
+        { AOM_CDF2(128 * 192) },
+        { AOM_CDF2(128 * 224) },
+        { AOM_CDF2(128 * 234) },
+        { AOM_CDF2(128 * 234) },
+        { AOM_CDF2(128 * 240) }
+    }, .class0_fp = {
+        { AOM_CDF4(16384, 24576, 26624) },
+        { AOM_CDF4(12288, 21248, 24128) }
+    }, .classN_fp = {
+        AOM_CDF4(8192, 17408, 21248)
+    }, .class0_hp = {
+        AOM_CDF2(160 * 128)
+    }, .classN_hp = {
+        AOM_CDF2(128 * 128)
+    }, .sign = {
+        AOM_CDF2(128 * 128)
     }
 };
 
+
+static const uint16_t default_mv_joint_cdf[N_MV_JOINTS + 1] = {
+    AOM_CDF4(4096, 11264, 19328)
+};
+
 static const uint16_t default_kf_y_mode_cdf[5][5][N_INTRA_PRED_MODES + 1] = {
     {
         { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
@@ -4041,42 +4011,9 @@
     }
 };
 
-static inline int get_qcat_idx(int q) {
-    if (q <= 20) return 0;
-    if (q <= 60) return 1;
-    if (q <= 120) return 2;
-    return 3;
-}
-
-static CdfThreadContext cdf_init[4] = {
-    [0] = { .cdf = NULL },
-    [1] = { .cdf = NULL },
-    [2] = { .cdf = NULL },
-    [3] = { .cdf = NULL },
-};
-
-int dav1d_init_states(CdfThreadContext *const cdf, const int qidx) {
-    const int qcat = get_qcat_idx(qidx);
-    if (cdf_init[qcat].cdf) {
-        dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
-        return 0;
-    }
-
-    int res = dav1d_cdf_thread_alloc(&cdf_init[qcat], NULL);
-    if (res < 0) return res;
-    cdf_init[qcat].cdf->m = av1_default_cdf;
-    memcpy(cdf_init[qcat].cdf->kfym, default_kf_y_mode_cdf,
-           sizeof(default_kf_y_mode_cdf));
-    cdf_init[qcat].cdf->coef = av1_default_coef_cdf[qcat];
-    cdf_init[qcat].cdf->mv = default_mv_cdf;
-    cdf_init[qcat].cdf->dmv = default_mv_cdf;
-    dav1d_cdf_thread_ref(cdf, &cdf_init[qcat]);
-    return 0;
-}
-
-void dav1d_update_tile_cdf(const Dav1dFrameHeader *const hdr,
-                           CdfContext *const dst,
-                           const CdfContext *const src)
+void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
+                             CdfContext *const dst,
+                             const CdfContext *const src)
 {
     int i, j, k, l;
 
@@ -4213,6 +4150,32 @@
 /*
  * CDF threading wrappers.
  */
+static inline int get_qcat_idx(int q) {
+    if (q <= 20) return 0;
+    if (q <= 60) return 1;
+    if (q <= 120) return 2;
+    return 3;
+}
+
+void dav1d_cdf_thread_init_static(CdfThreadContext *const cdf, const int qidx) {
+    cdf->ref = NULL;
+    cdf->data.qcat = get_qcat_idx(qidx);
+}
+
+void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const src) {
+    if (src->ref) {
+        memcpy(dst, src->data.cdf, sizeof(*dst));
+    } else {
+        dst->m = av1_default_cdf;
+        memcpy(dst->kfym, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
+        dst->coef = av1_default_coef_cdf[src->data.qcat];
+        memcpy(dst->mv.joint, default_mv_joint_cdf, sizeof(default_mv_joint_cdf));
+        memcpy(dst->dmv.joint, default_mv_joint_cdf, sizeof(default_mv_joint_cdf));
+        dst->mv.comp[0] = dst->mv.comp[1] = dst->dmv.comp[0] = dst->dmv.comp[1] =
+            default_mv_component_cdf;
+    }
+}
+
 int dav1d_cdf_thread_alloc(CdfThreadContext *const cdf,
                             struct thread_data *const t)
 {
@@ -4219,9 +4182,9 @@
     cdf->ref = dav1d_ref_create(sizeof(CdfContext) +
                                 (t != NULL) * sizeof(atomic_uint));
     if (!cdf->ref) return -ENOMEM;
-    cdf->cdf = cdf->ref->data;
+    cdf->data.cdf = cdf->ref->data;
     if (t) {
-        cdf->progress = (atomic_uint *) &cdf->cdf[1];
+        cdf->progress = (atomic_uint *) &cdf->data.cdf[1];
         atomic_init(cdf->progress, 0);
         cdf->t = t;
     }
@@ -4231,13 +4194,15 @@
 void dav1d_cdf_thread_ref(CdfThreadContext *const dst,
                           CdfThreadContext *const src)
 {
-    dav1d_ref_inc(src->ref);
     *dst = *src;
+    if (src->ref)
+        dav1d_ref_inc(src->ref);
 }
 
 void dav1d_cdf_thread_unref(CdfThreadContext *const cdf) {
-    dav1d_ref_dec(&cdf->ref);
     memset(cdf, 0, sizeof(*cdf));
+    if (cdf->ref)
+        dav1d_ref_dec(&cdf->ref);
 }
 
 void dav1d_cdf_thread_wait(CdfThreadContext *const cdf) {
--- a/src/cdf.h
+++ b/src/cdf.h
@@ -125,19 +125,22 @@
 } CdfContext;
 
 typedef struct CdfThreadContext {
-    CdfContext *cdf;
     Dav1dRef *ref; ///< allocation origin
+    union {
+        CdfContext *cdf; // if ref != NULL
+        unsigned qcat; // if ref == NULL, from static CDF tables
+    } data;
     struct thread_data *t;
     atomic_uint *progress;
 } CdfThreadContext;
 
-int dav1d_init_states(CdfThreadContext *cdf, int qidx);
-void dav1d_update_tile_cdf(const Dav1dFrameHeader *hdr, CdfContext *dst,
-                         const CdfContext *src);
-
+void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, int qidx);
 int dav1d_cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);
+void dav1d_cdf_thread_copy(CdfContext *dst, const CdfThreadContext *src);
 void dav1d_cdf_thread_ref(CdfThreadContext *dst, CdfThreadContext *src);
 void dav1d_cdf_thread_unref(CdfThreadContext *cdf);
+void dav1d_cdf_thread_update(const Dav1dFrameHeader *hdr, CdfContext *dst,
+                             const CdfContext *src);
 
 /*
  * These are binary signals (so a signal is either "done" or "not done").
--- a/src/decode.c
+++ b/src/decode.c
@@ -2215,7 +2215,7 @@
 
     ts->frame_thread.pal_idx = &f->frame_thread.pal_idx[tile_start_off * 2];
     ts->frame_thread.cf = &((int32_t *) f->frame_thread.cf)[tile_start_off * 3];
-    ts->cdf = *f->in_cdf.cdf;
+    dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
     ts->last_qidx = f->frame_hdr->quant.yac;
     memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
 
@@ -2762,7 +2762,7 @@
 
     dav1d_cdf_thread_wait(&f->in_cdf);
     if (f->frame_hdr->refresh_context)
-        memcpy(f->out_cdf.cdf, f->in_cdf.cdf, sizeof(*f->in_cdf.cdf));
+        dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
 
     // parse individual tiles per tile group
     int update_set = 0, tile_row = 0, tile_col = 0;
@@ -2914,8 +2914,8 @@
         if (f->frame_thread.pass <= 1 && f->frame_hdr->refresh_context) {
             // cdf update
             if (update_set)
-                dav1d_update_tile_cdf(f->frame_hdr, f->out_cdf.cdf,
-                                      &f->ts[f->frame_hdr->tiling.update].cdf);
+                dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
+                                        &f->ts[f->frame_hdr->tiling.update].cdf);
             dav1d_cdf_thread_signal(&f->out_cdf);
         }
         if (f->frame_thread.pass == 1) {
@@ -3105,8 +3105,7 @@
 
     // setup entropy
     if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
-        res = dav1d_init_states(&f->in_cdf, f->frame_hdr->quant.yac);
-        if (res < 0) goto error;
+        dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
     } else {
         const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
         dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
@@ -3277,7 +3276,7 @@
                 dav1d_thread_picture_unref(&c->refs[i].p);
             dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
 
-            if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
+            dav1d_cdf_thread_unref(&c->cdf[i]);
             if (f->frame_hdr->refresh_context) {
                 dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
             } else {
@@ -3306,8 +3305,7 @@
                 if (refresh_frame_flags & (1 << i)) {
                     if (c->refs[i].p.p.data[0])
                         dav1d_thread_picture_unref(&c->refs[i].p);
-                    if (c->cdf[i].cdf)
-                        dav1d_cdf_thread_unref(&c->cdf[i]);
+                    dav1d_cdf_thread_unref(&c->cdf[i]);
                     dav1d_ref_dec(&c->refs[i].segmap);
                     dav1d_ref_dec(&c->refs[i].refmvs);
                 }
--- a/src/lib.c
+++ b/src/lib.c
@@ -399,8 +399,7 @@
             dav1d_thread_picture_unref(&c->refs[i].p);
         dav1d_ref_dec(&c->refs[i].segmap);
         dav1d_ref_dec(&c->refs[i].refmvs);
-        if (c->cdf[i].cdf)
-            dav1d_cdf_thread_unref(&c->cdf[i]);
+        dav1d_cdf_thread_unref(&c->cdf[i]);
     }
     c->frame_hdr = NULL;
     c->seq_hdr = NULL;
@@ -493,8 +492,7 @@
     for (int n = 0; n < c->n_tile_data; n++)
         dav1d_data_unref(&c->tile[n].data);
     for (int n = 0; n < 8; n++) {
-        if (c->cdf[n].cdf)
-            dav1d_cdf_thread_unref(&c->cdf[n]);
+        dav1d_cdf_thread_unref(&c->cdf[n]);
         if (c->refs[n].p.p.data[0])
             dav1d_thread_picture_unref(&c->refs[n].p);
         dav1d_ref_dec(&c->refs[n].refmvs);
--- a/src/obu.c
+++ b/src/obu.c
@@ -1251,8 +1251,7 @@
                     dav1d_thread_picture_unref(&c->refs[i].p);
                 dav1d_ref_dec(&c->refs[i].segmap);
                 dav1d_ref_dec(&c->refs[i].refmvs);
-                if (c->cdf[i].cdf)
-                    dav1d_cdf_thread_unref(&c->cdf[i]);
+                dav1d_cdf_thread_unref(&c->cdf[i]);
             }
         }
         dav1d_ref_dec(&c->seq_hdr_ref);
@@ -1394,7 +1393,7 @@
                         dav1d_thread_picture_unref(&c->refs[i].p);
                     dav1d_thread_picture_ref(&c->refs[i].p, &c->refs[r].p);
 
-                    if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
+                    dav1d_cdf_thread_unref(&c->cdf[i]);
                     dav1d_cdf_thread_ref(&c->cdf[i], &c->cdf[r]);
 
                     dav1d_ref_dec(&c->refs[i].segmap);