shithub: dav1d

Download patch

ref: ba5eecc43e2ef516311b155e4c00d8ccc385ef86
parent: e4fbbbce672937dfa53a9783803776b6d8c76a44
author: Henrik Gramner <gramner@twoorioles.com>
date: Thu Nov 8 12:05:22 EST 2018

Align blend masks

--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -557,18 +557,6 @@
     const Dav1dFrameContext *const f = t->f;
     const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];
     pixel *const lap = t->scratch.lap;
-    static const uint8_t obmc_mask_2[2] = { 19,  0 };
-    static const uint8_t obmc_mask_4[4] = { 25, 14,  5,  0 };
-    static const uint8_t obmc_mask_8[8] = { 28, 22, 16, 11,  7,  3,  0,  0 };
-    static const uint8_t obmc_mask_16[16] = { 30, 27, 24, 21, 18, 15, 12, 10,
-                                               8,  6,  4,  3,  0,  0,  0,  0 };
-    static const uint8_t obmc_mask_32[32] = { 31, 29, 28, 26, 24, 23, 21, 20,
-                                              19, 17, 16, 14, 13, 12, 11,  9,
-                                               8,  7,  6,  5,  4,  4,  3,  2,
-                                               0,  0,  0,  0,  0,  0,  0,  0 };
-    static const uint8_t *const obmc_masks[] = {
-        obmc_mask_2, obmc_mask_4, obmc_mask_8, obmc_mask_16, obmc_mask_32
-    };
     const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
     const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
@@ -593,7 +581,7 @@
                 if (res) return res;
                 f->dsp->mc.blend(&dst[x * h_mul], dst_stride, lap,
                                  h_mul * ow4, v_mul * oh4,
-                                 obmc_masks[imin(b_dim[3], 4) - ss_ver], 1);
+                                 &dav1d_obmc_masks[v_mul * oh4], 1);
                 i++;
             }
             x += imax(a_b_dim[0], 2);
@@ -615,9 +603,9 @@
                          &f->refp[l_r->ref[0] - 1],
                          dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
                 if (res) return res;
-                f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)], dst_stride,
-                                 lap, h_mul * ow4, v_mul * oh4,
-                                 obmc_masks[imin(b_dim[2], 4) - ss_hor], 0);
+                f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)],
+                                 dst_stride, lap, h_mul * ow4, v_mul * oh4,
+                                 &dav1d_obmc_masks[h_mul * ow4], 0);
                 i++;
             }
             y += imax(l_b_dim[1], 2);
--- a/src/tables.c
+++ b/src/tables.c
@@ -821,3 +821,19 @@
          14,   0,  12,   0,  11,   0,   9,   0
     }
 };
+
+const uint8_t ALIGN(dav1d_obmc_masks[64], 32) = {
+    /* Unused */
+     0,  0,
+    /* 2 */
+    19,  0,
+    /* 4 */
+    25, 14,  5,  0,
+    /* 8 */
+    28, 22, 16, 11,  7,  3,  0,  0,
+    /* 16 */
+    30, 27, 24, 21, 18, 15, 12, 10,  8,  6,  4,  3,  0,  0,  0,  0,
+    /* 32 */
+    31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11,  9,
+     8,  7,  6,  5,  4,  4,  3,  2,  0,  0,  0,  0,  0,  0,  0,  0,
+};
--- a/src/tables.h
+++ b/src/tables.h
@@ -116,4 +116,6 @@
 extern const int16_t dav1d_dr_intra_derivative[90];
 extern const int8_t dav1d_filter_intra_taps[5][64];
 
+extern const uint8_t dav1d_obmc_masks[64];
+
 #endif /* __DAV1D_SRC_TABLES_H__ */
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -272,16 +272,18 @@
 #undef fill
 }
 
-static uint8_t ii_dc_mask[32 * 32];
-static uint8_t ii_nondc_mask_32x32[N_INTER_INTRA_PRED_MODES - 1][32 * 32];
-static uint8_t ii_nondc_mask_16x32[N_INTER_INTRA_PRED_MODES - 1][16 * 32];
-static uint8_t ii_nondc_mask_16x16[N_INTER_INTRA_PRED_MODES - 1][16 * 16];
-static uint8_t ii_nondc_mask_8x32[N_INTER_INTRA_PRED_MODES - 1][8 * 32];
-static uint8_t ii_nondc_mask_8x16[N_INTER_INTRA_PRED_MODES - 1][8 * 16];
-static uint8_t ii_nondc_mask_8x8[N_INTER_INTRA_PRED_MODES - 1][8 * 8];
-static uint8_t ii_nondc_mask_4x16[N_INTER_INTRA_PRED_MODES - 1][4 * 16];
-static uint8_t ii_nondc_mask_4x8[N_INTER_INTRA_PRED_MODES - 1][4 * 8];
-static uint8_t ii_nondc_mask_4x4[N_INTER_INTRA_PRED_MODES - 1][4 * 4];
+#define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
+static uint8_t ALIGN(ii_dc_mask[32 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x8  [N_II_PRED_MODES][ 8 *  8], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x8  [N_II_PRED_MODES][ 4 *  8], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x4  [N_II_PRED_MODES][ 4 *  4], 32);
+#undef N_II_PRED_MODES
 
 #define set1(sz) \
     [II_DC_PRED] = ii_dc_mask, \