ref: ba5eecc43e2ef516311b155e4c00d8ccc385ef86
parent: e4fbbbce672937dfa53a9783803776b6d8c76a44
author: Henrik Gramner <gramner@twoorioles.com>
date: Thu Nov 8 12:05:22 EST 2018
Align blend masks
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -557,18 +557,6 @@
const Dav1dFrameContext *const f = t->f;
const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];
pixel *const lap = t->scratch.lap;
- static const uint8_t obmc_mask_2[2] = { 19, 0 };
- static const uint8_t obmc_mask_4[4] = { 25, 14, 5, 0 };
- static const uint8_t obmc_mask_8[8] = { 28, 22, 16, 11, 7, 3, 0, 0 };
- static const uint8_t obmc_mask_16[16] = { 30, 27, 24, 21, 18, 15, 12, 10,
- 8, 6, 4, 3, 0, 0, 0, 0 };
- static const uint8_t obmc_mask_32[32] = { 31, 29, 28, 26, 24, 23, 21, 20,
- 19, 17, 16, 14, 13, 12, 11, 9,
- 8, 7, 6, 5, 4, 4, 3, 2,
- 0, 0, 0, 0, 0, 0, 0, 0 };
- static const uint8_t *const obmc_masks[] = {
- obmc_mask_2, obmc_mask_4, obmc_mask_8, obmc_mask_16, obmc_mask_32
- };
const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
@@ -593,7 +581,7 @@
if (res) return res;
f->dsp->mc.blend(&dst[x * h_mul], dst_stride, lap,
h_mul * ow4, v_mul * oh4,
- obmc_masks[imin(b_dim[3], 4) - ss_ver], 1);
+ &dav1d_obmc_masks[v_mul * oh4], 1);
i++;
}
x += imax(a_b_dim[0], 2);
@@ -615,9 +603,9 @@
&f->refp[l_r->ref[0] - 1],
dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
if (res) return res;
- f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)], dst_stride,
- lap, h_mul * ow4, v_mul * oh4,
- obmc_masks[imin(b_dim[2], 4) - ss_hor], 0);
+ f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)],
+ dst_stride, lap, h_mul * ow4, v_mul * oh4,
+ &dav1d_obmc_masks[h_mul * ow4], 0);
i++;
}
y += imax(l_b_dim[1], 2);
--- a/src/tables.c
+++ b/src/tables.c
@@ -821,3 +821,19 @@
14, 0, 12, 0, 11, 0, 9, 0
}
};
+
+const uint8_t ALIGN(dav1d_obmc_masks[64], 32) = {
+ /* Unused */
+ 0, 0,
+ /* 2 */
+ 19, 0,
+ /* 4 */
+ 25, 14, 5, 0,
+ /* 8 */
+ 28, 22, 16, 11, 7, 3, 0, 0,
+ /* 16 */
+ 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 0, 0, 0, 0,
+ /* 32 */
+ 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9,
+ 8, 7, 6, 5, 4, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+};
--- a/src/tables.h
+++ b/src/tables.h
@@ -116,4 +116,6 @@
extern const int16_t dav1d_dr_intra_derivative[90];
extern const int8_t dav1d_filter_intra_taps[5][64];
+extern const uint8_t dav1d_obmc_masks[64];
+
#endif /* __DAV1D_SRC_TABLES_H__ */
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -272,16 +272,18 @@
#undef fill
}
-static uint8_t ii_dc_mask[32 * 32];
-static uint8_t ii_nondc_mask_32x32[N_INTER_INTRA_PRED_MODES - 1][32 * 32];
-static uint8_t ii_nondc_mask_16x32[N_INTER_INTRA_PRED_MODES - 1][16 * 32];
-static uint8_t ii_nondc_mask_16x16[N_INTER_INTRA_PRED_MODES - 1][16 * 16];
-static uint8_t ii_nondc_mask_8x32[N_INTER_INTRA_PRED_MODES - 1][8 * 32];
-static uint8_t ii_nondc_mask_8x16[N_INTER_INTRA_PRED_MODES - 1][8 * 16];
-static uint8_t ii_nondc_mask_8x8[N_INTER_INTRA_PRED_MODES - 1][8 * 8];
-static uint8_t ii_nondc_mask_4x16[N_INTER_INTRA_PRED_MODES - 1][4 * 16];
-static uint8_t ii_nondc_mask_4x8[N_INTER_INTRA_PRED_MODES - 1][4 * 8];
-static uint8_t ii_nondc_mask_4x4[N_INTER_INTRA_PRED_MODES - 1][4 * 4];
+#define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
+static uint8_t ALIGN(ii_dc_mask[32 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x8 [N_II_PRED_MODES][ 4 * 8], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 32);
+#undef N_II_PRED_MODES
#define set1(sz) \
[II_DC_PRED] = ii_dc_mask, \