ref: 98ed9be69b08f5438cce7e696b2c8eadfb3ce905
parent: 6ea3fda58c17ec8d55d7fc90eb5305ade3f4ebbc
author: Victorien Le Couviour--Tuffet <victorien@videolan.org>
date: Wed Apr 15 14:18:30 EDT 2020
Fix MC masks alignment for sizes >= 64 for AVX-512 Those need to be aligned when w*h >= 64, as we will try to load by 64 bytes. (also realigns the 4x4 masks to 16 as a 32-byte alignment is unnecessary)
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -83,35 +83,35 @@
{ WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};
-static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 32);
-static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 32);
+static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 * 8], 64);
+static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 * 8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_8x8 [2 * 16 * 8 * 8], 64);
-static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 32);
-static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 * 8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_8x8 [2 * 16 * 8 * 8], 64);
+static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 * 4 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 * 4 * 16], 64);
static uint8_t ALIGN(wedge_masks_422_4x8 [2 * 16 * 4 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 32);
-static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 32);
-static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 * 8], 64);
+static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 * 4], 64);
+static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 * 8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_8x8 [2 * 16 * 8 * 8], 64);
+static uint8_t ALIGN(wedge_masks_420_8x4 [2 * 16 * 8 * 4], 64);
+static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 * 4 * 16], 64);
static uint8_t ALIGN(wedge_masks_420_4x8 [2 * 16 * 4 * 8], 32);
-static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 32);
+static uint8_t ALIGN(wedge_masks_420_4x4 [2 * 16 * 4 * 4], 16);
const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16];
@@ -274,16 +274,16 @@
}
#define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
-static uint8_t ALIGN(ii_dc_mask[32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 32);
+static uint8_t ALIGN(ii_dc_mask[32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x8 [N_II_PRED_MODES][ 8 * 8], 64);
+static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64);
static uint8_t ALIGN(ii_nondc_mask_4x8 [N_II_PRED_MODES][ 4 * 8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x4 [N_II_PRED_MODES][ 4 * 4], 16);
#undef N_II_PRED_MODES
#define set1(sz) \