shithub: dav1d

--- a/src/tables.c

+++ b/src/tables.c

@@ -442,7 +442,7 @@

};

-const int8_t ALIGN(dav1d_mc_subpel_filters[5][15][8], 8) = {

+const int8_t ALIGN(dav1d_mc_subpel_filters[5+ARCH_X86_64][15][8], 8) = {

     [DAV1D_FILTER_8TAP_REGULAR] = {

         {   0,   1,  -3,  63,   4,  -1,   0,   0 },

         {   0,   1,  -5,  61,   9,  -2,   0,   0 },

@@ -524,6 +524,27 @@

         {   0,   0,   2,  20,  31,  11,   0,   0 },

         {   0,   0,   2,  18,  31,  13,   0,   0 },

         {   0,   0,   1,  17,  31,  15,   0,   0 }

+#if ARCH_X86_64

+    /* Bilin scaled being very rarely used, add a new table entry

+     * and use the put/prep_8tap_scaled code, thus acting as a

+     * scaled bilinear filter. */

+    }, [5] = {

+        {   0,   0,   0, 60,   4,   0,   0,   0 },

+        {   0,   0,   0, 56,   8,   0,   0,   0 },

+        {   0,   0,   0, 52,  12,   0,   0,   0 },

+        {   0,   0,   0, 48,  16,   0,   0,   0 },

+        {   0,   0,   0, 44,  20,   0,   0,   0 },

+        {   0,   0,   0, 40,  24,   0,   0,   0 },

+        {   0,   0,   0, 36,  28,   0,   0,   0 },

+        {   0,   0,   0, 32,  32,   0,   0,   0 },

+        {   0,   0,   0, 28,  36,   0,   0,   0 },

+        {   0,   0,   0, 24,  40,   0,   0,   0 },

+        {   0,   0,   0, 20,  44,   0,   0,   0 },

+        {   0,   0,   0, 16,  48,   0,   0,   0 },

+        {   0,   0,   0, 12,  52,   0,   0,   0 },

+        {   0,   0,   0,  8,  56,   0,   0,   0 },

+        {   0,   0,   0,  4,  60,   0,   0,   0 }

+#endif

};

--- a/src/tables.h

+++ b/src/tables.h

@@ -110,7 +110,7 @@

 extern const int16_t dav1d_sgr_params[16][4];

 extern const uint8_t dav1d_sgr_x_by_x[256];

-extern const int8_t dav1d_mc_subpel_filters[5][15][8];

+extern const int8_t dav1d_mc_subpel_filters[5+ARCH_X86_64][15][8];

 extern const int8_t dav1d_mc_warp_filter[193][8];

 extern const int8_t dav1d_resize_filter[64][8];

--- a/src/x86/mc.asm

+++ b/src/x86/mc.asm

@@ -5719,12 +5719,21 @@

 %undef isprep

 %endmacro

+%macro BILIN_SCALED_FN 1

+cglobal %1_bilin_scaled

+    mov                 t0d, (5*15 << 16) | 5*15

+    mov                 t1d, (5*15 << 16) | 5*15

+    jmp mangle(private_prefix %+ _%1_8tap_scaled %+ SUFFIX)

+%endmacro

+%define PUT_8TAP_SCALED_FN FN put_8tap_scaled,

+%define PREP_8TAP_SCALED_FN FN prep_8tap_scaled,

 %if WIN64

 DECLARE_REG_TMP 6, 5

 %else

 DECLARE_REG_TMP 6, 8

 %endif

-%define PUT_8TAP_SCALED_FN FN put_8tap_scaled,

+BILIN_SCALED_FN put

 PUT_8TAP_SCALED_FN regular,        REGULAR, REGULAR

 PUT_8TAP_SCALED_FN regular_sharp,  REGULAR, SHARP

 PUT_8TAP_SCALED_FN regular_smooth, REGULAR, SMOOTH

@@ -5741,7 +5750,7 @@

 %else

 DECLARE_REG_TMP 6, 7

 %endif

-%define PREP_8TAP_SCALED_FN FN prep_8tap_scaled,

+BILIN_SCALED_FN prep

 PREP_8TAP_SCALED_FN regular,        REGULAR, REGULAR

 PREP_8TAP_SCALED_FN regular_sharp,  REGULAR, SHARP

 PREP_8TAP_SCALED_FN regular_smooth, REGULAR, SMOOTH

--- a/src/x86/mc_init_tmpl.c

+++ b/src/x86/mc_init_tmpl.c

@@ -99,6 +99,7 @@

 decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_avx2);

 decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_avx2);

 decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_avx2);

+decl_mc_scaled_fn(dav1d_put_bilin_scaled_avx2);

 decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_avx2);

 decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_avx2);

@@ -109,6 +110,7 @@

 decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_avx2);

 decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_avx2);

 decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_avx2);

+decl_mct_scaled_fn(dav1d_prep_bilin_scaled_avx2);

 decl_avg_fn(dav1d_avg_avx512icl);

 decl_avg_fn(dav1d_avg_avx2);

@@ -264,6 +266,7 @@

     init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);

     init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);

     init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);

+    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);

     init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);

     init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);

@@ -274,6 +277,7 @@

     init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);

     init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);

     init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);

+    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);

     c->avg = dav1d_avg_avx2;

     c->w_avg = dav1d_w_avg_avx2;

--

⑨