shithub: libvpx

--- a/test/partial_idct_test.cc

+++ b/test/partial_idct_test.cc

@@ -201,7 +201,19 @@

                       make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,

                                  &vpx_idct4x4_1_add_c, TX_4X4, 1)));

-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE

+#if CONFIG_VP9_HIGHBITDEPTH

+INSTANTIATE_TEST_CASE_P(

+    NEON, PartialIDctTest,

+    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,

+                                 &vpx_idct32x32_1_add_neon, TX_32X32, 1),

+                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,

+                                 &vpx_idct16x16_1_add_neon, TX_16X16, 1),

+                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,

+                                 &vpx_idct8x8_1_add_neon, TX_8X8, 1),

+                      make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,

+                                 &vpx_idct4x4_1_add_neon, TX_4X4, 1)));

+#else   // !CONFIG_VP9_HIGHBITDEPTH

 // 32x32_34_ 32x32_135_ are implemented using the 1024 version.

 INSTANTIATE_TEST_CASE_P(

     NEON, PartialIDctTest,

@@ -229,7 +241,8 @@

                                  &vpx_idct4x4_16_add_neon, TX_4X4, 16),

                       make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,

                                  &vpx_idct4x4_1_add_neon, TX_4X4, 1)));

-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 // 32x32_135_ is implemented using the 1024 version.

--- a/vpx_dsp/vpx_dsp.mk

+++ b/vpx_dsp/vpx_dsp.mk

@@ -199,23 +199,15 @@

 ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)

 ifeq ($(HAVE_NEON_ASM),yes)

-DSP_SRCS-yes  += arm/idct4x4_1_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct4x4_add_neon$(ASM)

-DSP_SRCS-yes  += arm/idct8x8_1_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct8x8_add_neon$(ASM)

-DSP_SRCS-yes  += arm/idct16x16_1_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct16x16_add_neon$(ASM)

-DSP_SRCS-yes  += arm/idct32x32_1_add_neon$(ASM)

 DSP_SRCS-yes  += arm/idct32x32_add_neon$(ASM)

 else

 ifeq ($(HAVE_NEON),yes)

-DSP_SRCS-yes  += arm/idct4x4_1_add_neon.c

 DSP_SRCS-yes  += arm/idct4x4_add_neon.c

-DSP_SRCS-yes  += arm/idct8x8_1_add_neon.c

 DSP_SRCS-yes  += arm/idct8x8_add_neon.c

-DSP_SRCS-yes  += arm/idct16x16_1_add_neon.c

 DSP_SRCS-yes  += arm/idct16x16_add_neon.c

-DSP_SRCS-yes  += arm/idct32x32_1_add_neon.c

 DSP_SRCS-yes  += arm/idct32x32_add_neon.c

 endif  # HAVE_NEON

 endif  # HAVE_NEON_ASM

@@ -233,7 +225,20 @@

 DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c

 DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c

 DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c

-endif  # CONFIG_VP9_HIGHBITDEPTH

+endif  # !CONFIG_VP9_HIGHBITDEPTH

+ifeq ($(HAVE_NEON_ASM),yes)

+DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)

+DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)

+DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)

+DSP_SRCS-yes += arm/idct32x32_1_add_neon$(ASM)

+else

+DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_1_add_neon.c

+DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_1_add_neon.c

+DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_1_add_neon.c

+DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_1_add_neon.c

+endif  # HAVE_NEON_ASM

 endif  # CONFIG_VP9

 # quantization

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -647,7 +647,7 @@

     specialize qw/vpx_idct4x4_16_add sse2/;

     add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vpx_idct4x4_1_add sse2/;

+    specialize qw/vpx_idct4x4_1_add neon sse2/;

     add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

     specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64";

@@ -656,7 +656,7 @@

     specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64";

     add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vpx_idct8x8_1_add sse2/;

+    specialize qw/vpx_idct8x8_1_add neon sse2/;

     add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

     specialize qw/vpx_idct16x16_256_add sse2/;

@@ -665,7 +665,7 @@

     specialize qw/vpx_idct16x16_10_add sse2/;

     add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vpx_idct16x16_1_add sse2/;

+    specialize qw/vpx_idct16x16_1_add neon sse2/;

     add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

     specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64";

@@ -679,7 +679,7 @@

     specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64";

     add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

-    specialize qw/vpx_idct32x32_1_add sse2/;

+    specialize qw/vpx_idct32x32_1_add neon sse2/;

     add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

     specialize qw/vpx_highbd_idct4x4_16_add sse2/;

--

⑨