shithub: libvpx

Download patch

ref: ce6318f254288e817e30cf29d59a0874cec781b6
parent: 770c6663d653539129885fde937ee37f4e184685
parent: f8d744d91a20c3bc80e82cf5f3aa7e78fe73d164
author: Johann Koenig <johannkoenig@google.com>
date: Wed Feb 1 22:03:58 EST 2017

Merge changes I43521ad3,I013659f6

* changes:
  satd highbd neon: use tran_low_t for coeff
  satd highbd sse2: use tran_low_t for coeff

--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -348,9 +348,6 @@
                       make_tuple(64, &vpx_int_pro_col_sse2,
                                  &vpx_int_pro_col_c)));
 
-// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_sse2),
                                           make_tuple(64, &vpx_satd_sse2),
@@ -357,7 +354,6 @@
                                           make_tuple(256, &vpx_satd_sse2),
                                           make_tuple(1024, &vpx_satd_sse2)));
 #endif
-#endif
 
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(
@@ -383,13 +379,11 @@
                       make_tuple(64, &vpx_int_pro_col_neon,
                                  &vpx_int_pro_col_c)));
 
-#if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_neon),
                                           make_tuple(64, &vpx_satd_neon),
                                           make_tuple(256, &vpx_satd_neon),
                                           make_tuple(1024, &vpx_satd_neon)));
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
@@ -416,6 +410,8 @@
                       make_tuple(64, &vpx_int_pro_col_msa,
                                  &vpx_int_pro_col_c)));
 
+// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
+// in place.
 #if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_msa),
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -15,6 +15,7 @@
 #include "./vpx_config.h"
 
 #include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/idct_neon.h"
 
 static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
   const uint32x4_t a = vpaddlq_u16(v_16x8);
@@ -64,13 +65,13 @@
 
 // coeff: 16 bits, dynamic range [-32640, 32640].
 // length: value range {16, 64, 256, 1024}.
-int vpx_satd_neon(const int16_t *coeff, int length) {
+int vpx_satd_neon(const tran_low_t *coeff, int length) {
   const int16x4_t zero = vdup_n_s16(0);
   int32x4_t accum = vdupq_n_s32(0);
 
   do {
-    const int16x8_t src0 = vld1q_s16(coeff);
-    const int16x8_t src8 = vld1q_s16(coeff + 8);
+    const int16x8_t src0 = load_tran_low_to_s16q(coeff);
+    const int16x8_t src8 = load_tran_low_to_s16q(coeff + 8);
     accum = vabal_s16(accum, vget_low_s16(src0), zero);
     accum = vabal_s16(accum, vget_high_s16(src0), zero);
     accum = vabal_s16(accum, vget_low_s16(src8), zero);
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -894,7 +894,7 @@
     specialize qw/vpx_hadamard_16x16 sse2 neon/;
 
     add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
-    specialize qw/vpx_satd/;
+    specialize qw/vpx_satd sse2 neon/;
   } else {
     add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
     specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";
--- a/vpx_dsp/x86/avg_intrin_sse2.c
+++ b/vpx_dsp/x86/avg_intrin_sse2.c
@@ -285,13 +285,13 @@
   }
 }
 
-int vpx_satd_sse2(const int16_t *coeff, int length) {
+int vpx_satd_sse2(const tran_low_t *coeff, int length) {
   int i;
   const __m128i zero = _mm_setzero_si128();
   __m128i accum = zero;
 
   for (i = 0; i < length; i += 8) {
-    const __m128i src_line = _mm_load_si128((const __m128i *)coeff);
+    const __m128i src_line = load_tran_low(coeff);
     const __m128i inv = _mm_sub_epi16(zero, src_line);
     const __m128i abs = _mm_max_epi16(src_line, inv);  // abs(src_line)
     const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);