shithub: libvpx

Download patch

ref: ab5704f02ce471b3dd792dee2872d452b3537d30
parent: 20973508dab25c5a0a3e369d6b1074462a0c5643
parent: c39a05ff61db40d39717df583b8aefd42cc3a2bf
author: Scott LaVarnway <slavarnway@google.com>
date: Thu Aug 31 17:34:27 EDT 2017

Merge "vpxdsp: [x86] add highbd_dc_left_predictor functions"

--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -481,7 +481,8 @@
 
 #if HAVE_SSE2
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
-                       vpx_highbd_dc_predictor_4x4_sse2, NULL,
+                       vpx_highbd_dc_predictor_4x4_sse2,
+                       vpx_highbd_dc_left_predictor_4x4_sse2,
                        vpx_highbd_dc_top_predictor_4x4_sse2, NULL,
                        vpx_highbd_v_predictor_4x4_sse2,
                        vpx_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
@@ -488,7 +489,8 @@
                        NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
 
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
-                       vpx_highbd_dc_predictor_8x8_sse2, NULL,
+                       vpx_highbd_dc_predictor_8x8_sse2,
+                       vpx_highbd_dc_left_predictor_8x8_sse2,
                        vpx_highbd_dc_top_predictor_8x8_sse2, NULL,
                        vpx_highbd_v_predictor_8x8_sse2,
                        vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
@@ -495,7 +497,8 @@
                        NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
 
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
-                       vpx_highbd_dc_predictor_16x16_sse2, NULL,
+                       vpx_highbd_dc_predictor_16x16_sse2,
+                       vpx_highbd_dc_left_predictor_16x16_sse2,
                        vpx_highbd_dc_top_predictor_16x16_sse2, NULL,
                        vpx_highbd_v_predictor_16x16_sse2,
                        vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
@@ -502,7 +505,8 @@
                        NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
 
 HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
-                       vpx_highbd_dc_predictor_32x32_sse2, NULL,
+                       vpx_highbd_dc_predictor_32x32_sse2,
+                       vpx_highbd_dc_left_predictor_32x32_sse2,
                        vpx_highbd_dc_top_predictor_32x32_sse2, NULL,
                        vpx_highbd_v_predictor_32x32_sse2,
                        vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -471,6 +471,14 @@
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_8, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 8),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                              &vpx_highbd_dc_predictor_4x4_c, 4, 8),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
@@ -515,6 +523,14 @@
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_10, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 10),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                              &vpx_highbd_dc_predictor_4x4_c, 4, 10),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
@@ -559,6 +575,14 @@
 INSTANTIATE_TEST_CASE_P(
     SSE2_TO_C_12, VP9HighbdIntraPredTest,
     ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 12),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
                              &vpx_highbd_dc_predictor_4x4_c, 4, 12),
         HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -217,7 +217,7 @@
   specialize qw/vpx_highbd_dc_top_predictor_4x4 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_left_predictor_4x4 neon/;
+  specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_dc_128_predictor_4x4 neon/;
@@ -252,7 +252,7 @@
   specialize qw/vpx_highbd_dc_top_predictor_8x8 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_left_predictor_8x8 neon/;
+  specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_dc_128_predictor_8x8 neon/;
@@ -287,7 +287,7 @@
   specialize qw/vpx_highbd_dc_top_predictor_16x16 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_left_predictor_16x16 neon/;
+  specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_dc_128_predictor_16x16 neon/;
@@ -322,7 +322,7 @@
   specialize qw/vpx_highbd_dc_top_predictor_32x32 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
-  specialize qw/vpx_highbd_dc_left_predictor_32x32 neon/;
+  specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
 
   add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_dc_128_predictor_32x32 neon/;
--- a/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c
@@ -177,6 +177,17 @@
   }
 }
 
+void vpx_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i two = _mm_cvtsi32_si128(2);
+  const __m128i sum = dc_sum_4(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
+  (void)above;
+  (void)bd;
+  dc_store_4x4(dst, stride, &dc);
+}
+
 void vpx_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
                                           const uint16_t *above,
                                           const uint16_t *left, int bd) {
@@ -210,6 +221,17 @@
   }
 }
 
+void vpx_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i four = _mm_cvtsi32_si128(4);
+  const __m128i sum = dc_sum_8(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
+  (void)above;
+  (void)bd;
+  dc_store_8x8(dst, stride, &dc);
+}
+
 void vpx_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
                                           const uint16_t *above,
                                           const uint16_t *left, int bd) {
@@ -241,6 +263,17 @@
   }
 }
 
+void vpx_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)above;
+  (void)bd;
+  dc_store_16x16(dst, stride, &dc);
+}
+
 void vpx_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
                                             const uint16_t *above,
                                             const uint16_t *left, int bd) {
@@ -275,6 +308,17 @@
     _mm_store_si128((__m128i *)(dst + 16), dc_dup);
     _mm_store_si128((__m128i *)(dst + 24), dc_dup);
   }
+}
+
+void vpx_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i sixteen = _mm_cvtsi32_si128(16);
+  const __m128i sum = dc_sum_32(left);
+  const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
+  (void)above;
+  (void)bd;
+  dc_store_32x32(dst, stride, &dc);
 }
 
 void vpx_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,