ref: 0e95039bd9ad4108be635d38151f949f74e6a4de
parent: 6822fb2f09f580c3d7ddb7955d8177460cb63702
parent: bc4bcca3fdcc839794daf6cc0aa9eacc3befb854
author: Scott LaVarnway <slavarnway@google.com>
date: Wed Sep 6 17:53:32 EDT 2017
Merge "vpxdsp: [x86] add highbd_dc_128_predictor functions"
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -483,7 +483,8 @@
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
vpx_highbd_dc_predictor_4x4_sse2,
vpx_highbd_dc_left_predictor_4x4_sse2,
- vpx_highbd_dc_top_predictor_4x4_sse2, NULL,
+ vpx_highbd_dc_top_predictor_4x4_sse2,
+ vpx_highbd_dc_128_predictor_4x4_sse2,
vpx_highbd_v_predictor_4x4_sse2,
vpx_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
@@ -491,7 +492,8 @@
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
vpx_highbd_dc_predictor_8x8_sse2,
vpx_highbd_dc_left_predictor_8x8_sse2,
- vpx_highbd_dc_top_predictor_8x8_sse2, NULL,
+ vpx_highbd_dc_top_predictor_8x8_sse2,
+ vpx_highbd_dc_128_predictor_8x8_sse2,
vpx_highbd_v_predictor_8x8_sse2,
vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
@@ -499,7 +501,8 @@
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
vpx_highbd_dc_predictor_16x16_sse2,
vpx_highbd_dc_left_predictor_16x16_sse2,
- vpx_highbd_dc_top_predictor_16x16_sse2, NULL,
+ vpx_highbd_dc_top_predictor_16x16_sse2,
+ vpx_highbd_dc_128_predictor_16x16_sse2,
vpx_highbd_v_predictor_16x16_sse2,
vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
@@ -507,7 +510,8 @@
HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
vpx_highbd_dc_predictor_32x32_sse2,
vpx_highbd_dc_left_predictor_32x32_sse2,
- vpx_highbd_dc_top_predictor_32x32_sse2, NULL,
+ vpx_highbd_dc_top_predictor_32x32_sse2,
+ vpx_highbd_dc_128_predictor_32x32_sse2,
vpx_highbd_v_predictor_32x32_sse2,
vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2)
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -471,6 +471,14 @@
INSTANTIATE_TEST_CASE_P(
SSE2_TO_C_8, VP9HighbdIntraPredTest,
::testing::Values(
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+ &vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+ &vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+ &vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+ &vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
&vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
@@ -523,6 +531,14 @@
INSTANTIATE_TEST_CASE_P(
SSE2_TO_C_10, VP9HighbdIntraPredTest,
::testing::Values(
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+ &vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+ &vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+ &vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+ &vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
&vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
@@ -575,6 +591,14 @@
INSTANTIATE_TEST_CASE_P(
SSE2_TO_C_12, VP9HighbdIntraPredTest,
::testing::Values(
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2,
+ &vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2,
+ &vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2,
+ &vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
+ HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2,
+ &vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2,
&vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2,
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -220,7 +220,7 @@
specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_4x4 neon/;
+ specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
@@ -255,7 +255,7 @@
specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_8x8 neon/;
+ specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
@@ -290,7 +290,7 @@
specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_16x16 neon/;
+ specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
@@ -325,7 +325,7 @@
specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_32x32 neon/;
+ specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
#
--- a/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c
@@ -199,6 +199,16 @@
dc_store_4x4(dst, stride, &dc);
}
+void vpx_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+ const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+ (void)above;
+ (void)left;
+ dc_store_4x4(dst, stride, &dc_dup);
+}
+
//------------------------------------------------------------------------------
// DC 8x8
@@ -243,6 +253,16 @@
dc_store_8x8(dst, stride, &dc);
}
+void vpx_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+ const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+ (void)above;
+ (void)left;
+ dc_store_8x8(dst, stride, &dc_dup);
+}
+
//------------------------------------------------------------------------------
// DC 16x16
@@ -285,6 +305,16 @@
dc_store_16x16(dst, stride, &dc);
}
+void vpx_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+ const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+ (void)above;
+ (void)left;
+ dc_store_16x16(dst, stride, &dc_dup);
+}
+
//------------------------------------------------------------------------------
// DC 32x32
@@ -330,4 +360,14 @@
(void)left;
(void)bd;
dc_store_32x32(dst, stride, &dc);
+}
+
+void vpx_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+ const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+ (void)above;
+ (void)left;
+ dc_store_32x32(dst, stride, &dc_dup);
}