shithub: libvpx

Download patch

ref: a5d499e16570d00d5e1348b1c7977ced7af3670f
parent: 583859d7395ca70c3b1ca0acc1258a720470a807
parent: a3c2774126a00674ea70fe6e2d722ea36b9f3527
author: Scott LaVarnway <slavarnway@google.com>
date: Mon Jun 25 14:45:45 EDT 2018

Merge "Add vpx_highbd_avg_8x8, vpx_highbd_avg_4x4"

--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -22,6 +22,7 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+#include "vpx/vpx_codec.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/vpx_timer.h"
 
@@ -28,22 +29,18 @@
 using libvpx_test::ACMRandom;
 
 namespace {
+
+template <typename Pixel>
 class AverageTestBase : public ::testing::Test {
  public:
   AverageTestBase(int width, int height) : width_(width), height_(height) {}
 
-  static void SetUpTestCase() {
-    source_data_ = reinterpret_cast<uint8_t *>(
-        vpx_memalign(kDataAlignment, kDataBlockSize));
-  }
-
-  static void TearDownTestCase() {
+  virtual void TearDown() {
     vpx_free(source_data_);
     source_data_ = NULL;
+    libvpx_test::ClearSystemState();
   }
 
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
  protected:
   // Handle blocks up to 4 blocks 64x64 with stride up to 128
   static const int kDataAlignment = 16;
@@ -50,12 +47,16 @@
   static const int kDataBlockSize = 64 * 128;
 
   virtual void SetUp() {
+    source_data_ = reinterpret_cast<Pixel *>(
+        vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
+    ASSERT_TRUE(source_data_ != NULL);
     source_stride_ = (width_ + 31) & ~31;
+    bit_depth_ = 8;
     rnd_.Reset(ACMRandom::DeterministicSeed());
   }
 
   // Sum Pixels
-  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 8; ++h) {
       for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
@@ -63,7 +64,7 @@
     return ((average + 32) >> 6);
   }
 
-  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
     unsigned int average = 0;
     for (int h = 0; h < 4; ++h) {
       for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
@@ -71,7 +72,7 @@
     return ((average + 8) >> 4);
   }
 
-  void FillConstant(uint8_t fill_constant) {
+  void FillConstant(Pixel fill_constant) {
     for (int i = 0; i < width_ * height_; ++i) {
       source_data_[i] = fill_constant;
     }
@@ -79,13 +80,14 @@
 
   void FillRandom() {
     for (int i = 0; i < width_ * height_; ++i) {
-      source_data_[i] = rnd_.Rand8();
+      source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
     }
   }
 
   int width_, height_;
-  static uint8_t *source_data_;
+  Pixel *source_data_;
   int source_stride_;
+  int bit_depth_;
 
   ACMRandom rnd_;
 };
@@ -93,7 +95,7 @@
 
 typedef ::testing::tuple<int, int, int, int, AverageFunction> AvgFunc;
 
-class AverageTest : public AverageTestBase,
+class AverageTest : public AverageTestBase<uint8_t>,
                     public ::testing::WithParamInterface<AvgFunc> {
  public:
   AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
@@ -119,12 +121,40 @@
   }
 };
 
+#if CONFIG_VP9_HIGHBITDEPTH
+class AverageTestHBD : public AverageTestBase<uint16_t>,
+                       public ::testing::WithParamInterface<AvgFunc> {
+ public:
+  AverageTestHBD() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void CheckAverages() {
+    const int block_size = GET_PARAM(3);
+    unsigned int expected = 0;
+    if (block_size == 8) {
+      expected =
+          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
+    } else if (block_size == 4) {
+      expected =
+          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
+    }
+
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(
+        CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_));
+    unsigned int actual = GET_PARAM(4)(
+        CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_);
+
+    EXPECT_EQ(expected, actual);
+  }
+};
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
                               const int ref_stride, const int height);
 
 typedef ::testing::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
 
-class IntProRowTest : public AverageTestBase,
+class IntProRowTest : public AverageTestBase<uint8_t>,
                       public ::testing::WithParamInterface<IntProRowParam> {
  public:
   IntProRowTest()
@@ -135,6 +165,10 @@
 
  protected:
   virtual void SetUp() {
+    source_data_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
+    ASSERT_TRUE(source_data_ != NULL);
+
     hbuf_asm_ = reinterpret_cast<int16_t *>(
         vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
     hbuf_c_ = reinterpret_cast<int16_t *>(
@@ -142,6 +176,8 @@
   }
 
   virtual void TearDown() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
     vpx_free(hbuf_c_);
     hbuf_c_ = NULL;
     vpx_free(hbuf_asm_);
@@ -166,7 +202,7 @@
 
 typedef ::testing::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
 
-class IntProColTest : public AverageTestBase,
+class IntProColTest : public AverageTestBase<uint8_t>,
                       public ::testing::WithParamInterface<IntProColParam> {
  public:
   IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
@@ -288,8 +324,6 @@
   ACMRandom rnd_;
 };
 
-uint8_t *AverageTestBase::source_data_ = NULL;
-
 TEST_P(AverageTest, MinValue) {
   FillConstant(0);
   CheckAverages();
@@ -308,7 +342,28 @@
     CheckAverages();
   }
 }
+#if CONFIG_VP9_HIGHBITDEPTH
+TEST_P(AverageTestHBD, MinValue) {
+  FillConstant(0);
+  CheckAverages();
+}
 
+TEST_P(AverageTestHBD, MaxValue) {
+  FillConstant((1 << VPX_BITS_12) - 1);
+  CheckAverages();
+}
+
+TEST_P(AverageTestHBD, Random) {
+  bit_depth_ = VPX_BITS_12;
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  for (int i = 0; i < 1000; i++) {
+    FillRandom();
+    CheckAverages();
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 TEST_P(IntProRowTest, MinValue) {
   FillConstant(0);
   RunComparison();
@@ -434,6 +489,20 @@
     C, AverageTest,
     ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c),
                       make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c)));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, AverageTestHBD,
+    ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_c),
+                      make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_c)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AverageTestHBD,
+    ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_sse2),
+                      make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_sse2)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
 INSTANTIATE_TEST_CASE_P(C, SatdTest,
                         ::testing::Values(make_tuple(16, &vpx_satd_c),
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -985,7 +985,11 @@
   # Avg
   #
   add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
+  specialize qw/vpx_highbd_avg_8x8 sse2/;
+
   add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
+  specialize qw/vpx_highbd_avg_4x4 sse2/;
+
   add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
 
   add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
--- a/vpx_dsp/x86/avg_intrin_sse2.c
+++ b/vpx_dsp/x86/avg_intrin_sse2.c
@@ -138,6 +138,56 @@
   return (avg + 8) >> 4;
 }
 
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vpx_highbd_avg_8x8_sse2(const uint8_t *s8, int p) {
+  __m128i s0, s1;
+  unsigned int avg;
+  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+  const __m128i zero = _mm_setzero_si128();
+  s0 = _mm_loadu_si128((const __m128i *)(s));
+  s1 = _mm_loadu_si128((const __m128i *)(s + p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 2 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 3 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 4 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 5 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 6 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadu_si128((const __m128i *)(s + 7 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpackhi_epi16(s0, zero);
+  s0 = _mm_unpacklo_epi16(s0, zero);
+  s0 = _mm_add_epi32(s0, s1);
+  s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 8));
+  s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 4));
+  avg = _mm_cvtsi128_si32(s0);
+
+  return (avg + 32) >> 6;
+}
+
+unsigned int vpx_highbd_avg_4x4_sse2(const uint8_t *s8, int p) {
+  __m128i s0, s1;
+  unsigned int avg;
+  const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
+  s0 = _mm_loadl_epi64((const __m128i *)(s));
+  s1 = _mm_loadl_epi64((const __m128i *)(s + p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadl_epi64((const __m128i *)(s + 2 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_loadl_epi64((const __m128i *)(s + 3 * p));
+  s0 = _mm_adds_epu16(s0, s1);
+  s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 4));
+  s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 2));
+  avg = _mm_extract_epi16(s0, 0);
+
+  return (avg + 8) >> 4;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 static void hadamard_col8_sse2(__m128i *in, int iter) {
   __m128i a0 = in[0];
   __m128i a1 = in[1];