shithub: libvpx

Download patch

ref: 3e0138edb72b2ef1a77d4b636d7c60f74a7abab9
parent: 4993158ee5120b05dfee5cba23d45369c97b7df6
author: James Zern <jzern@google.com>
date: Wed Nov 18 18:17:27 EST 2015

vp9_satd: return an int

the final sum may use up to 26 bits

+ add a unit test
+ disable the sse2 as the result will rollover; this will be fixed in a
future commit

Change-Id: I2a49811dfaa06abfd9fa1e1e65ed7cd68e4c97ce

--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -194,7 +194,49 @@
   int16_t sum_c_;
 };
 
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
+typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
 
+class SatdTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<SatdTestParam> {
+ protected:
+  virtual void SetUp() {
+    satd_size_ = GET_PARAM(0);
+    satd_func_ = GET_PARAM(1);
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    src_ = reinterpret_cast<int16_t*>(
+        vpx_memalign(16, sizeof(*src_) * satd_size_));
+    ASSERT_TRUE(src_ != NULL);
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+    vpx_free(src_);
+  }
+
+  void FillConstant(const int16_t val) {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
+  }
+
+  void Check(const int expected) {
+    int total;
+    ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
+    EXPECT_EQ(expected, total);
+  }
+
+  int satd_size_;
+
+ private:
+  int16_t *src_;
+  SatdFunc satd_func_;
+  ACMRandom rnd_;
+};
+
 uint8_t* AverageTestBase::source_data_ = NULL;
 
 TEST_P(AverageTest, MinValue) {
@@ -246,6 +288,36 @@
   RunComparison();
 }
 
+
+TEST_P(SatdTest, MinValue) {
+  const int kMin = -32640;
+  const int expected = -kMin * satd_size_;
+  FillConstant(kMin);
+  Check(expected);
+}
+
+TEST_P(SatdTest, MaxValue) {
+  const int kMax = 32640;
+  const int expected = kMax * satd_size_;
+  FillConstant(kMax);
+  Check(expected);
+}
+
+TEST_P(SatdTest, Random) {
+  int expected;
+  switch (satd_size_) {
+    case 16: expected = 205298; break;
+    case 64: expected = 1113950; break;
+    case 256: expected = 4268415; break;
+    case 1024: expected = 16954082; break;
+    default:
+      FAIL() << "Invalid satd size (" << satd_size_
+             << ") valid: 16/64/256/1024";
+  }
+  FillRandom();
+  Check(expected);
+}
+
 using std::tr1::make_tuple;
 
 INSTANTIATE_TEST_CASE_P(
@@ -253,6 +325,14 @@
     ::testing::Values(
         make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
         make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    C, SatdTest,
+    ::testing::Values(
+        make_tuple(16, &vp9_satd_c),
+        make_tuple(64, &vp9_satd_c),
+        make_tuple(256, &vp9_satd_c),
+        make_tuple(1024, &vp9_satd_c)));
 
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -209,8 +209,8 @@
 add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
 specialize qw/vp9_hadamard_16x16 sse2/;
 
-add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
-specialize qw/vp9_satd sse2/;
+add_proto qw/int vp9_satd/, "const int16_t *coeff, int length";
+specialize qw/vp9_satd/;
 
 add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
 specialize qw/vp9_int_pro_row sse2 neon/;
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -117,7 +117,7 @@
 
 // coeff: 16 bits, dynamic range [-32640, 32640].
 // length: value range {16, 64, 256, 1024}.
-int16_t vp9_satd_c(const int16_t *coeff, int length) {
+int vp9_satd_c(const int16_t *coeff, int length) {
   int i;
   int satd = 0;
   for (i = 0; i < length; ++i)
@@ -124,7 +124,7 @@
     satd += abs(coeff[i]);
 
   // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
-  return (int16_t)satd;
+  return satd;
 }
 
 // Integer projection onto row vectors.
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -673,7 +673,7 @@
         if (*eob == 1)
           *rate += (int)abs(qcoeff[0]);
         else if (*eob > 1)
-          *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
+          *rate += vp9_satd((const int16_t *)qcoeff, step << 4);
 
         *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
       }
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -283,7 +283,9 @@
   }
 }
 
-int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
+// TODO(jingning): the sum needs to be accumulated in 32-bits to avoid rollover.
+#if 0
+int vp9_satd_sse2(const int16_t *coeff, int length) {
   int i;
   __m128i sum = _mm_load_si128((const __m128i *)coeff);
   __m128i sign = _mm_srai_epi16(sum, 15);
@@ -309,6 +311,7 @@
 
   return _mm_extract_epi16(sum, 0);
 }
+#endif
 
 void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
                           const int ref_stride, const int height) {