shithub: libvpx

--- /dev/null

+++ b/test/minmax_test.cc

@@ -1,0 +1,132 @@

+/*

+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <stdlib.h>

+#include <string.h>

+#include "third_party/googletest/src/include/gtest/gtest.h"

+#include "./vpx_dsp_rtcd.h"

+#include "vpx/vpx_integer.h"

+#include "test/acm_random.h"

+#include "test/register_state_check.h"

+namespace {

+using ::libvpx_test::ACMRandom;

+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,

+                           const uint8_t *b, int b_stride,

+                           int *min, int *max);

+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {

+ public:

+  virtual void SetUp() {

+    mm_func_ = GetParam();

+    rnd_.Reset(ACMRandom::DeterministicSeed());

+  }

+ protected:

+  MinMaxFunc mm_func_;

+  ACMRandom rnd_;

+};

+void reference_minmax(const uint8_t *a, int a_stride,

+                      const uint8_t *b, int b_stride,

+                      int *min_ret, int *max_ret) {

+  int min = 255;

+  int max = 0;

+  for (int i = 0; i < 8; i++) {

+    for (int j = 0; j < 8; j++) {

+      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);

+      if (min > diff) min = diff;

+      if (max < diff) max = diff;

+    }

+  }

+  *min_ret = min;

+  *max_ret = max;

+}

+TEST_P(MinMaxTest, MinValue) {

+  for (int i = 0; i < 64; i++) {

+    uint8_t a[64], b[64];

+    memset(a, 0, sizeof(a));

+    memset(b, 255, sizeof(b));

+    b[i] = i;  // Set a minimum difference of i.

+    int min, max;

+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));

+    EXPECT_EQ(255, max);

+    EXPECT_EQ(i, min);

+  }

+}

+TEST_P(MinMaxTest, MaxValue) {

+  for (int i = 0; i < 64; i++) {

+    uint8_t a[64], b[64];

+    memset(a, 0, sizeof(a));

+    memset(b, 0, sizeof(b));

+    b[i] = i;  // Set a maximum difference of i.

+    int min, max;

+    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));

+    EXPECT_EQ(i, max);

+    EXPECT_EQ(0, min);

+  }

+}

+TEST_P(MinMaxTest, CompareReference) {

+  uint8_t a[64], b[64];

+  for (int j = 0; j < 64; j++) {

+    a[j] = rnd_.Rand8();

+    b[j] = rnd_.Rand8();

+  }

+  int min_ref, max_ref, min, max;

+  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);

+  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));

+  EXPECT_EQ(max_ref, max);

+  EXPECT_EQ(min_ref, min);

+}

+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {

+  uint8_t a[8 * 64], b[8 * 64];

+  for (int i = 0; i < 8 * 64; i++) {

+    a[i] = rnd_.Rand8();

+    b[i] = rnd_.Rand8();

+  }

+  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {

+    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {

+      int min_ref, max_ref, min, max;

+      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);

+      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));

+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride

+                              << " and b_stride = " << b_stride;;

+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride

+                              << " and b_stride = " << b_stride;;

+    }

+  }

+}

+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));

+#if HAVE_SSE2

+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,

+                        ::testing::Values(&vpx_minmax_8x8_sse2));

+#endif

+#if HAVE_NEON

+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,

+                        ::testing::Values(&vpx_minmax_8x8_neon));

+#endif

+}  // namespace

--- a/test/test.mk

+++ b/test/test.mk

@@ -144,6 +144,7 @@

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc

--- a/vpx_dsp/arm/avg_neon.c

+++ b/vpx_dsp/arm/avg_neon.c

@@ -197,3 +197,60 @@

     return s - ((t * t) >> shift_factor);

+void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride,

+                         const uint8_t *b, int b_stride,

+                         int *min, int *max) {

+  // Load and concatenate.

+  const uint8x16_t a01 = vcombine_u8(vld1_u8(a),

+                                     vld1_u8(a + a_stride));

+  const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride),

+                                     vld1_u8(a + 3 * a_stride));

+  const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride),

+                                     vld1_u8(a + 5 * a_stride));

+  const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride),

+                                     vld1_u8(a + 7 * a_stride));

+  const uint8x16_t b01 = vcombine_u8(vld1_u8(b),

+                                     vld1_u8(b + b_stride));

+  const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride),

+                                     vld1_u8(b + 3 * b_stride));

+  const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride),

+                                     vld1_u8(b + 5 * b_stride));

+  const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride),

+                                     vld1_u8(b + 7 * b_stride));

+  // Absolute difference.

+  const uint8x16_t ab01_diff = vabdq_u8(a01, b01);

+  const uint8x16_t ab23_diff = vabdq_u8(a23, b23);

+  const uint8x16_t ab45_diff = vabdq_u8(a45, b45);

+  const uint8x16_t ab67_diff = vabdq_u8(a67, b67);

+  // Max values between the Q vectors.

+  const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff);

+  const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff);

+  const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff);

+  const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff);

+  const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);

+  const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);

+  // Split to D and start doing pairwise.

+  uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max));

+  uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min));

+  // Enough runs of vpmax/min propogate the max/min values to every position.

+  ab_max = vpmax_u8(ab_max, ab_max);

+  ab_min = vpmin_u8(ab_min, ab_min);

+  ab_max = vpmax_u8(ab_max, ab_max);

+  ab_min = vpmin_u8(ab_min, ab_min);

+  ab_max = vpmax_u8(ab_max, ab_max);

+  ab_min = vpmin_u8(ab_min, ab_min);

+  *min = *max = 0;  // Clear high bits

+  // Store directly to avoid costly neon->gpr transfer.

+  vst1_lane_u8((uint8_t *)max, ab_max, 0);

+  vst1_lane_u8((uint8_t *)min, ab_min, 0);

+}

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -1014,7 +1014,7 @@

   specialize qw/vpx_avg_4x4 sse2 neon msa/;

   add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";

-  specialize qw/vpx_minmax_8x8 sse2/;

+  specialize qw/vpx_minmax_8x8 sse2 neon/;

   add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";

   specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";

--

⑨