shithub: libvpx

--- a/test/test.mk

+++ b/test/test.mk

@@ -129,6 +129,7 @@

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)

--- /dev/null

+++ b/test/vp9_avg_test.cc

@@ -1,0 +1,150 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <string.h>

+#include <limits.h>

+#include <stdio.h>

+#include "./vpx_config.h"

+#if CONFIG_VP9_ENCODER

+#include "./vp9_rtcd.h"

+#endif

+#include "vpx_mem/vpx_mem.h"

+#include "test/acm_random.h"

+#include "test/clear_system_state.h"

+#include "test/register_state_check.h"

+#include "test/util.h"

+#include "third_party/googletest/src/include/gtest/gtest.h"

+using libvpx_test::ACMRandom;

+namespace {

+class AverageTestBase : public ::testing::Test {

+ public:

+  AverageTestBase(int width, int height) : width_(width), height_(height) {}

+  static void SetUpTestCase() {

+    source_data_ = reinterpret_cast<uint8_t*>(

+        vpx_memalign(kDataAlignment, kDataBlockSize));

+  }

+  static void TearDownTestCase() {

+    vpx_free(source_data_);

+    source_data_ = NULL;

+  }

+  virtual void TearDown() {

+    libvpx_test::ClearSystemState();

+  }

+ protected:

+  // Handle blocks up to 4 blocks 64x64 with stride up to 128

+  static const int kDataAlignment = 16;

+  static const int kDataBlockSize = 64 * 128;

+  virtual void SetUp() {

+    source_stride_ = (width_ + 31) & ~31;

+    rnd_.Reset(ACMRandom::DeterministicSeed());

+  }

+  // Sum Pixels

+  unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {

+    unsigned int average = 0;

+    for (int h = 0; h < 8; ++h)

+      for (int w = 0; w < 8; ++w)

+        average += source[h * source_stride_ + w];

+    return ((average + 32) >> 6);

+  }

+  void FillConstant(uint8_t fill_constant) {

+    for (int i = 0; i < width_ * height_; ++i) {

+        source_data_[i] = fill_constant;

+    }

+  }

+  void FillRandom() {

+    for (int i = 0; i < width_ * height_; ++i) {

+        source_data_[i] = rnd_.Rand8();

+    }

+  }

+  int width_, height_;

+  static uint8_t* source_data_;

+  int source_stride_;

+  ACMRandom rnd_;

+};

+typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);

+typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;

+class AverageTest

+    : public AverageTestBase,

+      public ::testing::WithParamInterface<AvgFunc>{

+ public:

+  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}

+ protected:

+  void CheckAverages() {

+    unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),

+                                             source_stride_);

+    ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),

+                                          source_stride_));

+    unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),

+                                       source_stride_);

+    EXPECT_EQ(expected, actual);

+  }

+};

+uint8_t* AverageTestBase::source_data_ = NULL;

+TEST_P(AverageTest, MinValue) {

+  FillConstant(0);

+  CheckAverages();

+}

+TEST_P(AverageTest, MaxValue) {

+  FillConstant(255);

+  CheckAverages();

+}

+TEST_P(AverageTest, Random) {

+  // The reference frame, but not the source frame, may be unaligned for

+  // certain types of searches.

+  for (int i = 0; i < 1000; i++) {

+    FillRandom();

+    CheckAverages();

+  }

+}

+using std::tr1::make_tuple;

+INSTANTIATE_TEST_CASE_P(

+    C, AverageTest,

+    ::testing::Values(

+        make_tuple(16, 16, 1, &vp9_avg_8x8_c)));

+#if HAVE_SSE2

+INSTANTIATE_TEST_CASE_P(

+    SSE2, AverageTest,

+    ::testing::Values(

+        make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),

+        make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),

+        make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));

+#endif

+}  // namespace

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -1110,6 +1110,10 @@

 add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";

 specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";

+add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";

+specialize qw/vp9_avg_8x8/, "$sse2_x86inc";

 # ENCODEMB INVOKE

 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";

--- /dev/null

+++ b/vp9/encoder/vp9_avg.c

@@ -1,0 +1,19 @@

+/*

+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vpx_ports/mem.h"

+unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {

+  int i, j;

+  int sum = 0;

+  for (i = 0; i < 8; ++i, s+=p)

+    for (j = 0; j < 8; sum += s[j], ++j) {}

+  return (sum + 32) >> 6;

+}

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -396,10 +396,10 @@

   const int block_width = num_8x8_blocks_wide_lookup[bsize];

   const int block_height = num_8x8_blocks_high_lookup[bsize];

   // TODO(debargha): Choose this more intelligently.

-  const int64_t threshold_multiplier = 25;

-  int64_t threshold = threshold_multiplier * cpi->common.base_qindex;

+  const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;

+  int64_t threshold = threshold_multiplier *

+      vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);

   assert(block_height == block_width);

   tree_to_node(data, bsize, &vt);

   // Split none is available only if we have more than half a block size

@@ -511,10 +511,17 @@

         int y_idx = y16_idx + ((k >> 1) << 3);

         unsigned int sse = 0;

         int sum = 0;

-        if (x_idx < pixels_wide && y_idx < pixels_high)

-          vp9_get8x8var(s + y_idx * sp + x_idx, sp,

-                        d + y_idx * dp + x_idx, dp, &sse, &sum);

-        fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);

+        if (x_idx < pixels_wide && y_idx < pixels_high) {

+          int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);

+          int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);

+          sum = s_avg - d_avg;

+          sse = sum * sum;

+        }

+        // For an 8x8 block we have just one value the average of all 64

+        // pixels,  so use 1.   This means of course that there is no variance

+        // in an 8x8 block.

+        fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);

@@ -530,8 +537,8 @@

   // Now go through the entire structure,  splitting every block size until

   // we get to one that's got a variance lower than our threshold,  or we

   // hit 8x8.

-  if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,

-                           mi_row, mi_col)) {

+  if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||

+      !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {

     for (i = 0; i < 4; ++i) {

       const int x32_idx = ((i & 1) << 2);

       const int y32_idx = ((i >> 1) << 2);

@@ -561,10 +568,10 @@

 #else

-          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,

+          if (!set_vt_partitioning(cpi, &vt.split[i].split[j],

                                    BLOCK_16X16,

-                                   (mi_row + y32_idx + y16_idx),

-                                   (mi_col + x32_idx + x16_idx), 2)) {

+                                   mi_row + y32_idx + y16_idx,

+                                   mi_col + x32_idx + x16_idx)) {

             for (k = 0; k < 4; ++k) {

               const int x8_idx = (k & 1);

               const int y8_idx = (k >> 1);

@@ -2593,7 +2600,8 @@

       set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);

       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,

                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);

-    } else if (sf->partition_search_type == VAR_BASED_PARTITION) {

+      } else if (sf->partition_search_type == VAR_BASED_PARTITION &&

+                 cm->frame_type != KEY_FRAME ) {

       choose_partitioning(cpi, tile, mi_row, mi_col);

       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,

                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -235,6 +235,10 @@

               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);

     else

       xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;

+    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&

+        xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)

+      xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;

   } else {

     xd->mi[0].src_mi->mbmi.tx_size =

         MIN(max_txsize_lookup[bsize],

@@ -611,7 +615,8 @@

         continue;

       if (this_mode == NEWMV) {

-        if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))

+        if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&

+            this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))

           continue;

         if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,

                                     &frame_mv[NEWMV][ref_frame],

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -249,6 +249,7 @@

     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;

     sf->frame_parameter_update = 0;

     sf->mv.search_method = FAST_HEX;

     sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;

     sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;

     sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;

@@ -278,12 +279,17 @@

       int i;

       // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used

       for (i = 0; i < BLOCK_SIZES; ++i)

-        sf->inter_mode_mask[i] = INTER_ALL;

+        sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW;

     // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.

-    sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;

+    sf->partition_search_type = VAR_BASED_PARTITION;

     sf->search_type_check_frequency = 50;

+    sf->mv.search_method = NSTEP;

+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;

+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;

+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;

+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;

     sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;

@@ -291,7 +297,7 @@

     sf->reuse_inter_pred_sby = 1;

     // Increase mode checking threshold for NEWMV.

-    sf->elevate_newmv_thresh = 2000;

+    sf->elevate_newmv_thresh = 1000;

     sf->mv.reduce_first_step_size = 1;

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -34,6 +34,9 @@

 enum {

   INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),

   INTER_NEAREST = (1 << NEARESTMV),

+  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),

+  INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),

+  INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),

   INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),

   INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),

};

--- /dev/null

+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c

@@ -1,0 +1,40 @@

+/*

+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <immintrin.h>

+#include "vpx_ports/mem.h"

+unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {

+  __m128i s0, s1, u0;

+  unsigned int avg = 0;

+  u0  = _mm_setzero_si128();

+  s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);

+  s0 = _mm_adds_epu16(s0, s1);

+  s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));

+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));

+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));

+  avg = _mm_extract_epi16(s0, 0);

+  return (avg + 32) >> 6;

+}

--- a/vp9/vp9cx.mk

+++ b/vp9/vp9cx.mk

@@ -17,6 +17,7 @@

 VP9_CX_SRCS-yes += vp9_cx_iface.c

+VP9_CX_SRCS-yes += encoder/vp9_avg.c

 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c

 VP9_CX_SRCS-yes += encoder/vp9_context_tree.c

 VP9_CX_SRCS-yes += encoder/vp9_context_tree.h

@@ -95,6 +96,7 @@

 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c

 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm

+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c

 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c

 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c

 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm

--

⑨