ref: 7f7d1357a2732e0a1c36f3baded7dd14f449e535
parent: 282c963923eb969c146d63e934bbece433a95282
parent: 868ecb55a1528ca3f19286e7d1551572bf89b642
author: John Koleszar <jkoleszar@google.com>
date: Fri Apr 12 11:33:04 EDT 2013
Merge branch 'experimental' into master VP9 preview bitstream 2, commit '868ecb55a1528ca3f19286e7d1551572bf89b642' Conflicts: vp9/vp9_common.mk Change-Id: I3f0f6e692c987ff24f98ceafbb86cb9cf64ad8d3
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
*.d
*.o
*~
+/*.ivf
+/*.ivf.md5
/*-*.mk
/*.asm
/*.doxy
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -460,6 +460,7 @@
#ifndef VPX_CONFIG_H
#define VPX_CONFIG_H
#define RESTRICT ${RESTRICT}
+#define INLINE ${INLINE}
EOF
print_config_h ARCH "${TMP_H}" ${ARCH_LIST}
print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
@@ -1005,12 +1006,6 @@
#error "not x32"
#endif
EOF
- soft_enable runtime_cpu_detect
- soft_enable mmx
- soft_enable sse
- soft_enable sse2
- soft_enable sse3
- soft_enable ssse3
case ${tgt_os} in
win*)
@@ -1064,9 +1059,15 @@
;;
esac
+ soft_enable runtime_cpu_detect
+ soft_enable mmx
+ soft_enable sse
+ soft_enable sse2
+ soft_enable sse3
+ soft_enable ssse3
# We can't use 'check_cflags' until the compiler is configured and CC is
# populated.
- if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4.1; then
+ if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
else
soft_enable sse4_1
@@ -1173,6 +1174,14 @@
EOF
[ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian
+
+ # Try to find which inline keywords are supported
+ check_cc <<EOF && INLINE="inline"
+ static inline function() {}
+EOF
+ check_cc <<EOF && INLINE="__attribute__((always_inline))"
+ static __attribute__((always_inline)) function() {}
+EOF
# Almost every platform uses pthreads.
if enabled multithread; then
--- a/configure
+++ b/configure
@@ -239,17 +239,18 @@
"
EXPERIMENT_LIST="
csm
- lossless
new_mvref
implicit_segmentation
newbintramodes
comp_interintra_pred
- tx64x64
- dwtdcthybrid
- cnvcontext
- newcoefcontext
enable_6tap
abovesprefmv
+ code_nonzerocount
+ useselectrefmv
+ modelcoefprob
+ loop_dering
+ implicit_compoundinter_weight
+ scatterscan
"
CONFIG_LIST="
external_build
@@ -647,6 +648,7 @@
enable solution
vs_version=${tgt_cc##vs}
all_targets="${all_targets} solution"
+ INLINE="__forceinline"
;;
esac
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -8,9 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
-
+#include "test/util.h"
namespace {
// lookahead range: [kLookAheadMin, kLookAheadMax).
@@ -17,10 +18,10 @@
const int kLookAheadMin = 5;
const int kLookAheadMax = 26;
-class AltRefTest : public libvpx_test::EncoderTest,
- public ::testing::TestWithParam<int> {
+class AltRefTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<int> {
protected:
- AltRefTest() : altref_count_(0) {}
+ AltRefTest() : EncoderTest(GET_PARAM(0)), altref_count_(0) {}
virtual ~AltRefTest() {}
virtual void SetUp() {
@@ -58,7 +59,7 @@
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 1000;
- cfg_.g_lag_in_frames = GetParam();
+ cfg_.g_lag_in_frames = GET_PARAM(1);
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
timebase.den, timebase.num, 0, 30);
@@ -66,6 +67,7 @@
EXPECT_GE(altref_count(), 1);
}
-INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest,
- ::testing::Range(kLookAheadMin, kLookAheadMax));
+
+VP8_INSTANTIATE_TEST_CASE(AltRefTest,
+ ::testing::Range(kLookAheadMin, kLookAheadMax));
} // namespace
--- /dev/null
+++ b/test/codec_factory.h
@@ -1,0 +1,232 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_CODEC_FACTORY_H_
+#define TEST_CODEC_FACTORY_H_
+
+extern "C" {
+#include "./vpx_config.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_encoder.h"
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#include "vpx/vp8cx.h"
+#endif
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#include "vpx/vp8dx.h"
+#endif
+}
+
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+namespace libvpx_test {
+
+class CodecFactory {
+ public:
+ CodecFactory() {}
+
+ virtual ~CodecFactory() {}
+
+ virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+ unsigned long deadline) const = 0;
+
+ virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+ unsigned long deadline,
+ const unsigned long init_flags,
+ TwopassStatsStore *stats) const = 0;
+
+ virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
+ int usage) const = 0;
+};
+
+/* Provide CodecTestWith<n>Params classes for a variable number of parameters
+ * to avoid having to include a pointer to the CodecFactory in every test
+ * definition.
+ */
+template<class T1>
+class CodecTestWithParam : public ::testing::TestWithParam<
+ std::tr1::tuple< const libvpx_test::CodecFactory*, T1 > > {
+};
+
+template<class T1, class T2>
+class CodecTestWith2Params : public ::testing::TestWithParam<
+ std::tr1::tuple< const libvpx_test::CodecFactory*, T1, T2 > > {
+};
+
+template<class T1, class T2, class T3>
+class CodecTestWith3Params : public ::testing::TestWithParam<
+ std::tr1::tuple< const libvpx_test::CodecFactory*, T1, T2, T3 > > {
+};
+
+/*
+ * VP8 Codec Definitions
+ */
+#if CONFIG_VP8
+class VP8Decoder : public Decoder {
+ public:
+ VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
+ : Decoder(cfg, deadline) {}
+
+ protected:
+ virtual const vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP8_DECODER
+ return &vpx_codec_vp8_dx_algo;
+#else
+ return NULL;
+#endif
+ }
+};
+
+class VP8Encoder : public Encoder {
+ public:
+ VP8Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
+ const unsigned long init_flags, TwopassStatsStore *stats)
+ : Encoder(cfg, deadline, init_flags, stats) {}
+
+ protected:
+ virtual const vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP8_ENCODER
+ return &vpx_codec_vp8_cx_algo;
+#else
+ return NULL;
+#endif
+ }
+};
+
+class VP8CodecFactory : public CodecFactory {
+ public:
+ VP8CodecFactory() : CodecFactory() {}
+
+ virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+ unsigned long deadline) const {
+#if CONFIG_VP8_DECODER
+ return new VP8Decoder(cfg, deadline);
+#else
+ return NULL;
+#endif
+ }
+
+ virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+ unsigned long deadline,
+ const unsigned long init_flags,
+ TwopassStatsStore *stats) const {
+#if CONFIG_VP8_ENCODER
+ return new VP8Encoder(cfg, deadline, init_flags, stats);
+#else
+ return NULL;
+#endif
+ }
+
+ virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
+ int usage) const {
+#if CONFIG_VP8_ENCODER
+ return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage);
+#else
+ return VPX_CODEC_INCAPABLE;
+#endif
+ }
+};
+
+const libvpx_test::VP8CodecFactory kVP8;
+
+#define VP8_INSTANTIATE_TEST_CASE(test, params)\
+ INSTANTIATE_TEST_CASE_P(VP8, test, \
+ ::testing::Combine( \
+ ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
+ &libvpx_test::kVP8)), \
+ params))
+#else
+#define VP8_INSTANTIATE_TEST_CASE(test, params)
+#endif // CONFIG_VP8
+
+
+/*
+ * VP9 Codec Definitions
+ */
+#if CONFIG_VP9
+class VP9Decoder : public Decoder {
+ public:
+ VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
+ : Decoder(cfg, deadline) {}
+
+ protected:
+ virtual const vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP9_DECODER
+ return &vpx_codec_vp9_dx_algo;
+#else
+ return NULL;
+#endif
+ }
+};
+
+class VP9Encoder : public Encoder {
+ public:
+ VP9Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
+ const unsigned long init_flags, TwopassStatsStore *stats)
+ : Encoder(cfg, deadline, init_flags, stats) {}
+
+ protected:
+ virtual const vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP9_ENCODER
+ return &vpx_codec_vp9_cx_algo;
+#else
+ return NULL;
+#endif
+ }
+};
+
+class VP9CodecFactory : public CodecFactory {
+ public:
+ VP9CodecFactory() : CodecFactory() {}
+
+ virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+ unsigned long deadline) const {
+#if CONFIG_VP9_DECODER
+ return new VP9Decoder(cfg, deadline);
+#else
+ return NULL;
+#endif
+ }
+
+ virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+ unsigned long deadline,
+ const unsigned long init_flags,
+ TwopassStatsStore *stats) const {
+#if CONFIG_VP9_ENCODER
+ return new VP9Encoder(cfg, deadline, init_flags, stats);
+#else
+ return NULL;
+#endif
+ }
+
+ virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
+ int usage) const {
+#if CONFIG_VP9_ENCODER
+ return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
+#else
+ return VPX_CODEC_INCAPABLE;
+#endif
+ }
+};
+
+const libvpx_test::VP9CodecFactory kVP9;
+
+#define VP9_INSTANTIATE_TEST_CASE(test, params)\
+ INSTANTIATE_TEST_CASE_P(VP9, test, \
+ ::testing::Combine( \
+ ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
+ &libvpx_test::kVP9)), \
+ params))
+#else
+#define VP9_INSTANTIATE_TEST_CASE(test, params)
+#endif // CONFIG_VP9
+
+
+} // namespace libvpx_test
+
+#endif // TEST_CODEC_FACTORY_H_
--- a/test/config_test.cc
+++ b/test/config_test.cc
@@ -8,20 +8,22 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
+#include "test/util.h"
#include "test/video_source.h"
namespace {
class ConfigTest : public ::libvpx_test::EncoderTest,
- public ::testing::TestWithParam<enum libvpx_test::TestMode> {
- public:
- ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
-
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
+ ConfigTest() : EncoderTest(GET_PARAM(0)),
+ frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
+
virtual void SetUp() {
InitializeConfig();
- SetMode(GetParam());
+ SetMode(GET_PARAM(1));
}
virtual void BeginPassHook(unsigned int /*pass*/) {
@@ -57,5 +59,5 @@
EXPECT_EQ(frame_count_in_, frame_count_out_);
}
-INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ConfigTest, ONE_PASS_TEST_MODES);
} // namespace
--- /dev/null
+++ b/test/convolve_test.cc
@@ -1,0 +1,509 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+extern "C" {
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_filter.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+}
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h);
+
+struct ConvolveFunctions {
+ ConvolveFunctions(convolve_fn_t h8, convolve_fn_t h8_avg,
+ convolve_fn_t v8, convolve_fn_t v8_avg,
+ convolve_fn_t hv8, convolve_fn_t hv8_avg)
+ : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
+ hv8_avg_(hv8_avg) {}
+
+ convolve_fn_t h8_;
+ convolve_fn_t v8_;
+ convolve_fn_t hv8_;
+ convolve_fn_t h8_avg_;
+ convolve_fn_t v8_avg_;
+ convolve_fn_t hv8_avg_;
+};
+
+// Reference 8-tap subpixel filter, slightly modified to fit into this test.
+#define VP9_FILTER_WEIGHT 128
+#define VP9_FILTER_SHIFT 7
+static uint8_t clip_pixel(int x) {
+ return x < 0 ? 0 :
+ x > 255 ? 255 :
+ x;
+}
+
+static void filter_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ // Between passes, we use an intermediate buffer whose height is extended to
+ // have enough horizontally filtered values as input for the vertical pass.
+ // This buffer is allocated to be big enough for the largest block type we
+ // support.
+ const int kInterp_Extend = 4;
+ const unsigned int intermediate_height =
+ (kInterp_Extend - 1) + output_height + kInterp_Extend;
+
+ /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
+ * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
+ * + kInterp_Extend
+ * = 3 + 16 + 4
+ * = 23
+ * and filter_max_width = 16
+ */
+ uint8_t intermediate_buffer[23 * 16];
+ const int intermediate_next_stride = 1 - intermediate_height * output_width;
+
+ // Horizontal pass (src -> transposed intermediate).
+ {
+ uint8_t *output_ptr = intermediate_buffer;
+ const int src_next_row_stride = src_stride - output_width;
+ unsigned int i, j;
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
+ for (i = 0; i < intermediate_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * HFilter[0]) +
+ ((int)src_ptr[1] * HFilter[1]) +
+ ((int)src_ptr[2] * HFilter[2]) +
+ ((int)src_ptr[3] * HFilter[3]) +
+ ((int)src_ptr[4] * HFilter[4]) +
+ ((int)src_ptr[5] * HFilter[5]) +
+ ((int)src_ptr[6] * HFilter[6]) +
+ ((int)src_ptr[7] * HFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
+ ++src_ptr;
+ output_ptr += intermediate_height;
+ }
+ src_ptr += src_next_row_stride;
+ output_ptr += intermediate_next_stride;
+ }
+ }
+
+ // Vertical pass (transposed intermediate -> dst).
+ {
+ uint8_t *src_ptr = intermediate_buffer;
+ const int dst_next_row_stride = dst_stride - output_width;
+ unsigned int i, j;
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ // Apply filter...
+ int temp = ((int)src_ptr[0] * VFilter[0]) +
+ ((int)src_ptr[1] * VFilter[1]) +
+ ((int)src_ptr[2] * VFilter[2]) +
+ ((int)src_ptr[3] * VFilter[3]) +
+ ((int)src_ptr[4] * VFilter[4]) +
+ ((int)src_ptr[5] * VFilter[5]) +
+ ((int)src_ptr[6] * VFilter[6]) +
+ ((int)src_ptr[7] * VFilter[7]) +
+ (VP9_FILTER_WEIGHT >> 1); // Rounding
+
+ // Normalize back to 0-255...
+ *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
+ src_ptr += intermediate_height;
+ }
+ src_ptr += intermediate_next_stride;
+ dst_ptr += dst_next_row_stride;
+ }
+ }
+}
+
+static void block2d_average_c(uint8_t *src,
+ unsigned int src_stride,
+ uint8_t *output_ptr,
+ unsigned int output_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ unsigned int i, j;
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
+ }
+ output_ptr += output_stride;
+ }
+}
+
+static void filter_average_block2d_8_c(const uint8_t *src_ptr,
+ const unsigned int src_stride,
+ const int16_t *HFilter,
+ const int16_t *VFilter,
+ uint8_t *dst_ptr,
+ unsigned int dst_stride,
+ unsigned int output_width,
+ unsigned int output_height) {
+ uint8_t tmp[16*16];
+
+ assert(output_width <= 16);
+ assert(output_height <= 16);
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16,
+ output_width, output_height);
+ block2d_average_c(tmp, 16, dst_ptr, dst_stride,
+ output_width, output_height);
+}
+
+class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
+ public:
+ static void SetUpTestCase() {
+ // Force input_ to be unaligned, output to be 16 byte aligned.
+ input_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize + 1))
+ + 1;
+ output_ = reinterpret_cast<uint8_t*>(
+ vpx_memalign(kDataAlignment, kOuterBlockSize * kOuterBlockSize));
+ }
+
+ static void TearDownTestCase() {
+ vpx_free(input_ - 1);
+ input_ = NULL;
+ vpx_free(output_);
+ output_ = NULL;
+ }
+
+ protected:
+ static const int kDataAlignment = 16;
+ static const int kOuterBlockSize = 32;
+ static const int kInputStride = kOuterBlockSize;
+ static const int kOutputStride = kOuterBlockSize;
+ static const int kMaxDimension = 16;
+
+ int Width() const { return GET_PARAM(0); }
+ int Height() const { return GET_PARAM(1); }
+ int BorderLeft() const {
+ const int center = (kOuterBlockSize - Width()) / 2;
+ return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
+ }
+ int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
+
+ bool IsIndexInBorder(int i) {
+ return (i < BorderTop() * kOuterBlockSize ||
+ i >= (BorderTop() + Height()) * kOuterBlockSize ||
+ i % kOuterBlockSize < BorderLeft() ||
+ i % kOuterBlockSize >= (BorderLeft() + Width()));
+ }
+
+ virtual void SetUp() {
+ UUT_ = GET_PARAM(2);
+ memset(input_, 0, sizeof(input_));
+ /* Set up guard blocks for an inner block cetered in the outer block */
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
+ if (IsIndexInBorder(i))
+ output_[i] = 255;
+ else
+ output_[i] = 0;
+ }
+
+ ::libvpx_test::ACMRandom prng;
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)
+ input_[i] = prng.Rand8();
+ }
+
+ void CheckGuardBlocks() {
+ for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i) {
+ if (IsIndexInBorder(i))
+ EXPECT_EQ(255, output_[i]);
+ }
+ }
+
+ uint8_t* input() {
+ return input_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
+
+ uint8_t* output() {
+ return output_ + BorderTop() * kOuterBlockSize + BorderLeft();
+ }
+
+ const ConvolveFunctions* UUT_;
+ static uint8_t* input_;
+ static uint8_t* output_;
+};
+uint8_t* ConvolveTest::input_ = NULL;
+uint8_t* ConvolveTest::output_ = NULL;
+
+TEST_P(ConvolveTest, GuardBlocks) {
+ CheckGuardBlocks();
+}
+
+TEST_P(ConvolveTest, CopyHoriz) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, CopyVert) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, Copy2D) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ const int16_t filter8[8] = {0, 0, 0, 128, 0, 0, 0, 0};
+
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x])
+ << "(" << x << "," << y << ")";
+}
+
+const int16_t (*kTestFilterList[])[8] = {
+ vp9_bilinear_filters,
+ vp9_sub_pel_filters_6,
+ vp9_sub_pel_filters_8,
+ vp9_sub_pel_filters_8s,
+ vp9_sub_pel_filters_8lp
+};
+
+const int16_t kInvalidFilter[8] = { 0 };
+
+TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ uint8_t ref[kOutputStride * kMaxDimension];
+
+ const int kNumFilterBanks = sizeof(kTestFilterList) /
+ sizeof(kTestFilterList[0]);
+
+ for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+ const int16_t (*filters)[8] = kTestFilterList[filter_bank];
+ const int kNumFilters = 16;
+
+ for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+ for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+ filter_block2d_8_c(in, kInputStride,
+ filters[filter_x], filters[filter_y],
+ ref, kOutputStride,
+ Width(), Height());
+
+ if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else if (filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->v8_(in, kInputStride, out, kOutputStride,
+ kInvalidFilter, 16, filters[filter_y], 16,
+ Width(), Height()));
+ else
+ REGISTER_STATE_CHECK(
+ UUT_->h8_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, kInvalidFilter, 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x])
+ << "mismatch at (" << x << "," << y << "), "
+ << "filters (" << filter_bank << ","
+ << filter_x << "," << filter_y << ")";
+ }
+ }
+ }
+}
+
+TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+ uint8_t ref[kOutputStride * kMaxDimension];
+
+ // Populate ref and out with some random data
+ ::libvpx_test::ACMRandom prng;
+ for (int y = 0; y < Height(); ++y) {
+ for (int x = 0; x < Width(); ++x) {
+ const uint8_t r = prng.Rand8();
+
+ out[y * kOutputStride + x] = r;
+ ref[y * kOutputStride + x] = r;
+ }
+ }
+
+ const int kNumFilterBanks = sizeof(kTestFilterList) /
+ sizeof(kTestFilterList[0]);
+
+ for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+ const int16_t (*filters)[8] = kTestFilterList[filter_bank];
+ const int kNumFilters = 16;
+
+ for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+ for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+ filter_average_block2d_8_c(in, kInputStride,
+ filters[filter_x], filters[filter_y],
+ ref, kOutputStride,
+ Width(), Height());
+
+ if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
+ REGISTER_STATE_CHECK(
+ UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else if (filter_y)
+ REGISTER_STATE_CHECK(
+ UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+ else
+ REGISTER_STATE_CHECK(
+ UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
+ filters[filter_x], 16, filters[filter_y], 16,
+ Width(), Height()));
+
+ CheckGuardBlocks();
+
+ for (int y = 0; y < Height(); ++y)
+ for (int x = 0; x < Width(); ++x)
+ ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x])
+ << "mismatch at (" << x << "," << y << "), "
+ << "filters (" << filter_bank << ","
+ << filter_x << "," << filter_y << ")";
+ }
+ }
+ }
+}
+
+DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
+ { 0, 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 128},
+ { 0, 0, 0, 128},
+ { 0, 0, 128},
+ { 0, 128},
+ { 128},
+ { 0, 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 0, 128},
+ { 0, 0, 0, 0, 128},
+ { 0, 0, 0, 128},
+ { 0, 0, 128},
+ { 0, 128},
+ { 128}
+};
+
+TEST_P(ConvolveTest, ChangeFilterWorks) {
+ uint8_t* const in = input();
+ uint8_t* const out = output();
+
+ REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
+ kChangeFilters[8], 17, kChangeFilters[4], 16,
+ Width(), Height()));
+
+ for (int x = 0; x < Width(); ++x) {
+ if (x < 8)
+ ASSERT_EQ(in[4], out[x]) << "x == " << x;
+ else
+ ASSERT_EQ(in[12], out[x]) << "x == " << x;
+ }
+
+ REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
+ kChangeFilters[4], 16, kChangeFilters[8], 17,
+ Width(), Height()));
+
+ for (int y = 0; y < Height(); ++y) {
+ if (y < 8)
+ ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+ else
+ ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+ }
+
+ REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+ kChangeFilters[8], 17, kChangeFilters[8], 17,
+ Width(), Height()));
+
+ for (int y = 0; y < Height(); ++y) {
+ for (int x = 0; x < Width(); ++x) {
+ const int ref_x = x < 8 ? 4 : 12;
+ const int ref_y = y < 8 ? 4 : 12;
+
+ ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
+ << "x == " << x << ", y == " << y;
+ }
+ }
+}
+
+
+using std::tr1::make_tuple;
+
+const ConvolveFunctions convolve8_c(
+ vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
+ vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
+ vp9_convolve8_c, vp9_convolve8_avg_c);
+
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
+ make_tuple(4, 4, &convolve8_c),
+ make_tuple(8, 4, &convolve8_c),
+ make_tuple(8, 8, &convolve8_c),
+ make_tuple(16, 8, &convolve8_c),
+ make_tuple(16, 16, &convolve8_c)));
+}
+
+#if HAVE_SSSE3
+const ConvolveFunctions convolve8_ssse3(
+ vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
+ vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
+ vp9_convolve8_ssse3, vp9_convolve8_avg_c);
+
+INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
+ make_tuple(4, 4, &convolve8_ssse3),
+ make_tuple(8, 4, &convolve8_ssse3),
+ make_tuple(8, 8, &convolve8_ssse3),
+ make_tuple(16, 8, &convolve8_ssse3),
+ make_tuple(16, 16, &convolve8_ssse3)));
+#endif
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -9,9 +9,13 @@
*/
#include <cmath>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
+#include "test/util.h"
+namespace {
+
// CQ level range: [kCQLevelMin, kCQLevelMax).
const int kCQLevelMin = 4;
const int kCQLevelMax = 63;
@@ -18,12 +22,13 @@
const int kCQLevelStep = 8;
const int kCQTargetBitrate = 2000;
-namespace {
-
-class CQTest : public libvpx_test::EncoderTest,
- public ::testing::TestWithParam<int> {
+class CQTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<int> {
protected:
- CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; }
+ CQTest() : EncoderTest(GET_PARAM(0)), cq_level_(GET_PARAM(1)) {
+ init_flags_ = VPX_CODEC_USE_PSNR;
+ }
+
virtual ~CQTest() {}
virtual void SetUp() {
@@ -100,7 +105,7 @@
EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
}
-INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest,
- ::testing::Range(kCQLevelMin, kCQLevelMax,
- kCQLevelStep));
+VP8_INSTANTIATE_TEST_CASE(CQTest,
+ ::testing::Range(kCQLevelMin, kCQLevelMax,
+ kCQLevelStep));
} // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -7,17 +7,23 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/util.h"
+
namespace {
class DatarateTest : public ::libvpx_test::EncoderTest,
- public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ public:
+ DatarateTest() : EncoderTest(GET_PARAM(0)) {}
+
protected:
virtual void SetUp() {
InitializeConfig();
- SetMode(GetParam());
+ SetMode(GET_PARAM(1));
ResetModel();
}
@@ -174,5 +180,6 @@
}
}
-INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
+
} // namespace
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -15,7 +15,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
-#include "vp9/common/entropy.h"
+#include "vp9/common/vp9_entropy.h"
#include "vp9_rtcd.h"
}
@@ -26,6 +26,15 @@
namespace {
+#ifdef _MSC_VER
+static int round(double x) {
+ if (x < 0)
+ return (int)ceil(x - 0.5);
+ else
+ return (int)floor(x + 0.5);
+}
+#endif
+
const double PI = 3.1415926535898;
void reference2_16x16_idct_2d(double *input, double *output) {
double x;
@@ -278,18 +287,10 @@
<< "Error: 16x16 IDCT has error " << error
<< " at index " << j;
}
-
- vp9_short_fdct16x16_c(in, out_c, 32);
- for (int j = 0; j < 256; ++j) {
- const double diff = coeff[j] - out_c[j];
- const double error = diff * diff;
- EXPECT_GE(1.0, error)
- << "Error: 16x16 FDCT has error " << error
- << " at index " << j;
- }
}
}
-
+#if 1
+// we need enable fdct test once we re-do the 16 point fdct.
TEST(VP9Fdct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
@@ -318,10 +319,10 @@
}
EXPECT_GE(1, max_error)
- << "Error: 16x16 FDCT/IDCT has an individual roundtrip error > 1";
+ << "Error: 16x16 FDCT/IDCT has an individual round trip error > 1";
- EXPECT_GE(count_test_block/10, total_error)
- << "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block";
+ EXPECT_GE(count_test_block , total_error)
+ << "Error: 16x16 FDCT/IDCT has average round trip error > 1 per block";
}
TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
@@ -353,4 +354,6 @@
}
}
}
+#endif
+
} // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -36,7 +36,6 @@
}
#endif
-#if !CONFIG_DWTDCTHYBRID
static const double kPi = 3.141592653589793238462643383279502884;
static void reference2_32x32_idct_2d(double *input, double *output) {
double x;
@@ -116,20 +115,9 @@
<< "Error: 3x32 IDCT has error " << error
<< " at index " << j;
}
-
- vp9_short_fdct32x32_c(in, out_c, 64);
- for (int j = 0; j < 1024; ++j) {
- const double diff = coeff[j] - out_c[j];
- const double error = diff * diff;
- EXPECT_GE(1.0, error)
- << "Error: 32x32 FDCT has error " << error
- << " at index " << j;
- }
}
}
-#else // CONFIG_DWTDCTHYBRID
- // TODO(rbultje/debargha): add DWT-specific tests
-#endif // CONFIG_DWTDCTHYBRID
+
TEST(VP9Fdct32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
unsigned int max_error = 0;
@@ -160,8 +148,8 @@
EXPECT_GE(1u, max_error)
<< "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
- EXPECT_GE(count_test_block/10, total_error)
- << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1/10 per block";
+ EXPECT_GE(count_test_block, total_error)
+ << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
}
TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/register_state_check.h"
@@ -13,10 +14,10 @@
#include "test/video_source.h"
namespace libvpx_test {
-#if CONFIG_VP8_DECODER
vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, int size) {
vpx_codec_err_t res_dec;
+ InitOnce();
REGISTER_STATE_CHECK(res_dec = vpx_codec_decode(&decoder_,
cxdata, size, NULL, 0));
return res_dec;
@@ -24,15 +25,16 @@
void DecoderTest::RunLoop(CompressedVideoSource *video) {
vpx_codec_dec_cfg_t dec_cfg = {0};
- Decoder decoder(dec_cfg, 0);
+ Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
+ ASSERT_TRUE(decoder != NULL);
// Decode frames.
for (video->Begin(); video->cxdata(); video->Next()) {
- vpx_codec_err_t res_dec = decoder.DecodeFrame(video->cxdata(),
- video->frame_size());
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder.DecodeError();
+ vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
+ video->frame_size());
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
- DxDataIterator dec_iter = decoder.GetDxData();
+ DxDataIterator dec_iter = decoder->GetDxData();
const vpx_image_t *img = NULL;
// Get decompressed data
@@ -39,6 +41,7 @@
while ((img = dec_iter.Next()))
DecompressedFrameHook(*img, video->frame_number());
}
+
+ delete decoder;
}
-#endif
} // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -14,10 +14,10 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx_config.h"
#include "vpx/vpx_decoder.h"
-#include "vpx/vp8dx.h"
namespace libvpx_test {
+class CodecFactory;
class CompressedVideoSource;
// Provides an object to handle decoding output
@@ -42,12 +42,11 @@
class Decoder {
public:
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
- : cfg_(cfg), deadline_(deadline) {
+ : cfg_(cfg), deadline_(deadline), init_done_(false) {
memset(&decoder_, 0, sizeof(decoder_));
- Init();
}
- ~Decoder() {
+ virtual ~Decoder() {
vpx_codec_destroy(&decoder_);
}
@@ -62,37 +61,45 @@
}
void Control(int ctrl_id, int arg) {
+ InitOnce();
const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
}
void Control(int ctrl_id, const void *arg) {
+ InitOnce();
const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
}
- const char *DecodeError() {
+ const char* DecodeError() {
const char *detail = vpx_codec_error_detail(&decoder_);
return detail ? detail : vpx_codec_error(&decoder_);
}
protected:
- void Init() {
- const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
- &vpx_codec_vp8_dx_algo,
- &cfg_, 0);
- ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
+ virtual const vpx_codec_iface_t* CodecInterface() const = 0;
+
+ void InitOnce() {
+ if (!init_done_) {
+ const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
+ CodecInterface(),
+ &cfg_, 0);
+ ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
+ init_done_ = true;
+ }
}
vpx_codec_ctx_t decoder_;
vpx_codec_dec_cfg_t cfg_;
unsigned int deadline_;
+ bool init_done_;
};
// Common test functionality for all Decoder tests.
class DecoderTest {
public:
- // Main loop.
+ // Main decoding loop
virtual void RunLoop(CompressedVideoSource *video);
// Hook to be called on every decompressed frame.
@@ -100,9 +107,11 @@
const unsigned int frame_number) {}
protected:
- DecoderTest() {}
+ explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}
virtual ~DecoderTest() {}
+
+ const CodecFactory *codec_;
};
} // namespace libvpx_test
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -7,11 +7,11 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+
#include "vpx_config.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
-#if CONFIG_VP8_DECODER
#include "test/decode_test_driver.h"
-#endif
#include "test/register_state_check.h"
#include "test/video_source.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -45,7 +45,7 @@
cfg_.g_h = img->d_h;
cfg_.g_timebase = video.timebase();
cfg_.rc_twopass_stats_in = stats_->buf();
- res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_,
+ res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
init_flags_);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
@@ -72,6 +72,11 @@
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
+void EncoderTest::InitializeConfig() {
+ const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
+ ASSERT_EQ(VPX_CODEC_OK, res);
+}
+
void EncoderTest::SetMode(TestMode mode) {
switch (mode) {
case kRealTime:
@@ -125,13 +130,17 @@
return match;
}
+void EncoderTest::MismatchHook(const vpx_image_t *img1,
+ const vpx_image_t *img2) {
+ ASSERT_TRUE(0) << "Encode/Decode mismatch found";
+}
+
void EncoderTest::RunLoop(VideoSource *video) {
-#if CONFIG_VP8_DECODER
vpx_codec_dec_cfg_t dec_cfg = {0};
-#endif
stats_.Reset();
+ ASSERT_TRUE(passes_ == 1 || passes_ == 2);
for (unsigned int pass = 0; pass < passes_; pass++) {
last_pts_ = 0;
@@ -143,34 +152,34 @@
cfg_.g_pass = VPX_RC_LAST_PASS;
BeginPassHook(pass);
- Encoder encoder(cfg_, deadline_, init_flags_, &stats_);
-#if CONFIG_VP8_DECODER
- Decoder decoder(dec_cfg, 0);
- bool has_cxdata = false;
-#endif
+ Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
+ &stats_);
+ ASSERT_TRUE(encoder != NULL);
+ Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
bool again;
for (again = true, video->Begin(); again; video->Next()) {
again = video->img() != NULL;
PreEncodeFrameHook(video);
- PreEncodeFrameHook(video, &encoder);
- encoder.EncodeFrame(video, frame_flags_);
+ PreEncodeFrameHook(video, encoder);
+ encoder->EncodeFrame(video, frame_flags_);
- CxDataIterator iter = encoder.GetCxData();
+ CxDataIterator iter = encoder->GetCxData();
+ bool has_cxdata = false;
+ bool has_dxdata = false;
while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
+ pkt = MutateEncoderOutputHook(pkt);
again = true;
-#if CONFIG_VP8_DECODER
- vpx_codec_err_t res_dec;
-#endif
switch (pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT:
-#if CONFIG_VP8_DECODER
has_cxdata = true;
- res_dec = decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf,
- pkt->data.frame.sz);
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder.DecodeError();
-#endif
+ if (decoder && DoDecode()) {
+ vpx_codec_err_t res_dec = decoder->DecodeFrame(
+ (const uint8_t*)pkt->data.frame.buf, pkt->data.frame.sz);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
+ has_dxdata = true;
+ }
ASSERT_GE(pkt->data.frame.pts, last_pts_);
last_pts_ = pkt->data.frame.pts;
FramePktHook(pkt);
@@ -185,17 +194,19 @@
}
}
-#if CONFIG_VP8_DECODER
- if (has_cxdata) {
- const vpx_image_t *img_enc = encoder.GetPreviewFrame();
- DxDataIterator dec_iter = decoder.GetDxData();
+ if (has_dxdata && has_cxdata) {
+ const vpx_image_t *img_enc = encoder->GetPreviewFrame();
+ DxDataIterator dec_iter = decoder->GetDxData();
const vpx_image_t *img_dec = dec_iter.Next();
- if(img_enc && img_dec) {
+ if (img_enc && img_dec) {
const bool res = compare_img(img_enc, img_dec);
- ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found.";
+ if (!res) { // Mismatch
+ MismatchHook(img_enc, img_dec);
+ }
}
+ if (img_dec)
+ DecompressedFrameHook(*img_dec, video->pts());
}
-#endif
if (!Continue())
break;
}
@@ -202,8 +213,13 @@
EndPassHook();
+ if (decoder)
+ delete decoder;
+ delete encoder;
+
if (!Continue())
break;
}
}
+
} // namespace libvpx_test
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -9,14 +9,16 @@
*/
#ifndef TEST_ENCODE_TEST_DRIVER_H_
#define TEST_ENCODE_TEST_DRIVER_H_
+
+#include "./vpx_config.h"
#include <string>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
namespace libvpx_test {
+class CodecFactory;
class VideoSource;
enum TestMode {
@@ -36,7 +38,10 @@
::libvpx_test::kOnePassGood, \
::libvpx_test::kOnePassBest)
+#define TWO_PASS_TEST_MODES ::testing::Values(::libvpx_test::kTwoPassGood, \
+ ::libvpx_test::kTwoPassBest)
+
// Provides an object to handle the libvpx get_cx_data() iteration pattern
class CxDataIterator {
public:
@@ -83,7 +88,7 @@
public:
Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
const unsigned long init_flags, TwopassStatsStore *stats)
- : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
+ : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
memset(&encoder_, 0, sizeof(encoder_));
}
@@ -112,11 +117,18 @@
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
+ void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
+ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
+
void set_deadline(unsigned long deadline) {
deadline_ = deadline;
}
protected:
+ virtual const vpx_codec_iface_t* CodecInterface() const = 0;
+
const char *EncoderError() {
const char *detail = vpx_codec_error_detail(&encoder_);
return detail ? detail : vpx_codec_error(&encoder_);
@@ -145,22 +157,19 @@
// classes directly, so that tests can be parameterized differently.
class EncoderTest {
protected:
- EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0),
- last_pts_(0) {}
+ explicit EncoderTest(const CodecFactory *codec)
+ : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
+ last_pts_(0) {}
virtual ~EncoderTest() {}
// Initialize the cfg_ member with the default configuration.
- void InitializeConfig() {
- const vpx_codec_err_t res = vpx_codec_enc_config_default(
- &vpx_codec_vp8_cx_algo, &cfg_, 0);
- ASSERT_EQ(VPX_CODEC_OK, res);
- }
+ void InitializeConfig();
// Map the TestMode enum to the deadline_ and passes_ variables.
void SetMode(TestMode mode);
- // Main loop.
+ // Main loop
virtual void RunLoop(VideoSource *video);
// Hook to be called at the beginning of a pass.
@@ -181,6 +190,24 @@
// Hook to determine whether the encode loop should continue.
virtual bool Continue() const { return !abort_; }
+
+ const CodecFactory *codec_;
+ // Hook to determine whether to decode frame after encoding
+ virtual bool DoDecode() const { return 1; }
+
+ // Hook to handle encode/decode mismatch
+ virtual void MismatchHook(const vpx_image_t *img1,
+ const vpx_image_t *img2);
+
+ // Hook to be called on every decompressed frame.
+ virtual void DecompressedFrameHook(const vpx_image_t& img,
+ vpx_codec_pts_t pts) {}
+
+ // Hook that can modify the encoder's output data
+ virtual const vpx_codec_cx_pkt_t * MutateEncoderOutputHook(
+ const vpx_codec_cx_pkt_t *pkt) {
+ return pkt;
+ }
bool abort_;
vpx_codec_enc_cfg_t cfg_;
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -7,22 +7,37 @@
in the file PATENTS. All contributing project authors may
be found in the AUTHORS file in the root of the source tree.
*/
+
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
+#include "test/util.h"
namespace {
-class ErrorResilienceTest : public libvpx_test::EncoderTest,
- public ::testing::TestWithParam<int> {
+const int kMaxErrorFrames = 8;
+const int kMaxDroppableFrames = 8;
+
+class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
- ErrorResilienceTest() {
- psnr_ = 0.0;
- nframes_ = 0;
- encoding_mode_ = static_cast<libvpx_test::TestMode>(GetParam());
+ ErrorResilienceTest() : EncoderTest(GET_PARAM(0)),
+ psnr_(0.0),
+ nframes_(0),
+ mismatch_psnr_(0.0),
+ mismatch_nframes_(0),
+ encoding_mode_(GET_PARAM(1)) {
+ Reset();
}
+
virtual ~ErrorResilienceTest() {}
+ void Reset() {
+ error_nframes_ = 0;
+ droppable_nframes_ = 0;
+ }
+
virtual void SetUp() {
InitializeConfig();
SetMode(encoding_mode_);
@@ -31,6 +46,8 @@
virtual void BeginPassHook(unsigned int /*pass*/) {
psnr_ = 0.0;
nframes_ = 0;
+ mismatch_psnr_ = 0.0;
+ mismatch_nframes_ = 0;
}
virtual bool Continue() const {
@@ -42,6 +59,25 @@
nframes_++;
}
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
+ frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF);
+ if (droppable_nframes_ > 0 &&
+ (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+ for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+ if (droppable_frames_[i] == nframes_) {
+ std::cout << " Encoding droppable frame: "
+ << droppable_frames_[i] << "\n";
+ frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF);
+ return;
+ }
+ }
+ }
+ }
+
double GetAveragePsnr() const {
if (nframes_)
return psnr_ / nframes_;
@@ -48,9 +84,67 @@
return 0.0;
}
+ double GetAverageMismatchPsnr() const {
+ if (mismatch_nframes_)
+ return mismatch_psnr_ / mismatch_nframes_;
+ return 0.0;
+ }
+
+ virtual bool DoDecode() const {
+ if (error_nframes_ > 0 &&
+ (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+ for (unsigned int i = 0; i < error_nframes_; ++i) {
+ if (error_frames_[i] == nframes_ - 1) {
+ std::cout << " Skipping decoding frame: "
+ << error_frames_[i] << "\n";
+ return 0;
+ }
+ }
+ }
+ return 1;
+ }
+
+ virtual void MismatchHook(const vpx_image_t *img1,
+ const vpx_image_t *img2) {
+ double mismatch_psnr = compute_psnr(img1, img2);
+ mismatch_psnr_ += mismatch_psnr;
+ ++mismatch_nframes_;
+ // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
+ }
+
+ void SetErrorFrames(int num, unsigned int *list) {
+ if (num > kMaxErrorFrames)
+ num = kMaxErrorFrames;
+ else if (num < 0)
+ num = 0;
+ error_nframes_ = num;
+ for (unsigned int i = 0; i < error_nframes_; ++i)
+ error_frames_[i] = list[i];
+ }
+
+ void SetDroppableFrames(int num, unsigned int *list) {
+ if (num > kMaxDroppableFrames)
+ num = kMaxDroppableFrames;
+ else if (num < 0)
+ num = 0;
+ droppable_nframes_ = num;
+ for (unsigned int i = 0; i < droppable_nframes_; ++i)
+ droppable_frames_[i] = list[i];
+ }
+
+ unsigned int GetMismatchFrames() {
+ return mismatch_nframes_;
+ }
+
private:
double psnr_;
unsigned int nframes_;
+ unsigned int error_nframes_;
+ unsigned int droppable_nframes_;
+ double mismatch_psnr_;
+ unsigned int mismatch_nframes_;
+ unsigned int error_frames_[kMaxErrorFrames];
+ unsigned int droppable_frames_[kMaxDroppableFrames];
libvpx_test::TestMode encoding_mode_;
};
@@ -85,6 +179,49 @@
}
}
-INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest,
- ONE_PASS_TEST_MODES);
+TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 500;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+
+ // Error resilient mode ON.
+ cfg_.g_error_resilient = 1;
+
+ // Set an arbitrary set of error frames same as droppable frames
+ unsigned int num_droppable_frames = 2;
+ unsigned int droppable_frame_list[] = {5, 16};
+ SetDroppableFrames(num_droppable_frames, droppable_frame_list);
+ SetErrorFrames(num_droppable_frames, droppable_frame_list);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Test that no mismatches have been found
+ std::cout << " Mismatch frames: "
+ << GetMismatchFrames() << "\n";
+ EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+ // reset previously set error/droppable frames
+ Reset();
+
+ // Now set an arbitrary set of error frames that are non-droppable
+ unsigned int num_error_frames = 3;
+ unsigned int error_frame_list[] = {3, 10, 20};
+ SetErrorFrames(num_error_frames, error_frame_list);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Test that dropping an arbitrary set of inter frames does not hurt too much
+ // Note the Average Mismatch PSNR is the average of the PSNR between
+ // decoded frame and encoder's version of the same frame for all frames
+ // with mismatch.
+ const double psnr_resilience_mismatch = GetAverageMismatchPsnr();
+ std::cout << " Mismatch PSNR: "
+ << psnr_resilience_mismatch << "\n";
+ EXPECT_GT(psnr_resilience_mismatch, 20.0);
+}
+
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES);
+
} // namespace
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -25,7 +25,7 @@
namespace {
-TEST(Vp9FdctTest, SignBiasCheck) {
+TEST(Vp9Fdct4x4Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int16_t test_input_block[16];
int16_t test_output_block[16];
@@ -88,7 +88,7 @@
}
};
-TEST(Vp9FdctTest, RoundTripErrorCheck) {
+TEST(Vp9Fdct4x4Test, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
@@ -120,7 +120,7 @@
}
// Because the bitstream is not frozen yet, use the idct in the codebase.
- vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);
+ vp9_short_idct4x4_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -149,7 +149,7 @@
// Initialize a test block with input range {-255, 255}.
for (int j = 0; j < 64; ++j)
- test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+ test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256;
const int pitch = 16;
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -120,31 +120,6 @@
input[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 16;
- vp9_short_fdct8x8_c(input, output_c, pitch);
- reference_dct_2d(input, output_r);
-
- for (int j = 0; j < 64; ++j) {
- const double diff = output_c[j] - output_r[j];
- const double error = diff * diff;
- // An error in a DCT coefficient isn't that bad.
- // We care more about the reconstructed pixels.
- EXPECT_GE(2.0, error)
- << "Error: 8x8 FDCT/IDCT has error " << error
- << " at index " << j;
- }
-
-#if 0
- // Tests that the reference iDCT and fDCT match.
- reference_dct_2d(input, output_r);
- reference_idct_2d(output_r, output_c);
- for (int j = 0; j < 64; ++j) {
- const int diff = output_c[j] -input[j];
- const int error = diff * diff;
- EXPECT_EQ(0, error)
- << "Error: 8x8 FDCT/IDCT has error " << error
- << " at index " << j;
- }
-#endif
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j)
coeff[j] = round(output_r[j]);
--- /dev/null
+++ b/test/idct_test.cc
@@ -1,0 +1,118 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+extern "C" {
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+}
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
+ int pred_stride, unsigned char *dst_ptr,
+ int dst_stride);
+namespace {
+class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
+ protected:
+ virtual void SetUp() {
+ int i;
+
+ UUT = GetParam();
+ memset(input, 0, sizeof(input));
+ /* Set up guard blocks */
+ for (i = 0; i < 256; i++)
+ output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
+ }
+
+ idct_fn_t UUT;
+ short input[16];
+ unsigned char output[256];
+ unsigned char predict[256];
+};
+
+TEST_P(IDCTTest, TestGuardBlocks) {
+ int i;
+
+ for (i = 0; i < 256; i++)
+ if ((i & 0xF) < 4 && i < 64)
+ EXPECT_EQ(0, output[i]) << i;
+ else
+ EXPECT_EQ(255, output[i]);
+}
+
+TEST_P(IDCTTest, TestAllZeros) {
+ int i;
+
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for (i = 0; i < 256; i++)
+ if ((i & 0xF) < 4 && i < 64)
+ EXPECT_EQ(0, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAllOnes) {
+ int i;
+
+ input[0] = 4;
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for (i = 0; i < 256; i++)
+ if ((i & 0xF) < 4 && i < 64)
+ EXPECT_EQ(1, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestAddOne) {
+ int i;
+
+ for (i = 0; i < 256; i++)
+ predict[i] = i;
+ input[0] = 4;
+ REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+
+ for (i = 0; i < 256; i++)
+ if ((i & 0xF) < 4 && i < 64)
+ EXPECT_EQ(i+1, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+}
+
+TEST_P(IDCTTest, TestWithData) {
+ int i;
+
+ for (i = 0; i < 16; i++)
+ input[i] = i;
+
+ REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+ for (i = 0; i < 256; i++)
+ if ((i & 0xF) > 3 || i > 63)
+ EXPECT_EQ(255, output[i]) << "i==" << i;
+ else if (i == 0)
+ EXPECT_EQ(11, output[i]) << "i==" << i;
+ else if (i == 34)
+ EXPECT_EQ(1, output[i]) << "i==" << i;
+ else if (i == 2 || i == 17 || i == 32)
+ EXPECT_EQ(3, output[i]) << "i==" << i;
+ else
+ EXPECT_EQ(0, output[i]) << "i==" << i;
+}
+
+INSTANTIATE_TEST_CASE_P(C, IDCTTest,
+ ::testing::Values(vp8_short_idct4x4llm_c));
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
+ ::testing::Values(vp8_short_idct4x4llm_mmx));
+#endif
+}
--- a/test/idctllm_test.cc
+++ /dev/null
@@ -1,126 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-extern "C" {
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-}
-#include "test/register_state_check.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
- int pred_stride, unsigned char *dst_ptr,
- int dst_stride);
-namespace {
-class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
-{
- protected:
- virtual void SetUp()
- {
- int i;
-
- UUT = GetParam();
- memset(input, 0, sizeof(input));
- /* Set up guard blocks */
- for(i=0; i<256; i++)
- output[i] = ((i&0xF)<4&&(i<64))?0:-1;
- }
-
- idct_fn_t UUT;
- short input[16];
- unsigned char output[256];
- unsigned char predict[256];
-};
-
-TEST_P(IDCTTest, TestGuardBlocks)
-{
- int i;
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(0, output[i]) << i;
- else
- EXPECT_EQ(255, output[i]);
-}
-
-TEST_P(IDCTTest, TestAllZeros)
-{
- int i;
-
- REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(0, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestAllOnes)
-{
- int i;
-
- input[0] = 4;
- REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(1, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestAddOne)
-{
- int i;
-
- for(i=0; i<256; i++)
- predict[i] = i;
-
- input[0] = 4;
- REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
-
- for(i=0; i<256; i++)
- if((i&0xF) < 4 && i<64)
- EXPECT_EQ(i+1, output[i]) << "i==" << i;
- else
- EXPECT_EQ(255, output[i]) << "i==" << i;
-}
-
-TEST_P(IDCTTest, TestWithData)
-{
- int i;
-
- for(i=0; i<16; i++)
- input[i] = i;
-
- REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
-
- for(i=0; i<256; i++)
- if((i&0xF) > 3 || i>63)
- EXPECT_EQ(255, output[i]) << "i==" << i;
- else if(i == 0)
- EXPECT_EQ(11, output[i]) << "i==" << i;
- else if(i == 34)
- EXPECT_EQ(1, output[i]) << "i==" << i;
- else if(i == 2 || i == 17 || i == 32)
- EXPECT_EQ(3, output[i]) << "i==" << i;
- else
- EXPECT_EQ(0, output[i]) << "i==" << i;
-}
-
-INSTANTIATE_TEST_CASE_P(C, IDCTTest,
- ::testing::Values(vp8_short_idct4x4llm_c));
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
- ::testing::Values(vp8_short_idct4x4llm_mmx));
-#endif
-}
--- a/test/keyframe_test.cc
+++ b/test/keyframe_test.cc
@@ -9,18 +9,22 @@
*/
#include <climits>
#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/util.h"
namespace {
class KeyframeTest : public ::libvpx_test::EncoderTest,
- public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
+ KeyframeTest() : EncoderTest(GET_PARAM(0)) {}
+
virtual void SetUp() {
InitializeConfig();
- SetMode(GetParam());
+ SetMode(GET_PARAM(1));
kf_count_ = 0;
kf_count_max_ = INT_MAX;
kf_do_force_kf_ = false;
@@ -64,7 +68,7 @@
// In realtime mode - auto placed keyframes are exceedingly rare, don't
// bother with this check if(GetParam() > 0)
- if(GetParam() > 0)
+ if (GET_PARAM(1) > 0)
EXPECT_GT(kf_count_, 1);
}
@@ -126,7 +130,7 @@
// In realtime mode - auto placed keyframes are exceedingly rare, don't
// bother with this check
- if(GetParam() > 0)
+ if (GET_PARAM(1) > 0)
EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes ";
// Verify that keyframes match the file keyframes in the file.
@@ -141,5 +145,5 @@
}
}
-INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(KeyframeTest, ALL_TEST_MODES);
} // namespace
--- /dev/null
+++ b/test/md5_helper.h
@@ -1,0 +1,64 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef LIBVPX_TEST_MD5_HELPER_H_
+#define LIBVPX_TEST_MD5_HELPER_H_
+
+extern "C" {
+#include "./md5_utils.h"
+#include "vpx/vpx_decoder.h"
+}
+
+namespace libvpx_test {
+class MD5 {
+ public:
+ MD5() {
+ MD5Init(&md5_);
+ }
+
+ void Add(const vpx_image_t *img) {
+ for (int plane = 0; plane < 3; ++plane) {
+ uint8_t *buf = img->planes[plane];
+ const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+ const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
+
+ for (int y = 0; y < h; ++y) {
+ MD5Update(&md5_, buf, w);
+ buf += img->stride[plane];
+ }
+ }
+ }
+
+ const char *Get(void) {
+ static const char hex[16] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+ };
+ uint8_t tmp[16];
+ MD5Context ctx_tmp = md5_;
+
+ MD5Final(tmp, &ctx_tmp);
+ for (int i = 0; i < 16; i++) {
+ res_[i * 2 + 0] = hex[tmp[i] >> 4];
+ res_[i * 2 + 1] = hex[tmp[i] & 0xf];
+ }
+ res_[32] = 0;
+
+ return res_;
+ }
+
+ protected:
+ char res_[33];
+ MD5Context md5_;
+};
+
+} // namespace libvpx_test
+
+#endif // LIBVPX_TEST_MD5_HELPER_H_
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -9,9 +9,12 @@
*/
#include <climits>
#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
#include "test/video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/util.h"
namespace {
@@ -49,8 +52,10 @@
};
class ResizeTest : public ::libvpx_test::EncoderTest,
- public ::testing::TestWithParam<enum libvpx_test::TestMode> {
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
+ ResizeTest() : EncoderTest(GET_PARAM(0)) {}
+
struct FrameInfo {
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
: pts(_pts), w(_w), h(_h) {}
@@ -62,7 +67,7 @@
virtual void SetUp() {
InitializeConfig();
- SetMode(GetParam());
+ SetMode(GET_PARAM(1));
}
virtual bool Continue() const {
@@ -69,15 +74,9 @@
return !HasFatalFailure() && !abort_;
}
- virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
- if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
- const unsigned char *buf =
- reinterpret_cast<const unsigned char *>(pkt->data.frame.buf);
- const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff;
- const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff;
-
- frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h));
- }
+ virtual void DecompressedFrameHook(const vpx_image_t &img,
+ vpx_codec_pts_t pts) {
+ frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
}
std::vector< FrameInfo > frame_info_list_;
@@ -100,5 +99,53 @@
}
}
-INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES);
+class ResizeInternalTest : public ResizeTest {
+ protected:
+ ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 3) {
+ struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ }
+ if (video->frame() == 6) {
+ struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ }
+ }
+
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (!frame0_psnr_)
+ frame0_psnr_ = pkt->data.psnr.psnr[0];
+ EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
+ }
+
+ double frame0_psnr_;
+};
+
+TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 10);
+ init_flags_ = VPX_CODEC_USE_PSNR;
+ // q picked such that initial keyframe on this clip is ~30dB PSNR
+ cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ const vpx_codec_pts_t pts = info->pts;
+ if (pts >= 3 && pts < 6) {
+ ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
+ ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
+ } else {
+ EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
+ EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
+ }
+ }
+}
+
+VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
+ ::testing::Values(::libvpx_test::kOnePassBest));
} // namespace
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -15,8 +15,13 @@
extern "C" {
#include "./vpx_config.h"
+#if CONFIG_VP8_ENCODER
#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
+//#include "vp8/common/blockd.h"
+#endif
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
#include "vpx_mem/vpx_mem.h"
}
@@ -32,14 +37,22 @@
int reference_stride,
unsigned int max_sad);
+typedef void (*sad_n_by_n_by_4_fn_t)(const uint8_t *src_ptr,
+ int src_stride,
+ const unsigned char * const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array);
+
using libvpx_test::ACMRandom;
namespace {
-class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) {
+class SADTestBase : public ::testing::Test {
public:
+ SADTestBase(int width, int height) : width_(width), height_(height) {}
+
static void SetUpTestCase() {
source_data_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kDataBufferSize));
+ vpx_memalign(kDataAlignment, kDataBlockSize));
reference_data_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
@@ -52,36 +65,31 @@
}
protected:
+ // Handle blocks up to 4 blocks 64x64 with stride up to 128
static const int kDataAlignment = 16;
- static const int kDataBufferSize = 16 * 32;
+ static const int kDataBlockSize = 64 * 128;
+ static const int kDataBufferSize = 4 * kDataBlockSize;
virtual void SetUp() {
- sad_fn_ = GET_PARAM(2);
- height_ = GET_PARAM(1);
- width_ = GET_PARAM(0);
- source_stride_ = width_ * 2;
+ source_stride_ = (width_ + 31) & ~31;
reference_stride_ = width_ * 2;
rnd_.Reset(ACMRandom::DeterministicSeed());
}
- sad_m_by_n_fn_t sad_fn_;
- virtual unsigned int SAD(unsigned int max_sad) {
- unsigned int ret;
- REGISTER_STATE_CHECK(ret = sad_fn_(source_data_, source_stride_,
- reference_data_, reference_stride_,
- max_sad));
- return ret;
+ virtual uint8_t* GetReference(int block_idx) {
+ return reference_data_ + block_idx * kDataBlockSize;
}
// Sum of Absolute Differences. Given two blocks, calculate the absolute
// difference between two pixels in the same relative location; accumulate.
- unsigned int ReferenceSAD(unsigned int max_sad) {
+ unsigned int ReferenceSAD(unsigned int max_sad, int block_idx = 0) {
unsigned int sad = 0;
+ const uint8_t* const reference = GetReference(block_idx);
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
sad += abs(source_data_[h * source_stride_ + w]
- - reference_data_[h * reference_stride_ + w]);
+ - reference[h * reference_stride_ + w]);
}
if (sad > max_sad) {
break;
@@ -106,6 +114,32 @@
}
}
+ int width_, height_;
+ static uint8_t* source_data_;
+ int source_stride_;
+ static uint8_t* reference_data_;
+ int reference_stride_;
+
+ ACMRandom rnd_;
+};
+
+class SADTest : public SADTestBase,
+ public ::testing::WithParamInterface<
+ std::tr1::tuple<int, int, sad_m_by_n_fn_t> > {
+ public:
+ SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+ unsigned int SAD(unsigned int max_sad, int block_idx = 0) {
+ unsigned int ret;
+ const uint8_t* const reference = GetReference(block_idx);
+
+ REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+ reference, reference_stride_,
+ max_sad));
+ return ret;
+ }
+
void CheckSad(unsigned int max_sad) {
unsigned int reference_sad, exp_sad;
@@ -119,19 +153,38 @@
ASSERT_GE(exp_sad, reference_sad);
}
}
+};
- // Handle blocks up to 16x16 with stride up to 32
- int height_, width_;
- static uint8_t* source_data_;
- int source_stride_;
- static uint8_t* reference_data_;
- int reference_stride_;
+class SADx4Test : public SADTestBase,
+ public ::testing::WithParamInterface<
+ std::tr1::tuple<int, int, sad_n_by_n_by_4_fn_t> > {
+ public:
+ SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1)) {}
- ACMRandom rnd_;
+ protected:
+ void SADs(unsigned int *results) {
+ const uint8_t* refs[] = {GetReference(0), GetReference(1),
+ GetReference(2), GetReference(3)};
+
+ REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
+ refs, reference_stride_,
+ results));
+ }
+
+ void CheckSADs() {
+ unsigned int reference_sad, exp_sad[4];
+
+ SADs(exp_sad);
+ for (int block = 0; block < 4; block++) {
+ reference_sad = ReferenceSAD(UINT_MAX, block);
+
+ EXPECT_EQ(exp_sad[block], reference_sad) << "block " << block;
+ }
+ }
};
-uint8_t* SADTest::source_data_ = NULL;
-uint8_t* SADTest::reference_data_ = NULL;
+uint8_t* SADTestBase::source_data_ = NULL;
+uint8_t* SADTestBase::reference_data_ = NULL;
TEST_P(SADTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
@@ -139,6 +192,15 @@
CheckSad(UINT_MAX);
}
+TEST_P(SADx4Test, MaxRef) {
+ FillConstant(source_data_, source_stride_, 0);
+ FillConstant(GetReference(0), reference_stride_, 255);
+ FillConstant(GetReference(1), reference_stride_, 255);
+ FillConstant(GetReference(2), reference_stride_, 255);
+ FillConstant(GetReference(3), reference_stride_, 255);
+ CheckSADs();
+}
+
TEST_P(SADTest, MaxSrc) {
FillConstant(source_data_, source_stride_, 255);
FillConstant(reference_data_, reference_stride_, 0);
@@ -145,6 +207,15 @@
CheckSad(UINT_MAX);
}
+TEST_P(SADx4Test, MaxSrc) {
+ FillConstant(source_data_, source_stride_, 255);
+ FillConstant(GetReference(0), reference_stride_, 0);
+ FillConstant(GetReference(1), reference_stride_, 0);
+ FillConstant(GetReference(2), reference_stride_, 0);
+ FillConstant(GetReference(3), reference_stride_, 0);
+ CheckSADs();
+}
+
TEST_P(SADTest, ShortRef) {
int tmp_stride = reference_stride_;
reference_stride_ >>= 1;
@@ -154,6 +225,18 @@
reference_stride_ = tmp_stride;
}
+TEST_P(SADx4Test, ShortRef) {
+ int tmp_stride = reference_stride_;
+ reference_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ reference_stride_ = tmp_stride;
+}
+
TEST_P(SADTest, UnalignedRef) {
// The reference frame, but not the source frame, may be unaligned for
// certain types of searches.
@@ -165,6 +248,20 @@
reference_stride_ = tmp_stride;
}
+TEST_P(SADx4Test, UnalignedRef) {
+ // The reference frame, but not the source frame, may be unaligned for
+ // certain types of searches.
+ int tmp_stride = reference_stride_;
+ reference_stride_ -= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ reference_stride_ = tmp_stride;
+}
+
TEST_P(SADTest, ShortSrc) {
int tmp_stride = source_stride_;
source_stride_ >>= 1;
@@ -174,6 +271,18 @@
source_stride_ = tmp_stride;
}
+TEST_P(SADx4Test, ShortSrc) {
+ int tmp_stride = source_stride_;
+ source_stride_ >>= 1;
+ FillRandom(source_data_, source_stride_);
+ FillRandom(GetReference(0), reference_stride_);
+ FillRandom(GetReference(1), reference_stride_);
+ FillRandom(GetReference(2), reference_stride_);
+ FillRandom(GetReference(3), reference_stride_);
+ CheckSADs();
+ source_stride_ = tmp_stride;
+}
+
TEST_P(SADTest, MaxSAD) {
// Verify that, when max_sad is set, the implementation does not return a
// value lower than the reference.
@@ -184,18 +293,62 @@
using std::tr1::make_tuple;
+#if CONFIG_VP8_ENCODER && CONFIG_VP9_ENCODER
+#define VP8_VP9_SEPARATOR ,
+#else
+#define VP8_VP9_SEPARATOR
+#endif
+
+#if CONFIG_VP8_ENCODER
const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c;
const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c;
const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c;
const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c;
const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c;
+#endif
+#if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_t sad_64x64_c_vp9 = vp9_sad64x64_c;
+const sad_m_by_n_fn_t sad_32x32_c_vp9 = vp9_sad32x32_c;
+const sad_m_by_n_fn_t sad_16x16_c_vp9 = vp9_sad16x16_c;
+const sad_m_by_n_fn_t sad_8x16_c_vp9 = vp9_sad8x16_c;
+const sad_m_by_n_fn_t sad_16x8_c_vp9 = vp9_sad16x8_c;
+const sad_m_by_n_fn_t sad_8x8_c_vp9 = vp9_sad8x8_c;
+const sad_m_by_n_fn_t sad_4x4_c_vp9 = vp9_sad4x4_c;
+#endif
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values(
+#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_c),
make_tuple(8, 16, sad_8x16_c),
make_tuple(16, 8, sad_16x8_c),
make_tuple(8, 8, sad_8x8_c),
- make_tuple(4, 4, sad_4x4_c)));
+ make_tuple(4, 4, sad_4x4_c)
+#endif
+ VP8_VP9_SEPARATOR
+#if CONFIG_VP9_ENCODER
+ make_tuple(64, 64, sad_64x64_c_vp9),
+ make_tuple(32, 32, sad_32x32_c_vp9),
+ make_tuple(16, 16, sad_16x16_c_vp9),
+ make_tuple(8, 16, sad_8x16_c_vp9),
+ make_tuple(16, 8, sad_16x8_c_vp9),
+ make_tuple(8, 8, sad_8x8_c_vp9),
+ make_tuple(4, 4, sad_4x4_c_vp9)
+#endif
+ ));
+#if CONFIG_VP9_ENCODER
+const sad_n_by_n_by_4_fn_t sad_64x64x4d_c = vp9_sad64x64x4d_c;
+const sad_n_by_n_by_4_fn_t sad_32x32x4d_c = vp9_sad32x32x4d_c;
+const sad_n_by_n_by_4_fn_t sad_16x16x4d_c = vp9_sad16x16x4d_c;
+const sad_n_by_n_by_4_fn_t sad_8x8x4d_c = vp9_sad8x8x4d_c;
+const sad_n_by_n_by_4_fn_t sad_4x4x4d_c = vp9_sad4x4x4d_c;
+INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, sad_64x64x4d_c),
+ make_tuple(32, 32, sad_32x32x4d_c),
+ make_tuple(16, 16, sad_16x16x4d_c),
+ make_tuple(8, 8, sad_8x8x4d_c),
+ make_tuple(4, 4, sad_4x4x4d_c)));
+#endif
+
// ARM tests
#if HAVE_MEDIA
const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6;
@@ -219,31 +372,120 @@
// X86 tests
#if HAVE_MMX
+#if CONFIG_VP8_ENCODER
const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx;
const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx;
const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx;
const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx;
const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx;
+#endif
+#if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_t sad_16x16_mmx_vp9 = vp9_sad16x16_mmx;
+const sad_m_by_n_fn_t sad_8x16_mmx_vp9 = vp9_sad8x16_mmx;
+const sad_m_by_n_fn_t sad_16x8_mmx_vp9 = vp9_sad16x8_mmx;
+const sad_m_by_n_fn_t sad_8x8_mmx_vp9 = vp9_sad8x8_mmx;
+const sad_m_by_n_fn_t sad_4x4_mmx_vp9 = vp9_sad4x4_mmx;
+#endif
+
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values(
+#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_mmx),
make_tuple(8, 16, sad_8x16_mmx),
make_tuple(16, 8, sad_16x8_mmx),
make_tuple(8, 8, sad_8x8_mmx),
- make_tuple(4, 4, sad_4x4_mmx)));
+ make_tuple(4, 4, sad_4x4_mmx)
#endif
+ VP8_VP9_SEPARATOR
+#if CONFIG_VP9_ENCODER
+ make_tuple(16, 16, sad_16x16_mmx_vp9),
+ make_tuple(8, 16, sad_8x16_mmx_vp9),
+ make_tuple(16, 8, sad_16x8_mmx_vp9),
+ make_tuple(8, 8, sad_8x8_mmx_vp9),
+ make_tuple(4, 4, sad_4x4_mmx_vp9)
+#endif
+ ));
+#endif
+
+#if HAVE_SSE
+#if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
+INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
+ make_tuple(4, 4, sad_4x4_sse_vp9)));
+
+const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse = vp9_sad4x4x4d_sse;
+INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
+ make_tuple(4, 4, sad_4x4x4d_sse)));
+#endif
+#endif
+
#if HAVE_SSE2
+#if CONFIG_VP8_ENCODER
const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt;
const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt;
const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt;
const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
+#endif
+#if CONFIG_VP9_ENCODER
+const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
+const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
+const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
+const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
+const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
+const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
+#endif
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values(
+#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_wmt),
make_tuple(8, 16, sad_8x16_wmt),
make_tuple(16, 8, sad_16x8_wmt),
make_tuple(8, 8, sad_8x8_wmt),
- make_tuple(4, 4, sad_4x4_wmt)));
+ make_tuple(4, 4, sad_4x4_wmt)
#endif
+ VP8_VP9_SEPARATOR
+#if CONFIG_VP9_ENCODER
+ make_tuple(64, 64, sad_64x64_sse2_vp9),
+ make_tuple(32, 32, sad_32x32_sse2_vp9),
+ make_tuple(16, 16, sad_16x16_sse2_vp9),
+ make_tuple(8, 16, sad_8x16_sse2_vp9),
+ make_tuple(16, 8, sad_16x8_sse2_vp9),
+ make_tuple(8, 8, sad_8x8_sse2_vp9)
+#endif
+ ));
+
+#if CONFIG_VP9_ENCODER
+const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
+const sad_n_by_n_by_4_fn_t sad_32x32x4d_sse2 = vp9_sad32x32x4d_sse2;
+const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse2 = vp9_sad16x16x4d_sse2;
+const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
+const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
+const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
+ make_tuple(64, 64, sad_64x64x4d_sse2),
+ make_tuple(32, 32, sad_32x32x4d_sse2),
+ make_tuple(16, 16, sad_16x16x4d_sse2),
+ make_tuple(16, 8, sad_16x8x4d_sse2),
+ make_tuple(8, 16, sad_8x16x4d_sse2),
+ make_tuple(8, 8, sad_8x8x4d_sse2)));
+#endif
+#endif
+
+#if HAVE_SSE3
+#if CONFIG_VP8_ENCODER
+const sad_n_by_n_by_4_fn_t sad_16x16x4d_sse3 = vp8_sad16x16x4d_sse3;
+const sad_n_by_n_by_4_fn_t sad_16x8x4d_sse3 = vp8_sad16x8x4d_sse3;
+const sad_n_by_n_by_4_fn_t sad_8x16x4d_sse3 = vp8_sad8x16x4d_sse3;
+const sad_n_by_n_by_4_fn_t sad_8x8x4d_sse3 = vp8_sad8x8x4d_sse3;
+const sad_n_by_n_by_4_fn_t sad_4x4x4d_sse3 = vp8_sad4x4x4d_sse3;
+INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
+ make_tuple(16, 16, sad_16x16x4d_sse3),
+ make_tuple(16, 8, sad_16x8x4d_sse3),
+ make_tuple(8, 16, sad_8x16x4d_sse3),
+ make_tuple(8, 8, sad_8x8x4d_sse3),
+ make_tuple(4, 4, sad_4x4x4d_sse3)));
+#endif
+#endif
+
#if HAVE_SSSE3
const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
--- /dev/null
+++ b/test/superframe_test.cc
@@ -1,0 +1,100 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <climits>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class SuperframeTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+ SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
+ last_sf_pts_(0) {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GET_PARAM(1));
+ sf_count_ = 0;
+ sf_count_max_ = INT_MAX;
+ }
+
+ virtual void TearDown() {
+ delete modified_buf_;
+ }
+
+ virtual bool Continue() const {
+ return !HasFatalFailure() && !abort_;
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+ }
+ }
+
+ virtual const vpx_codec_cx_pkt_t * MutateEncoderOutputHook(
+ const vpx_codec_cx_pkt_t *pkt) {
+ if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+ return pkt;
+
+ const uint8_t *buffer = reinterpret_cast<uint8_t*>(pkt->data.frame.buf);
+ const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+ const int frames = (marker & 0x7) + 1;
+ const int mag = ((marker >> 3) & 3) + 1;
+ const unsigned int index_sz = 2 + mag * frames;
+ if ((marker & 0xe0) == 0xc0 &&
+ pkt->data.frame.sz >= index_sz &&
+ buffer[pkt->data.frame.sz - index_sz] == marker) {
+ // frame is a superframe. strip off the index.
+ if (modified_buf_)
+ delete modified_buf_;
+ modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz];
+ memcpy(modified_buf_, pkt->data.frame.buf,
+ pkt->data.frame.sz - index_sz);
+ modified_pkt_ = *pkt;
+ modified_pkt_.data.frame.buf = modified_buf_;
+ modified_pkt_.data.frame.sz -= index_sz;
+
+ sf_count_++;
+ last_sf_pts_ = pkt->data.frame.pts;
+ return &modified_pkt_;
+ }
+
+ // Make sure we do a few frames after the last SF
+ abort_ |= sf_count_ > sf_count_max_ &&
+ pkt->data.frame.pts - last_sf_pts_ >= 5;
+ return pkt;
+ }
+
+ int sf_count_;
+ int sf_count_max_;
+ vpx_codec_cx_pkt_t modified_pkt_;
+ uint8_t *modified_buf_;
+ vpx_codec_pts_t last_sf_pts_;
+};
+
+TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
+ sf_count_max_ = 0; // early exit on successful test.
+ cfg_.g_lag_in_frames = 25;
+
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 40);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_EQ(sf_count_, 1);
+}
+
+VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
+ ::libvpx_test::kTwoPassGood));
+} // namespace
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,7 +1,8 @@
LIBVPX_TEST_SRCS-yes += register_state_check.h
LIBVPX_TEST_SRCS-yes += test.mk
LIBVPX_TEST_SRCS-yes += acm_random.h
-
+LIBVPX_TEST_SRCS-yes += md5_helper.h
+LIBVPX_TEST_SRCS-yes += codec_factory.h
LIBVPX_TEST_SRCS-yes += test_libvpx.cc
LIBVPX_TEST_SRCS-yes += util.h
LIBVPX_TEST_SRCS-yes += video_source.h
@@ -15,17 +16,20 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h
+
+LIBVPX_TEST_SRCS-yes += encode_test_driver.cc
+LIBVPX_TEST_SRCS-yes += encode_test_driver.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
+LIBVPX_TEST_SRCS-yes += decode_test_driver.h
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
+
+
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
##
@@ -44,10 +48,10 @@
LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
endif
-LIBVPX_TEST_SRCS-yes += idctllm_test.cc
+LIBVPX_TEST_SRCS-yes += idct_test.cc
LIBVPX_TEST_SRCS-yes += intrapred_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
-LIBVPX_TEST_SRCS-yes += sad_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
@@ -66,13 +70,18 @@
# IDCT test currently depends on FDCT function
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
+LIBVPX_TEST_SRCS-yes += superframe_test.cc
+LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
+LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
+
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
-#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
+
endif # VP9
@@ -82,7 +91,8 @@
##
## TEST DATA
##
-LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
+
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -12,17 +12,15 @@
#include <cstdlib>
#include <string>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
#include "test/decode_test_driver.h"
#include "test/ivf_video_source.h"
+#include "test/util.h"
+#include "test/md5_helper.h"
extern "C" {
-#include "./md5_utils.h"
#include "vpx_mem/vpx_mem.h"
}
-#if defined(_MSC_VER)
-#define snprintf sprintf_s
-#endif
-
namespace {
// There are 61 test vectors in total.
const char *kTestVectors[] = {
@@ -59,10 +57,10 @@
"vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf"
};
-class TestVectorTest : public libvpx_test::DecoderTest,
- public ::testing::TestWithParam<const char*> {
+class TestVectorTest : public ::libvpx_test::DecoderTest,
+ public ::libvpx_test::CodecTestWithParam<const char*> {
protected:
- TestVectorTest() : md5_file_(NULL) {}
+ TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}
virtual ~TestVectorTest() {
if (md5_file_)
@@ -85,31 +83,10 @@
ASSERT_NE(res, EOF) << "Read md5 data failed";
expected_md5[32] = '\0';
- MD5Context md5;
- MD5Init(&md5);
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&img);
+ const char *actual_md5 = md5_res.Get();
- // Compute and update md5 for each raw in decompressed data.
- for (int plane = 0; plane < 3; ++plane) {
- uint8_t *buf = img.planes[plane];
-
- for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h);
- ++y) {
- MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w));
- buf += img.stride[plane];
- }
- }
-
- uint8_t md5_sum[16];
- MD5Final(md5_sum, &md5);
-
- char actual_md5[33];
- // Convert to get the actual md5.
- for (int i = 0; i < 16; i++) {
- snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x",
- md5_sum[i]);
- }
- actual_md5[32] = '\0';
-
// Check md5 match.
ASSERT_STREQ(expected_md5, actual_md5)
<< "Md5 checksums don't match: frame number = " << frame_number;
@@ -124,7 +101,7 @@
// checksums match the correct md5 data, then the test is passed. Otherwise,
// the test failed.
TEST_P(TestVectorTest, MD5Match) {
- const std::string filename = GetParam();
+ const std::string filename = GET_PARAM(1);
// Open compressed video file.
libvpx_test::IVFVideoSource video(filename);
@@ -138,7 +115,7 @@
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
-INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest,
- ::testing::ValuesIn(kTestVectors));
+VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
+ ::testing::ValuesIn(kTestVectors));
} // namespace
--- /dev/null
+++ b/test/tile_independence_test.cc
@@ -1,0 +1,102 @@
+/*
+ Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+
+ Use of this source code is governed by a BSD-style license
+ that can be found in the LICENSE file in the root of the source
+ tree. An additional intellectual property rights grant can be found
+ in the file PATENTS. All contributing project authors may
+ be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/md5_helper.h"
+extern "C" {
+#include "vpx_mem/vpx_mem.h"
+}
+
+namespace {
+class TileIndependenceTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<int> {
+ protected:
+ TileIndependenceTest() : EncoderTest(GET_PARAM(0)), n_tiles_(GET_PARAM(1)),
+ md5_fw_order_(), md5_inv_order_() {
+ init_flags_ = VPX_CODEC_USE_PSNR;
+ vpx_codec_dec_cfg_t cfg;
+ cfg.w = 704;
+ cfg.h = 144;
+ cfg.threads = 1;
+ fw_dec_ = codec_->CreateDecoder(cfg, 0);
+ inv_dec_ = codec_->CreateDecoder(cfg, 0);
+ inv_dec_->Control(VP9_INVERT_TILE_DECODE_ORDER, 1);
+ }
+
+ virtual ~TileIndependenceTest() {
+ delete fw_dec_;
+ delete inv_dec_;
+ }
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(libvpx_test::kTwoPassGood);
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ encoder->Control(VP9E_SET_TILE_COLUMNS, n_tiles_);
+ }
+ }
+
+ void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt,
+ ::libvpx_test::MD5 *md5) {
+ dec->DecodeFrame((uint8_t *) pkt->data.frame.buf, pkt->data.frame.sz);
+ const vpx_image_t *img = dec->GetDxData().Next();
+ md5->Add(img);
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ UpdateMD5(fw_dec_, pkt, &md5_fw_order_);
+ UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
+ }
+
+ private:
+ int n_tiles_;
+ protected:
+ ::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
+ ::libvpx_test::Decoder *fw_dec_, *inv_dec_;
+};
+
+// run an encode with 2 or 4 tiles, and do the decode both in normal and
+// inverted tile ordering. Ensure that the MD5 of the output in both cases
+// is identical. If so, tiles are considered independent and the test passes.
+TEST_P(TileIndependenceTest, MD5Match) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 500;
+ cfg_.g_lag_in_frames = 25;
+ cfg_.rc_end_usage = VPX_VBR;
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 144,
+ timebase.den, timebase.num, 0, 30);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ const char *md5_fw_str = md5_fw_order_.Get();
+ const char *md5_inv_str = md5_inv_order_.Get();
+
+ // could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer
+ // output if it fails. Not sure if it's helpful since it's really just
+ // a MD5...
+ ASSERT_STREQ(md5_fw_str, md5_inv_str);
+}
+
+VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest,
+ ::testing::Range(0, 2, 1));
+
+} // namespace
--- a/test/util.h
+++ b/test/util.h
@@ -11,8 +11,38 @@
#ifndef TEST_UTIL_H_
#define TEST_UTIL_H_
+#include <stdio.h>
+#include <math.h>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_image.h"
+
// Macros
#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+static double compute_psnr(const vpx_image_t *img1,
+ const vpx_image_t *img2) {
+ assert((img1->fmt == img2->fmt) &&
+ (img1->d_w == img2->d_w) &&
+ (img1->d_h == img2->d_h));
+
+ const unsigned int width_y = img1->d_w;
+ const unsigned int height_y = img1->d_h;
+ unsigned int i, j;
+
+ int64_t sqrerr = 0;
+ for (i = 0; i < height_y; ++i)
+ for (j = 0; j < width_y; ++j) {
+ int64_t d = img1->planes[VPX_PLANE_Y][i * img1->stride[VPX_PLANE_Y] + j] -
+ img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j];
+ sqrerr += d * d;
+ }
+ double mse = sqrerr / (width_y * height_y);
+ double psnr = 100.0;
+ if (mse > 0.0) {
+ psnr = 10 * log10(255.0 * 255.0 / mse);
+ }
+ return psnr;
+}
#endif // TEST_UTIL_H_
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -302,7 +302,7 @@
return 1;
}
-
+
int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
const uint8_t *source,
int64_t time_stamp)
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -50,7 +50,7 @@
unsigned __int64 Sectionbits[500];
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
int intra_mode_stats[10][10][10];
static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2];
extern unsigned int active_section;
@@ -531,7 +531,7 @@
vp8_convert_rfct_to_prob(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 1;
#endif
@@ -580,7 +580,7 @@
xd->mb_to_top_edge = -((mb_row * 16)) << 3;
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 9;
#endif
@@ -593,7 +593,7 @@
if (rf == INTRA_FRAME)
{
vp8_write(w, 0, cpi->prob_intra_coded);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 6;
#endif
write_ymode(w, mode, pc->fc.ymode_prob);
@@ -633,13 +633,13 @@
vp8_mv_ref_probs(mv_ref_p, ct);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
accum_mv_refs(mode, ct);
#endif
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 3;
#endif
@@ -649,7 +649,7 @@
{
case NEWMV:
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 5;
#endif
@@ -692,7 +692,7 @@
if (blockmode == NEW4X4)
{
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 11;
#endif
write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
@@ -769,7 +769,7 @@
const B_PREDICTION_MODE L = left_block_mode(m, i);
const int bm = m->bmi[i].as_mode;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
++intra_mode_stats [A] [L] [bm];
#endif
@@ -1160,7 +1160,7 @@
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
++ tree_update_hist [i][j][k][t] [u];
#endif
@@ -1181,7 +1181,7 @@
while (++t < ENTROPY_NODES);
/* Accum token counts for generation of default statistics */
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
t = 0;
do
@@ -1527,7 +1527,7 @@
if (pc->frame_type != KEY_FRAME)
vp8_write_bit(bc, pc->refresh_last_frame);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
if (pc->frame_type == INTER_FRAME)
active_section = 0;
@@ -1550,7 +1550,7 @@
vp8_update_coef_probs(cpi);
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 2;
#endif
@@ -1561,7 +1561,7 @@
{
write_kfmodes(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 8;
#endif
}
@@ -1569,7 +1569,7 @@
{
pack_inter_mode_mvs(cpi);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 1;
#endif
}
@@ -1687,7 +1687,7 @@
#endif
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void print_tree_update_probs()
{
int i, j, k, l;
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -16,7 +16,7 @@
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
unsigned int active_section = 0;
#endif
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -67,7 +67,7 @@
unsigned int lowvalue = br->lowvalue;
register unsigned int shift;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
#if defined(SECTIONBITS_OUTPUT)
if (bit)
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -16,7 +16,7 @@
#include <math.h>
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern unsigned int active_section;
#endif
@@ -359,7 +359,7 @@
vp8_writer *const w = cpi->bc;
MV_CONTEXT *mvc = cpi->common.fc.mvc;
int flags[2] = {0, 0};
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 4;
#endif
write_component_probs(
@@ -374,7 +374,7 @@
if (flags[0] || flags[1])
vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
active_section = 5;
#endif
}
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -18,7 +18,7 @@
#include <math.h>
#include "vp8/common/findnearmv.h"
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
static int mv_mode_cts [4] [2];
#endif
@@ -1912,7 +1912,7 @@
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void print_mode_context(void)
{
FILE *f = fopen("modecont.c", "w");
@@ -1965,8 +1965,8 @@
fclose(f);
}
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
+/* MV ref count VP8_ENTROPY_STATS stats code */
+#ifdef VP8_ENTROPY_STATS
void init_mv_ref_counts()
{
vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
@@ -2020,6 +2020,6 @@
}
}
-#endif/* END MV ref count ENTROPY_STATS stats code */
+#endif/* END MV ref count VP8_ENTROPY_STATS stats code */
#endif
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -15,7 +15,7 @@
#include "block.h"
#include "vp8/common/variance.h"
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern void init_mv_ref_counts();
extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
#endif
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -111,7 +111,7 @@
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
extern int intra_mode_stats[10][10][10];
#endif
@@ -1805,7 +1805,7 @@
else
cpi->cyclic_refresh_map = (signed char *) NULL;
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
init_context_counters();
#endif
@@ -1923,7 +1923,7 @@
cpi->mb.rd_thresh_mult[i] = 128;
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
init_mv_ref_counts();
#endif
@@ -2060,7 +2060,7 @@
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
print_context_counters();
print_tree_update_probs();
print_mode_context();
@@ -2242,7 +2242,7 @@
}
#endif
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
{
int i, j, k;
FILE *fmode = fopen("modecontext.c", "w");
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -20,7 +20,7 @@
/* Global event counters used for accumulating statistics across several
compressions, then generating context.c = initial stats. */
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
_int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
#endif
void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) ;
@@ -413,7 +413,7 @@
}
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void init_context_counters(void)
{
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -33,7 +33,7 @@
int rd_cost_mby(MACROBLOCKD *);
-#ifdef ENTROPY_STATS
+#ifdef VP8_ENTROPY_STATS
void init_context_counters();
void print_context_counters();
--- a/vp8/encoder/x86/quantize_sse2.c
+++ /dev/null
@@ -1,229 +1,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "vpx_ports/x86.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/encoder/block.h"
-#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
-
-#include <mmintrin.h> /* MMX */
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#define SELECT_EOB(i, z) \
- do { \
- short boost = *zbin_boost_ptr; \
- int cmp = (x[z] < boost) | (y[z] == 0); \
- zbin_boost_ptr++; \
- if (cmp) \
- goto select_eob_end_##i; \
- qcoeff_ptr[z] = y[z]; \
- eob = i; \
- zbin_boost_ptr = b->zrun_zbin_boost; \
- select_eob_end_##i:; \
- } while (0)
-
-void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
- char eob = 0;
- short *zbin_boost_ptr = b->zrun_zbin_boost;
- short *qcoeff_ptr = d->qcoeff;
- DECLARE_ALIGNED_ARRAY(16, short, x, 16);
- DECLARE_ALIGNED_ARRAY(16, short, y, 16);
-
- __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
- __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
- __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
- __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
- __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
- __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
- __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
- __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
- __m128i round0 = _mm_load_si128((__m128i *)(b->round));
- __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
- __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
- __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
- __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
- __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
-
- vpx_memset(qcoeff_ptr, 0, 32);
-
- /* Duplicate to all lanes. */
- zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
- zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
-
- /* Sign of z: z >> 15 */
- sz0 = _mm_srai_epi16(z0, 15);
- sz1 = _mm_srai_epi16(z1, 15);
-
- /* x = abs(z): (z ^ sz) - sz */
- x0 = _mm_xor_si128(z0, sz0);
- x1 = _mm_xor_si128(z1, sz1);
- x0 = _mm_sub_epi16(x0, sz0);
- x1 = _mm_sub_epi16(x1, sz1);
-
- /* zbin[] + zbin_extra */
- zbin0 = _mm_add_epi16(zbin0, zbin_extra);
- zbin1 = _mm_add_epi16(zbin1, zbin_extra);
-
- /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
- * the equation because boost is the only value which can change:
- * x - (zbin[] + extra) >= boost */
- x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
- x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
-
- _mm_store_si128((__m128i *)(x), x_minus_zbin0);
- _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
-
- /* All the remaining calculations are valid whether they are done now with
- * simd or later inside the loop one at a time. */
- x0 = _mm_add_epi16(x0, round0);
- x1 = _mm_add_epi16(x1, round1);
-
- y0 = _mm_mulhi_epi16(x0, quant0);
- y1 = _mm_mulhi_epi16(x1, quant1);
-
- y0 = _mm_add_epi16(y0, x0);
- y1 = _mm_add_epi16(y1, x1);
-
- /* Instead of shifting each value independently we convert the scaling
- * factor with 1 << (16 - shift) so we can use multiply/return high half. */
- y0 = _mm_mulhi_epi16(y0, quant_shift0);
- y1 = _mm_mulhi_epi16(y1, quant_shift1);
-
- /* Return the sign: (y ^ sz) - sz */
- y0 = _mm_xor_si128(y0, sz0);
- y1 = _mm_xor_si128(y1, sz1);
- y0 = _mm_sub_epi16(y0, sz0);
- y1 = _mm_sub_epi16(y1, sz1);
-
- _mm_store_si128((__m128i *)(y), y0);
- _mm_store_si128((__m128i *)(y + 8), y1);
-
- zbin_boost_ptr = b->zrun_zbin_boost;
-
- /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
- SELECT_EOB(1, 0);
- SELECT_EOB(2, 1);
- SELECT_EOB(3, 4);
- SELECT_EOB(4, 8);
- SELECT_EOB(5, 5);
- SELECT_EOB(6, 2);
- SELECT_EOB(7, 3);
- SELECT_EOB(8, 6);
- SELECT_EOB(9, 9);
- SELECT_EOB(10, 12);
- SELECT_EOB(11, 13);
- SELECT_EOB(12, 10);
- SELECT_EOB(13, 7);
- SELECT_EOB(14, 11);
- SELECT_EOB(15, 14);
- SELECT_EOB(16, 15);
-
- y0 = _mm_load_si128((__m128i *)(d->qcoeff));
- y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
-
- /* dqcoeff = qcoeff * dequant */
- y0 = _mm_mullo_epi16(y0, dequant0);
- y1 = _mm_mullo_epi16(y1, dequant1);
-
- _mm_store_si128((__m128i *)(d->dqcoeff), y0);
- _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
-
- *d->eob = eob;
-}
-
-void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
-{
- __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
- __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
- __m128i round0 = _mm_load_si128((__m128i *)(b->round));
- __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
- __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
- __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
- __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
- __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
- __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
- __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
-
- __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
-
- /* sign of z: z >> 15 */
- sz0 = _mm_srai_epi16(z0, 15);
- sz1 = _mm_srai_epi16(z1, 15);
-
- /* x = abs(z): (z ^ sz) - sz */
- x0 = _mm_xor_si128(z0, sz0);
- x1 = _mm_xor_si128(z1, sz1);
- x0 = _mm_sub_epi16(x0, sz0);
- x1 = _mm_sub_epi16(x1, sz1);
-
- /* x += round */
- x0 = _mm_add_epi16(x0, round0);
- x1 = _mm_add_epi16(x1, round1);
-
- /* y = (x * quant) >> 16 */
- y0 = _mm_mulhi_epi16(x0, quant_fast0);
- y1 = _mm_mulhi_epi16(x1, quant_fast1);
-
- /* x = abs(y) = (y ^ sz) - sz */
- y0 = _mm_xor_si128(y0, sz0);
- y1 = _mm_xor_si128(y1, sz1);
- x0 = _mm_sub_epi16(y0, sz0);
- x1 = _mm_sub_epi16(y1, sz1);
-
- /* qcoeff = x */
- _mm_store_si128((__m128i *)(d->qcoeff), x0);
- _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
-
- /* x * dequant */
- xdq0 = _mm_mullo_epi16(x0, dequant0);
- xdq1 = _mm_mullo_epi16(x1, dequant1);
-
- /* dqcoeff = x * dequant */
- _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
- _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
-
- /* build a mask for the zig zag */
- zeros = _mm_setzero_si128();
-
- x0 = _mm_cmpeq_epi16(x0, zeros);
- x1 = _mm_cmpeq_epi16(x1, zeros);
-
- ones = _mm_cmpeq_epi16(zeros, zeros);
-
- x0 = _mm_xor_si128(x0, ones);
- x1 = _mm_xor_si128(x1, ones);
-
- x0 = _mm_and_si128(x0, inv_zig_zag0);
- x1 = _mm_and_si128(x1, inv_zig_zag1);
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* now down to 8 */
- x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* only 4 left */
- x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
-
- x0 = _mm_max_epi16(x0, x1);
-
- /* okay, just 2! */
- x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
-
- x0 = _mm_max_epi16(x0, x1);
-
- *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
-}
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse2_intrinsics.c
@@ -1,0 +1,229 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/x86.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp8/encoder/block.h"
+#include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */
+
+#include <mmintrin.h> /* MMX */
+#include <xmmintrin.h> /* SSE */
+#include <emmintrin.h> /* SSE2 */
+
+#define SELECT_EOB(i, z) \
+ do { \
+ short boost = *zbin_boost_ptr; \
+ int cmp = (x[z] < boost) | (y[z] == 0); \
+ zbin_boost_ptr++; \
+ if (cmp) \
+ goto select_eob_end_##i; \
+ qcoeff_ptr[z] = y[z]; \
+ eob = i; \
+ zbin_boost_ptr = b->zrun_zbin_boost; \
+ select_eob_end_##i:; \
+ } while (0)
+
+void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ char eob = 0;
+ short *zbin_boost_ptr = b->zrun_zbin_boost;
+ short *qcoeff_ptr = d->qcoeff;
+ DECLARE_ALIGNED_ARRAY(16, short, x, 16);
+ DECLARE_ALIGNED_ARRAY(16, short, y, 16);
+
+ __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1;
+ __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift));
+ __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8));
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff+8));
+ __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra);
+ __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin));
+ __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant0 = _mm_load_si128((__m128i *)(b->quant));
+ __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+
+ vpx_memset(qcoeff_ptr, 0, 32);
+
+ /* Duplicate to all lanes. */
+ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0);
+ zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra);
+
+ /* Sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* zbin[] + zbin_extra */
+ zbin0 = _mm_add_epi16(zbin0, zbin_extra);
+ zbin1 = _mm_add_epi16(zbin1, zbin_extra);
+
+ /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance
+ * the equation because boost is the only value which can change:
+ * x - (zbin[] + extra) >= boost */
+ x_minus_zbin0 = _mm_sub_epi16(x0, zbin0);
+ x_minus_zbin1 = _mm_sub_epi16(x1, zbin1);
+
+ _mm_store_si128((__m128i *)(x), x_minus_zbin0);
+ _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1);
+
+ /* All the remaining calculations are valid whether they are done now with
+ * simd or later inside the loop one at a time. */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ y0 = _mm_mulhi_epi16(x0, quant0);
+ y1 = _mm_mulhi_epi16(x1, quant1);
+
+ y0 = _mm_add_epi16(y0, x0);
+ y1 = _mm_add_epi16(y1, x1);
+
+ /* Instead of shifting each value independently we convert the scaling
+ * factor with 1 << (16 - shift) so we can use multiply/return high half. */
+ y0 = _mm_mulhi_epi16(y0, quant_shift0);
+ y1 = _mm_mulhi_epi16(y1, quant_shift1);
+
+ /* Return the sign: (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ y0 = _mm_sub_epi16(y0, sz0);
+ y1 = _mm_sub_epi16(y1, sz1);
+
+ _mm_store_si128((__m128i *)(y), y0);
+ _mm_store_si128((__m128i *)(y + 8), y1);
+
+ zbin_boost_ptr = b->zrun_zbin_boost;
+
+ /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */
+ SELECT_EOB(1, 0);
+ SELECT_EOB(2, 1);
+ SELECT_EOB(3, 4);
+ SELECT_EOB(4, 8);
+ SELECT_EOB(5, 5);
+ SELECT_EOB(6, 2);
+ SELECT_EOB(7, 3);
+ SELECT_EOB(8, 6);
+ SELECT_EOB(9, 9);
+ SELECT_EOB(10, 12);
+ SELECT_EOB(11, 13);
+ SELECT_EOB(12, 10);
+ SELECT_EOB(13, 7);
+ SELECT_EOB(14, 11);
+ SELECT_EOB(15, 14);
+ SELECT_EOB(16, 15);
+
+ y0 = _mm_load_si128((__m128i *)(d->qcoeff));
+ y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8));
+
+ /* dqcoeff = qcoeff * dequant */
+ y0 = _mm_mullo_epi16(y0, dequant0);
+ y1 = _mm_mullo_epi16(y1, dequant1);
+
+ _mm_store_si128((__m128i *)(d->dqcoeff), y0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1);
+
+ *d->eob = eob;
+}
+
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
+{
+ __m128i z0 = _mm_load_si128((__m128i *)(b->coeff));
+ __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8));
+ __m128i round0 = _mm_load_si128((__m128i *)(b->round));
+ __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8));
+ __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast));
+ __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8));
+ __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant));
+ __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8));
+ __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag));
+ __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8));
+
+ __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones;
+
+ /* sign of z: z >> 15 */
+ sz0 = _mm_srai_epi16(z0, 15);
+ sz1 = _mm_srai_epi16(z1, 15);
+
+ /* x = abs(z): (z ^ sz) - sz */
+ x0 = _mm_xor_si128(z0, sz0);
+ x1 = _mm_xor_si128(z1, sz1);
+ x0 = _mm_sub_epi16(x0, sz0);
+ x1 = _mm_sub_epi16(x1, sz1);
+
+ /* x += round */
+ x0 = _mm_add_epi16(x0, round0);
+ x1 = _mm_add_epi16(x1, round1);
+
+ /* y = (x * quant) >> 16 */
+ y0 = _mm_mulhi_epi16(x0, quant_fast0);
+ y1 = _mm_mulhi_epi16(x1, quant_fast1);
+
+ /* x = abs(y) = (y ^ sz) - sz */
+ y0 = _mm_xor_si128(y0, sz0);
+ y1 = _mm_xor_si128(y1, sz1);
+ x0 = _mm_sub_epi16(y0, sz0);
+ x1 = _mm_sub_epi16(y1, sz1);
+
+ /* qcoeff = x */
+ _mm_store_si128((__m128i *)(d->qcoeff), x0);
+ _mm_store_si128((__m128i *)(d->qcoeff + 8), x1);
+
+ /* x * dequant */
+ xdq0 = _mm_mullo_epi16(x0, dequant0);
+ xdq1 = _mm_mullo_epi16(x1, dequant1);
+
+ /* dqcoeff = x * dequant */
+ _mm_store_si128((__m128i *)(d->dqcoeff), xdq0);
+ _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1);
+
+ /* build a mask for the zig zag */
+ zeros = _mm_setzero_si128();
+
+ x0 = _mm_cmpeq_epi16(x0, zeros);
+ x1 = _mm_cmpeq_epi16(x1, zeros);
+
+ ones = _mm_cmpeq_epi16(zeros, zeros);
+
+ x0 = _mm_xor_si128(x0, ones);
+ x1 = _mm_xor_si128(x1, ones);
+
+ x0 = _mm_and_si128(x0, inv_zig_zag0);
+ x1 = _mm_and_si128(x1, inv_zig_zag1);
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* now down to 8 */
+ x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* only 4 left */
+ x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ /* okay, just 2! */
+ x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001
+
+ x0 = _mm_max_epi16(x0, x1);
+
+ *d->eob = 0xFF & _mm_cvtsi128_si32(x0);
+}
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -684,6 +684,8 @@
yv12->u_buffer = img->planes[VPX_PLANE_U];
yv12->v_buffer = img->planes[VPX_PLANE_V];
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
yv12->y_width = img->d_w;
yv12->y_height = img->d_h;
yv12->uv_width = (1 + yv12->y_width) / 2;
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -790,6 +790,8 @@
yv12->u_buffer = img->planes[VPX_PLANE_U];
yv12->v_buffer = img->planes[VPX_PLANE_V];
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
yv12->y_width = img->d_w;
yv12->y_height = img->d_h;
yv12->uv_width = yv12->y_width / 2;
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -89,12 +89,12 @@
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2_intrinsics.c
# TODO(johann) make this generic
ifeq ($(HAVE_SSE2),yes)
-vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2
-vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2
+vp8/encoder/x86/quantize_sse2_intrinsics.c.o: CFLAGS += -msse2
+vp8/encoder/x86/quantize_sse2_intrinsics.c.d: CFLAGS += -msse2
endif
ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
--- a/vp9/common/generic/vp9_systemdependent.c
+++ b/vp9/common/generic/vp9_systemdependent.c
@@ -11,8 +11,6 @@
#include "./vpx_config.h"
#include "vp9_rtcd.h"
-#include "vp9/common/vp9_subpixel.h"
-#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
void vp9_machine_specific_config(VP9_COMMON *ctx) {
--- /dev/null
+++ b/vp9/common/ppc/vp9_idct_altivec.asm
@@ -1,0 +1,189 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ .globl short_idct4x4_ppc
+
+.macro load_c V, LABEL, OFF, R0, R1
+ lis \R0, \LABEL@ha
+ la \R1, \LABEL@l(\R0)
+ lvx \V, \OFF, \R1
+.endm
+
+;# r3 short *input
+;# r4 short *output
+;# r5 int pitch
+ .align 2
+short_idct4x4_ppc:
+ mfspr r11, 256 ;# get old VRSAVE
+ oris r12, r11, 0xfff8
+ mtspr 256, r12 ;# set VRSAVE
+
+ load_c v8, sinpi8sqrt2, 0, r9, r10
+ load_c v9, cospi8sqrt2minus1, 0, r9, r10
+ load_c v10, hi_hi, 0, r9, r10
+ load_c v11, lo_lo, 0, r9, r10
+ load_c v12, shift_16, 0, r9, r10
+
+ li r10, 16
+ lvx v0, 0, r3 ;# input ip[0], ip[ 4]
+ lvx v1, r10, r3 ;# input ip[8], ip[12]
+
+ ;# first pass
+ vupkhsh v2, v0
+ vupkhsh v3, v1
+ vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8]
+ vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8]
+
+ vupklsh v0, v0
+ vmulosh v4, v0, v8
+ vsraw v4, v4, v12
+ vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2)
+
+ vupklsh v1, v1
+ vmulosh v5, v1, v9
+ vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v1
+
+ vsubsws v4, v4, v5 ;# c1
+
+ vmulosh v3, v1, v8
+ vsraw v3, v3, v12
+ vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v0, v9
+ vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v0
+
+ vaddsws v3, v3, v5 ;# d1
+
+ vaddsws v0, v6, v3 ;# a1 + d1
+ vsubsws v3, v6, v3 ;# a1 - d1
+
+ vaddsws v1, v7, v4 ;# b1 + c1
+ vsubsws v2, v7, v4 ;# b1 - c1
+
+ ;# transpose input
+ vmrghw v4, v0, v1 ;# a0 b0 a1 b1
+ vmrghw v5, v2, v3 ;# c0 d0 c1 d1
+
+ vmrglw v6, v0, v1 ;# a2 b2 a3 b3
+ vmrglw v7, v2, v3 ;# c2 d2 c3 d3
+
+ vperm v0, v4, v5, v10 ;# a0 b0 c0 d0
+ vperm v1, v4, v5, v11 ;# a1 b1 c1 d1
+
+ vperm v2, v6, v7, v10 ;# a2 b2 c2 d2
+ vperm v3, v6, v7, v11 ;# a3 b3 c3 d3
+
+ ;# second pass
+ vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8]
+ vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8]
+
+ vmulosh v4, v1, v8
+ vsraw v4, v4, v12
+ vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v3, v9
+ vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v3
+
+ vsubsws v4, v4, v5 ;# c1
+
+ vmulosh v2, v3, v8
+ vsraw v2, v2, v12
+ vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2)
+
+ vmulosh v5, v1, v9
+ vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
+ vaddsws v5, v5, v1
+
+ vaddsws v3, v2, v5 ;# d1
+
+ vaddsws v0, v6, v3 ;# a1 + d1
+ vsubsws v3, v6, v3 ;# a1 - d1
+
+ vaddsws v1, v7, v4 ;# b1 + c1
+ vsubsws v2, v7, v4 ;# b1 - c1
+
+ vspltish v6, 4
+ vspltish v7, 3
+
+ vpkswss v0, v0, v1
+ vpkswss v1, v2, v3
+
+ vaddshs v0, v0, v6
+ vaddshs v1, v1, v6
+
+ vsrah v0, v0, v7
+ vsrah v1, v1, v7
+
+ ;# transpose output
+ vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3
+ vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3
+
+ vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1
+ vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3
+
+ stwu r1,-416(r1) ;# create space on the stack
+
+ stvx v0, 0, r1
+ lwz r6, 0(r1)
+ stw r6, 0(r4)
+ lwz r6, 4(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ lwz r6, 8(r1)
+ stw r6, 0(r4)
+ lwz r6, 12(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ stvx v1, 0, r1
+ lwz r6, 0(r1)
+ stw r6, 0(r4)
+ lwz r6, 4(r1)
+ stw r6, 4(r4)
+
+ add r4, r4, r5
+
+ lwz r6, 8(r1)
+ stw r6, 0(r4)
+ lwz r6, 12(r1)
+ stw r6, 4(r4)
+
+ addi r1, r1, 416 ;# recover stack
+
+ mtspr 256, r11 ;# reset old VRSAVE
+
+ blr
+
+ .align 4
+sinpi8sqrt2:
+ .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
+
+ .align 4
+cospi8sqrt2minus1:
+ .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
+
+ .align 4
+shift_16:
+ .long 16, 16, 16, 16
+
+ .align 4
+hi_hi:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+
+ .align 4
+lo_lo:
+ .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
--- a/vp9/common/ppc/vp9_idctllm_altivec.asm
+++ /dev/null
@@ -1,189 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- .globl short_idct4x4llm_ppc
-
-.macro load_c V, LABEL, OFF, R0, R1
- lis \R0, \LABEL@ha
- la \R1, \LABEL@l(\R0)
- lvx \V, \OFF, \R1
-.endm
-
-;# r3 short *input
-;# r4 short *output
-;# r5 int pitch
- .align 2
-short_idct4x4llm_ppc:
- mfspr r11, 256 ;# get old VRSAVE
- oris r12, r11, 0xfff8
- mtspr 256, r12 ;# set VRSAVE
-
- load_c v8, sinpi8sqrt2, 0, r9, r10
- load_c v9, cospi8sqrt2minus1, 0, r9, r10
- load_c v10, hi_hi, 0, r9, r10
- load_c v11, lo_lo, 0, r9, r10
- load_c v12, shift_16, 0, r9, r10
-
- li r10, 16
- lvx v0, 0, r3 ;# input ip[0], ip[ 4]
- lvx v1, r10, r3 ;# input ip[8], ip[12]
-
- ;# first pass
- vupkhsh v2, v0
- vupkhsh v3, v1
- vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8]
- vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8]
-
- vupklsh v0, v0
- vmulosh v4, v0, v8
- vsraw v4, v4, v12
- vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2)
-
- vupklsh v1, v1
- vmulosh v5, v1, v9
- vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
- vaddsws v5, v5, v1
-
- vsubsws v4, v4, v5 ;# c1
-
- vmulosh v3, v1, v8
- vsraw v3, v3, v12
- vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2)
-
- vmulosh v5, v0, v9
- vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
- vaddsws v5, v5, v0
-
- vaddsws v3, v3, v5 ;# d1
-
- vaddsws v0, v6, v3 ;# a1 + d1
- vsubsws v3, v6, v3 ;# a1 - d1
-
- vaddsws v1, v7, v4 ;# b1 + c1
- vsubsws v2, v7, v4 ;# b1 - c1
-
- ;# transpose input
- vmrghw v4, v0, v1 ;# a0 b0 a1 b1
- vmrghw v5, v2, v3 ;# c0 d0 c1 d1
-
- vmrglw v6, v0, v1 ;# a2 b2 a3 b3
- vmrglw v7, v2, v3 ;# c2 d2 c3 d3
-
- vperm v0, v4, v5, v10 ;# a0 b0 c0 d0
- vperm v1, v4, v5, v11 ;# a1 b1 c1 d1
-
- vperm v2, v6, v7, v10 ;# a2 b2 c2 d2
- vperm v3, v6, v7, v11 ;# a3 b3 c3 d3
-
- ;# second pass
- vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8]
- vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8]
-
- vmulosh v4, v1, v8
- vsraw v4, v4, v12
- vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2)
-
- vmulosh v5, v3, v9
- vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2)
- vaddsws v5, v5, v3
-
- vsubsws v4, v4, v5 ;# c1
-
- vmulosh v2, v3, v8
- vsraw v2, v2, v12
- vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2)
-
- vmulosh v5, v1, v9
- vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2)
- vaddsws v5, v5, v1
-
- vaddsws v3, v2, v5 ;# d1
-
- vaddsws v0, v6, v3 ;# a1 + d1
- vsubsws v3, v6, v3 ;# a1 - d1
-
- vaddsws v1, v7, v4 ;# b1 + c1
- vsubsws v2, v7, v4 ;# b1 - c1
-
- vspltish v6, 4
- vspltish v7, 3
-
- vpkswss v0, v0, v1
- vpkswss v1, v2, v3
-
- vaddshs v0, v0, v6
- vaddshs v1, v1, v6
-
- vsrah v0, v0, v7
- vsrah v1, v1, v7
-
- ;# transpose output
- vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3
- vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3
-
- vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1
- vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3
-
- stwu r1,-416(r1) ;# create space on the stack
-
- stvx v0, 0, r1
- lwz r6, 0(r1)
- stw r6, 0(r4)
- lwz r6, 4(r1)
- stw r6, 4(r4)
-
- add r4, r4, r5
-
- lwz r6, 8(r1)
- stw r6, 0(r4)
- lwz r6, 12(r1)
- stw r6, 4(r4)
-
- add r4, r4, r5
-
- stvx v1, 0, r1
- lwz r6, 0(r1)
- stw r6, 0(r4)
- lwz r6, 4(r1)
- stw r6, 4(r4)
-
- add r4, r4, r5
-
- lwz r6, 8(r1)
- stw r6, 0(r4)
- lwz r6, 12(r1)
- stw r6, 4(r4)
-
- addi r1, r1, 416 ;# recover stack
-
- mtspr 256, r11 ;# reset old VRSAVE
-
- blr
-
- .align 4
-sinpi8sqrt2:
- .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
-
- .align 4
-cospi8sqrt2minus1:
- .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
-
- .align 4
-shift_16:
- .long 16, 16, 16, 16
-
- .align 4
-hi_hi:
- .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
-
- .align 4
-lo_lo:
- .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
--- a/vp9/common/ppc/vp9_systemdependent.c
+++ b/vp9/common/ppc/vp9_systemdependent.c
@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_subpixel.h"
#include "vp9/common/vp9_loopfilter.h"
#include "recon.h"
#include "vp9/common/vp9_onyxc_int.h"
@@ -17,33 +16,29 @@
void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
-extern void (*vp9_post_proc_down_and_across)(
- unsigned char *src_ptr,
- unsigned char *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit
-);
+extern void (*vp9_post_proc_down_and_across)(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int rows, int cols, int flimit);
-extern void (*vp9_mbpost_proc_down)(unsigned char *dst, int pitch, int rows, int cols, int flimit);
-extern void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit);
-extern void (*vp9_mbpost_proc_across_ip)(unsigned char *src, int pitch, int rows, int cols, int flimit);
-extern void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit);
+extern void (*vp9_mbpost_proc_down)(unsigned char *dst, int pitch,
+ int rows, int cols, int flimit);
+extern void vp9_mbpost_proc_down_c(unsigned char *dst, int pitch,
+ int rows, int cols, int flimit);
+extern void (*vp9_mbpost_proc_across_ip)(unsigned char *src, int pitch,
+ int rows, int cols, int flimit);
+extern void vp9_mbpost_proc_across_ip_c(unsigned char *src, int pitch,
+ int rows, int cols, int flimit);
+extern void vp9_post_proc_down_and_across_c(unsigned char *src_ptr,
+ unsigned char *dst_ptr,
+ int src_pixels_per_line,
+ int dst_pixels_per_line,
+ int rows, int cols, int flimit);
+void vp9_plane_add_noise_c(unsigned char *start,
+ unsigned int width, unsigned int height,
+ int pitch, int q, int a);
-extern void vp9_post_proc_down_and_across_c
-(
- unsigned char *src_ptr,
- unsigned char *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit
-);
-void vp9_plane_add_noise_c(unsigned char *Start, unsigned int Width, unsigned int Height, int Pitch, int q, int a);
-
extern copy_mem_block_function *vp9_copy_mem16x16;
extern copy_mem_block_function *vp9_copy_mem8x8;
extern copy_mem_block_function *vp9_copy_mem8x4;
@@ -60,11 +55,14 @@
extern copy_mem_block_function copy_mem16x16_ppc;
-void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
-void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
-void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void recon_b_ppc(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
+void recon2b_ppc(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
+void recon4b_ppc(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
-extern void short_idct4x4llm_ppc(short *input, short *output, int pitch);
+extern void short_idct4x4_ppc(short *input, short *output, int pitch);
// Generic C
extern subpixel_predict_function vp9_sixtap_predict_c;
@@ -80,12 +78,15 @@
extern copy_mem_block_function vp9_copy_mem8x8_c;
extern copy_mem_block_function vp9_copy_mem8x4_c;
-void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
-void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
-void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr, unsigned char *dst_ptr, int stride);
+void vp9_recon_b_c(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
+void vp9_recon2b_c(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
+void vp9_recon4b_c(short *diff_ptr, unsigned char *pred_ptr,
+ unsigned char *dst_ptr, int stride);
-extern void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch);
-extern void vp9_short_idct4x4llm_c(short *input, short *output, int pitch);
+extern void vp9_short_idct4x4_1_c(short *input, short *output, int pitch);
+extern void vp9_short_idct4x4_c(short *input, short *output, int pitch);
extern void vp8_dc_only_idct_c(short input_dc, short *output, int pitch);
// PPC
@@ -140,8 +141,8 @@
vp9_sixtap_predict8x4 = sixtap_predict8x4_ppc;
vp9_sixtap_predict = sixtap_predict_ppc;
- vp8_short_idct4x4_1 = vp9_short_idct4x4llm_1_c;
- vp8_short_idct4x4 = short_idct4x4llm_ppc;
+ vp8_short_idct4x4_1 = vp9_short_idct4x4_1_c;
+ vp8_short_idct4x4 = short_idct4x4_ppc;
vp8_dc_only_idct = vp8_dc_only_idct_c;
vp8_lf_mbvfull = loop_filter_mbv_ppc;
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -67,20 +67,16 @@
int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) {
int i;
+ int aligned_width, aligned_height;
vp9_de_alloc_frame_buffers(oci);
/* our internal buffers are always multiples of 16 */
- if ((width & 0xf) != 0)
- width += 16 - (width & 0xf);
+ aligned_width = (width + 15) & ~15;
+ aligned_height = (height + 15) & ~15;
- if ((height & 0xf) != 0)
- height += 16 - (height & 0xf);
-
-
for (i = 0; i < NUM_YV12_BUFFERS; i++) {
oci->fb_idx_ref_cnt[i] = 0;
- oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height,
VP9BORDERINPIXELS) < 0) {
vp9_de_alloc_frame_buffers(oci);
@@ -88,16 +84,17 @@
}
}
- oci->new_fb_idx = 0;
- oci->lst_fb_idx = 1;
- oci->gld_fb_idx = 2;
- oci->alt_fb_idx = 3;
+ oci->new_fb_idx = NUM_YV12_BUFFERS - 1;
+ oci->fb_idx_ref_cnt[oci->new_fb_idx] = 1;
- oci->fb_idx_ref_cnt[0] = 1;
- oci->fb_idx_ref_cnt[1] = 1;
- oci->fb_idx_ref_cnt[2] = 1;
- oci->fb_idx_ref_cnt[3] = 1;
+ for (i = 0; i < 3; i++)
+ oci->active_ref_idx[i] = i;
+ for (i = 0; i < NUM_REF_FRAMES; i++) {
+ oci->ref_frame_map[i] = i;
+ oci->fb_idx_ref_cnt[i] = 1;
+ }
+
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16,
VP9BORDERINPIXELS) < 0) {
vp9_de_alloc_frame_buffers(oci);
@@ -110,8 +107,8 @@
return 1;
}
- oci->mb_rows = height >> 4;
- oci->mb_cols = width >> 4;
+ oci->mb_rows = aligned_height >> 4;
+ oci->mb_cols = aligned_width >> 4;
oci->MBs = oci->mb_rows * oci->mb_cols;
oci->mode_info_stride = oci->mb_cols + 1;
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
@@ -134,7 +131,8 @@
oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
- oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
+ oci->above_context =
+ vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * (3 + oci->mb_cols), 1);
if (!oci->above_context) {
vp9_de_alloc_frame_buffers(oci);
@@ -146,6 +144,7 @@
return 0;
}
+
void vp9_setup_version(VP9_COMMON *cm) {
if (cm->version & 0x4) {
if (!CONFIG_EXPERIMENTAL)
@@ -204,9 +203,6 @@
/* Initialise reference frame sign bias structure to defaults */
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
- /* Default disable buffer to buffer copying */
- oci->copy_buffer_to_gf = 0;
- oci->copy_buffer_to_arf = 0;
oci->kf_ymode_probs_update = 0;
}
@@ -220,8 +216,4 @@
vp9_entropy_mode_init();
vp9_entropy_mv_init();
-
-#if CONFIG_NEWCOEFCONTEXT
- vp9_init_neighbors();
-#endif
}
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -12,15 +12,431 @@
#include "vp9/common/vp9_blockd.h"
#include "vpx_mem/vpx_mem.h"
-const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25] = {
- {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8},
- {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}
+const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24] = {
+ { 0, 0, 0, 0,
+ 1, 1, 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3, 3,
+ 4, 4,
+ 5, 5,
+ 6, 6,
+ 7, 7 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 2, 2, 2, 2,
+ 4, 4,
+ 4, 4,
+ 6, 6,
+ 6, 6 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 },
};
-const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25] = {
- {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8},
- {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8},
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6, 8}
+const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = {
+ { 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 4, 5,
+ 4, 5,
+ 6, 7,
+ 6, 7 },
+ { 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 0, 0, 0, 0,
+ 2, 2, 2, 2,
+ 4, 4,
+ 4, 4,
+ 6, 6,
+ 6, 6 },
+ { 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 },
};
+
+#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)
+const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5),
+ 6, 6, 6, 6,
+ 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 4, 5, S(4), S(5),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7),
+ 6, 7, S(6), S(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 0, 0, 0, 0, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6),
+ 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 4, 4, 4, 4,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6,
+ 6, 6, 6, 6 },
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
+#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT))
+const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(5), T(5), T(5), T(5), T(5), T(5), T(5), T(5),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ U(5), U(5), U(5), U(5), U(5), U(5), U(5), U(5),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(7), T(7), T(7), T(7), T(7), T(7), T(7), T(7),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6),
+ U(7), U(7), U(7), U(7), U(7), U(7), U(7), U(7) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6),
+ U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6 },
+};
+const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = {
+ { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 4, 5, S(4), S(5), T(4), T(5), U(4), U(5),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7),
+ 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) },
+ { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2),
+ T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 4, 4, 4, 4, S(4), S(4), S(4), S(4),
+ T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6),
+ 6, 6, 6, 6, S(6), S(6), S(6), S(6),
+ T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0),
+ T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
+ U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6),
+ T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6 },
+};
+#undef U
+#undef T
+#undef S
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -16,9 +16,9 @@
#include "./vpx_config.h"
#include "vpx_scale/yv12config.h"
+#include "vp9/common/vp9_convolve.h"
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_treecoder.h"
-#include "vp9/common/vp9_subpixel.h"
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
@@ -47,27 +47,13 @@
#define MAX_MV_REFS 9
#define MAX_MV_REF_CANDIDATES 4
-#if CONFIG_DWTDCTHYBRID
-#define DWT_MAX_LENGTH 64
-#define DWT_TYPE 26 // 26/53/97
-#define DWT_PRECISION_BITS 2
-#define DWT_PRECISION_RND ((1 << DWT_PRECISION_BITS) / 2)
-
-#define DWTDCT16X16 0
-#define DWTDCT16X16_LEAN 1
-#define DWTDCT8X8 2
-#define DWTDCT_TYPE DWTDCT16X16_LEAN
-#endif
-
typedef struct {
int r, c;
} POS;
-typedef enum PlaneType {
- PLANE_TYPE_Y_NO_DC = 0,
- PLANE_TYPE_Y2,
- PLANE_TYPE_UV,
+typedef enum {
PLANE_TYPE_Y_WITH_DC,
+ PLANE_TYPE_UV,
} PLANE_TYPE;
typedef char ENTROPY_CONTEXT;
@@ -75,10 +61,9 @@
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
- ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
-#define VP9_COMBINEENTROPYCONTEXTS( Dest, A, B) \
+#define VP9_COMBINEENTROPYCONTEXTS(Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0);
typedef enum {
@@ -86,8 +71,7 @@
INTER_FRAME = 1
} FRAME_TYPE;
-typedef enum
-{
+typedef enum {
#if CONFIG_ENABLE_6TAP
SIXTAP,
#endif
@@ -98,8 +82,7 @@
SWITCHABLE /* should be the last one */
} INTERPOLATIONFILTERTYPE;
-typedef enum
-{
+typedef enum {
DC_PRED, /* average of above and left pixels */
V_PRED, /* vertical prediction */
H_PRED, /* horizontal prediction */
@@ -125,10 +108,8 @@
SEG_LVL_ALT_Q = 0, // Use alternate Quantizer ....
SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
- SEG_LVL_MODE = 3, // Optional Segment mode
- SEG_LVL_EOB = 4, // EOB end stop marker.
- SEG_LVL_TRANSFORM = 5, // Block transform size.
- SEG_LVL_MAX = 6 // Number of MB level features supported
+ SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode
+ SEG_LVL_MAX = 4 // Number of MB level features supported
} SEG_LVL_FEATURES;
// Segment level features.
@@ -155,10 +136,7 @@
#define VP9_MVREFS (1 + SPLITMV - NEARESTMV)
-#if CONFIG_LOSSLESS
-#define WHT_UPSCALE_FACTOR 3
-#define Y2_WHT_UPSCALE_FACTOR 2
-#endif
+#define WHT_UPSCALE_FACTOR 2
typedef enum {
B_DC_PRED, /* average of above and left pixels */
@@ -219,10 +197,7 @@
B_PREDICTION_MODE context;
#endif
} as_mode;
- struct {
- int_mv first;
- int_mv second;
- } as_mv;
+ int_mv as_mv[2]; // first, second inter predictor motion vectors
};
typedef enum {
@@ -274,6 +249,9 @@
INTERPOLATIONFILTERTYPE interp_filter;
BLOCK_SIZE_TYPE sb_type;
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t nzcs[256+64*2];
+#endif
} MB_MODE_INFO;
typedef struct {
@@ -298,36 +276,44 @@
int dst;
int dst_stride;
- int eob;
-
union b_mode_info bmi;
} BLOCKD;
-typedef struct superblockd {
- /* 32x32 Y and 16x16 U/V. No 2nd order transform yet. */
- DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]);
-} SUPERBLOCKD;
+struct scale_factors {
+ int x_num;
+ int x_den;
+ int x_offset_q4;
+ int x_step_q4;
+ int y_num;
+ int y_den;
+ int y_offset_q4;
+ int y_step_q4;
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+ convolve_fn_t predict[2][2][8]; // horiz, vert, weight (0 - 7)
+#else
+ convolve_fn_t predict[2][2][2]; // horiz, vert, avg
+#endif
+};
typedef struct macroblockd {
- DECLARE_ALIGNED(16, int16_t, diff[400]); /* from idct diff */
- DECLARE_ALIGNED(16, uint8_t, predictor[384]);
- DECLARE_ALIGNED(16, int16_t, qcoeff[400]);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[400]);
- DECLARE_ALIGNED(16, uint16_t, eobs[25]);
+ DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */
+ DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks
+ DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
+#if CONFIG_CODE_NONZEROCOUNT
+ DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
+#endif
- SUPERBLOCKD sb_coeff_data;
-
- /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
- BLOCKD block[25];
+ /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
+ BLOCKD block[24];
int fullpixel_mask;
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
- struct {
- uint8_t *y_buffer, *u_buffer, *v_buffer;
- } second_pre;
+ YV12_BUFFER_CONFIG second_pre;
YV12_BUFFER_CONFIG dst;
+ struct scale_factors scale_factor[2];
+ struct scale_factors scale_factor_uv[2];
MODE_INFO *prev_mode_info_context;
MODE_INFO *mode_info_context;
@@ -337,8 +323,9 @@
int up_available;
int left_available;
+ int right_available;
- /* Y,U,V,Y2 */
+ /* Y,U,V */
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
@@ -359,6 +346,7 @@
// Probability Tree used to code Segment number
vp9_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];
+ vp9_prob mb_segment_mispred_tree_probs[MAX_MB_SEGMENTS];
#if CONFIG_NEW_MVREF
vp9_prob mb_mv_ref_probs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES-1];
@@ -387,21 +375,20 @@
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
+ int lossless;
/* Inverse transform function pointers. */
- void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
- void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
- void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
- void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
+ void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
+ void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*itxm_add)(int16_t *input, const int16_t *dq,
+ uint8_t *pred, uint8_t *output, int pitch, int stride, int eob);
+ void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
+ uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd);
+ void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
+ uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
+ struct macroblockd *xd);
+ struct subpix_fn_table subpix;
- vp9_subpix_fn_t subpixel_predict4x4;
- vp9_subpix_fn_t subpixel_predict8x4;
- vp9_subpix_fn_t subpixel_predict8x8;
- vp9_subpix_fn_t subpixel_predict16x16;
- vp9_subpix_fn_t subpixel_predict_avg4x4;
- vp9_subpix_fn_t subpixel_predict_avg8x4;
- vp9_subpix_fn_t subpixel_predict_avg8x8;
- vp9_subpix_fn_t subpixel_predict_avg16x16;
int allow_high_precision_mv;
int corrupted;
@@ -412,74 +399,46 @@
} MACROBLOCKD;
-#define ACTIVE_HT 110 // quantization stepsize threshold
+#define ACTIVE_HT 110 // quantization stepsize threshold
-#define ACTIVE_HT8 300
+#define ACTIVE_HT8 300
#define ACTIVE_HT16 300
// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
static B_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
- B_PREDICTION_MODE b_mode;
switch (mode) {
- case DC_PRED:
- b_mode = B_DC_PRED;
- break;
- case V_PRED:
- b_mode = B_VE_PRED;
- break;
- case H_PRED:
- b_mode = B_HE_PRED;
- break;
- case TM_PRED:
- b_mode = B_TM_PRED;
- break;
- case D45_PRED:
- b_mode = B_LD_PRED;
- break;
- case D135_PRED:
- b_mode = B_RD_PRED;
- break;
- case D117_PRED:
- b_mode = B_VR_PRED;
- break;
- case D153_PRED:
- b_mode = B_HD_PRED;
- break;
- case D27_PRED:
- b_mode = B_HU_PRED;
- break;
- case D63_PRED:
- b_mode = B_VL_PRED;
- break;
- default :
- // for debug purpose, to be removed after full testing
- assert(0);
- break;
+ case DC_PRED: return B_DC_PRED;
+ case V_PRED: return B_VE_PRED;
+ case H_PRED: return B_HE_PRED;
+ case TM_PRED: return B_TM_PRED;
+ case D45_PRED: return B_LD_PRED;
+ case D135_PRED: return B_RD_PRED;
+ case D117_PRED: return B_VR_PRED;
+ case D153_PRED: return B_HD_PRED;
+ case D27_PRED: return B_HU_PRED;
+ case D63_PRED: return B_VL_PRED;
+ default:
+ assert(0);
+ return B_MODE_COUNT; // Dummy value
}
- return b_mode;
}
// transform mapping
static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) {
- // map transform type
- TX_TYPE tx_type;
switch (bmode) {
case B_TM_PRED :
case B_RD_PRED :
- tx_type = ADST_ADST;
- break;
+ return ADST_ADST;
case B_VE_PRED :
case B_VR_PRED :
- tx_type = ADST_DCT;
- break;
+ return ADST_DCT;
case B_HE_PRED :
case B_HD_PRED :
case B_HU_PRED :
- tx_type = DCT_ADST;
- break;
+ return DCT_ADST;
#if CONFIG_NEWBINTRAMODES
case B_CONTEXT_PRED:
@@ -487,33 +446,41 @@
break;
#endif
- default :
- tx_type = DCT_DCT;
- break;
+ default:
+ return DCT_DCT;
}
- return tx_type;
}
-extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][25];
-extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][25];
+extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24];
+extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24];
+extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96];
+extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96];
+extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384];
+extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384];
-#define USE_ADST_FOR_I16X16_8X8 0
-#define USE_ADST_FOR_I16X16_4X4 0
+#define USE_ADST_FOR_I16X16_8X8 1
+#define USE_ADST_FOR_I16X16_4X4 1
#define USE_ADST_FOR_I8X8_4X4 1
#define USE_ADST_PERIPHERY_ONLY 1
+#define USE_ADST_FOR_SB 1
+#define USE_ADST_FOR_REMOTE_EDGE 0
-static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
return tx_type;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
+#endif
+ if (ib >= (16 << (2 * sb_type))) // no chroma adst
return tx_type;
+ if (xd->lossless)
+ return DCT_DCT;
if (xd->mode_info_context->mbmi.mode == B_PRED &&
xd->q_index < ACTIVE_HT) {
+ const BLOCKD *b = &xd->block[ib];
tx_type = txfm_map(
#if CONFIG_NEWBINTRAMODES
b->bmi.as_mode.first == B_CONTEXT_PRED ? b->bmi.as_mode.context :
@@ -521,16 +488,32 @@
b->bmi.as_mode.first);
} else if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT) {
+ const BLOCKD *b = &xd->block[ib];
+ const int ic = (ib & 10);
#if USE_ADST_FOR_I8X8_4X4
#if USE_ADST_PERIPHERY_ONLY
// Use ADST for periphery blocks only
- int ic = (ib & 10);
+ const int inner = ib & 5;
b += ic - ib;
- tx_type = (ic != 10) ?
- txfm_map(pred_mode_conv((MB_PREDICTION_MODE)b->bmi.as_mode.first)) :
- DCT_DCT;
+ tx_type = txfm_map(pred_mode_conv(
+ (MB_PREDICTION_MODE)b->bmi.as_mode.first));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if (inner == 5)
+ tx_type = DCT_DCT;
#else
+ if (inner == 1) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (inner == 4) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (inner == 5) {
+ tx_type = DCT_DCT;
+ }
+#endif
+#else
// Use ADST
+ b += ic - ib;
tx_type = txfm_map(pred_mode_conv(
(MB_PREDICTION_MODE)b->bmi.as_mode.first));
#endif
@@ -542,10 +525,23 @@
xd->q_index < ACTIVE_HT) {
#if USE_ADST_FOR_I16X16_4X4
#if USE_ADST_PERIPHERY_ONLY
- // Use ADST for periphery blocks only
- tx_type = (ib < 4 || ((ib & 3) == 0)) ?
- txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+ const int hmax = 4 << sb_type;
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
+#else
// Use ADST
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
#endif
@@ -557,18 +553,20 @@
return tx_type;
}
-static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
// TODO(debargha): explore different patterns for ADST usage when blocksize
// is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
return tx_type;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
+#endif
+ if (ib >= (16 << (2 * sb_type))) // no chroma adst
return tx_type;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
+ const BLOCKD *b = &xd->block[ib];
// TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
// or the relationship otherwise modified to address this type conversion.
tx_type = txfm_map(pred_mode_conv(
@@ -575,12 +573,25 @@
(MB_PREDICTION_MODE)b->bmi.as_mode.first));
} else if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
-#if USE_ADST_FOR_I8X8_4X4
+#if USE_ADST_FOR_I16X16_8X8
#if USE_ADST_PERIPHERY_ONLY
- // Use ADST for periphery blocks only
- tx_type = (ib != 10) ?
- txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)) : DCT_DCT;
+ const int hmax = 4 << sb_type;
+ tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
+#else
// Use ADST
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
#endif
@@ -592,63 +603,73 @@
return tx_type;
}
-static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
+static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+#if !USE_ADST_FOR_SB
+ if (sb_type)
return tx_type;
- // TODO(rbultje, debargha): Explore ADST usage for superblocks
- if (xd->mode_info_context->mbmi.sb_type)
+#endif
+ if (ib >= (16 << (2 * sb_type)))
return tx_type;
if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT16) {
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+#if USE_ADST_PERIPHERY_ONLY
+ if (sb_type) {
+ const int hmax = 4 << sb_type;
+#if USE_ADST_FOR_REMOTE_EDGE
+ if ((ib & (hmax - 1)) != 0 && ib >= hmax)
+ tx_type = DCT_DCT;
+#else
+ if (ib >= 1 && ib < hmax) {
+ if (tx_type == ADST_ADST) tx_type = ADST_DCT;
+ else if (tx_type == DCT_ADST) tx_type = DCT_DCT;
+ } else if (ib >= 1 && (ib & (hmax - 1)) == 0) {
+ if (tx_type == ADST_ADST) tx_type = DCT_ADST;
+ else if (tx_type == ADST_DCT) tx_type = DCT_DCT;
+ } else if (ib != 0) {
+ tx_type = DCT_DCT;
+ }
+#endif
+ }
+#endif
}
return tx_type;
}
-static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
- TX_TYPE tx_type = DCT_DCT;
- int ib = (int)(b - xd->block);
- if (ib >= 16)
- return tx_type;
- if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
- tx_type = get_tx_type_16x16(xd, b);
- }
- if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- ib = (ib & 8) + ((ib & 4) >> 1);
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
- }
- if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
- tx_type = get_tx_type_4x4(xd, b);
- }
- return tx_type;
-}
+void vp9_build_block_doffsets(MACROBLOCKD *xd);
+void vp9_setup_block_dptrs(MACROBLOCKD *xd);
-static int get_2nd_order_usage(const MACROBLOCKD *xd) {
- int has_2nd_order = (xd->mode_info_context->mbmi.mode != SPLITMV &&
- xd->mode_info_context->mbmi.mode != I8X8_PRED &&
- xd->mode_info_context->mbmi.mode != B_PRED &&
- xd->mode_info_context->mbmi.txfm_size != TX_16X16);
- if (has_2nd_order)
- has_2nd_order = (get_tx_type(xd, xd->block) == DCT_DCT);
- return has_2nd_order;
-}
-
-extern void vp9_build_block_doffsets(MACROBLOCKD *xd);
-extern void vp9_setup_block_dptrs(MACROBLOCKD *xd);
-
static void update_blockd_bmi(MACROBLOCKD *xd) {
- int i;
- int is_4x4;
- is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
- (xd->mode_info_context->mbmi.mode == I8X8_PRED) ||
- (xd->mode_info_context->mbmi.mode == B_PRED);
+ const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
- if (is_4x4) {
- for (i = 0; i < 16; i++) {
+ if (mode == SPLITMV || mode == I8X8_PRED || mode == B_PRED) {
+ int i;
+ for (i = 0; i < 16; i++)
xd->block[i].bmi = xd->mode_info_context->bmi[i];
- }
}
+}
+
+static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
+ TX_SIZE tx_size_uv;
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ if (xd->mode_info_context->mbmi.txfm_size == TX_32X32)
+ tx_size_uv = TX_16X16;
+ else
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ } else {
+ if (xd->mode_info_context->mbmi.txfm_size == TX_16X16)
+ tx_size_uv = TX_8X8;
+ else if (xd->mode_info_context->mbmi.txfm_size == TX_8X8 &&
+ (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV))
+ tx_size_uv = TX_4X4;
+ else
+ tx_size_uv = xd->mode_info_context->mbmi.txfm_size;
+ }
+ return tx_size_uv;
}
#endif // VP9_COMMON_VP9_BLOCKD_H_
--- a/vp9/common/vp9_coefupdateprobs.h
+++ b/vp9/common/vp9_coefupdateprobs.h
@@ -9,12 +9,25 @@
*/
#ifndef VP9_COMMON_VP9_COEFUPDATEPROBS_H_
-#define VP9_COMMON_VP9_COEFUPDATEPROBS_H__
+#define VP9_COMMON_VP9_COEFUPDATEPROBS_H_
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by vp9_entropy.c */
-#define COEF_UPDATE_PROB 252
-#define COEF_UPDATE_PROB_8X8 252
-#define COEF_UPDATE_PROB_16X16 252
+
+static const vp9_prob vp9_coef_update_prob[ENTROPY_NODES] = {
+ 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252
+};
+
+#if CONFIG_CODE_NONZEROCOUNT
+#define NZC_UPDATE_PROB_4X4 252
+#define NZC_UPDATE_PROB_8X8 252
+#define NZC_UPDATE_PROB_16X16 252
+#define NZC_UPDATE_PROB_32X32 252
+#define NZC_UPDATE_PROB_PCAT 252
+#endif
+
+#if CONFIG_MODELCOEFPROB
+#define COEF_MODEL_UPDATE_PROB 16
+#endif
#endif // VP9_COMMON_VP9_COEFUPDATEPROBS_H__
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -11,10 +11,11 @@
#ifndef VP9_COMMON_VP9_COMMON_H_
#define VP9_COMMON_VP9_COMMON_H_
-#include <assert.h>
-#include "vpx_config.h"
/* Interface header for common constant data structures and lookup tables */
+#include <assert.h>
+
+#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
@@ -24,26 +25,34 @@
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-/* Only need this for fixed-size arrays, for structs just assign. */
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n))
-#define vp9_copy(Dest, Src) { \
- assert(sizeof(Dest) == sizeof(Src)); \
- vpx_memcpy(Dest, Src, sizeof(Src)); \
+/* If we don't want to use ROUND_POWER_OF_TWO macro
+static INLINE int16_t round_power_of_two(int16_t value, int n) {
+ return (value + (1 << (n - 1))) >> n;
+}*/
+
+// Only need this for fixed-size arrays, for structs just assign.
+#define vp9_copy(dest, src) { \
+ assert(sizeof(dest) == sizeof(src)); \
+ vpx_memcpy(dest, src, sizeof(src)); \
}
-/* Use this for variably-sized arrays. */
-
-#define vp9_copy_array(Dest, Src, N) { \
- assert(sizeof(*Dest) == sizeof(*Src)); \
- vpx_memcpy(Dest, Src, N * sizeof(*Src)); \
+// Use this for variably-sized arrays.
+#define vp9_copy_array(dest, src, n) { \
+ assert(sizeof(*dest) == sizeof(*src)); \
+ vpx_memcpy(dest, src, n * sizeof(*src)); \
}
-#define vp9_zero(Dest) vpx_memset(&Dest, 0, sizeof(Dest));
+#define vp9_zero(dest) vpx_memset(&dest, 0, sizeof(dest));
+#define vp9_zero_array(dest, n) vpx_memset(dest, 0, n * sizeof(*dest));
-#define vp9_zero_array(Dest, N) vpx_memset(Dest, 0, N * sizeof(*Dest));
-
-static __inline uint8_t clip_pixel(int val) {
+static INLINE uint8_t clip_pixel(int val) {
return (val > 255) ? 255u : (val < 0) ? 0u : val;
+}
+
+static INLINE int clamp(int value, int low, int high) {
+ return value < low ? low : (value > high ? high : value);
}
#endif // VP9_COMMON_VP9_COMMON_H_
--- /dev/null
+++ b/vp9/common/vp9_convolve.c
@@ -1,0 +1,850 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vp9/common/vp9_convolve.h"
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+#define VP9_FILTER_WEIGHT 128
+#define VP9_FILTER_SHIFT 7
+
+/* Assume a bank of 16 filters to choose from. There are two implementations
+ * for filter wrapping behavior, since we want to be able to pick which filter
+ * to start with. We could either:
+ *
+ * 1) make filter_ a pointer to the base of the filter array, and then add an
+ * additional offset parameter, to choose the starting filter.
+ * 2) use a pointer to 2 periods worth of filters, so that even if the original
+ * phase offset is at 15/16, we'll have valid data to read. The filter
+ * tables become [32][8], and the second half is duplicated.
+ * 3) fix the alignment of the filter tables, so that we know the 0/16 is
+ * always 256 byte aligned.
+ *
+ * Implementations 2 and 3 are likely preferable, as they avoid an extra 2
+ * parameters, and switching between them is trivial, with the
+ * ALIGN_FILTERS_256 macro, below.
+ */
+ #define ALIGN_FILTERS_256 1
+
+static void convolve_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x0, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+ const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+ filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source line */
+ src -= taps / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ /* Pointer to filter to use */
+ const int16_t *filter_x = filter_x0;
+
+ /* Initial phase offset */
+ int x0_q4 = (filter_x - filter_x_base) / taps;
+ int x_q4 = x0_q4;
+
+ for (x = 0; x < w; ++x) {
+ /* Per-pixel src offset */
+ int src_x = (x_q4 - x0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[src_x + k] * filter_x[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);
+
+ /* Adjust source and filter to use for the next pixel */
+ x_q4 += x_step_q4;
+ filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void convolve_avg_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x0, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+ const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+ filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source line */
+ src -= taps / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ /* Pointer to filter to use */
+ const int16_t *filter_x = filter_x0;
+
+ /* Initial phase offset */
+ int x0_q4 = (filter_x - filter_x_base) / taps;
+ int x_q4 = x0_q4;
+
+ for (x = 0; x < w; ++x) {
+ /* Per-pixel src offset */
+ int src_x = (x_q4 - x0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[src_x + k] * filter_x[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
+
+ /* Adjust source and filter to use for the next pixel */
+ x_q4 += x_step_q4;
+ filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+
+static inline uint8_t combine_qtr(uint8_t a, uint8_t b) {
+ return (((a) + (b) * 3 + 2) >> 2);
+}
+
+static inline uint8_t combine_3qtr(uint8_t a, uint8_t b) {
+ return (((a) * 3 + (b) + 2) >> 2);
+}
+
+static inline uint8_t combine_1by8(uint8_t a, uint8_t b) {
+ return (((a) * 1 + (b) * 7 + 4) >> 3);
+}
+
+static inline uint8_t combine_3by8(uint8_t a, uint8_t b) {
+ return (((a) * 3 + (b) * 5 + 4) >> 3);
+}
+
+static inline uint8_t combine_5by8(uint8_t a, uint8_t b) {
+ return (((a) * 5 + (b) * 3 + 4) >> 3);
+}
+
+static inline uint8_t combine_7by8(uint8_t a, uint8_t b) {
+ return (((a) * 7 + (b) * 1 + 4) >> 3);
+}
+
+// TODO(debargha): Implment with a separate weight parameter
+static void convolve_wtd_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x0, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps,
+ uint8_t (*combine)(uint8_t a, uint8_t b)) {
+ int x, y, k, sum;
+ const int16_t *filter_x_base = filter_x0;
+
+#if ALIGN_FILTERS_256
+ filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source line */
+ src -= taps / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ /* Pointer to filter to use */
+ const int16_t *filter_x = filter_x0;
+
+ /* Initial phase offset */
+ int x0_q4 = (filter_x - filter_x_base) / taps;
+ int x_q4 = x0_q4;
+
+ for (x = 0; x < w; ++x) {
+ /* Per-pixel src offset */
+ int src_x = (x_q4 - x0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[src_x + k] * filter_x[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[x] = combine(dst[x], clip_pixel(sum >> VP9_FILTER_SHIFT));
+
+ /* Adjust source and filter to use for the next pixel */
+ x_q4 += x_step_q4;
+ filter_x = filter_x_base + (x_q4 & 0xf) * taps;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+#endif
+
+static void convolve_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y0, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+
+ const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+ filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source column */
+ src -= src_stride * (taps / 2 - 1);
+ for (x = 0; x < w; ++x) {
+ /* Pointer to filter to use */
+ const int16_t *filter_y = filter_y0;
+
+ /* Initial phase offset */
+ int y0_q4 = (filter_y - filter_y_base) / taps;
+ int y_q4 = y0_q4;
+
+ for (y = 0; y < h; ++y) {
+ /* Per-pixel src offset */
+ int src_y = (y_q4 - y0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[(src_y + k) * src_stride] * filter_y[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);
+
+ /* Adjust source and filter to use for the next pixel */
+ y_q4 += y_step_q4;
+ filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+static void convolve_avg_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y0, int y_step_q4,
+ int w, int h, int taps) {
+ int x, y, k, sum;
+
+ const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+ filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source column */
+ src -= src_stride * (taps / 2 - 1);
+ for (x = 0; x < w; ++x) {
+ /* Pointer to filter to use */
+ const int16_t *filter_y = filter_y0;
+
+ /* Initial phase offset */
+ int y0_q4 = (filter_y - filter_y_base) / taps;
+ int y_q4 = y0_q4;
+
+ for (y = 0; y < h; ++y) {
+ /* Per-pixel src offset */
+ int src_y = (y_q4 - y0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[(src_y + k) * src_stride] * filter_y[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[y * dst_stride] =
+ (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
+
+ /* Adjust source and filter to use for the next pixel */
+ y_q4 += y_step_q4;
+ filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void convolve_wtd_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y0, int y_step_q4,
+ int w, int h, int taps,
+ uint8_t (*combine)(uint8_t a, uint8_t b)) {
+ int x, y, k, sum;
+
+ const int16_t *filter_y_base = filter_y0;
+
+#if ALIGN_FILTERS_256
+ filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
+#endif
+
+ /* Adjust base pointer address for this source column */
+ src -= src_stride * (taps / 2 - 1);
+ for (x = 0; x < w; ++x) {
+ /* Pointer to filter to use */
+ const int16_t *filter_y = filter_y0;
+
+ /* Initial phase offset */
+ int y0_q4 = (filter_y - filter_y_base) / taps;
+ int y_q4 = y0_q4;
+
+ for (y = 0; y < h; ++y) {
+ /* Per-pixel src offset */
+ int src_y = (y_q4 - y0_q4) >> 4;
+
+ for (sum = 0, k = 0; k < taps; ++k) {
+ sum += src[(src_y + k) * src_stride] * filter_y[k];
+ }
+ sum += (VP9_FILTER_WEIGHT >> 1);
+ dst[y * dst_stride] = combine(dst[y * dst_stride],
+ clip_pixel(sum >> VP9_FILTER_SHIFT));
+
+ /* Adjust source and filter to use for the next pixel */
+ y_q4 += y_step_q4;
+ filter_y = filter_y_base + (y_q4 & 0xf) * taps;
+ }
+ ++src;
+ ++dst;
+ }
+}
+#endif
+
+static void convolve_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ /* Fixed size intermediate buffer places limits on parameters.
+ * Maximum intermediate_height is 39, for y_step_q4 == 32,
+ * h == 16, taps == 8.
+ */
+ uint8_t temp[16 * 39];
+ int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
+
+ assert(w <= 16);
+ assert(h <= 16);
+ assert(taps <= 8);
+ assert(y_step_q4 <= 32);
+
+ if (intermediate_height < h)
+ intermediate_height = h;
+
+ convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
+ temp, 16,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height, taps);
+ convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, taps);
+}
+
+static void convolve_avg_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int taps) {
+ /* Fixed size intermediate buffer places limits on parameters.
+ * Maximum intermediate_height is 39, for y_step_q4 == 32,
+ * h == 16, taps == 8.
+ */
+ uint8_t temp[16 * 39];
+ int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
+
+ assert(w <= 16);
+ assert(h <= 16);
+ assert(taps <= 8);
+ assert(y_step_q4 <= 32);
+
+ if (intermediate_height < h)
+ intermediate_height = h;
+
+ convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
+ temp, 16,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height, taps);
+ convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, taps);
+}
+
+void vp9_convolve8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_1by8);
+}
+
+void vp9_convolve8_qtr_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_qtr);
+}
+
+void vp9_convolve8_3by8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_3by8);
+}
+
+void vp9_convolve8_5by8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_5by8);
+}
+
+void vp9_convolve8_3qtr_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_3qtr);
+}
+
+void vp9_convolve8_7by8_horiz_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_7by8);
+}
+#endif
+
+void vp9_convolve8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_1by8);
+}
+
+void vp9_convolve8_qtr_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_qtr);
+}
+
+void vp9_convolve8_3by8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_3by8);
+}
+
+void vp9_convolve8_5by8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_5by8);
+}
+
+void vp9_convolve8_3qtr_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_3qtr);
+}
+
+void vp9_convolve8_7by8_vert_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_wtd_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8, combine_7by8);
+}
+#endif
+
+void vp9_convolve8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ convolve_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, 8);
+}
+
+void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_avg(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve8_1by8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_1by8(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+void vp9_convolve8_qtr_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_qtr(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+void vp9_convolve8_3by8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_3by8(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+void vp9_convolve8_5by8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_5by8(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+void vp9_convolve8_3qtr_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_3qtr(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+
+void vp9_convolve8_7by8_c(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Fixed size intermediate buffer places limits on parameters. */
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
+ assert(w <= 16);
+ assert(h <= 16);
+
+ vp9_convolve8(src, src_stride,
+ temp, 16,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+ vp9_convolve_7by8(temp, 16,
+ dst, dst_stride,
+ NULL, 0, /* These unused parameter should be removed! */
+ NULL, 0, /* These unused parameter should be removed! */
+ w, h);
+}
+#endif
+
+void vp9_convolve_copy(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ if (w == 16 && h == 16) {
+ vp9_copy_mem16x16(src, src_stride, dst, dst_stride);
+ } else if (w == 8 && h == 8) {
+ vp9_copy_mem8x8(src, src_stride, dst, dst_stride);
+ } else if (w == 8 && h == 4) {
+ vp9_copy_mem8x4(src, src_stride, dst, dst_stride);
+ } else {
+ int r;
+
+ for (r = h; r > 0; --r) {
+ memcpy(dst, src, w);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
+
+void vp9_convolve_avg(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = (dst[x] + src[x] + 1) >> 1;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+void vp9_convolve_1by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_1by8(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve_qtr(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_qtr(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve_3by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_3by8(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve_5by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_5by8(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve_3qtr(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_3qtr(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve_7by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h) {
+ int x, y;
+
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) {
+ dst[x] = combine_7by8(dst[x], src[x]);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+#endif
--- /dev/null
+++ b/vp9/common/vp9_convolve.h
@@ -1,0 +1,85 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_COMMON_CONVOLVE_H_
+#define VP9_COMMON_CONVOLVE_H_
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block copy conforming to the convolution prototype
+void vp9_convolve_copy(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block average conforming to the convolution prototype
+void vp9_convolve_avg(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+// Not a convolution, a block wtd (1/8, 7/8) average for (dst, src)
+void vp9_convolve_1by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block wtd (1/4, 3/4) average for (dst, src)
+void vp9_convolve_qtr(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block wtd (3/8, 5/8) average for (dst, src)
+void vp9_convolve_3by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block wtd (5/8, 3/8) average for (dst, src)
+void vp9_convolve_5by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block wtd (3/4, 1/4) average for (dst, src)
+void vp9_convolve_3qtr(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+
+// Not a convolution, a block wtd (7/8, 1/8) average for (dst, src)
+void vp9_convolve_7by8(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h);
+#endif
+
+struct subpix_fn_table {
+ const int16_t (*filter_x)[8];
+ const int16_t (*filter_y)[8];
+};
+
+#endif // VP9_COMMON_CONVOLVE_H_
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -9,6 +9,7 @@
*/
#include <stdio.h>
+
#include "vp9/common/vp9_blockd.h"
void vp9_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols,
@@ -18,8 +19,7 @@
int mb_index = 0;
FILE *mvs = fopen("mvs.stt", "a");
- /* print out the macroblock Y modes */
- mb_index = 0;
+ // Print out the macroblock Y modes
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
for (mb_row = 0; mb_row < rows; mb_row++) {
@@ -129,8 +129,8 @@
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
bindex = (b_row & 3) * 4 + (b_col & 3);
fprintf(mvs, "%3d:%-3d ",
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.row,
- mi[mb_index].bmi[bindex].as_mv.first.as_mv.col);
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.row,
+ mi[mb_index].bmi[bindex].as_mv[0].as_mv.col);
}
--- a/vp9/common/vp9_default_coef_probs.h
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -11,1201 +11,987 @@
/*Generated file, included by vp9_entropy.c*/
-
-static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES_4X4] = {
+// NOTE: When the CONFIG_MODELCOEFPROB experiment is on, only the first
+// 2 or 3 from each row is actually used depending on whether
+// UNCONSTRAINDED_NODES is 2 or 3. If this experiment is merged
+// the tables below should be shortened accordingly.
+static const vp9_coeff_probs default_coef_probs_4x4[BLOCK_TYPES] = {
{ /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 224, 180, 254, 255, 234, 224, 255, 227, 128, 128, 128 },
- { 187, 178, 250, 255, 226, 218, 255, 229, 255, 255, 128 },
- { 145, 171, 243, 253, 219, 211, 254, 226, 255, 224, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 1, 187, 252, 255, 231, 220, 255, 229, 255, 255, 128 },
- { 129, 174, 244, 254, 225, 216, 253, 219, 255, 255, 128 },
- { 16, 131, 193, 251, 205, 205, 254, 222, 255, 255, 128 },
- { 2, 93, 136, 236, 159, 179, 255, 197, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 1, 188, 254, 255, 241, 236, 254, 220, 255, 255, 128 },
- { 133, 165, 249, 255, 236, 220, 252, 220, 255, 255, 128 },
- { 20, 112, 203, 254, 217, 214, 255, 224, 255, 255, 128 },
- { 4, 61, 106, 240, 155, 189, 252, 202, 255, 255, 128 }
- }, { /* Coeff Band 4 */
- { 1, 168, 252, 255, 239, 228, 253, 217, 255, 255, 128 },
- { 158, 163, 247, 255, 231, 221, 255, 242, 128, 128, 128 },
- { 23, 127, 205, 253, 212, 224, 255, 234, 255, 255, 128 },
- { 2, 83, 141, 237, 176, 210, 245, 207, 255, 255, 128 }
- }, { /* Coeff Band 5 */
- { 1, 233, 254, 255, 243, 241, 255, 213, 128, 128, 128 },
- { 155, 213, 253, 255, 240, 221, 216, 112, 255, 255, 128 },
- { 41, 159, 237, 254, 229, 216, 255, 161, 128, 128, 128 },
- { 11, 95, 176, 244, 194, 191, 255, 167, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 1, 160, 253, 255, 238, 231, 255, 230, 255, 255, 128 },
- { 174, 152, 248, 255, 230, 223, 255, 223, 255, 255, 128 },
- { 86, 125, 213, 253, 207, 207, 254, 224, 255, 171, 128 },
- { 39, 89, 156, 240, 168, 190, 251, 181, 255, 255, 128 }
- }, { /* Coeff Band 7 */
- { 1, 101, 255, 255, 243, 244, 255, 255, 128, 128, 128 },
- { 230, 66, 255, 255, 238, 238, 128, 128, 128, 128, 128 },
- { 151, 92, 229, 255, 224, 197, 128, 128, 128, 128, 128 },
- { 109, 57, 171, 255, 73, 255, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 208, 32, 178, 198, 161, 167, 196, 147, 244, 194, 210 },
+ { 102, 43, 132, 185, 148, 162, 185, 141, 237, 181, 215 },
+ { 15, 36, 68, 143, 119, 151, 169, 133, 230, 173, 214 }
+ }, { /* Coeff Band 1 */
+ { 71, 91, 178, 226, 169, 176, 232, 170, 252, 219, 231 },
+ { 72, 88, 174, 226, 168, 176, 232, 170, 252, 219, 234 },
+ { 40, 79, 154, 222, 161, 174, 231, 169, 251, 219, 238 },
+ { 21, 68, 126, 211, 144, 167, 230, 167, 252, 219, 236 },
+ { 7, 49, 84, 175, 121, 152, 223, 151, 251, 218, 237 },
+ { 1, 20, 32, 100, 97, 140, 163, 116, 237, 186, 222 }
+ }, { /* Coeff Band 2 */
+ { 108, 110, 206, 237, 182, 183, 239, 181, 252, 221, 245 },
+ { 72, 98, 191, 236, 180, 182, 240, 183, 252, 223, 239 },
+ { 26, 77, 152, 230, 166, 179, 239, 181, 252, 222, 241 },
+ { 7, 57, 106, 212, 141, 167, 236, 173, 252, 223, 243 },
+ { 1, 35, 60, 171, 110, 149, 225, 155, 251, 218, 240 },
+ { 1, 14, 22, 90, 86, 134, 163, 116, 238, 181, 233 }
+ }, { /* Coeff Band 3 */
+ { 105, 139, 222, 245, 196, 192, 245, 195, 253, 229, 255 },
+ { 76, 118, 205, 245, 192, 192, 247, 198, 254, 230, 255 },
+ { 21, 88, 164, 240, 175, 186, 246, 197, 255, 232, 255 },
+ { 5, 63, 118, 222, 149, 172, 242, 185, 255, 230, 254 },
+ { 1, 42, 74, 186, 120, 157, 227, 161, 253, 220, 250 },
+ { 1, 18, 30, 97, 92, 136, 163, 118, 244, 184, 244 }
+ }, { /* Coeff Band 4 */
+ { 143, 117, 233, 251, 207, 201, 250, 210, 255, 239, 128 },
+ { 99, 104, 214, 249, 200, 199, 251, 211, 255, 238, 255 },
+ { 26, 81, 170, 245, 183, 192, 250, 206, 255, 242, 255 },
+ { 6, 60, 116, 226, 151, 176, 242, 187, 255, 235, 255 },
+ { 1, 38, 65, 178, 114, 153, 224, 157, 254, 224, 255 },
+ { 1, 15, 26, 86, 88, 133, 163, 110, 251, 197, 252 }
+ }, { /* Coeff Band 5 */
+ { 155, 74, 238, 252, 215, 206, 252, 223, 255, 255, 128 },
+ { 152, 64, 223, 250, 205, 201, 254, 219, 255, 255, 128 },
+ { 67, 55, 182, 246, 187, 192, 251, 210, 255, 240, 128 },
+ { 27, 44, 127, 227, 155, 176, 244, 186, 255, 240, 255 },
+ { 9, 27, 69, 176, 115, 152, 227, 154, 255, 229, 255 },
+ { 2, 11, 28, 91, 84, 133, 177, 115, 254, 210, 255 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 207, 112, 234, 244, 192, 193, 246, 194, 255, 237, 255 },
+ { 145, 120, 212, 233, 178, 183, 232, 177, 252, 216, 228 },
+ { 77, 114, 177, 214, 164, 174, 210, 159, 245, 199, 230 }
+ }, { /* Coeff Band 1 */
+ { 93, 174, 243, 248, 205, 200, 245, 195, 255, 232, 255 },
+ { 100, 144, 231, 248, 204, 200, 244, 193, 255, 232, 255 },
+ { 28, 101, 186, 247, 194, 199, 244, 194, 255, 232, 255 },
+ { 9, 73, 132, 238, 155, 186, 245, 197, 255, 232, 250 },
+ { 2, 44, 76, 187, 112, 151, 240, 172, 255, 235, 249 },
+ { 1, 19, 33, 98, 92, 138, 176, 113, 252, 208, 249 }
+ }, { /* Coeff Band 2 */
+ { 116, 175, 246, 250, 212, 202, 248, 198, 255, 238, 255 },
+ { 78, 142, 231, 250, 208, 203, 249, 200, 255, 241, 255 },
+ { 14, 93, 177, 245, 186, 196, 248, 198, 255, 241, 255 },
+ { 4, 65, 122, 227, 148, 177, 244, 186, 255, 241, 243 },
+ { 1, 38, 69, 180, 111, 152, 235, 162, 255, 237, 247 },
+ { 1, 18, 30, 101, 89, 133, 190, 116, 255, 219, 246 }
+ }, { /* Coeff Band 3 */
+ { 138, 183, 249, 253, 220, 209, 252, 210, 255, 251, 128 },
+ { 93, 147, 237, 252, 213, 209, 253, 213, 255, 251, 128 },
+ { 21, 104, 187, 247, 185, 196, 252, 210, 255, 249, 128 },
+ { 6, 73, 131, 225, 147, 174, 248, 190, 255, 248, 128 },
+ { 1, 47, 83, 189, 119, 155, 239, 167, 255, 246, 128 },
+ { 1, 26, 44, 130, 96, 139, 209, 129, 255, 235, 255 }
+ }, { /* Coeff Band 4 */
+ { 188, 143, 252, 255, 228, 218, 253, 218, 255, 209, 128 },
+ { 137, 124, 241, 253, 215, 211, 254, 221, 255, 255, 128 },
+ { 32, 89, 188, 248, 186, 198, 254, 216, 255, 253, 128 },
+ { 7, 61, 122, 231, 146, 176, 252, 201, 255, 250, 128 },
+ { 1, 34, 66, 186, 103, 149, 246, 176, 255, 249, 128 },
+ { 1, 18, 34, 115, 91, 134, 217, 124, 255, 233, 255 }
+ }, { /* Coeff Band 5 */
+ { 198, 92, 253, 255, 231, 222, 255, 230, 128, 128, 128 },
+ { 189, 79, 244, 254, 220, 217, 255, 237, 255, 255, 128 },
+ { 78, 61, 200, 252, 196, 207, 255, 231, 255, 255, 128 },
+ { 34, 50, 146, 242, 161, 187, 255, 222, 255, 255, 128 },
+ { 11, 38, 93, 215, 122, 159, 253, 202, 255, 255, 128 },
+ { 1, 31, 55, 143, 102, 143, 227, 148, 255, 238, 128 }
+ }
}
}, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 148, 109, 219, 239, 203, 184, 222, 172, 238, 203, 192 },
- { 101, 110, 206, 229, 181, 178, 224, 171, 250, 206, 180 },
- { 67, 108, 186, 222, 172, 174, 216, 167, 246, 195, 221 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 184, 249, 254, 226, 220, 253, 241, 255, 255, 128 },
- { 84, 182, 244, 254, 222, 218, 254, 217, 255, 255, 128 },
- { 56, 147, 210, 252, 208, 210, 253, 218, 255, 255, 128 },
- { 32, 124, 170, 233, 165, 178, 249, 196, 255, 253, 128 }
- }, { /* Coeff Band 2 */
- { 1, 182, 242, 245, 208, 194, 239, 179, 255, 238, 128 },
- { 28, 170, 230, 241, 202, 192, 243, 171, 255, 243, 128 },
- { 16, 109, 165, 231, 182, 184, 237, 168, 255, 249, 255 },
- { 2, 76, 113, 202, 141, 172, 221, 160, 252, 227, 255 }
- }, { /* Coeff Band 3 */
- { 1, 195, 249, 254, 230, 239, 251, 211, 255, 255, 128 },
- { 39, 164, 242, 254, 224, 222, 255, 235, 255, 255, 128 },
- { 16, 111, 179, 251, 204, 197, 251, 234, 255, 209, 128 },
- { 3, 84, 130, 225, 155, 176, 226, 196, 255, 238, 128 }
- }, { /* Coeff Band 4 */
- { 1, 180, 248, 254, 227, 219, 254, 211, 255, 255, 128 },
- { 38, 170, 242, 253, 222, 214, 254, 242, 255, 255, 128 },
- { 5, 111, 176, 250, 204, 197, 255, 208, 128, 128, 128 },
- { 1, 75, 120, 233, 146, 186, 250, 203, 255, 255, 128 }
- }, { /* Coeff Band 5 */
- { 1, 183, 251, 255, 232, 223, 252, 229, 255, 255, 128 },
- { 51, 158, 245, 255, 230, 224, 255, 239, 128, 128, 128 },
- { 13, 80, 158, 253, 206, 216, 255, 233, 128, 128, 128 },
- { 4, 39, 76, 212, 107, 153, 252, 206, 255, 255, 128 }
- }, { /* Coeff Band 6 */
- { 1, 181, 252, 254, 231, 214, 242, 225, 255, 236, 128 },
- { 81, 167, 247, 254, 229, 217, 252, 226, 255, 255, 128 },
- { 20, 122, 195, 253, 213, 212, 249, 211, 255, 238, 128 },
- { 18, 100, 153, 231, 158, 182, 244, 203, 255, 219, 128 }
- }, { /* Coeff Band 7 */
- { 1, 100, 254, 255, 242, 246, 255, 230, 128, 128, 128 },
- { 177, 62, 250, 255, 246, 210, 255, 255, 128, 128, 128 },
- { 65, 58, 186, 255, 227, 241, 255, 219, 128, 128, 128 },
- { 45, 23, 118, 244, 162, 208, 255, 228, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 207, 35, 219, 243, 195, 192, 243, 188, 251, 232, 238 },
+ { 126, 46, 182, 230, 177, 182, 228, 171, 248, 214, 232 },
+ { 51, 47, 125, 196, 147, 166, 206, 151, 245, 199, 229 }
+ }, { /* Coeff Band 1 */
+ { 114, 124, 220, 244, 197, 192, 242, 189, 253, 226, 255 },
+ { 142, 116, 213, 243, 194, 191, 241, 188, 252, 226, 255 },
+ { 81, 101, 190, 242, 188, 190, 242, 190, 253, 229, 255 },
+ { 42, 83, 155, 235, 166, 183, 241, 190, 253, 227, 246 },
+ { 16, 62, 104, 205, 133, 161, 238, 176, 254, 227, 250 },
+ { 6, 40, 60, 132, 109, 145, 190, 128, 248, 202, 239 }
+ }, { /* Coeff Band 2 */
+ { 139, 149, 228, 248, 205, 198, 244, 196, 255, 223, 255 },
+ { 115, 127, 221, 248, 202, 198, 245, 198, 255, 228, 255 },
+ { 43, 100, 189, 246, 195, 195, 244, 196, 254, 234, 228 },
+ { 13, 77, 141, 238, 168, 187, 243, 191, 255, 232, 255 },
+ { 3, 49, 88, 203, 125, 160, 237, 178, 253, 227, 251 },
+ { 1, 23, 41, 118, 97, 136, 191, 127, 250, 207, 247 }
+ }, { /* Coeff Band 3 */
+ { 119, 185, 236, 251, 216, 205, 249, 202, 253, 237, 255 },
+ { 89, 140, 224, 251, 211, 205, 250, 208, 255, 241, 255 },
+ { 34, 105, 189, 248, 195, 197, 250, 208, 255, 245, 255 },
+ { 14, 78, 142, 235, 166, 182, 246, 194, 255, 242, 255 },
+ { 5, 49, 90, 196, 128, 160, 235, 165, 255, 237, 255 },
+ { 1, 22, 41, 114, 97, 139, 180, 124, 252, 201, 249 }
+ }, { /* Coeff Band 4 */
+ { 162, 142, 244, 254, 228, 215, 255, 230, 128, 128, 128 },
+ { 129, 120, 231, 253, 216, 210, 255, 228, 255, 255, 128 },
+ { 44, 90, 189, 249, 195, 199, 253, 217, 255, 240, 128 },
+ { 14, 65, 132, 234, 158, 181, 249, 203, 255, 248, 128 },
+ { 3, 38, 72, 188, 112, 154, 239, 171, 255, 243, 128 },
+ { 1, 17, 39, 110, 86, 141, 201, 123, 255, 240, 128 }
+ }, { /* Coeff Band 5 */
+ { 167, 96, 247, 255, 230, 218, 249, 231, 255, 255, 128 },
+ { 163, 84, 234, 253, 214, 209, 255, 231, 255, 255, 128 },
+ { 70, 63, 185, 249, 189, 197, 255, 230, 255, 255, 128 },
+ { 30, 44, 132, 238, 157, 180, 251, 210, 255, 220, 128 },
+ { 13, 30, 80, 195, 121, 153, 243, 179, 255, 224, 128 },
+ { 5, 13, 38, 103, 109, 128, 196, 147, 255, 255, 128 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 242, 90, 246, 244, 200, 192, 242, 189, 255, 234, 255 },
+ { 186, 102, 228, 233, 187, 182, 231, 172, 254, 225, 252 },
+ { 102, 108, 203, 228, 181, 180, 218, 167, 243, 201, 223 }
+ }, { /* Coeff Band 1 */
+ { 152, 169, 250, 253, 223, 209, 251, 208, 255, 250, 128 },
+ { 164, 149, 242, 253, 222, 209, 249, 207, 253, 238, 255 },
+ { 63, 108, 204, 252, 215, 211, 251, 211, 255, 242, 128 },
+ { 39, 83, 153, 248, 175, 199, 250, 214, 255, 245, 128 },
+ { 31, 66, 108, 214, 130, 161, 251, 196, 255, 237, 128 },
+ { 27, 65, 71, 150, 112, 149, 213, 133, 255, 230, 255 }
+ }, { /* Coeff Band 2 */
+ { 161, 174, 250, 254, 226, 215, 254, 226, 255, 230, 128 },
+ { 133, 150, 239, 254, 222, 213, 254, 225, 255, 255, 128 },
+ { 32, 105, 197, 252, 206, 207, 253, 220, 255, 255, 128 },
+ { 10, 78, 147, 245, 173, 193, 253, 212, 255, 255, 128 },
+ { 2, 49, 99, 221, 133, 164, 250, 198, 255, 252, 128 },
+ { 1, 26, 53, 154, 96, 135, 234, 142, 255, 240, 128 }
+ }, { /* Coeff Band 3 */
+ { 160, 187, 251, 255, 234, 223, 255, 233, 128, 128, 128 },
+ { 131, 155, 241, 255, 228, 222, 255, 232, 255, 255, 128 },
+ { 42, 108, 198, 253, 207, 212, 255, 234, 255, 255, 128 },
+ { 18, 81, 151, 246, 176, 194, 254, 222, 255, 255, 128 },
+ { 9, 60, 112, 225, 144, 167, 252, 199, 255, 255, 128 },
+ { 5, 35, 49, 163, 113, 150, 237, 118, 255, 255, 128 }
+ }, { /* Coeff Band 4 */
+ { 195, 141, 253, 255, 242, 232, 255, 255, 128, 128, 128 },
+ { 169, 128, 245, 255, 235, 227, 255, 248, 128, 128, 128 },
+ { 62, 91, 204, 255, 216, 220, 255, 233, 128, 128, 128 },
+ { 23, 70, 150, 248, 178, 202, 255, 223, 128, 128, 128 },
+ { 2, 44, 78, 220, 110, 164, 255, 209, 128, 128, 128 },
+ { 1, 1, 128, 255, 255, 128, 128, 128, 128, 128, 128 }
+ }, { /* Coeff Band 5 */
+ { 195, 104, 253, 255, 246, 246, 255, 171, 128, 128, 128 },
+ { 197, 92, 248, 255, 239, 228, 255, 239, 128, 128, 128 },
+ { 88, 71, 214, 255, 219, 220, 255, 244, 128, 128, 128 },
+ { 39, 56, 160, 250, 187, 204, 255, 255, 128, 128, 128 },
+ { 18, 28, 90, 217, 81, 137, 255, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 242, 73, 238, 244, 198, 192, 241, 189, 253, 226, 247 },
- { 171, 70, 204, 231, 180, 183, 228, 172, 247, 215, 221 },
- { 73, 62, 144, 202, 153, 169, 207, 153, 245, 199, 230 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 163, 241, 245, 201, 192, 243, 191, 255, 229, 255 },
- { 165, 147, 230, 245, 201, 193, 244, 193, 255, 231, 255 },
- { 76, 109, 191, 243, 190, 193, 243, 192, 255, 231, 255 },
- { 22, 63, 111, 202, 138, 164, 225, 164, 252, 218, 248 }
- }, { /* Coeff Band 2 */
- { 1, 113, 225, 245, 201, 195, 238, 185, 254, 225, 255 },
- { 122, 105, 195, 236, 183, 186, 235, 180, 254, 227, 252 },
- { 38, 79, 135, 217, 154, 172, 229, 171, 253, 220, 250 },
- { 9, 53, 78, 161, 121, 151, 202, 141, 251, 207, 244 }
- }, { /* Coeff Band 3 */
- { 1, 150, 238, 250, 213, 202, 244, 194, 255, 236, 255 },
- { 140, 132, 223, 247, 204, 199, 243, 193, 255, 234, 255 },
- { 51, 101, 182, 240, 188, 189, 240, 186, 255, 232, 255 },
- { 6, 59, 100, 201, 137, 165, 225, 161, 252, 221, 249 }
- }, { /* Coeff Band 4 */
- { 1, 151, 233, 248, 205, 199, 248, 196, 255, 243, 255 },
- { 133, 140, 214, 244, 193, 193, 245, 194, 255, 236, 255 },
- { 27, 104, 168, 235, 172, 183, 243, 187, 254, 235, 255 },
- { 2, 61, 101, 202, 135, 164, 229, 167, 254, 223, 255 }
- }, { /* Coeff Band 5 */
- { 1, 227, 246, 254, 225, 215, 254, 217, 255, 255, 128 },
- { 132, 195, 239, 253, 219, 210, 252, 212, 255, 255, 128 },
- { 49, 143, 214, 251, 207, 204, 253, 212, 255, 238, 128 },
- { 11, 93, 151, 235, 169, 185, 247, 190, 255, 238, 128 }
- }, { /* Coeff Band 6 */
- { 1, 143, 237, 251, 213, 203, 249, 203, 255, 243, 128 },
- { 137, 120, 216, 246, 198, 196, 248, 199, 255, 240, 255 },
- { 50, 94, 166, 233, 169, 181, 245, 189, 255, 240, 255 },
- { 9, 56, 97, 190, 129, 158, 228, 159, 255, 226, 255 }
- }, { /* Coeff Band 7 */
- { 1, 96, 245, 254, 229, 216, 255, 212, 255, 255, 128 },
- { 179, 81, 234, 253, 217, 209, 255, 230, 255, 255, 128 },
- { 105, 56, 192, 248, 192, 197, 252, 212, 255, 205, 128 },
- { 53, 32, 133, 228, 151, 177, 250, 192, 255, 255, 128 }
- }
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 209, 89, 216, 242, 191, 190, 245, 191, 240, 235, 168 },
- { 142, 96, 196, 229, 173, 180, 233, 175, 247, 220, 174 },
- { 66, 89, 157, 205, 155, 171, 209, 156, 243, 200, 197 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 159, 235, 246, 202, 197, 237, 186, 248, 223, 223 },
- { 96, 137, 223, 247, 203, 198, 242, 188, 241, 202, 209 },
- { 22, 95, 167, 243, 184, 196, 237, 187, 247, 221, 221 },
- { 3, 51, 81, 192, 125, 158, 220, 164, 242, 211, 197 }
- }, { /* Coeff Band 2 */
- { 1, 145, 226, 244, 196, 194, 240, 191, 247, 225, 233 },
- { 66, 127, 203, 240, 188, 189, 239, 188, 248, 225, 220 },
- { 9, 83, 136, 224, 159, 176, 235, 177, 247, 223, 207 },
- { 2, 46, 71, 169, 121, 152, 210, 149, 241, 212, 199 }
- }, { /* Coeff Band 3 */
- { 1, 174, 238, 249, 209, 201, 245, 198, 241, 196, 241 },
- { 76, 151, 223, 247, 203, 197, 245, 194, 243, 202, 198 },
- { 12, 102, 170, 240, 183, 187, 242, 191, 247, 225, 209 },
- { 1, 52, 85, 202, 135, 162, 225, 168, 240, 209, 221 }
- }, { /* Coeff Band 4 */
- { 1, 140, 230, 247, 204, 198, 242, 190, 249, 209, 248 },
- { 94, 126, 213, 244, 195, 194, 240, 190, 247, 210, 237 },
- { 13, 95, 159, 232, 171, 181, 237, 179, 245, 205, 237 },
- { 1, 51, 83, 186, 128, 158, 216, 154, 240, 193, 229 }
- }, { /* Coeff Band 5 */
- { 1, 218, 244, 251, 214, 202, 243, 199, 253, 214, 255 },
- { 91, 194, 238, 249, 210, 200, 247, 203, 251, 223, 255 },
- { 18, 140, 207, 247, 198, 194, 246, 203, 252, 213, 255 },
- { 3, 76, 126, 223, 156, 172, 233, 185, 251, 206, 255 }
- }, { /* Coeff Band 6 */
- { 1, 135, 235, 250, 210, 203, 246, 206, 251, 219, 241 },
- { 105, 120, 214, 246, 196, 196, 245, 195, 250, 216, 243 },
- { 24, 91, 154, 231, 166, 180, 241, 183, 250, 214, 242 },
- { 3, 53, 84, 183, 127, 157, 218, 153, 244, 195, 237 }
- }, { /* Coeff Band 7 */
- { 1, 83, 246, 252, 215, 208, 246, 206, 255, 237, 128 },
- { 184, 61, 233, 250, 208, 204, 245, 198, 254, 227, 255 },
- { 83, 58, 190, 246, 189, 195, 244, 198, 255, 229, 128 },
- { 41, 38, 125, 214, 144, 169, 229, 171, 251, 216, 255 }
- }
}
};
-static const vp9_coeff_probs default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4] = {
+static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES] = {
{ /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 196, 40, 199, 180, 158, 161, 172, 135, 226, 183, 140 },
+ { 83, 38, 128, 153, 142, 157, 155, 128, 222, 164, 202 },
+ { 10, 29, 55, 116, 113, 146, 150, 122, 223, 169, 200 }
+ }, { /* Coeff Band 1 */
+ { 33, 114, 160, 211, 155, 169, 223, 162, 248, 212, 215 },
+ { 69, 107, 155, 210, 154, 169, 224, 163, 248, 212, 216 },
+ { 30, 91, 138, 207, 150, 168, 223, 162, 248, 212, 216 },
+ { 12, 74, 115, 200, 140, 164, 222, 160, 249, 212, 219 },
+ { 4, 52, 80, 172, 121, 153, 216, 149, 249, 212, 226 },
+ { 1, 27, 40, 105, 101, 141, 157, 120, 231, 177, 210 }
+ }, { /* Coeff Band 2 */
+ { 38, 159, 190, 227, 171, 177, 229, 172, 250, 214, 237 },
+ { 34, 130, 182, 229, 173, 180, 231, 174, 249, 215, 234 },
+ { 10, 97, 153, 226, 164, 178, 232, 175, 250, 215, 241 },
+ { 3, 71, 115, 213, 145, 170, 230, 171, 251, 217, 235 },
+ { 1, 41, 68, 172, 114, 152, 219, 154, 250, 212, 235 },
+ { 1, 16, 27, 88, 90, 135, 155, 113, 235, 180, 216 }
+ }, { /* Coeff Band 3 */
+ { 41, 184, 214, 238, 187, 186, 235, 180, 252, 217, 236 },
+ { 24, 142, 199, 241, 188, 189, 237, 184, 252, 220, 235 },
+ { 6, 97, 159, 235, 172, 184, 239, 185, 252, 221, 243 },
+ { 1, 63, 110, 214, 144, 170, 234, 174, 253, 223, 243 },
+ { 1, 32, 58, 166, 109, 149, 218, 152, 251, 215, 238 },
+ { 1, 12, 21, 78, 85, 131, 152, 109, 236, 180, 224 }
+ }, { /* Coeff Band 4 */
+ { 54, 207, 231, 245, 201, 193, 238, 186, 252, 221, 220 },
+ { 32, 156, 213, 246, 198, 195, 242, 192, 252, 224, 245 },
+ { 7, 98, 164, 240, 177, 187, 243, 193, 252, 227, 244 },
+ { 2, 62, 108, 216, 143, 170, 237, 177, 254, 227, 248 },
+ { 1, 32, 57, 165, 108, 148, 219, 152, 252, 217, 243 },
+ { 1, 13, 22, 79, 87, 132, 153, 109, 240, 182, 232 }
+ }, { /* Coeff Band 5 */
+ { 89, 208, 239, 250, 216, 200, 240, 190, 255, 222, 219 },
+ { 53, 155, 223, 250, 209, 202, 245, 199, 253, 225, 246 },
+ { 12, 102, 170, 243, 183, 192, 246, 198, 254, 230, 255 },
+ { 3, 67, 111, 218, 144, 171, 239, 180, 254, 231, 248 },
+ { 1, 38, 60, 164, 108, 148, 221, 152, 253, 220, 246 },
+ { 1, 18, 26, 81, 88, 132, 157, 108, 245, 188, 241 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 205, 121, 244, 237, 187, 188, 229, 174, 248, 215, 228 },
+ { 140, 120, 211, 219, 174, 177, 207, 158, 241, 195, 214 },
+ { 51, 100, 152, 198, 155, 168, 199, 148, 240, 193, 207 }
+ }, { /* Coeff Band 1 */
+ { 66, 196, 236, 247, 202, 197, 243, 193, 254, 228, 246 },
+ { 99, 164, 223, 246, 199, 196, 243, 193, 254, 226, 255 },
+ { 29, 122, 187, 244, 187, 194, 244, 193, 255, 227, 239 },
+ { 14, 95, 145, 234, 156, 181, 244, 194, 254, 229, 246 },
+ { 6, 68, 97, 190, 123, 155, 240, 168, 254, 232, 245 },
+ { 3, 43, 50, 112, 105, 143, 170, 118, 245, 195, 230 }
+ }, { /* Coeff Band 2 */
+ { 66, 202, 238, 248, 206, 199, 245, 196, 254, 233, 244 },
+ { 45, 155, 218, 248, 200, 199, 245, 197, 254, 229, 208 },
+ { 6, 96, 163, 242, 178, 191, 245, 196, 254, 233, 228 },
+ { 2, 64, 110, 224, 142, 175, 242, 185, 254, 232, 247 },
+ { 1, 34, 61, 172, 103, 147, 232, 164, 254, 226, 244 },
+ { 1, 13, 24, 82, 85, 133, 165, 105, 248, 199, 242 }
+ }, { /* Coeff Band 3 */
+ { 66, 204, 242, 251, 213, 204, 248, 204, 255, 236, 255 },
+ { 38, 158, 222, 251, 206, 205, 249, 206, 255, 238, 255 },
+ { 6, 95, 166, 244, 178, 194, 249, 205, 255, 236, 255 },
+ { 2, 61, 111, 223, 141, 173, 244, 187, 255, 237, 255 },
+ { 1, 31, 59, 171, 104, 149, 230, 158, 255, 230, 252 },
+ { 1, 12, 22, 82, 79, 128, 171, 111, 251, 203, 249 }
+ }, { /* Coeff Band 4 */
+ { 63, 214, 245, 252, 219, 208, 249, 206, 255, 241, 128 },
+ { 38, 164, 228, 252, 210, 208, 251, 212, 255, 245, 255 },
+ { 5, 101, 174, 246, 182, 196, 251, 207, 255, 244, 255 },
+ { 1, 64, 116, 224, 142, 174, 246, 190, 255, 241, 228 },
+ { 1, 34, 63, 172, 105, 148, 233, 160, 255, 235, 237 },
+ { 1, 14, 26, 88, 85, 130, 177, 110, 252, 210, 250 }
+ }, { /* Coeff Band 5 */
+ { 91, 214, 246, 254, 226, 213, 251, 210, 255, 239, 255 },
+ { 55, 162, 233, 253, 215, 210, 253, 216, 255, 244, 128 },
+ { 10, 104, 179, 247, 184, 196, 252, 212, 255, 247, 255 },
+ { 2, 67, 119, 226, 143, 173, 249, 195, 255, 245, 255 },
+ { 1, 37, 66, 175, 106, 149, 237, 164, 255, 240, 255 },
+ { 1, 16, 30, 96, 87, 132, 188, 113, 255, 222, 255 }
+ }
}
}, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 211, 32, 212, 235, 185, 184, 223, 167, 239, 210, 182 },
+ { 121, 47, 171, 224, 171, 180, 211, 162, 238, 195, 221 },
+ { 40, 51, 118, 203, 145, 168, 211, 160, 246, 200, 236 }
+ }, { /* Coeff Band 1 */
+ { 71, 129, 209, 244, 192, 194, 242, 188, 255, 230, 255 },
+ { 118, 122, 206, 244, 192, 192, 241, 187, 254, 227, 255 },
+ { 53, 104, 184, 241, 186, 190, 241, 184, 254, 232, 255 },
+ { 20, 81, 148, 234, 168, 183, 240, 183, 254, 231, 240 },
+ { 3, 47, 82, 197, 127, 160, 234, 166, 254, 228, 251 },
+ { 1, 18, 28, 96, 88, 134, 174, 116, 247, 194, 247 }
+ }, { /* Coeff Band 2 */
+ { 86, 162, 220, 247, 203, 198, 245, 193, 255, 237, 255 },
+ { 84, 134, 216, 247, 201, 197, 244, 192, 255, 233, 255 },
+ { 26, 102, 186, 243, 190, 192, 244, 192, 255, 232, 255 },
+ { 7, 75, 135, 231, 163, 181, 240, 183, 255, 234, 255 },
+ { 1, 46, 79, 193, 121, 157, 233, 168, 255, 225, 242 },
+ { 1, 20, 35, 113, 94, 136, 191, 123, 252, 209, 250 }
+ }, { /* Coeff Band 3 */
+ { 89, 191, 232, 250, 211, 203, 248, 202, 255, 230, 128 },
+ { 67, 148, 223, 250, 207, 201, 250, 207, 255, 247, 255 },
+ { 19, 105, 183, 245, 189, 193, 249, 202, 255, 244, 255 },
+ { 5, 72, 127, 228, 156, 177, 245, 186, 255, 238, 255 },
+ { 1, 44, 76, 190, 119, 156, 234, 167, 255, 231, 255 },
+ { 1, 21, 36, 116, 92, 138, 195, 128, 250, 208, 241 }
+ }, { /* Coeff Band 4 */
+ { 94, 210, 236, 252, 215, 206, 253, 209, 255, 247, 128 },
+ { 68, 153, 224, 251, 209, 204, 251, 213, 255, 240, 128 },
+ { 14, 103, 178, 246, 188, 195, 251, 209, 255, 239, 128 },
+ { 2, 70, 122, 230, 154, 177, 247, 194, 255, 239, 128 },
+ { 1, 42, 72, 189, 115, 153, 234, 166, 255, 229, 255 },
+ { 1, 19, 34, 104, 98, 143, 180, 124, 252, 200, 255 }
+ }, { /* Coeff Band 5 */
+ { 87, 200, 238, 254, 226, 214, 250, 212, 255, 226, 128 },
+ { 55, 151, 225, 253, 217, 212, 253, 217, 255, 233, 128 },
+ { 11, 106, 179, 249, 193, 200, 252, 213, 255, 247, 128 },
+ { 2, 72, 124, 232, 155, 180, 246, 195, 255, 230, 128 },
+ { 1, 42, 70, 182, 114, 153, 232, 163, 255, 236, 255 },
+ { 1, 17, 28, 95, 92, 137, 170, 115, 252, 208, 228 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 238, 66, 250, 245, 205, 193, 232, 180, 254, 228, 255 },
+ { 178, 84, 226, 237, 192, 185, 230, 176, 253, 217, 251 },
+ { 76, 83, 168, 218, 166, 173, 225, 162, 252, 220, 243 }
+ }, { /* Coeff Band 1 */
+ { 137, 176, 246, 252, 218, 207, 251, 208, 255, 238, 128 },
+ { 176, 160, 237, 252, 217, 206, 249, 209, 255, 247, 128 },
+ { 68, 128, 205, 251, 209, 207, 251, 207, 255, 248, 128 },
+ { 40, 105, 167, 246, 172, 192, 252, 215, 255, 247, 128 },
+ { 22, 84, 131, 214, 144, 164, 249, 185, 255, 250, 255 },
+ { 11, 60, 91, 161, 130, 155, 194, 133, 253, 214, 255 }
+ }, { /* Coeff Band 2 */
+ { 124, 192, 247, 253, 223, 210, 254, 215, 255, 255, 128 },
+ { 103, 161, 234, 253, 218, 209, 253, 214, 255, 255, 128 },
+ { 19, 108, 190, 250, 202, 202, 251, 213, 255, 241, 128 },
+ { 6, 74, 131, 242, 165, 191, 251, 207, 255, 244, 128 },
+ { 1, 41, 72, 198, 111, 151, 249, 185, 255, 248, 128 },
+ { 1, 14, 24, 82, 90, 140, 185, 96, 254, 224, 255 }
+ }, { /* Coeff Band 3 */
+ { 118, 200, 248, 254, 228, 216, 254, 222, 255, 213, 128 },
+ { 91, 166, 235, 254, 220, 212, 254, 223, 255, 233, 128 },
+ { 16, 110, 186, 251, 197, 201, 255, 225, 255, 255, 128 },
+ { 3, 72, 124, 239, 160, 186, 253, 209, 255, 239, 128 },
+ { 1, 39, 66, 198, 106, 151, 248, 191, 255, 247, 128 },
+ { 1, 14, 19, 94, 74, 124, 209, 109, 255, 245, 128 }
+ }, { /* Coeff Band 4 */
+ { 112, 213, 248, 255, 231, 218, 255, 234, 255, 255, 128 },
+ { 80, 172, 234, 254, 220, 216, 255, 233, 255, 255, 128 },
+ { 11, 112, 182, 251, 195, 204, 255, 231, 255, 224, 128 },
+ { 2, 73, 126, 241, 159, 186, 254, 219, 255, 255, 128 },
+ { 1, 40, 69, 207, 111, 159, 249, 191, 255, 255, 128 },
+ { 1, 16, 24, 83, 78, 138, 230, 134, 255, 239, 128 }
+ }, { /* Coeff Band 5 */
+ { 100, 209, 245, 255, 236, 225, 248, 231, 255, 192, 128 },
+ { 65, 164, 232, 255, 226, 221, 255, 240, 255, 255, 128 },
+ { 11, 117, 186, 253, 203, 209, 255, 240, 255, 255, 128 },
+ { 2, 83, 136, 245, 167, 191, 253, 222, 255, 255, 128 },
+ { 1, 55, 88, 213, 122, 157, 248, 182, 255, 255, 128 },
+ { 1, 10, 38, 58, 85, 43, 198, 107, 255, 255, 128 }
+ }
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 191, 34, 178, 193, 160, 173, 196, 142, 247, 191, 244 },
- { 84, 45, 129, 187, 145, 170, 189, 145, 240, 186, 212 },
- { 14, 36, 69, 149, 120, 154, 177, 136, 231, 177, 196 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 76, 169, 226, 167, 180, 227, 171, 247, 218, 226 },
- { 72, 75, 162, 226, 166, 181, 231, 172, 242, 200, 219 },
- { 30, 63, 130, 218, 153, 175, 226, 170, 247, 216, 219 },
- { 5, 39, 67, 156, 119, 151, 194, 140, 239, 202, 216 }
- }, { /* Coeff Band 2 */
- { 1, 79, 182, 228, 175, 183, 224, 170, 247, 215, 220 },
- { 69, 77, 168, 224, 170, 180, 223, 168, 246, 215, 223 },
- { 24, 63, 126, 209, 153, 171, 219, 160, 247, 215, 225 },
- { 3, 35, 58, 151, 115, 151, 191, 138, 240, 199, 220 }
- }, { /* Coeff Band 3 */
- { 1, 139, 213, 238, 194, 192, 234, 180, 244, 193, 236 },
- { 82, 127, 204, 238, 190, 186, 234, 175, 244, 191, 235 },
- { 26, 93, 161, 230, 173, 179, 233, 178, 249, 217, 241 },
- { 3, 48, 78, 186, 132, 158, 212, 157, 244, 205, 233 }
- }, { /* Coeff Band 4 */
- { 1, 100, 208, 233, 180, 182, 238, 175, 250, 206, 225 },
- { 84, 87, 184, 230, 175, 180, 236, 179, 250, 209, 243 },
- { 14, 61, 111, 217, 146, 171, 236, 174, 249, 207, 245 },
- { 1, 32, 49, 150, 106, 142, 212, 145, 242, 191, 237 }
- }, { /* Coeff Band 5 */
- { 1, 130, 223, 241, 192, 189, 231, 176, 250, 209, 246 },
- { 101, 120, 207, 239, 188, 187, 240, 196, 250, 202, 255 },
- { 19, 90, 155, 232, 169, 181, 238, 190, 250, 207, 249 },
- { 1, 54, 86, 197, 130, 161, 220, 170, 248, 196, 248 }
- }, { /* Coeff Band 6 */
- { 1, 103, 208, 236, 183, 185, 235, 190, 243, 202, 219 },
- { 95, 92, 185, 230, 175, 181, 233, 174, 242, 203, 225 },
- { 24, 72, 131, 213, 152, 171, 226, 164, 241, 202, 220 },
- { 3, 45, 74, 169, 123, 154, 204, 145, 238, 188, 222 }
- }, { /* Coeff Band 7 */
- { 1, 63, 236, 247, 205, 194, 241, 189, 252, 222, 255 },
- { 151, 48, 224, 245, 200, 193, 240, 187, 255, 234, 255 },
- { 76, 45, 178, 240, 180, 189, 239, 182, 253, 231, 255 },
- { 38, 31, 111, 187, 125, 154, 217, 155, 253, 214, 255 }
- }
}
};
-static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES_8X8] = {
+static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES] = {
{ /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 179, 203, 246, 252, 217, 208, 249, 197, 238, 237, 255 },
- { 136, 193, 232, 247, 202, 199, 245, 194, 255, 235, 255 },
- { 66, 170, 209, 244, 190, 191, 250, 199, 255, 242, 192 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 1, 191, 232, 250, 204, 201, 248, 199, 254, 243, 213 },
- { 50, 161, 209, 247, 196, 197, 250, 206, 253, 240, 213 },
- { 6, 118, 160, 239, 173, 186, 249, 203, 254, 235, 255 },
- { 2, 90, 110, 211, 141, 166, 242, 181, 254, 235, 255 }
- }, { /* Coeff Band 3 */
- { 1, 209, 242, 254, 223, 215, 253, 218, 255, 253, 128 },
- { 58, 168, 227, 253, 216, 211, 254, 226, 255, 251, 128 },
- { 7, 111, 178, 249, 195, 202, 253, 222, 254, 240, 255 },
- { 2, 63, 103, 226, 142, 175, 250, 202, 255, 246, 128 }
- }, { /* Coeff Band 4 */
- { 1, 207, 241, 252, 213, 205, 252, 215, 255, 228, 255 },
- { 55, 171, 225, 251, 209, 205, 251, 212, 254, 234, 255 },
- { 5, 108, 173, 247, 187, 195, 251, 211, 255, 231, 128 },
- { 2, 56, 97, 220, 138, 169, 248, 191, 253, 237, 255 }
- }, { /* Coeff Band 5 */
- { 1, 211, 245, 255, 227, 219, 255, 233, 255, 255, 128 },
- { 58, 175, 228, 254, 217, 215, 255, 231, 255, 255, 128 },
- { 6, 124, 181, 249, 191, 199, 255, 222, 255, 251, 128 },
- { 2, 85, 122, 227, 149, 172, 250, 195, 255, 245, 128 }
- }, { /* Coeff Band 6 */
- { 1, 216, 246, 255, 231, 217, 254, 220, 255, 250, 128 },
- { 74, 177, 236, 254, 222, 214, 254, 221, 255, 255, 128 },
- { 13, 125, 192, 250, 200, 203, 254, 217, 255, 245, 128 },
- { 2, 70, 114, 227, 147, 175, 251, 198, 255, 240, 128 }
- }, { /* Coeff Band 7 */
- { 1, 199, 246, 255, 238, 229, 255, 226, 255, 255, 128 },
- { 132, 162, 240, 255, 229, 222, 255, 239, 255, 255, 128 },
- { 79, 125, 207, 253, 213, 214, 255, 232, 255, 255, 128 },
- { 41, 89, 149, 240, 161, 187, 250, 216, 255, 255, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 8, 26, 101, 170, 141, 159, 166, 138, 205, 164, 158 },
+ { 2, 25, 67, 119, 124, 152, 121, 123, 189, 145, 175 },
+ { 1, 15, 28, 67, 102, 139, 95, 107, 191, 136, 187 }
+ }, { /* Coeff Band 1 */
+ { 22, 73, 118, 160, 137, 157, 175, 132, 242, 184, 229 },
+ { 43, 73, 116, 160, 137, 157, 177, 132, 242, 185, 231 },
+ { 24, 66, 105, 158, 134, 156, 175, 133, 242, 185, 232 },
+ { 9, 54, 85, 150, 126, 153, 175, 132, 242, 185, 231 },
+ { 2, 34, 54, 123, 109, 145, 168, 124, 242, 183, 231 },
+ { 1, 14, 22, 63, 93, 134, 108, 103, 214, 149, 206 }
+ }, { /* Coeff Band 2 */
+ { 34, 123, 149, 186, 148, 163, 195, 143, 245, 195, 233 },
+ { 34, 106, 147, 189, 149, 164, 198, 146, 246, 197, 234 },
+ { 10, 81, 123, 186, 143, 162, 200, 147, 246, 198, 235 },
+ { 2, 56, 87, 170, 127, 156, 201, 143, 248, 202, 234 },
+ { 1, 35, 56, 138, 109, 146, 187, 133, 246, 196, 233 },
+ { 1, 17, 27, 80, 93, 135, 136, 109, 229, 168, 215 }
+ }, { /* Coeff Band 3 */
+ { 27, 159, 171, 208, 161, 171, 211, 155, 249, 205, 239 },
+ { 17, 119, 162, 213, 160, 172, 218, 160, 250, 210, 238 },
+ { 3, 81, 128, 207, 149, 168, 220, 161, 250, 213, 238 },
+ { 1, 53, 87, 183, 128, 158, 217, 153, 251, 214, 239 },
+ { 1, 31, 52, 143, 106, 145, 199, 137, 249, 205, 235 },
+ { 1, 14, 24, 77, 89, 133, 142, 109, 234, 174, 215 }
+ }, { /* Coeff Band 4 */
+ { 24, 189, 200, 224, 177, 178, 221, 164, 250, 212, 234 },
+ { 14, 136, 184, 230, 176, 181, 228, 172, 252, 215, 231 },
+ { 2, 87, 140, 222, 159, 176, 230, 172, 252, 218, 238 },
+ { 1, 54, 90, 193, 130, 161, 223, 160, 252, 217, 241 },
+ { 1, 28, 49, 142, 103, 144, 202, 139, 250, 208, 233 },
+ { 1, 12, 21, 73, 87, 132, 141, 106, 234, 176, 209 }
+ }, { /* Coeff Band 5 */
+ { 32, 220, 227, 242, 199, 190, 234, 180, 251, 220, 232 },
+ { 12, 155, 200, 242, 190, 191, 240, 187, 252, 225, 230 },
+ { 1, 90, 144, 231, 164, 180, 240, 184, 253, 229, 239 },
+ { 1, 53, 90, 198, 130, 162, 230, 165, 253, 226, 238 },
+ { 1, 28, 50, 145, 103, 144, 207, 140, 251, 213, 236 },
+ { 1, 13, 22, 74, 88, 132, 142, 107, 233, 176, 216 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 5, 61, 234, 230, 183, 183, 212, 164, 241, 199, 205 },
+ { 3, 65, 184, 199, 164, 170, 182, 145, 232, 175, 223 },
+ { 1, 56, 104, 154, 137, 158, 156, 131, 221, 165, 210 }
+ }, { /* Coeff Band 1 */
+ { 46, 183, 210, 229, 181, 182, 222, 165, 252, 214, 251 },
+ { 122, 166, 202, 228, 179, 181, 223, 164, 252, 217, 250 },
+ { 49, 125, 177, 225, 172, 179, 223, 163, 252, 215, 253 },
+ { 22, 99, 142, 216, 155, 173, 222, 164, 252, 215, 250 },
+ { 8, 69, 95, 180, 127, 156, 220, 153, 252, 214, 250 },
+ { 2, 38, 51, 112, 109, 144, 159, 118, 243, 184, 232 }
+ }, { /* Coeff Band 2 */
+ { 56, 196, 218, 236, 187, 185, 231, 172, 254, 223, 239 },
+ { 38, 141, 195, 235, 182, 185, 233, 174, 254, 225, 232 },
+ { 7, 93, 147, 225, 164, 178, 233, 173, 255, 226, 248 },
+ { 2, 63, 101, 201, 137, 165, 227, 162, 254, 225, 248 },
+ { 1, 39, 61, 159, 110, 148, 213, 146, 254, 218, 247 },
+ { 1, 20, 33, 98, 95, 136, 166, 115, 247, 192, 231 }
+ }, { /* Coeff Band 3 */
+ { 44, 206, 223, 240, 193, 189, 235, 177, 255, 231, 224 },
+ { 27, 147, 200, 240, 188, 189, 238, 181, 255, 229, 239 },
+ { 4, 93, 147, 230, 165, 180, 238, 180, 255, 231, 237 },
+ { 1, 58, 95, 201, 134, 164, 229, 164, 255, 228, 254 },
+ { 1, 32, 52, 152, 105, 146, 212, 142, 254, 221, 255 },
+ { 1, 14, 23, 81, 87, 133, 156, 109, 248, 191, 236 }
+ }, { /* Coeff Band 4 */
+ { 39, 216, 227, 244, 200, 194, 237, 179, 255, 231, 255 },
+ { 22, 152, 204, 243, 192, 193, 240, 186, 255, 231, 240 },
+ { 2, 92, 148, 232, 167, 183, 239, 182, 255, 232, 255 },
+ { 1, 55, 91, 200, 132, 164, 229, 164, 255, 230, 255 },
+ { 1, 28, 47, 144, 99, 142, 211, 141, 255, 222, 251 },
+ { 1, 13, 21, 75, 86, 131, 152, 103, 249, 193, 242 }
+ }, { /* Coeff Band 5 */
+ { 34, 228, 234, 249, 213, 201, 246, 194, 255, 239, 255 },
+ { 13, 161, 208, 247, 198, 197, 248, 197, 255, 243, 255 },
+ { 1, 95, 148, 234, 166, 183, 246, 190, 255, 243, 236 },
+ { 1, 55, 90, 199, 128, 161, 237, 168, 255, 239, 255 },
+ { 1, 30, 51, 147, 102, 144, 218, 142, 255, 232, 254 },
+ { 1, 16, 25, 86, 88, 131, 168, 109, 252, 207, 245 }
+ }
}
}, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 138, 65, 189, 212, 172, 169, 200, 153, 233, 182, 214 },
- { 93, 60, 162, 203, 160, 169, 200, 153, 239, 190, 213 },
- { 66, 55, 141, 195, 152, 166, 199, 152, 238, 190, 212 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 102, 221, 247, 205, 198, 248, 201, 255, 235, 128 },
- { 122, 95, 215, 247, 200, 197, 248, 200, 254, 227, 255 },
- { 60, 81, 166, 241, 177, 190, 245, 193, 255, 246, 255 },
- { 32, 61, 108, 195, 133, 159, 230, 163, 254, 230, 238 }
- }, { /* Coeff Band 2 */
- { 1, 58, 203, 242, 194, 193, 229, 177, 253, 225, 249 },
- { 113, 62, 192, 237, 184, 187, 231, 181, 253, 220, 249 },
- { 50, 50, 135, 225, 159, 177, 229, 172, 254, 222, 241 },
- { 24, 34, 82, 185, 125, 152, 223, 158, 253, 212, 219 }
- }, { /* Coeff Band 3 */
- { 1, 1, 220, 253, 218, 209, 251, 213, 255, 255, 128 },
- { 154, 1, 216, 252, 211, 206, 252, 212, 255, 252, 128 },
- { 102, 1, 157, 249, 184, 200, 253, 214, 255, 247, 128 },
- { 68, 1, 101, 213, 129, 161, 247, 186, 255, 237, 255 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 204, 33, 217, 233, 185, 184, 199, 165, 204, 163, 162 },
+ { 93, 48, 151, 209, 157, 171, 193, 161, 203, 167, 189 },
+ { 18, 43, 86, 173, 126, 156, 203, 149, 231, 193, 200 }
+ }, { /* Coeff Band 1 */
+ { 43, 121, 184, 233, 173, 182, 235, 187, 248, 211, 237 },
+ { 93, 117, 177, 232, 170, 180, 235, 182, 246, 204, 224 },
+ { 33, 101, 158, 229, 165, 179, 235, 182, 245, 207, 236 },
+ { 11, 81, 129, 221, 153, 173, 233, 179, 246, 203, 229 },
+ { 2, 51, 82, 188, 124, 158, 224, 162, 248, 206, 228 },
+ { 1, 18, 29, 88, 93, 137, 141, 116, 222, 161, 217 }
+ }, { /* Coeff Band 2 */
+ { 63, 154, 199, 239, 184, 187, 236, 187, 248, 209, 221 },
+ { 53, 128, 191, 239, 182, 188, 236, 188, 251, 209, 255 },
+ { 14, 99, 160, 235, 172, 184, 235, 187, 249, 207, 240 },
+ { 4, 75, 122, 219, 150, 173, 226, 177, 250, 204, 240 },
+ { 1, 47, 77, 176, 121, 154, 207, 153, 245, 197, 237 },
+ { 1, 18, 30, 84, 95, 136, 138, 112, 229, 167, 228 }
+ }, { /* Coeff Band 3 */
+ { 48, 193, 210, 245, 194, 194, 241, 196, 252, 213, 255 },
+ { 26, 145, 201, 245, 194, 196, 240, 195, 251, 215, 240 },
+ { 6, 104, 165, 241, 179, 190, 239, 191, 253, 222, 255 },
+ { 1, 73, 120, 218, 151, 174, 227, 172, 251, 219, 248 },
+ { 1, 42, 69, 167, 118, 153, 205, 146, 251, 206, 245 },
+ { 1, 16, 27, 84, 89, 133, 148, 112, 240, 179, 238 }
+ }, { /* Coeff Band 4 */
+ { 47, 213, 225, 248, 203, 199, 240, 194, 254, 211, 255 },
+ { 32, 153, 212, 248, 201, 199, 241, 196, 251, 226, 255 },
+ { 6, 102, 168, 240, 181, 190, 240, 187, 251, 225, 238 },
+ { 1, 66, 111, 211, 146, 169, 229, 167, 255, 224, 244 },
+ { 1, 36, 60, 157, 110, 148, 209, 143, 252, 215, 255 },
+ { 1, 16, 27, 83, 90, 133, 152, 111, 244, 184, 250 }
+ }, { /* Coeff Band 5 */
+ { 46, 225, 232, 252, 219, 208, 247, 204, 254, 233, 255 },
+ { 24, 162, 214, 250, 208, 204, 247, 201, 254, 236, 255 },
+ { 3, 106, 165, 242, 182, 191, 245, 196, 255, 231, 255 },
+ { 1, 66, 108, 213, 142, 169, 235, 175, 255, 226, 247 },
+ { 1, 35, 59, 158, 108, 147, 216, 146, 254, 220, 255 },
+ { 1, 16, 27, 85, 90, 131, 159, 110, 248, 191, 252 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 229, 28, 245, 227, 195, 182, 200, 145, 253, 186, 255 },
+ { 151, 44, 210, 214, 180, 175, 193, 146, 247, 185, 254 },
+ { 55, 48, 131, 183, 148, 163, 194, 138, 249, 201, 246 }
+ }, { /* Coeff Band 1 */
+ { 126, 165, 239, 250, 206, 204, 248, 193, 255, 255, 128 },
+ { 199, 158, 231, 248, 206, 198, 247, 200, 243, 255, 255 },
+ { 102, 136, 209, 248, 203, 197, 247, 201, 255, 244, 128 },
+ { 64, 116, 181, 245, 185, 196, 248, 201, 255, 233, 128 },
+ { 44, 98, 151, 233, 162, 179, 248, 195, 255, 242, 128 },
+ { 44, 81, 119, 204, 140, 165, 222, 163, 252, 217, 255 }
+ }, { /* Coeff Band 2 */
+ { 108, 185, 239, 252, 216, 209, 248, 205, 255, 230, 128 },
+ { 91, 155, 224, 252, 211, 205, 251, 211, 255, 230, 128 },
+ { 20, 116, 185, 248, 194, 196, 252, 206, 255, 255, 128 },
+ { 8, 86, 141, 239, 168, 185, 248, 196, 255, 247, 128 },
+ { 3, 50, 92, 206, 125, 164, 242, 176, 255, 246, 128 },
+ { 1, 21, 40, 131, 85, 141, 200, 131, 247, 236, 255 }
+ }, { /* Coeff Band 3 */
+ { 94, 198, 243, 254, 226, 215, 254, 220, 255, 255, 128 },
+ { 67, 164, 228, 253, 217, 208, 250, 216, 255, 213, 128 },
+ { 14, 120, 185, 250, 196, 205, 248, 205, 255, 255, 128 },
+ { 4, 83, 134, 238, 161, 181, 250, 202, 255, 233, 128 },
+ { 1, 48, 82, 196, 119, 157, 248, 178, 255, 255, 128 },
+ { 1, 26, 38, 96, 84, 132, 221, 110, 255, 209, 128 }
+ }, { /* Coeff Band 4 */
+ { 82, 210, 245, 255, 230, 215, 246, 221, 255, 255, 128 },
+ { 55, 170, 231, 254, 222, 213, 255, 220, 255, 255, 128 },
+ { 8, 118, 184, 251, 200, 207, 255, 219, 255, 255, 128 },
+ { 2, 78, 126, 239, 156, 185, 251, 216, 255, 255, 128 },
+ { 1, 43, 68, 189, 108, 151, 247, 187, 255, 228, 128 },
+ { 1, 34, 40, 121, 114, 102, 205, 96, 255, 255, 128 }
+ }, { /* Coeff Band 5 */
+ { 65, 228, 241, 255, 231, 214, 253, 222, 255, 255, 128 },
+ { 33, 173, 226, 254, 222, 216, 255, 231, 255, 255, 128 },
+ { 5, 120, 180, 251, 197, 205, 251, 226, 255, 233, 128 },
+ { 1, 81, 130, 240, 159, 187, 251, 206, 255, 205, 128 },
+ { 1, 51, 78, 198, 119, 168, 238, 181, 255, 171, 128 },
+ { 1, 18, 49, 183, 119, 160, 255, 171, 128, 128, 128 }
+ }
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 229, 64, 235, 236, 189, 190, 227, 179, 247, 203, 226 },
- { 148, 70, 194, 228, 175, 182, 216, 170, 238, 192, 224 },
- { 53, 63, 134, 207, 150, 169, 213, 161, 247, 204, 232 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 173, 234, 244, 201, 193, 239, 180, 252, 214, 255 },
- { 160, 156, 222, 243, 200, 193, 237, 179, 253, 216, 255 },
- { 55, 119, 187, 240, 189, 192, 236, 180, 253, 226, 255 },
- { 14, 65, 105, 193, 142, 165, 205, 151, 249, 200, 250 }
- }, { /* Coeff Band 2 */
- { 1, 124, 218, 246, 195, 196, 242, 198, 254, 229, 255 },
- { 85, 114, 180, 240, 179, 187, 239, 191, 253, 223, 239 },
- { 18, 81, 128, 220, 152, 173, 232, 176, 252, 221, 254 },
- { 2, 42, 64, 150, 115, 149, 192, 137, 247, 197, 247 }
- }, { /* Coeff Band 3 */
- { 1, 164, 230, 251, 210, 204, 245, 201, 255, 238, 255 },
- { 96, 137, 210, 248, 199, 199, 244, 198, 254, 218, 255 },
- { 20, 97, 169, 240, 179, 188, 242, 190, 254, 228, 255 },
- { 2, 58, 95, 197, 137, 164, 220, 158, 252, 217, 248 }
- }, { /* Coeff Band 4 */
- { 1, 193, 236, 245, 203, 194, 243, 191, 254, 223, 255 },
- { 86, 163, 217, 241, 190, 188, 242, 189, 253, 220, 255 },
- { 14, 108, 161, 228, 167, 178, 238, 180, 253, 224, 255 },
- { 1, 51, 84, 186, 127, 159, 216, 155, 251, 208, 243 }
- }, { /* Coeff Band 5 */
- { 1, 183, 235, 248, 209, 197, 244, 195, 253, 236, 239 },
- { 79, 144, 208, 243, 193, 190, 244, 191, 254, 231, 255 },
- { 13, 100, 151, 227, 163, 176, 240, 180, 255, 233, 244 },
- { 1, 48, 77, 171, 121, 153, 214, 150, 252, 214, 245 }
- }, { /* Coeff Band 6 */
- { 1, 202, 234, 252, 215, 207, 248, 207, 254, 242, 255 },
- { 75, 153, 216, 249, 203, 201, 248, 203, 255, 239, 255 },
- { 11, 104, 168, 241, 179, 189, 245, 194, 255, 237, 128 },
- { 1, 57, 95, 201, 134, 163, 229, 165, 254, 223, 246 }
- }, { /* Coeff Band 7 */
- { 1, 184, 236, 254, 222, 212, 254, 225, 255, 255, 128 },
- { 74, 149, 220, 252, 210, 208, 253, 223, 255, 249, 128 },
- { 18, 109, 175, 247, 184, 195, 253, 211, 255, 250, 128 },
- { 3, 64, 113, 219, 144, 171, 246, 187, 255, 250, 128 }
- }
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 140, 101, 214, 227, 176, 182, 218, 167, 233, 205, 164 },
- { 96, 101, 176, 204, 161, 173, 193, 152, 223, 182, 182 },
- { 27, 84, 123, 176, 140, 162, 190, 142, 238, 189, 210 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 178, 218, 240, 189, 189, 238, 184, 250, 232, 189 },
- { 69, 146, 204, 239, 187, 189, 238, 183, 251, 226, 221 },
- { 16, 98, 157, 234, 170, 185, 237, 183, 252, 220, 218 },
- { 3, 49, 78, 172, 122, 154, 204, 150, 242, 198, 207 }
- }, { /* Coeff Band 2 */
- { 1, 165, 207, 230, 179, 181, 234, 172, 252, 228, 218 },
- { 25, 130, 175, 224, 169, 177, 232, 169, 252, 230, 207 },
- { 4, 81, 118, 205, 144, 167, 227, 162, 252, 225, 219 },
- { 2, 51, 63, 150, 114, 148, 197, 138, 244, 202, 204 }
- }, { /* Coeff Band 3 */
- { 1, 181, 222, 247, 200, 197, 246, 199, 252, 232, 228 },
- { 25, 142, 200, 244, 190, 193, 245, 195, 253, 233, 204 },
- { 3, 90, 146, 233, 166, 181, 242, 188, 252, 229, 216 },
- { 1, 47, 79, 188, 124, 157, 222, 162, 245, 213, 203 }
- }, { /* Coeff Band 4 */
- { 1, 179, 220, 242, 195, 191, 237, 182, 251, 217, 231 },
- { 27, 144, 200, 241, 188, 190, 238, 185, 250, 224, 235 },
- { 3, 93, 149, 230, 166, 180, 235, 180, 249, 222, 221 },
- { 1, 47, 79, 181, 125, 157, 211, 154, 241, 205, 198 }
- }, { /* Coeff Band 5 */
- { 1, 176, 222, 247, 202, 198, 247, 199, 252, 234, 219 },
- { 24, 139, 197, 244, 190, 192, 246, 196, 253, 232, 220 },
- { 2, 89, 140, 229, 161, 178, 243, 185, 253, 233, 234 },
- { 1, 49, 76, 176, 121, 154, 214, 153, 243, 209, 208 }
- }, { /* Coeff Band 6 */
- { 1, 197, 233, 251, 213, 205, 247, 206, 249, 222, 247 },
- { 35, 159, 216, 249, 203, 201, 246, 203, 250, 222, 223 },
- { 4, 108, 167, 240, 178, 188, 244, 195, 248, 220, 235 },
- { 1, 58, 93, 198, 133, 161, 220, 167, 233, 195, 221 }
- }, { /* Coeff Band 7 */
- { 1, 188, 240, 253, 221, 209, 248, 207, 252, 223, 255 },
- { 84, 153, 227, 251, 212, 205, 247, 205, 254, 215, 255 },
- { 25, 117, 182, 244, 186, 192, 243, 198, 250, 209, 255 },
- { 7, 72, 108, 197, 138, 162, 203, 161, 240, 178, 247 }
- }
}
};
-static const vp9_coeff_probs default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8] = {
+static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = {
{ /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 37, 34, 137, 205, 154, 170, 151, 159, 109, 172, 44 },
+ { 3, 26, 60, 113, 123, 154, 100, 124, 152, 131, 144 },
+ { 1, 13, 23, 54, 102, 139, 71, 106, 146, 123, 148 }
+ }, { /* Coeff Band 1 */
+ { 26, 77, 122, 152, 144, 160, 143, 129, 216, 158, 201 },
+ { 43, 76, 123, 152, 142, 159, 145, 129, 218, 160, 204 },
+ { 25, 67, 112, 150, 141, 159, 144, 128, 218, 159, 204 },
+ { 9, 54, 90, 143, 134, 156, 144, 127, 218, 159, 204 },
+ { 2, 32, 52, 116, 114, 148, 138, 123, 217, 158, 207 },
+ { 1, 10, 15, 44, 91, 133, 75, 99, 172, 128, 169 }
+ }, { /* Coeff Band 2 */
+ { 32, 122, 143, 163, 145, 161, 162, 131, 226, 171, 206 },
+ { 46, 105, 143, 168, 148, 161, 165, 133, 228, 174, 204 },
+ { 17, 79, 116, 164, 142, 161, 166, 134, 229, 174, 206 },
+ { 4, 53, 78, 143, 125, 153, 163, 129, 232, 175, 213 },
+ { 1, 29, 44, 105, 105, 142, 147, 120, 228, 168, 211 },
+ { 1, 12, 18, 52, 91, 133, 92, 100, 193, 140, 183 }
+ }, { /* Coeff Band 3 */
+ { 33, 157, 160, 182, 149, 163, 185, 141, 236, 185, 218 },
+ { 20, 116, 152, 188, 152, 165, 191, 144, 238, 188, 217 },
+ { 4, 74, 114, 180, 141, 162, 192, 143, 240, 191, 219 },
+ { 1, 44, 69, 148, 119, 151, 183, 134, 243, 192, 227 },
+ { 1, 25, 40, 110, 101, 141, 162, 121, 238, 181, 223 },
+ { 1, 12, 18, 56, 89, 132, 103, 101, 206, 148, 196 }
+ }, { /* Coeff Band 4 */
+ { 25, 183, 174, 207, 159, 171, 205, 156, 243, 194, 228 },
+ { 13, 124, 159, 209, 157, 171, 213, 160, 243, 200, 228 },
+ { 2, 75, 117, 199, 143, 166, 215, 158, 246, 205, 230 },
+ { 1, 45, 73, 165, 119, 153, 204, 144, 248, 205, 231 },
+ { 1, 26, 43, 120, 101, 141, 178, 127, 242, 192, 226 },
+ { 1, 12, 19, 59, 89, 132, 112, 102, 215, 154, 201 }
+ }, { /* Coeff Band 5 */
+ { 13, 232, 223, 239, 196, 188, 225, 172, 248, 209, 226 },
+ { 4, 155, 187, 237, 184, 187, 233, 180, 250, 216, 232 },
+ { 1, 86, 131, 222, 156, 175, 233, 176, 251, 218, 237 },
+ { 1, 49, 79, 181, 123, 157, 218, 155, 251, 214, 237 },
+ { 1, 26, 43, 125, 100, 141, 188, 130, 246, 199, 231 },
+ { 1, 12, 20, 62, 88, 131, 119, 102, 222, 161, 209 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 51, 37, 227, 237, 205, 184, 200, 162, 231, 187, 207 },
+ { 9, 36, 172, 204, 176, 173, 171, 145, 217, 167, 197 },
+ { 21, 26, 112, 162, 145, 162, 155, 133, 215, 165, 191 }
+ }, { /* Coeff Band 1 */
+ { 79, 169, 219, 223, 176, 177, 222, 161, 248, 213, 244 },
+ { 177, 166, 216, 222, 175, 178, 222, 161, 246, 212, 226 },
+ { 119, 141, 196, 222, 174, 176, 220, 163, 250, 212, 236 },
+ { 63, 117, 165, 217, 163, 175, 218, 161, 248, 209, 231 },
+ { 30, 87, 117, 192, 138, 162, 216, 157, 247, 211, 224 },
+ { 14, 56, 60, 119, 111, 146, 156, 123, 227, 171, 220 }
+ }, { /* Coeff Band 2 */
+ { 88, 195, 225, 229, 181, 181, 229, 171, 252, 212, 221 },
+ { 66, 145, 202, 229, 177, 180, 230, 172, 253, 220, 255 },
+ { 12, 97, 152, 221, 162, 174, 230, 169, 253, 218, 249 },
+ { 3, 66, 103, 198, 138, 165, 223, 159, 253, 219, 251 },
+ { 1, 38, 61, 158, 110, 148, 209, 146, 252, 212, 238 },
+ { 1, 19, 30, 94, 94, 136, 160, 114, 244, 185, 236 }
+ }, { /* Coeff Band 3 */
+ { 79, 211, 228, 235, 186, 184, 233, 176, 255, 225, 255 },
+ { 50, 151, 205, 235, 182, 185, 237, 177, 254, 223, 255 },
+ { 7, 95, 149, 225, 162, 176, 236, 177, 254, 229, 219 },
+ { 1, 62, 98, 198, 134, 164, 228, 162, 254, 224, 238 },
+ { 1, 35, 57, 156, 108, 148, 211, 143, 253, 215, 238 },
+ { 1, 17, 26, 87, 89, 135, 161, 113, 246, 189, 237 }
+ }, { /* Coeff Band 4 */
+ { 68, 225, 230, 239, 190, 187, 238, 180, 252, 234, 255 },
+ { 39, 156, 206, 239, 185, 187, 241, 187, 254, 231, 255 },
+ { 4, 94, 147, 229, 163, 178, 242, 183, 255, 236, 224 },
+ { 1, 58, 94, 200, 132, 163, 232, 166, 254, 230, 255 },
+ { 1, 32, 52, 153, 104, 146, 214, 144, 253, 222, 236 },
+ { 1, 15, 24, 84, 89, 131, 159, 109, 247, 192, 240 }
+ }, { /* Coeff Band 5 */
+ { 45, 248, 234, 248, 208, 198, 244, 193, 255, 233, 255 },
+ { 19, 169, 204, 246, 195, 195, 246, 199, 255, 233, 255 },
+ { 2, 98, 145, 235, 166, 183, 245, 192, 255, 235, 255 },
+ { 1, 59, 92, 205, 131, 164, 236, 172, 254, 231, 250 },
+ { 1, 33, 52, 152, 103, 145, 216, 144, 253, 221, 240 },
+ { 1, 15, 24, 83, 87, 133, 156, 110, 246, 191, 242 }
+ }
}
}, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ { /* Intra */
+ { /* Coeff Band 0 */
+ { 179, 23, 200, 222, 180, 182, 150, 152, 148, 135, 125 },
+ { 60, 33, 113, 185, 143, 166, 168, 144, 189, 168, 152 },
+ { 8, 31, 59, 137, 114, 150, 163, 132, 206, 171, 169 }
+ }, { /* Coeff Band 1 */
+ { 27, 103, 158, 215, 157, 174, 209, 165, 239, 191, 233 },
+ { 90, 101, 159, 213, 156, 173, 212, 164, 230, 185, 237 },
+ { 39, 91, 146, 212, 155, 169, 212, 165, 232, 186, 207 },
+ { 16, 75, 120, 203, 144, 169, 210, 161, 233, 189, 227 },
+ { 3, 48, 76, 167, 120, 154, 199, 146, 236, 190, 218 },
+ { 1, 18, 26, 72, 95, 137, 113, 109, 197, 146, 186 }
+ }, { /* Coeff Band 2 */
+ { 45, 137, 177, 218, 166, 174, 206, 163, 234, 184, 214 },
+ { 47, 117, 167, 218, 166, 176, 206, 164, 234, 182, 229 },
+ { 16, 90, 136, 211, 153, 172, 205, 162, 236, 192, 231 },
+ { 6, 65, 100, 188, 136, 162, 193, 155, 237, 177, 228 },
+ { 1, 37, 58, 137, 113, 150, 166, 134, 229, 167, 234 },
+ { 1, 13, 19, 55, 90, 132, 93, 103, 196, 137, 202 }
+ }, { /* Coeff Band 3 */
+ { 36, 171, 194, 227, 177, 179, 208, 165, 244, 196, 245 },
+ { 19, 129, 178, 227, 175, 184, 214, 165, 246, 188, 255 },
+ { 5, 90, 139, 217, 158, 174, 213, 166, 246, 198, 255 },
+ { 1, 59, 93, 182, 134, 162, 193, 150, 242, 188, 241 },
+ { 1, 31, 49, 122, 108, 145, 160, 127, 235, 172, 229 },
+ { 1, 10, 18, 54, 89, 132, 101, 99, 213, 144, 217 }
+ }, { /* Coeff Band 4 */
+ { 37, 197, 210, 233, 187, 186, 216, 172, 250, 202, 255 },
+ { 20, 142, 191, 234, 183, 186, 219, 170, 249, 207, 246 },
+ { 3, 93, 144, 222, 163, 176, 219, 170, 249, 204, 224 },
+ { 1, 56, 88, 179, 130, 159, 199, 148, 246, 197, 243 },
+ { 1, 29, 47, 123, 104, 144, 172, 127, 244, 185, 234 },
+ { 1, 14, 22, 66, 91, 130, 120, 103, 225, 158, 221 }
+ }, { /* Coeff Band 5 */
+ { 19, 227, 223, 245, 203, 194, 238, 187, 251, 225, 217 },
+ { 6, 152, 192, 242, 189, 190, 241, 190, 253, 225, 255 },
+ { 1, 89, 138, 228, 161, 177, 239, 181, 254, 224, 248 },
+ { 1, 52, 84, 188, 127, 157, 224, 159, 253, 222, 247 },
+ { 1, 29, 47, 132, 102, 140, 196, 132, 251, 208, 244 },
+ { 1, 14, 23, 71, 90, 133, 134, 103, 239, 174, 233 }
+ }
+ }, { /* Inter */
+ { /* Coeff Band 0 */
+ { 205, 14, 245, 235, 216, 189, 190, 146, 249, 201, 255 },
+ { 97, 19, 213, 210, 194, 174, 176, 139, 241, 183, 250 },
+ { 31, 20, 144, 183, 160, 167, 171, 132, 240, 184, 253 }
+ }, { /* Coeff Band 1 */
+ { 137, 182, 245, 254, 221, 216, 255, 160, 128, 128, 128 },
+ { 231, 185, 242, 251, 218, 205, 255, 233, 128, 128, 128 },
+ { 170, 175, 229, 252, 205, 209, 255, 211, 128, 128, 128 },
+ { 107, 157, 213, 250, 199, 205, 251, 207, 255, 255, 128 },
+ { 77, 126, 183, 243, 182, 183, 252, 206, 255, 255, 128 },
+ { 69, 96, 149, 229, 157, 170, 247, 169, 255, 255, 128 }
+ }, { /* Coeff Band 2 */
+ { 107, 196, 241, 252, 211, 208, 255, 210, 128, 128, 128 },
+ { 92, 162, 221, 249, 203, 195, 255, 199, 128, 128, 128 },
+ { 20, 108, 181, 244, 190, 191, 250, 200, 255, 255, 128 },
+ { 7, 80, 132, 241, 172, 197, 253, 191, 255, 255, 128 },
+ { 2, 43, 75, 219, 122, 150, 255, 203, 128, 128, 128 },
+ { 1, 15, 48, 98, 51, 192, 255, 160, 128, 128, 128 }
+ }, { /* Coeff Band 3 */
+ { 107, 202, 244, 254, 226, 215, 255, 192, 128, 128, 128 },
+ { 77, 167, 224, 252, 215, 212, 255, 235, 128, 128, 128 },
+ { 14, 117, 179, 249, 191, 196, 255, 212, 128, 128, 128 },
+ { 3, 84, 134, 237, 160, 194, 248, 216, 255, 255, 128 },
+ { 1, 57, 84, 216, 145, 136, 255, 161, 128, 128, 128 },
+ { 1, 1, 1, 255, 128, 255, 128, 128, 128, 128, 128 }
+ }, { /* Coeff Band 4 */
+ { 88, 219, 248, 255, 239, 225, 255, 255, 128, 128, 128 },
+ { 61, 178, 234, 255, 227, 227, 255, 217, 128, 128, 128 },
+ { 6, 127, 188, 252, 201, 211, 255, 244, 128, 128, 128 },
+ { 1, 83, 130, 248, 173, 197, 255, 175, 128, 128, 128 },
+ { 1, 61, 66, 211, 121, 188, 255, 213, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, { /* Coeff Band 5 */
+ { 73, 243, 250, 255, 244, 220, 255, 205, 128, 128, 128 },
+ { 42, 197, 242, 255, 237, 227, 242, 166, 255, 255, 128 },
+ { 10, 137, 197, 252, 214, 199, 255, 238, 128, 128, 128 },
+ { 2, 85, 134, 242, 163, 185, 224, 238, 255, 255, 128 },
+ { 1, 70, 69, 199, 110, 64, 255, 213, 128, 128, 128 },
+ { 1, 1, 1, 1, 128, 128, 255, 1, 128, 128, 128 }
+ }
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }
+};
+
+#if CONFIG_CODE_NONZEROCOUNT
+
+// TODO(debargha): Remove the macro and count tables after experimentation
+#define NZC_DEFAULT_COUNTS /* Uncomment to use counts as defaults */
+
+#ifdef NZC_DEFAULT_COUNTS
+static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC4X4_TOKENS] = {
+ {
+ {
+ { 967652, 29023, 15039, 6952, 1568, 116 },
+ { 289116, 22938, 4522, 1935, 520, 47 }
+ }, {
+ { 967652, 29023, 15039, 6952, 1568, 116 },
+ { 689116, 22938, 4522, 1935, 520, 47 }
+ },
+ }, {
+ {
+ { 124684, 37167, 15270, 8483, 1777, 102 },
+ { 10405, 12395, 3401, 3574, 2461, 771 }
+ }, {
+ { 124684, 37167, 15270, 8483, 1777, 102 },
+ { 20405, 12395, 3401, 3574, 2461, 771 }
}
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 118, 27, 105, 170, 137, 166, 183, 137, 243, 189, 241 },
- { 44, 34, 85, 142, 127, 158, 161, 128, 232, 174, 213 },
- { 8, 26, 47, 104, 108, 145, 143, 117, 226, 168, 207 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 134, 172, 217, 163, 175, 226, 167, 251, 220, 204 },
- { 56, 129, 168, 217, 161, 174, 223, 164, 249, 218, 223 },
- { 20, 110, 151, 215, 158, 174, 221, 165, 249, 209, 221 },
- { 2, 59, 88, 169, 128, 157, 192, 143, 239, 189, 214 }
- }, { /* Coeff Band 2 */
- { 1, 65, 126, 191, 140, 163, 218, 153, 252, 218, 229 },
- { 21, 57, 92, 175, 126, 156, 214, 148, 252, 218, 229 },
- { 4, 44, 66, 148, 114, 148, 200, 136, 251, 211, 228 },
- { 1, 28, 42, 108, 104, 141, 158, 119, 235, 180, 210 }
- }, { /* Coeff Band 3 */
- { 1, 114, 172, 227, 166, 177, 236, 178, 252, 226, 233 },
- { 41, 94, 152, 218, 156, 172, 233, 172, 251, 223, 231 },
- { 9, 69, 116, 202, 142, 165, 226, 162, 251, 221, 227 },
- { 1, 36, 60, 151, 113, 148, 195, 140, 241, 198, 211 }
- }, { /* Coeff Band 4 */
- { 1, 186, 200, 227, 174, 178, 230, 169, 248, 210, 238 },
- { 27, 148, 181, 221, 167, 176, 226, 166, 250, 218, 228 },
- { 3, 96, 139, 208, 154, 170, 219, 161, 249, 214, 229 },
- { 1, 44, 70, 156, 120, 152, 188, 139, 239, 193, 200 }
- }, { /* Coeff Band 5 */
- { 1, 169, 203, 238, 186, 186, 238, 184, 252, 224, 230 },
- { 32, 119, 173, 232, 172, 181, 236, 182, 252, 222, 237 },
- { 6, 84, 128, 215, 150, 170, 232, 172, 251, 221, 235 },
- { 1, 49, 78, 167, 124, 154, 200, 145, 243, 198, 217 }
- }, { /* Coeff Band 6 */
- { 1, 193, 215, 244, 197, 195, 239, 192, 249, 213, 240 },
- { 52, 136, 193, 239, 184, 189, 237, 189, 248, 211, 226 },
- { 13, 90, 146, 227, 162, 178, 233, 182, 248, 211, 231 },
- { 1, 49, 79, 177, 124, 156, 201, 154, 234, 188, 212 }
- }, { /* Coeff Band 7 */
- { 1, 189, 238, 248, 219, 196, 232, 180, 253, 211, 255 },
- { 104, 148, 224, 245, 211, 194, 225, 171, 251, 206, 255 },
- { 43, 116, 190, 231, 179, 183, 217, 168, 249, 199, 255 },
- { 13, 65, 92, 154, 131, 152, 167, 132, 238, 174, 243 }
+ }, {
+ {
+ { 4100, 22976, 15627, 16137, 7982, 1793 },
+ { 4249, 3084, 2131, 4081, 6439, 1653 }
+ }, {
+ { 21100, 22976, 15627, 16137, 7982, 1793 },
+ { 4249, 3084, 2131, 4081, 2439, 1653 }
}
}
};
-static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES_16X16] = {
- { /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+
+static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC8X8_TOKENS] = {
+ {
+ {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 5 },
+ }, {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 },
+ { 192052, 30468, 6973, 3250, 1500, 750, 375, 5 },
}
- }, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, {
+ {
+ { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 },
+ { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 },
+ }, {
+ { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 },
+ { 23772, 23120, 13127, 8115, 4000, 2000, 200, 6 },
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 223, 34, 236, 234, 193, 185, 216, 169, 239, 189, 229 },
- { 125, 40, 195, 221, 173, 175, 209, 165, 220, 181, 196 },
- { 41, 37, 127, 185, 145, 162, 191, 150, 227, 180, 219 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 160, 224, 239, 193, 190, 213, 178, 244, 174, 255 },
- { 199, 154, 212, 238, 190, 190, 210, 173, 246, 183, 249 },
- { 88, 122, 178, 234, 180, 187, 213, 174, 244, 182, 247 },
- { 27, 69, 100, 174, 139, 165, 159, 142, 225, 157, 240 }
- }, { /* Coeff Band 2 */
- { 1, 118, 207, 237, 179, 185, 234, 189, 241, 194, 237 },
- { 86, 103, 161, 227, 163, 176, 231, 183, 241, 196, 234 },
- { 19, 69, 113, 205, 140, 166, 220, 169, 240, 188, 242 },
- { 3, 32, 49, 106, 111, 144, 132, 121, 225, 151, 237 }
- }, { /* Coeff Band 3 */
- { 1, 160, 218, 245, 197, 195, 235, 189, 254, 218, 255 },
- { 90, 127, 193, 240, 186, 189, 235, 187, 251, 217, 230 },
- { 18, 92, 148, 229, 164, 179, 228, 180, 254, 212, 229 },
- { 2, 50, 79, 163, 126, 156, 186, 140, 247, 191, 236 }
- }, { /* Coeff Band 4 */
- { 1, 196, 231, 240, 203, 191, 225, 171, 253, 214, 255 },
- { 71, 167, 210, 234, 194, 188, 218, 165, 253, 215, 236 },
- { 11, 119, 165, 217, 171, 177, 213, 155, 252, 209, 255 },
- { 1, 46, 70, 145, 121, 153, 180, 131, 249, 192, 246 }
- }, { /* Coeff Band 5 */
- { 1, 176, 223, 242, 202, 194, 222, 169, 253, 211, 244 },
- { 62, 131, 191, 233, 185, 186, 219, 164, 251, 211, 252 },
- { 7, 89, 133, 207, 156, 173, 211, 157, 251, 206, 247 },
- { 1, 36, 56, 127, 113, 147, 166, 125, 243, 183, 242 }
- }, { /* Coeff Band 6 */
- { 1, 203, 232, 249, 213, 202, 245, 193, 254, 237, 255 },
- { 51, 155, 212, 245, 199, 195, 244, 192, 254, 234, 255 },
- { 7, 101, 158, 233, 170, 181, 244, 185, 253, 242, 255 },
- { 1, 49, 82, 185, 123, 157, 226, 156, 252, 225, 240 }
- }, { /* Coeff Band 7 */
- { 1, 222, 233, 252, 220, 207, 247, 206, 255, 240, 128 },
- { 40, 159, 216, 250, 205, 201, 248, 207, 249, 219, 255 },
- { 6, 106, 163, 240, 176, 188, 247, 198, 251, 222, 255 },
- { 1, 51, 88, 196, 127, 159, 232, 169, 252, 214, 255 }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 },
+ { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 },
+ { 11612, 13874, 13329, 13022, 6500, 3250, 300, 12 },
}
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 14, 78, 225, 217, 173, 181, 198, 153, 228, 185, 176 },
- { 9, 74, 179, 191, 157, 171, 178, 143, 229, 175, 209 },
- { 3, 48, 92, 128, 130, 155, 135, 123, 220, 155, 219 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 178, 209, 214, 173, 175, 208, 152, 252, 210, 237 },
- { 142, 151, 193, 212, 170, 175, 209, 151, 251, 208, 237 },
- { 38, 105, 150, 206, 159, 173, 208, 151, 250, 209, 238 },
- { 5, 44, 61, 128, 114, 147, 167, 125, 239, 184, 217 }
- }, { /* Coeff Band 2 */
- { 1, 154, 195, 202, 166, 173, 184, 144, 245, 184, 236 },
- { 49, 110, 150, 188, 155, 168, 180, 141, 244, 183, 239 },
- { 4, 63, 90, 158, 132, 157, 171, 134, 243, 179, 239 },
- { 1, 25, 37, 93, 104, 141, 133, 114, 231, 161, 226 }
- }, { /* Coeff Band 3 */
- { 1, 184, 201, 223, 173, 177, 224, 164, 253, 220, 238 },
- { 42, 127, 170, 215, 164, 173, 223, 162, 253, 219, 233 },
- { 4, 75, 114, 195, 142, 164, 218, 155, 253, 217, 235 },
- { 1, 32, 50, 128, 108, 144, 180, 127, 247, 197, 219 }
- }, { /* Coeff Band 4 */
- { 1, 190, 207, 232, 181, 184, 228, 172, 251, 216, 212 },
- { 35, 136, 180, 227, 173, 180, 227, 171, 251, 216, 218 },
- { 2, 85, 131, 214, 154, 173, 224, 166, 250, 214, 225 },
- { 1, 44, 71, 162, 120, 153, 195, 143, 240, 195, 197 }
- }, { /* Coeff Band 5 */
- { 1, 185, 201, 230, 177, 180, 232, 172, 253, 225, 235 },
- { 27, 122, 165, 221, 164, 175, 230, 169, 253, 224, 220 },
- { 1, 72, 108, 197, 139, 163, 224, 159, 253, 224, 226 },
- { 1, 33, 51, 132, 107, 144, 186, 130, 245, 201, 206 }
- }, { /* Coeff Band 6 */
- { 1, 203, 214, 240, 193, 191, 235, 178, 252, 225, 224 },
- { 20, 140, 188, 235, 182, 186, 234, 177, 252, 226, 226 },
- { 1, 85, 132, 218, 155, 174, 230, 170, 251, 224, 227 },
- { 1, 39, 62, 154, 114, 150, 199, 141, 241, 203, 214 }
- }, { /* Coeff Band 7 */
- { 1, 217, 224, 244, 202, 193, 241, 187, 252, 227, 239 },
- { 22, 151, 200, 239, 187, 188, 240, 184, 252, 226, 237 },
- { 2, 90, 138, 222, 158, 174, 237, 176, 252, 226, 239 },
- { 1, 41, 66, 163, 116, 151, 206, 146, 243, 201, 230 }
- }
}
};
-static const vp9_coeff_probs default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16] = {
- { /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+
+static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC16X16_TOKENS] = {
+ {
+ {
+ { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 },
+ }, {
+ { 32988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 },
+ { 92052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 },
}
- }, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, {
+ {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 },
+ { 47772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 },
+ }, {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 },
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 },
}
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 3, 29, 86, 140, 130, 163, 135, 131, 190, 148, 186 },
- { 1, 26, 61, 105, 124, 156, 105, 119, 178, 138, 173 },
- { 1, 15, 28, 60, 105, 142, 80, 105, 173, 128, 178 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 130, 142, 172, 141, 161, 191, 140, 244, 193, 216 },
- { 61, 124, 141, 173, 141, 161, 190, 139, 244, 194, 215 },
- { 28, 103, 124, 171, 138, 160, 190, 140, 243, 194, 225 },
- { 1, 36, 51, 111, 109, 144, 152, 120, 227, 173, 205 }
- }, { /* Coeff Band 2 */
- { 1, 60, 125, 153, 143, 159, 156, 127, 234, 170, 233 },
- { 22, 48, 78, 129, 124, 152, 151, 123, 234, 170, 233 },
- { 3, 32, 46, 98, 107, 142, 138, 114, 232, 165, 232 },
- { 1, 15, 23, 61, 96, 135, 101, 103, 210, 144, 213 }
- }, { /* Coeff Band 3 */
- { 1, 102, 144, 182, 146, 162, 194, 143, 246, 196, 239 },
- { 34, 76, 116, 171, 136, 159, 192, 140, 246, 195, 239 },
- { 4, 51, 81, 153, 124, 153, 184, 135, 246, 192, 239 },
- { 1, 23, 37, 98, 102, 140, 142, 116, 230, 167, 227 }
- }, { /* Coeff Band 4 */
- { 1, 165, 171, 214, 163, 174, 214, 160, 245, 203, 219 },
- { 16, 120, 154, 210, 158, 172, 212, 159, 245, 201, 219 },
- { 1, 80, 122, 199, 147, 167, 208, 154, 244, 200, 223 },
- { 1, 40, 65, 145, 118, 151, 171, 135, 226, 175, 202 }
- }, { /* Coeff Band 5 */
- { 1, 146, 162, 215, 159, 172, 226, 165, 251, 218, 231 },
- { 16, 92, 131, 205, 147, 167, 224, 162, 252, 217, 228 },
- { 2, 60, 92, 182, 129, 158, 216, 152, 251, 214, 234 },
- { 1, 32, 50, 126, 107, 144, 176, 128, 240, 189, 216 }
- }, { /* Coeff Band 6 */
- { 1, 178, 186, 224, 172, 178, 224, 167, 251, 214, 232 },
- { 14, 118, 158, 215, 160, 173, 223, 164, 250, 214, 228 },
- { 2, 70, 109, 194, 139, 164, 217, 156, 250, 213, 227 },
- { 1, 32, 51, 129, 108, 146, 175, 128, 240, 187, 218 }
- }, { /* Coeff Band 7 */
- { 1, 210, 214, 240, 192, 188, 235, 182, 251, 221, 228 },
- { 22, 140, 187, 233, 177, 183, 234, 178, 251, 219, 233 },
- { 3, 82, 130, 215, 152, 171, 229, 171, 250, 217, 232 },
- { 1, 38, 63, 154, 115, 149, 195, 141, 240, 196, 219 }
- }
}
};
-static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = {
- { /* block Type 0 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+
+static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC32X32_TOKENS] = {
+ {
+ {
+ { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 },
+ { 52052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 },
+ }, {
+ { 72988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 },
+ { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1, 0, 0 },
}
- }, { /* block Type 1 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, {
+ {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 },
+ }, {
+ { 21533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 },
+ { 27772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2, 1, 0 },
}
- }, { /* block Type 2 */
- { /* Coeff Band 0 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 2 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 3 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 4 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 5 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 6 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 7 */
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+ }, {
+ {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 },
+ }, {
+ { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 },
+ { 9612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3, 2, 1 },
}
- }, { /* block Type 3 */
- { /* Coeff Band 0 */
- { 8, 40, 224, 217, 183, 181, 180, 148, 200, 180, 123 },
- { 6, 37, 178, 193, 173, 171, 160, 139, 205, 166, 173 },
- { 3, 27, 93, 133, 143, 159, 115, 125, 183, 141, 178 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
- }, { /* Coeff Band 1 */
- { 1, 170, 209, 202, 172, 175, 179, 143, 238, 181, 214 },
- { 184, 164, 199, 199, 169, 173, 180, 143, 238, 184, 217 },
- { 99, 128, 165, 194, 161, 171, 180, 142, 239, 182, 219 },
- { 17, 49, 59, 102, 117, 148, 122, 116, 208, 152, 191 }
- }, { /* Coeff Band 2 */
- { 1, 136, 200, 197, 172, 172, 168, 142, 226, 170, 216 },
- { 66, 104, 146, 175, 152, 165, 163, 139, 225, 170, 219 },
- { 11, 52, 83, 144, 130, 156, 151, 130, 222, 165, 216 },
- { 1, 16, 25, 65, 99, 137, 96, 106, 190, 138, 184 }
- }, { /* Coeff Band 3 */
- { 1, 180, 203, 198, 166, 170, 190, 143, 241, 190, 227 },
- { 74, 125, 161, 187, 154, 165, 187, 142, 241, 189, 224 },
- { 15, 70, 98, 163, 133, 157, 182, 137, 241, 187, 226 },
- { 1, 25, 37, 89, 104, 140, 128, 113, 218, 158, 206 }
- }, { /* Coeff Band 4 */
- { 1, 191, 208, 213, 169, 173, 212, 156, 246, 206, 217 },
- { 53, 136, 170, 205, 159, 170, 211, 156, 246, 205, 208 },
- { 3, 75, 112, 189, 140, 163, 209, 151, 246, 205, 215 },
- { 1, 32, 51, 127, 108, 145, 171, 128, 231, 183, 197 }
- }, { /* Coeff Band 5 */
- { 1, 183, 195, 202, 161, 168, 206, 150, 247, 202, 229 },
- { 42, 113, 144, 190, 147, 163, 203, 148, 247, 202, 229 },
- { 2, 56, 82, 160, 124, 153, 195, 140, 246, 200, 229 },
- { 1, 22, 34, 93, 99, 138, 143, 115, 227, 170, 206 }
- }, { /* Coeff Band 6 */
- { 1, 202, 193, 221, 168, 175, 227, 167, 251, 217, 236 },
- { 26, 122, 158, 213, 157, 171, 225, 165, 251, 216, 242 },
- { 1, 68, 105, 194, 136, 162, 221, 158, 251, 215, 239 },
- { 1, 32, 51, 131, 107, 145, 179, 130, 240, 188, 231 }
- }, { /* Coeff Band 7 */
- { 1, 234, 212, 243, 195, 192, 240, 187, 253, 226, 227 },
- { 14, 141, 186, 237, 181, 186, 239, 184, 253, 226, 233 },
- { 1, 85, 132, 221, 155, 174, 235, 176, 253, 224, 226 },
- { 1, 39, 65, 159, 115, 150, 202, 144, 245, 202, 214 }
- }
}
};
+
+#else
+
+static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC4X4_TOKENS] = {
+ {
+ {
+ { 219, 162, 179, 142, 242, },
+ { 214, 253, 228, 246, 255, },
+ }, {
+ { 225, 236, 190, 229, 253, },
+ { 251, 253, 240, 248, 255, },
+ },
+ }, {
+ {
+ { 106, 126, 158, 126, 244, },
+ { 118, 241, 201, 240, 255, },
+ }, {
+ { 165, 179, 143, 189, 242, },
+ { 173, 239, 192, 255, 128, },
+ },
+ }, {
+ {
+ { 42 , 78 , 153, 92 , 223, },
+ { 128, 128, 128, 128, 128, },
+ }, {
+ { 76 , 68 , 126, 110, 216, },
+ { 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC8X8_TOKENS] = {
+ {
+ {
+ { 134, 139, 170, 178, 142, 197, 255, },
+ { 167, 224, 199, 252, 205, 255, 128, },
+ }, {
+ { 181, 210, 180, 241, 190, 235, 255, },
+ { 234, 251, 235, 252, 219, 255, 128, },
+ },
+ }, {
+ {
+ { 33 , 64 , 155, 143, 86 , 216, 255, },
+ { 73 , 160, 167, 251, 153, 255, 128, },
+ }, {
+ { 79 , 104, 153, 195, 119, 246, 255, },
+ { 149, 183, 186, 249, 203, 255, 128, },
+ },
+ }, {
+ {
+ { 10 , 25 , 156, 61 , 69 , 156, 254, },
+ { 32 , 1 , 128, 146, 64 , 255, 128, },
+ }, {
+ { 37 , 48 , 143, 113, 81 , 202, 255, },
+ { 1 , 255, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC16X16_TOKENS] = {
+ {
+ {
+ { 11 , 188, 210, 167, 141, 143, 152, 255, 128, },
+ { 171, 201, 203, 244, 207, 255, 255, 128, 128, },
+ }, {
+ { 23 , 217, 207, 251, 198, 255, 219, 128, 128, },
+ { 235, 249, 229, 255, 199, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 9 , 45 , 168, 85 , 66 , 221, 139, 246, 255, },
+ { 51 , 110, 163, 238, 94 , 255, 255, 128, 128, },
+ }, {
+ { 4 , 149, 175, 240, 149, 255, 205, 128, 128, },
+ { 141, 217, 186, 255, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 1 , 12 , 173, 6 , 68 , 145, 41 , 204, 255, },
+ { 39 , 47 , 128, 199, 110, 255, 128, 128, 128, },
+ }, {
+ { 1 , 121, 171, 149, 115, 242, 159, 255, 128, },
+ { 1 , 255, 255, 128, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+
+static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS]
+ [REF_TYPES]
+ [BLOCK_TYPES]
+ [NZC32X32_TOKENS] = {
+ {
+ {
+ { 11 , 216, 195, 201, 160, 247, 217, 255, 255, 128, 128, },
+ { 177, 240, 239, 255, 192, 128, 128, 128, 128, 128, 128, },
+ }, {
+ { 48 , 235, 213, 235, 199, 255, 255, 128, 128, 128, 128, },
+ { 205, 255, 248, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 6 , 96 , 138, 99 , 125, 248, 188, 255, 128, 128, 128, },
+ { 17 , 53 , 43 , 189, 1 , 255, 171, 128, 128, 128, 128, },
+ }, {
+ { 5 , 187, 235, 232, 117, 255, 219, 128, 128, 128, 128, },
+ { 146, 255, 255, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ }, {
+ {
+ { 1 , 7 , 93 , 14 , 100, 30 , 85 , 65 , 81 , 210, 255, },
+ { 1 , 1 , 128, 26 , 1 , 218, 78 , 255, 255, 128, 128, },
+ }, {
+ { 4 , 148, 206, 137, 160, 255, 255, 128, 128, 128, 128, },
+ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, },
+ },
+ },
+};
+#endif
+
+static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA] = {
+ // Bit probabilities are in least to most significance order
+ {
+ {176, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {164, 192, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {154, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ }, {
+ {168, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {152, 184, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {152, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ }, {
+ {160, 128, 128, 128, 128, 128, 128, 128, 128}, // 3 - 4
+ {152, 176, 128, 128, 128, 128, 128, 128, 128}, // 5 - 8
+ {150, 184, 208, 128, 128, 128, 128, 128, 128}, // 9 - 16
+ {144, 176, 200, 216, 128, 128, 128, 128, 128}, // 17 - 32
+ {140, 172, 192, 208, 224, 128, 128, 128, 128}, // 33 - 64
+ {136, 168, 188, 200, 220, 232, 128, 128, 128}, // 65 - 128
+ {132, 164, 184, 196, 216, 228, 240, 128, 128}, // 129 - 256
+ {130, 162, 178, 194, 212, 226, 240, 248, 128}, // 257 - 512
+ {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024
+ },
+};
+
+#endif // CONFIG_CODE_NONZEROCOUNT
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -41,15 +41,176 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]) = {
- 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7
+// Unified coefficient band structure used by all block sizes
+DECLARE_ALIGNED(16, const int, vp9_coef_bands8x8[64]) = {
+ 0, 1, 2, 3, 4, 4, 5, 5,
+ 1, 2, 3, 4, 4, 5, 5, 5,
+ 2, 3, 4, 4, 5, 5, 5, 5,
+ 3, 4, 4, 5, 5, 5, 5, 5,
+ 4, 4, 5, 5, 5, 5, 5, 5,
+ 4, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5
};
+DECLARE_ALIGNED(16, const int, vp9_coef_bands4x4[16]) = {
+ 0, 1, 2, 3,
+ 1, 2, 3, 4,
+ 2, 3, 4, 5,
+ 3, 4, 5, 5
+};
-DECLARE_ALIGNED(16, const uint8_t, vp9_prev_token_class[MAX_ENTROPY_TOKENS]) = {
- 0, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0
+DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = {
+ 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
+#if CONFIG_SCATTERSCAN
DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = {
+ 0, 4, 1, 5,
+ 8, 2, 12, 9,
+ 3, 6, 13, 10,
+ 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_col_scan_4x4[16]) = {
+ 0, 4, 8, 1,
+ 12, 5, 9, 2,
+ 13, 6, 10, 3,
+ 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]) = {
+ 0, 1, 4, 2,
+ 5, 3, 6, 8,
+ 9, 7, 12, 10,
+ 13, 11, 14, 15,
+};
+
+DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = {
+ 0, 8, 1, 16, 9, 2, 17, 24,
+ 10, 3, 18, 25, 32, 11, 4, 26,
+ 33, 19, 40, 12, 34, 27, 5, 41,
+ 20, 48, 13, 35, 42, 28, 21, 6,
+ 49, 56, 36, 43, 29, 7, 14, 50,
+ 57, 44, 22, 37, 15, 51, 58, 30,
+ 45, 23, 52, 59, 38, 31, 60, 53,
+ 46, 39, 61, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = {
+ 0, 8, 16, 1, 24, 9, 32, 17,
+ 2, 40, 25, 10, 33, 18, 48, 3,
+ 26, 41, 11, 56, 19, 34, 4, 49,
+ 27, 42, 12, 35, 20, 57, 50, 28,
+ 5, 43, 13, 36, 58, 51, 21, 44,
+ 6, 29, 59, 37, 14, 52, 22, 7,
+ 45, 60, 30, 15, 38, 53, 23, 46,
+ 31, 61, 39, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = {
+ 0, 1, 2, 8, 9, 3, 16, 10,
+ 4, 17, 11, 24, 5, 18, 25, 12,
+ 19, 26, 32, 6, 13, 20, 33, 27,
+ 7, 34, 40, 21, 28, 41, 14, 35,
+ 48, 42, 29, 36, 49, 22, 43, 15,
+ 56, 37, 50, 44, 30, 57, 23, 51,
+ 58, 45, 38, 52, 31, 59, 53, 46,
+ 60, 39, 61, 47, 54, 55, 62, 63,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
+ 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
+ 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
+ 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
+ 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
+ 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
+ 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
+ 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
+ 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
+ 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
+ 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
+ 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
+ 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
+ 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
+ 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
+ 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = {
+ 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
+ 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
+ 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
+ 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
+ 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
+ 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
+ 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
+ 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
+ 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
+ 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
+ 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
+ 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
+ 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
+ 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
+ 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
+ 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = {
+ 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
+ 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
+ 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
+ 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
+ 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
+ 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
+ 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
+ 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
+ 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
+ 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
+ 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
+ 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
+ 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
+ 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158,
+ 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
+ 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255,
+};
+
+DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100,
+ 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197,
+ 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136,
+ 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451,
+ 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234,
+ 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 392, 203, 108, 546, 485, 576, 298, 235, 140, 361, 516, 330, 172, 547, 45, 424, 455, 267, 393, 577,
+ 486, 77, 204, 517, 362, 548, 608, 14, 456, 299, 578, 109, 236, 425, 394, 487, 609, 331, 141, 579, 518, 46, 268, 15, 173, 549, 610, 640, 363, 78, 519, 488,
+ 300, 205, 16, 457, 580, 426, 550, 395, 110, 237, 611, 641, 332, 672, 142, 642, 269, 458, 47, 581, 427, 489, 174, 364, 520, 612, 551, 673, 79, 206, 301, 643,
+ 704, 17, 111, 490, 674, 238, 582, 48, 521, 613, 333, 396, 459, 143, 270, 552, 644, 705, 736, 365, 80, 675, 583, 175, 428, 706, 112, 302, 207, 614, 553, 49,
+ 645, 522, 737, 397, 768, 144, 334, 18, 676, 491, 239, 615, 707, 584, 81, 460, 176, 271, 738, 429, 113, 800, 366, 208, 523, 708, 646, 554, 677, 769, 19, 145,
+ 585, 739, 240, 303, 50, 461, 616, 398, 647, 335, 492, 177, 82, 770, 832, 555, 272, 430, 678, 209, 709, 114, 740, 801, 617, 51, 304, 679, 524, 367, 586, 241,
+ 20, 146, 771, 864, 83, 802, 648, 493, 399, 273, 336, 710, 178, 462, 833, 587, 741, 115, 305, 711, 368, 525, 618, 803, 210, 896, 680, 834, 772, 52, 649, 147,
+ 431, 494, 556, 242, 400, 865, 337, 21, 928, 179, 742, 84, 463, 274, 369, 804, 650, 557, 743, 960, 835, 619, 773, 306, 211, 526, 432, 992, 588, 712, 116, 243,
+ 866, 495, 681, 558, 805, 589, 401, 897, 53, 338, 148, 682, 867, 464, 275, 22, 370, 433, 307, 620, 527, 836, 774, 651, 713, 744, 85, 180, 621, 465, 929, 775,
+ 496, 898, 212, 339, 244, 402, 590, 117, 559, 714, 434, 23, 868, 930, 806, 683, 528, 652, 371, 961, 149, 837, 54, 899, 745, 276, 993, 497, 403, 622, 181, 776,
+ 746, 529, 560, 435, 86, 684, 466, 308, 591, 653, 715, 807, 340, 869, 213, 962, 245, 838, 561, 931, 808, 592, 118, 498, 372, 623, 685, 994, 467, 654, 747, 900,
+ 716, 277, 150, 55, 24, 404, 530, 839, 777, 655, 182, 963, 840, 686, 778, 309, 870, 341, 87, 499, 809, 624, 593, 436, 717, 932, 214, 246, 995, 718, 625, 373,
+ 562, 25, 119, 901, 531, 468, 964, 748, 810, 278, 779, 500, 563, 656, 405, 687, 871, 872, 594, 151, 933, 749, 841, 310, 657, 626, 595, 437, 688, 183, 996, 965,
+ 902, 811, 342, 750, 689, 719, 532, 56, 215, 469, 934, 374, 247, 720, 780, 564, 781, 842, 406, 26, 751, 903, 873, 57, 279, 627, 501, 658, 843, 997, 812, 904,
+ 88, 813, 438, 752, 935, 936, 311, 596, 533, 690, 343, 966, 874, 89, 120, 470, 721, 875, 659, 782, 565, 998, 375, 844, 845, 27, 628, 967, 121, 905, 968, 152,
+ 937, 814, 753, 502, 691, 783, 184, 153, 722, 407, 58, 815, 999, 660, 597, 723, 534, 906, 216, 439, 907, 248, 185, 876, 846, 692, 784, 629, 90, 969, 280, 754,
+ 938, 939, 217, 847, 566, 471, 785, 816, 877, 1000, 249, 878, 661, 503, 312, 970, 755, 122, 817, 281, 344, 786, 598, 724, 28, 59, 29, 154, 535, 630, 376, 1001,
+ 313, 908, 186, 91, 848, 849, 345, 909, 940, 879, 408, 818, 693, 1002, 971, 941, 567, 377, 218, 756, 910, 787, 440, 123, 880, 725, 662, 250, 819, 1003, 282, 972,
+ 850, 599, 472, 409, 155, 441, 942, 757, 788, 694, 911, 881, 314, 631, 973, 504, 187, 1004, 346, 473, 851, 943, 820, 726, 60, 505, 219, 378, 912, 974, 30, 31,
+ 536, 882, 1005, 92, 251, 663, 944, 913, 283, 695, 883, 568, 1006, 975, 410, 442, 945, 789, 852, 537, 1007, 124, 315, 61, 758, 821, 600, 914, 976, 569, 474, 347,
+ 156, 1008, 915, 93, 977, 506, 946, 727, 379, 884, 188, 632, 601, 1009, 790, 853, 978, 947, 220, 411, 125, 633, 664, 759, 252, 443, 916, 538, 157, 822, 62, 570,
+ 979, 284, 1010, 885, 948, 189, 475, 94, 316, 665, 696, 1011, 854, 791, 980, 221, 348, 63, 917, 602, 380, 507, 253, 126, 697, 823, 634, 285, 728, 949, 886, 95,
+ 158, 539, 1012, 317, 412, 444, 760, 571, 190, 981, 729, 918, 127, 666, 349, 381, 476, 855, 761, 1013, 603, 222, 159, 698, 950, 508, 254, 792, 286, 635, 887, 793,
+ 413, 191, 982, 445, 540, 318, 730, 667, 223, 824, 919, 1014, 350, 477, 572, 255, 825, 951, 762, 509, 604, 856, 382, 699, 287, 319, 636, 983, 794, 414, 541, 731,
+ 857, 888, 351, 446, 573, 1015, 668, 889, 478, 826, 383, 763, 605, 920, 510, 637, 415, 700, 921, 858, 447, 952, 542, 795, 479, 953, 732, 890, 669, 574, 511, 984,
+ 827, 985, 922, 1016, 764, 606, 543, 701, 859, 638, 1017, 575, 796, 954, 733, 891, 670, 607, 828, 986, 765, 923, 639, 1018, 702, 860, 955, 671, 892, 734, 797, 703,
+ 987, 829, 1019, 766, 924, 735, 861, 956, 988, 893, 767, 798, 830, 1020, 925, 957, 799, 862, 831, 989, 894, 1021, 863, 926, 895, 958, 990, 1022, 927, 959, 991, 1023,
+};
+#else // CONFIG_SCATTERSCAN
+DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]) = {
0, 1, 4, 8,
5, 2, 3, 6,
9, 12, 13, 10,
@@ -70,17 +231,6 @@
12, 13, 14, 15
};
-DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]) = {
- 0, 1, 2, 3, 5, 4, 4, 5,
- 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7
-};
-
DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]) = {
0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
@@ -88,26 +238,28 @@
58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
};
-// Table can be optimized.
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]) = {
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 1, 9, 17, 25, 33, 41, 49, 57,
+ 2, 10, 18, 26, 34, 42, 50, 58,
+ 3, 11, 19, 27, 35, 43, 51, 59,
+ 4, 12, 20, 28, 36, 44, 52, 60,
+ 5, 13, 21, 29, 37, 45, 53, 61,
+ 6, 14, 22, 30, 38, 46, 54, 62,
+ 7, 15, 23, 31, 39, 47, 55, 63,
};
+DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+};
+
DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]) = {
0, 1, 16, 32, 17, 2, 3, 18,
33, 48, 64, 49, 34, 19, 4, 5,
@@ -143,692 +295,42 @@
237, 252, 253, 238, 223, 239, 254, 255,
};
-#if CONFIG_DWTDCTHYBRID
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 16, 512, 528, 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6,
- 6, 6, 6,
- 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4,
- 16, 512, 528,
- 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 256, 225, 194, 163,
- 132, 101, 70, 39, 8, 9, 40, 71,
- 102, 133, 164, 195, 226, 257, 288, 320,
- 289, 258, 227, 196, 165, 134, 103, 72,
- 41, 10, 11, 42, 73, 104, 135, 166,
- 197, 228, 259, 290, 321, 352, 384, 353,
- 322, 291, 260, 229, 198, 167, 136, 105,
- 74, 43, 12, 13, 44, 75, 106, 137,
- 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262,
- 231, 200, 169, 138, 107, 76, 45, 14,
- 15, 46, 77, 108, 139, 170, 201, 232,
- 263, 294, 325, 356, 387, 418, 449, 480,
- 481, 450, 419, 388, 357, 326, 295, 264,
- 233, 202, 171, 140, 109, 78, 47, 79,
- 110, 141, 172, 203, 234, 265, 296, 327,
- 358, 389, 420, 451, 482, 483, 452, 421,
- 390, 359, 328, 297, 266, 235, 204, 173,
- 142, 111, 143, 174, 205, 236, 267, 298,
- 329, 360, 391, 422, 453, 484, 485, 454,
- 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 207, 238, 269, 300, 331, 362, 393,
- 424, 455, 486, 487, 456, 425, 394, 363,
- 332, 301, 270, 239, 271, 302, 333, 364,
- 395, 426, 457, 488, 489, 458, 427, 396,
- 365, 334, 303, 335, 366, 397, 428, 459,
- 490, 491, 460, 429, 398, 367, 399, 430,
- 461, 492, 493, 462, 431, 463, 494, 495,
-
- 17, 513, 529, 48, 544,
- 560, 80, 576, 592, 49, 545, 561, 18,
- 514, 530, 19, 515, 531, 50, 546, 562,
- 81, 577, 593, 112, 608, 624, 144, 640,
- 656, 113, 609, 625, 82, 578, 594, 51,
- 547, 563, 20, 516, 532, 21, 517, 533,
- 52, 548, 564, 83, 579, 595, 114, 610,
- 626, 145, 641, 657, 176, 672, 688, 208,
- 704, 720, 177, 673, 689, 146, 642, 658,
- 115, 611, 627, 84, 580, 596, 53, 549,
- 565, 22, 518, 534, 23, 519, 535, 54,
- 550, 566, 85, 581, 597, 116, 612, 628,
- 147, 643, 659, 178, 674, 690, 209, 705,
- 721, 240, 736, 752, 272, 768, 784, 241,
- 737, 753, 210, 706, 722, 179, 675, 691,
- 148, 644, 660, 117, 613, 629, 86, 582,
- 598, 55, 551, 567, 24, 520, 536, 25,
- 521, 537, 56, 552, 568, 87, 583, 599,
- 118, 614, 630, 149, 645, 661, 180, 676,
- 692, 211, 707, 723, 242, 738, 754, 273,
- 769, 785, 304, 800, 816, 336, 832, 848,
- 305, 801, 817, 274, 770, 786, 243, 739,
- 755, 212, 708, 724, 181, 677, 693, 150,
- 646, 662, 119, 615, 631, 88, 584, 600,
- 57, 553, 569, 26, 522, 538, 27, 523,
- 539, 58, 554, 570, 89, 585, 601, 120,
- 616, 632, 151, 647, 663, 182, 678, 694,
- 213, 709, 725, 244, 740, 756, 275, 771,
- 787, 306, 802, 818, 337, 833, 849, 368,
- 864, 880, 400, 896, 912, 369, 865, 881,
- 338, 834, 850, 307, 803, 819, 276, 772,
- 788, 245, 741, 757, 214, 710, 726, 183,
-
- 679, 695, 152, 648, 664, 121, 617, 633,
- 90, 586, 602, 59, 555, 571, 28, 524,
- 540, 29, 525, 541, 60, 556, 572, 91,
- 587, 603, 122, 618, 634, 153, 649, 665,
- 184, 680, 696, 215, 711, 727, 246, 742,
- 758, 277, 773, 789, 308, 804, 820, 339,
- 835, 851, 370, 866, 882, 401, 897, 913,
- 432, 928, 944, 464, 960, 976, 433, 929,
- 945, 402, 898, 914, 371, 867, 883, 340,
- 836, 852, 309, 805, 821, 278, 774, 790,
- 247, 743, 759, 216, 712, 728, 185, 681,
- 697, 154, 650, 666, 123, 619, 635, 92,
- 588, 604, 61, 557, 573, 30, 526, 542,
- 31, 527, 543, 62, 558, 574, 93, 589,
- 605, 124, 620, 636, 155, 651, 667, 186,
- 682, 698, 217, 713, 729, 248, 744, 760,
- 279, 775, 791, 310, 806, 822, 341, 837,
- 853, 372, 868, 884, 403, 899, 915, 434,
- 930, 946, 465, 961, 977, 496, 992, 1008,
- 497, 993, 1009, 466, 962, 978, 435, 931,
- 947, 404, 900, 916, 373, 869, 885, 342,
- 838, 854, 311, 807, 823, 280, 776, 792,
- 249, 745, 761, 218, 714, 730, 187, 683,
- 699, 156, 652, 668, 125, 621, 637, 94,
- 590, 606, 63, 559, 575, 95, 591, 607,
- 126, 622, 638, 157, 653, 669, 188, 684,
- 700, 219, 715, 731, 250, 746, 762, 281,
- 777, 793, 312, 808, 824, 343, 839, 855,
- 374, 870, 886, 405, 901, 917, 436, 932,
- 948, 467, 963, 979, 498, 994, 1010, 499,
- 995, 1011, 468, 964, 980, 437, 933, 949,
- 406, 902, 918, 375, 871, 887, 344, 840,
-
- 856, 313, 809, 825, 282, 778, 794, 251,
- 747, 763, 220, 716, 732, 189, 685, 701,
- 158, 654, 670, 127, 623, 639, 159, 655,
- 671, 190, 686, 702, 221, 717, 733, 252,
- 748, 764, 283, 779, 795, 314, 810, 826,
- 345, 841, 857, 376, 872, 888, 407, 903,
- 919, 438, 934, 950, 469, 965, 981, 500,
- 996, 1012, 501, 997, 1013, 470, 966, 982,
- 439, 935, 951, 408, 904, 920, 377, 873,
- 889, 346, 842, 858, 315, 811, 827, 284,
- 780, 796, 253, 749, 765, 222, 718, 734,
- 191, 687, 703, 223, 719, 735, 254, 750,
- 766, 285, 781, 797, 316, 812, 828, 347,
- 843, 859, 378, 874, 890, 409, 905, 921,
- 440, 936, 952, 471, 967, 983, 502, 998,
- 1014, 503, 999, 1015, 472, 968, 984, 441,
- 937, 953, 410, 906, 922, 379, 875, 891,
- 348, 844, 860, 317, 813, 829, 286, 782,
- 798, 255, 751, 767, 287, 783, 799, 318,
- 814, 830, 349, 845, 861, 380, 876, 892,
- 411, 907, 923, 442, 938, 954, 473, 969,
- 985, 504, 1000, 1016, 505, 1001, 1017, 474,
- 970, 986, 443, 939, 955, 412, 908, 924,
- 381, 877, 893, 350, 846, 862, 319, 815,
- 831, 351, 847, 863, 382, 878, 894, 413,
- 909, 925, 444, 940, 956, 475, 971, 987,
- 506, 1002, 1018, 507, 1003, 1019, 476, 972,
- 988, 445, 941, 957, 414, 910, 926, 383,
- 879, 895, 415, 911, 927, 446, 942, 958,
- 477, 973, 989, 508, 1004, 1020, 509, 1005,
- 1021, 478, 974, 990, 447, 943, 959, 479,
- 975, 991, 510, 1006, 1022, 511, 1007, 1023,
-};
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5,
- 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-
-DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34,
- 65, 96, 128, 97, 66, 35, 4, 5,
- 36, 67, 98, 129, 160, 192, 161, 130,
- 99, 68, 37, 6, 7, 38, 69, 100,
- 131, 162, 193, 224, 225, 194, 163, 132,
- 101, 70, 39, 71, 102, 133, 164, 195,
- 226, 227, 196, 165, 134, 103, 135, 166,
- 197, 228, 229, 198, 167, 199, 230, 231,
-
- 8, 256, 264, 9, 257, 265, 40, 288, 296, 72, 320, 328,
- 41, 289, 297, 10, 258, 266, 11, 259, 267, 42, 290, 298,
- 73, 321, 329, 104, 352, 360, 136, 384, 392, 105, 353, 361,
- 74, 322, 330, 43, 291, 299, 12, 260, 268, 13, 261, 269,
- 44, 292, 300, 75, 323, 331, 106, 354, 362, 137, 385, 393,
- 168, 416, 424, 200, 448, 456, 169, 417, 425, 138, 386, 394,
- 107, 355, 363, 76, 324, 332, 45, 293, 301, 14, 262, 270,
- 15, 263, 271, 46, 294, 302, 77, 325, 333, 108, 356, 364,
- 139, 387, 395, 170, 418, 426, 201, 449, 457, 232, 480, 488,
- 233, 481, 489, 202, 450, 458, 171, 419, 427, 140, 388, 396,
- 109, 357, 365, 78, 326, 334, 47, 295, 303, 79, 327, 335,
- 110, 358, 366, 141, 389, 397, 172, 420, 428, 203, 451, 459,
- 234, 482, 490, 235, 483, 491, 204, 452, 460, 173, 421, 429,
- 142, 390, 398, 111, 359, 367, 143, 391, 399, 174, 422, 430,
- 205, 453, 461, 236, 484, 492, 237, 485, 493, 206, 454, 462,
- 175, 423, 431, 207, 455, 463, 238, 486, 494, 239, 487, 495,
-
- 16, 512, 528, 17, 513, 529, 18, 514,
- 530, 19, 515, 531, 20, 516, 532, 21,
- 517, 533, 22, 518, 534, 23, 519, 535,
- 24, 520, 536, 25, 521, 537, 26, 522,
- 538, 27, 523, 539, 28, 524, 540, 29,
- 525, 541, 30, 526, 542, 31, 527, 543,
- 48, 544, 560, 49, 545, 561, 50, 546,
- 562, 51, 547, 563, 52, 548, 564, 53,
- 549, 565, 54, 550, 566, 55, 551, 567,
- 56, 552, 568, 57, 553, 569, 58, 554,
- 570, 59, 555, 571, 60, 556, 572, 61,
- 557, 573, 62, 558, 574, 63, 559, 575,
- 80, 576, 592, 81, 577, 593, 82, 578,
- 594, 83, 579, 595, 84, 580, 596, 85,
- 581, 597, 86, 582, 598, 87, 583, 599,
- 88, 584, 600, 89, 585, 601, 90, 586,
- 602, 91, 587, 603, 92, 588, 604, 93,
- 589, 605, 94, 590, 606, 95, 591, 607,
- 112, 608, 624, 113, 609, 625, 114, 610,
- 626, 115, 611, 627, 116, 612, 628, 117,
- 613, 629, 118, 614, 630, 119, 615, 631,
- 120, 616, 632, 121, 617, 633, 122, 618,
- 634, 123, 619, 635, 124, 620, 636, 125,
- 621, 637, 126, 622, 638, 127, 623, 639,
- 144, 640, 656, 145, 641, 657, 146, 642,
- 658, 147, 643, 659, 148, 644, 660, 149,
- 645, 661, 150, 646, 662, 151, 647, 663,
- 152, 648, 664, 153, 649, 665, 154, 650,
- 666, 155, 651, 667, 156, 652, 668, 157,
- 653, 669, 158, 654, 670, 159, 655, 671,
- 176, 672, 688, 177, 673, 689, 178, 674,
- 690, 179, 675, 691, 180, 676, 692, 181,
- 677, 693, 182, 678, 694, 183, 679, 695,
- 184, 680, 696, 185, 681, 697, 186, 682,
- 698, 187, 683, 699, 188, 684, 700, 189,
- 685, 701, 190, 686, 702, 191, 687, 703,
- 208, 704, 720, 209, 705, 721, 210, 706,
- 722, 211, 707, 723, 212, 708, 724, 213,
- 709, 725, 214, 710, 726, 215, 711, 727,
- 216, 712, 728, 217, 713, 729, 218, 714,
- 730, 219, 715, 731, 220, 716, 732, 221,
- 717, 733, 222, 718, 734, 223, 719, 735,
- 240, 736, 752, 241, 737, 753, 242, 738,
- 754, 243, 739, 755, 244, 740, 756, 245,
- 741, 757, 246, 742, 758, 247, 743, 759,
- 248, 744, 760, 249, 745, 761, 250, 746,
- 762, 251, 747, 763, 252, 748, 764, 253,
- 749, 765, 254, 750, 766, 255, 751, 767,
- 272, 768, 784, 273, 769, 785, 274, 770,
- 786, 275, 771, 787, 276, 772, 788, 277,
- 773, 789, 278, 774, 790, 279, 775, 791,
- 280, 776, 792, 281, 777, 793, 282, 778,
- 794, 283, 779, 795, 284, 780, 796, 285,
- 781, 797, 286, 782, 798, 287, 783, 799,
- 304, 800, 816, 305, 801, 817, 306, 802,
- 818, 307, 803, 819, 308, 804, 820, 309,
- 805, 821, 310, 806, 822, 311, 807, 823,
- 312, 808, 824, 313, 809, 825, 314, 810,
- 826, 315, 811, 827, 316, 812, 828, 317,
- 813, 829, 318, 814, 830, 319, 815, 831,
- 336, 832, 848, 337, 833, 849, 338, 834,
- 850, 339, 835, 851, 340, 836, 852, 341,
- 837, 853, 342, 838, 854, 343, 839, 855,
- 344, 840, 856, 345, 841, 857, 346, 842,
- 858, 347, 843, 859, 348, 844, 860, 349,
- 845, 861, 350, 846, 862, 351, 847, 863,
- 368, 864, 880, 369, 865, 881, 370, 866,
- 882, 371, 867, 883, 372, 868, 884, 373,
- 869, 885, 374, 870, 886, 375, 871, 887,
- 376, 872, 888, 377, 873, 889, 378, 874,
- 890, 379, 875, 891, 380, 876, 892, 381,
- 877, 893, 382, 878, 894, 383, 879, 895,
- 400, 896, 912, 401, 897, 913, 402, 898,
- 914, 403, 899, 915, 404, 900, 916, 405,
- 901, 917, 406, 902, 918, 407, 903, 919,
- 408, 904, 920, 409, 905, 921, 410, 906,
- 922, 411, 907, 923, 412, 908, 924, 413,
- 909, 925, 414, 910, 926, 415, 911, 927,
- 432, 928, 944, 433, 929, 945, 434, 930,
- 946, 435, 931, 947, 436, 932, 948, 437,
- 933, 949, 438, 934, 950, 439, 935, 951,
- 440, 936, 952, 441, 937, 953, 442, 938,
- 954, 443, 939, 955, 444, 940, 956, 445,
- 941, 957, 446, 942, 958, 447, 943, 959,
- 464, 960, 976, 465, 961, 977, 466, 962,
- 978, 467, 963, 979, 468, 964, 980, 469,
- 965, 981, 470, 966, 982, 471, 967, 983,
- 472, 968, 984, 473, 969, 985, 474, 970,
- 986, 475, 971, 987, 476, 972, 988, 477,
- 973, 989, 478, 974, 990, 479, 975, 991,
- 496, 992, 1008, 497, 993, 1009, 498, 994,
- 1010, 499, 995, 1011, 500, 996, 1012, 501,
- 997, 1013, 502, 998, 1014, 503, 999, 1015,
- 504, 1000, 1016, 505, 1001, 1017, 506, 1002,
- 1018, 507, 1003, 1019, 508, 1004, 1020, 509,
- 1005, 1021, 510, 1006, 1022, 511, 1007, 1023,
+DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
+ 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
+ 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
+ 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
+ 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+ 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+ 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+ 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+ 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+ 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+ 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+ 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+ 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+ 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+ 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+ 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
};
-#endif
-#else
-
-DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]) = {
- 0, 1, 2, 3, 5, 4, 4, 5, 5, 3, 6, 3, 5, 4, 6, 6,
- 6, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
};
DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]) = {
@@ -865,7 +367,7 @@
951, 920, 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890, 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767, 799, 830, 861, 892,
923, 954, 985, 1016, 1017, 986, 955, 924, 893, 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023,
};
-#endif // CONFIG_DWTDCTHYBRID
+#endif // CONFIG_SCATTERSCAN
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
@@ -898,6 +400,1661 @@
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
+#if CONFIG_CODE_NONZEROCOUNT
+const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ -NZC_3TO4, 8,
+ -NZC_5TO8, -NZC_9TO16,
+};
+struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+
+const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ -NZC_9TO16, 12,
+ -NZC_17TO32, -NZC_33TO64,
+};
+struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+
+const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ 12, 14,
+ -NZC_9TO16, -NZC_17TO32,
+ -NZC_33TO64, 16,
+ -NZC_65TO128, -NZC_129TO256,
+};
+struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+
+const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = {
+ -NZC_0, 2,
+ 4, 6,
+ -NZC_1, -NZC_2,
+ 8, 10,
+ -NZC_3TO4, -NZC_5TO8,
+ 12, 14,
+ -NZC_9TO16, -NZC_17TO32,
+ 16, 18,
+ -NZC_33TO64, -NZC_65TO128,
+ -NZC_129TO256, 20,
+ -NZC_257TO512, -NZC_513TO1024,
+};
+struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+
+const int vp9_extranzcbits[NZC32X32_TOKENS] = {
+ 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+};
+
+const int vp9_basenzcvalue[NZC32X32_TOKENS] = {
+ 0, 1, 2, 3, 5, 9, 17, 33, 65, 129, 257, 513
+};
+
+#endif // CONFIG_CODE_NONZEROCOUNT
+
+#if CONFIG_MODELCOEFPROB
+
+const vp9_prob vp9_modelcoefprobs_gg875[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.875)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,},
+ {2, 4, 12, 87, 129, 22, 89, 75, 97, 91,},
+ {3, 6, 17, 88, 130, 32, 90, 102, 102, 121,},
+ {4, 8, 22, 89, 131, 41, 91, 125, 107, 145,},
+ {5, 10, 28, 90, 131, 50, 93, 144, 112, 164,},
+ {6, 12, 33, 90, 132, 59, 94, 160, 117, 180,},
+ {7, 14, 38, 91, 132, 67, 95, 173, 122, 193,},
+ {8, 15, 42, 92, 133, 75, 97, 185, 126, 204,},
+ {9, 17, 47, 92, 133, 82, 98, 195, 131, 212,},
+ {10, 19, 52, 93, 134, 89, 99, 203, 135, 220,},
+ {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,},
+ {12, 23, 60, 95, 135, 102, 102, 217, 144, 231,},
+ {13, 25, 65, 95, 135, 109, 103, 222, 148, 235,},
+ {14, 26, 69, 96, 136, 115, 105, 227, 153, 238,},
+ {15, 28, 73, 97, 136, 120, 106, 231, 157, 241,},
+ {16, 30, 77, 97, 137, 126, 107, 234, 161, 244,},
+ {17, 32, 81, 98, 138, 131, 108, 237, 164, 246,},
+ {18, 34, 85, 99, 138, 136, 110, 240, 168, 247,},
+ {19, 35, 89, 100, 139, 141, 111, 242, 172, 249,},
+ {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,},
+ {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,},
+ {22, 41, 99, 102, 140, 154, 115, 247, 182, 252,},
+ {23, 42, 103, 102, 141, 158, 116, 248, 185, 252,},
+ {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,},
+ {25, 46, 110, 104, 142, 166, 118, 250, 191, 253,},
+ {26, 48, 113, 104, 142, 170, 120, 251, 194, 254,},
+ {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,},
+ {28, 51, 119, 106, 143, 176, 122, 252, 200, 254,},
+ {29, 53, 122, 107, 144, 180, 123, 253, 202, 255,},
+ {30, 54, 125, 107, 144, 183, 125, 253, 205, 255,},
+ {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,},
+ {32, 58, 131, 109, 145, 189, 127, 254, 209, 255,},
+ {33, 59, 134, 109, 146, 191, 128, 254, 212, 255,},
+ {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,},
+ {35, 62, 139, 111, 147, 196, 131, 255, 216, 255,},
+ {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,},
+ {37, 66, 145, 112, 148, 201, 134, 255, 220, 255,},
+ {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,},
+ {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,},
+ {40, 70, 152, 114, 149, 208, 137, 255, 225, 255,},
+ {41, 72, 155, 115, 150, 210, 138, 255, 226, 255,},
+ {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,},
+ {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,},
+ {44, 77, 161, 117, 151, 215, 142, 255, 230, 255,},
+ {45, 78, 164, 118, 152, 217, 143, 255, 232, 255,},
+ {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,},
+ {47, 81, 168, 120, 153, 220, 146, 255, 234, 255,},
+ {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,},
+ {49, 84, 172, 121, 154, 223, 148, 255, 236, 255,},
+ {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,},
+ {51, 87, 176, 123, 155, 226, 151, 255, 238, 255,},
+ {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,},
+ {53, 90, 180, 124, 156, 228, 153, 255, 240, 255,},
+ {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,},
+ {55, 93, 183, 126, 157, 231, 156, 255, 242, 255,},
+ {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,},
+ {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,},
+ {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,},
+ {59, 99, 190, 129, 159, 235, 160, 255, 244, 255,},
+ {60, 101, 192, 129, 159, 236, 162, 255, 245, 255,},
+ {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,},
+ {62, 104, 195, 131, 160, 238, 164, 255, 246, 255,},
+ {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,},
+ {64, 106, 198, 132, 162, 239, 166, 255, 247, 255,},
+ {65, 108, 199, 133, 162, 240, 167, 255, 248, 255,},
+ {66, 109, 201, 134, 163, 241, 169, 255, 248, 255,},
+ {67, 111, 202, 135, 163, 241, 170, 255, 249, 255,},
+ {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,},
+ {69, 113, 205, 136, 164, 243, 172, 255, 249, 255,},
+ {70, 115, 206, 137, 165, 243, 173, 255, 250, 255,},
+ {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,},
+ {72, 117, 209, 138, 166, 244, 175, 255, 250, 255,},
+ {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,},
+ {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,},
+ {75, 121, 212, 141, 167, 246, 179, 255, 251, 255,},
+ {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,},
+ {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,},
+ {78, 125, 216, 143, 169, 247, 182, 255, 252, 255,},
+ {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,},
+ {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,},
+ {81, 129, 219, 146, 171, 248, 185, 255, 253, 255,},
+ {82, 131, 220, 146, 171, 249, 186, 255, 253, 255,},
+ {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,},
+ {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,},
+ {85, 134, 223, 149, 173, 250, 189, 255, 253, 255,},
+ {86, 136, 224, 149, 173, 250, 190, 255, 254, 255,},
+ {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,},
+ {88, 138, 226, 151, 174, 251, 192, 255, 254, 255,},
+ {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,},
+ {90, 141, 227, 153, 175, 251, 194, 255, 254, 255,},
+ {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,},
+ {92, 143, 229, 154, 177, 252, 196, 255, 254, 255,},
+ {93, 144, 230, 155, 177, 252, 197, 255, 254, 255,},
+ {94, 146, 230, 156, 178, 252, 198, 255, 255, 255,},
+ {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,},
+ {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,},
+ {97, 149, 233, 158, 179, 253, 201, 255, 255, 255,},
+ {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,},
+ {99, 152, 234, 160, 180, 253, 203, 255, 255, 255,},
+ {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,},
+ {101, 154, 235, 161, 182, 253, 205, 255, 255, 255,},
+ {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,},
+ {103, 156, 236, 163, 183, 254, 207, 255, 255, 255,},
+ {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,},
+ {105, 159, 238, 165, 184, 254, 208, 255, 255, 255,},
+ {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,},
+ {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,},
+ {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,},
+ {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,},
+ {110, 164, 240, 169, 187, 254, 212, 255, 255, 255,},
+ {111, 165, 241, 170, 187, 254, 213, 255, 255, 255,},
+ {112, 166, 241, 170, 188, 255, 214, 255, 255, 255,},
+ {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,},
+ {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,},
+ {115, 170, 243, 173, 189, 255, 216, 255, 255, 255,},
+ {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,},
+ {117, 172, 244, 174, 190, 255, 218, 255, 255, 255,},
+ {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,},
+ {119, 174, 244, 176, 192, 255, 219, 255, 255, 255,},
+ {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,},
+ {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,},
+ {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,},
+ {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,},
+ {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,},
+ {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,},
+ {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,},
+ {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,},
+ {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,},
+ {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,},
+ {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,},
+ {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,},
+ {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,},
+ {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,},
+ {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,},
+ {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,},
+ {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,},
+ {137, 192, 250, 190, 202, 255, 231, 255, 255, 255,},
+ {138, 193, 250, 191, 202, 255, 232, 255, 255, 255,},
+ {139, 194, 250, 192, 203, 255, 232, 255, 255, 255,},
+ {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,},
+ {141, 195, 251, 194, 204, 255, 234, 255, 255, 255,},
+ {142, 196, 251, 194, 205, 255, 234, 255, 255, 255,},
+ {143, 197, 251, 195, 205, 255, 235, 255, 255, 255,},
+ {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,},
+ {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,},
+ {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,},
+ {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,},
+ {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,},
+ {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,},
+ {150, 203, 252, 201, 209, 255, 238, 255, 255, 255,},
+ {151, 204, 253, 201, 210, 255, 239, 255, 255, 255,},
+ {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,},
+ {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,},
+ {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,},
+ {155, 208, 253, 204, 212, 255, 240, 255, 255, 255,},
+ {156, 209, 253, 205, 213, 255, 241, 255, 255, 255,},
+ {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,},
+ {158, 210, 254, 207, 214, 255, 242, 255, 255, 255,},
+ {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,},
+ {160, 212, 254, 208, 215, 255, 242, 255, 255, 255,},
+ {161, 213, 254, 209, 215, 255, 243, 255, 255, 255,},
+ {162, 213, 254, 210, 216, 255, 243, 255, 255, 255,},
+ {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,},
+ {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,},
+ {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,},
+ {166, 216, 254, 212, 218, 255, 245, 255, 255, 255,},
+ {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,},
+ {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,},
+ {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,},
+ {170, 219, 255, 215, 221, 255, 246, 255, 255, 255,},
+ {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,},
+ {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,},
+ {173, 222, 255, 217, 222, 255, 247, 255, 255, 255,},
+ {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,},
+ {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,},
+ {176, 224, 255, 220, 224, 255, 248, 255, 255, 255,},
+ {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,},
+ {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,},
+ {179, 226, 255, 222, 225, 255, 249, 255, 255, 255,},
+ {180, 226, 255, 222, 226, 255, 249, 255, 255, 255,},
+ {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,},
+ {182, 228, 255, 224, 227, 255, 249, 255, 255, 255,},
+ {183, 228, 255, 224, 228, 255, 250, 255, 255, 255,},
+ {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,},
+ {185, 230, 255, 226, 229, 255, 250, 255, 255, 255,},
+ {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,},
+ {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,},
+ {188, 232, 255, 228, 230, 255, 251, 255, 255, 255,},
+ {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,},
+ {190, 233, 255, 229, 231, 255, 251, 255, 255, 255,},
+ {191, 233, 255, 229, 232, 255, 251, 255, 255, 255,},
+ {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,},
+ {193, 234, 255, 231, 233, 255, 252, 255, 255, 255,},
+ {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,},
+ {195, 236, 255, 232, 234, 255, 252, 255, 255, 255,},
+ {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,},
+ {197, 237, 255, 233, 235, 255, 252, 255, 255, 255,},
+ {198, 237, 255, 234, 235, 255, 253, 255, 255, 255,},
+ {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,},
+ {200, 238, 255, 235, 236, 255, 253, 255, 255, 255,},
+ {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,},
+ {202, 239, 255, 236, 237, 255, 253, 255, 255, 255,},
+ {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,},
+ {204, 240, 255, 237, 238, 255, 254, 255, 255, 255,},
+ {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,},
+ {206, 241, 255, 238, 239, 255, 254, 255, 255, 255,},
+ {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,},
+ {208, 242, 255, 239, 240, 255, 254, 255, 255, 255,},
+ {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,},
+ {210, 243, 255, 240, 241, 255, 254, 255, 255, 255,},
+ {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,},
+ {212, 244, 255, 241, 242, 255, 254, 255, 255, 255,},
+ {213, 245, 255, 242, 243, 255, 255, 255, 255, 255,},
+ {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,},
+ {215, 246, 255, 243, 244, 255, 255, 255, 255, 255,},
+ {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,},
+ {217, 246, 255, 244, 244, 255, 255, 255, 255, 255,},
+ {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,},
+ {219, 247, 255, 245, 245, 255, 255, 255, 255, 255,},
+ {220, 248, 255, 245, 246, 255, 255, 255, 255, 255,},
+ {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,},
+ {222, 248, 255, 246, 247, 255, 255, 255, 255, 255,},
+ {223, 249, 255, 247, 247, 255, 255, 255, 255, 255,},
+ {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,},
+ {225, 250, 255, 247, 248, 255, 255, 255, 255, 255,},
+ {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,},
+ {227, 250, 255, 248, 249, 255, 255, 255, 255, 255,},
+ {228, 251, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {230, 251, 255, 249, 250, 255, 255, 255, 255, 255,},
+ {231, 251, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {232, 252, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {234, 252, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {235, 253, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {237, 253, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {238, 253, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {239, 254, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {240, 254, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {242, 254, 255, 253, 254, 255, 255, 255, 255, 255,},
+ {243, 254, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {244, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {245, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {247, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {248, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {249, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {250, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {251, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {252, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {253, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+};
+
+const vp9_prob vp9_modelcoefprobs_gg75[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.75)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 2, 6, 87, 129, 11, 88, 39, 93, 47,},
+ {2, 4, 11, 88, 130, 21, 89, 68, 98, 79,},
+ {3, 6, 16, 89, 131, 30, 91, 92, 103, 105,},
+ {4, 8, 21, 90, 131, 38, 92, 112, 107, 126,},
+ {5, 10, 26, 90, 132, 46, 94, 129, 111, 143,},
+ {6, 11, 31, 91, 133, 54, 95, 143, 115, 157,},
+ {7, 13, 35, 92, 133, 61, 96, 156, 119, 170,},
+ {8, 15, 40, 93, 134, 68, 97, 167, 123, 180,},
+ {9, 17, 44, 94, 134, 74, 98, 177, 126, 189,},
+ {10, 19, 48, 94, 135, 80, 100, 185, 130, 197,},
+ {11, 20, 52, 95, 135, 86, 101, 192, 133, 204,},
+ {12, 22, 56, 96, 136, 92, 102, 199, 137, 210,},
+ {13, 24, 60, 96, 136, 97, 103, 205, 140, 215,},
+ {14, 26, 64, 97, 137, 103, 104, 210, 143, 219,},
+ {15, 27, 68, 98, 137, 108, 105, 215, 146, 223,},
+ {16, 29, 71, 98, 138, 112, 106, 219, 149, 227,},
+ {17, 31, 75, 99, 138, 117, 107, 223, 152, 230,},
+ {18, 32, 78, 100, 139, 121, 108, 226, 155, 233,},
+ {19, 34, 82, 100, 139, 126, 109, 229, 158, 235,},
+ {20, 36, 85, 101, 140, 130, 110, 231, 161, 238,},
+ {21, 37, 88, 102, 140, 134, 111, 234, 164, 239,},
+ {22, 39, 91, 102, 141, 138, 112, 236, 167, 241,},
+ {23, 40, 94, 103, 141, 141, 113, 238, 169, 243,},
+ {24, 42, 97, 104, 142, 145, 114, 240, 172, 244,},
+ {25, 44, 100, 104, 142, 149, 115, 241, 174, 245,},
+ {26, 45, 103, 105, 143, 152, 116, 243, 177, 246,},
+ {27, 47, 106, 105, 143, 155, 117, 244, 179, 247,},
+ {28, 48, 109, 106, 143, 158, 118, 245, 182, 248,},
+ {29, 50, 112, 107, 144, 161, 119, 246, 184, 249,},
+ {30, 52, 115, 107, 144, 164, 120, 247, 186, 250,},
+ {31, 53, 117, 108, 145, 167, 121, 248, 188, 250,},
+ {32, 55, 120, 109, 145, 170, 122, 249, 190, 251,},
+ {33, 56, 122, 109, 146, 173, 123, 249, 192, 252,},
+ {34, 58, 125, 110, 146, 175, 124, 250, 194, 252,},
+ {35, 59, 127, 110, 147, 178, 125, 251, 196, 252,},
+ {36, 61, 130, 111, 147, 180, 126, 251, 198, 253,},
+ {37, 62, 132, 112, 147, 183, 127, 251, 200, 253,},
+ {38, 64, 135, 112, 148, 185, 128, 252, 202, 253,},
+ {39, 65, 137, 113, 148, 187, 129, 252, 204, 254,},
+ {40, 67, 139, 114, 149, 189, 130, 253, 205, 254,},
+ {41, 68, 141, 114, 149, 191, 131, 253, 207, 254,},
+ {42, 70, 144, 115, 150, 193, 132, 253, 209, 254,},
+ {43, 71, 146, 115, 150, 195, 133, 254, 210, 254,},
+ {44, 72, 148, 116, 151, 197, 134, 254, 212, 255,},
+ {45, 74, 150, 117, 151, 199, 135, 254, 213, 255,},
+ {46, 75, 152, 117, 151, 201, 136, 254, 215, 255,},
+ {47, 77, 154, 118, 152, 202, 137, 254, 216, 255,},
+ {48, 78, 156, 119, 152, 204, 138, 254, 217, 255,},
+ {49, 80, 158, 119, 153, 206, 139, 255, 219, 255,},
+ {50, 81, 160, 120, 153, 207, 140, 255, 220, 255,},
+ {51, 82, 162, 120, 154, 209, 141, 255, 221, 255,},
+ {52, 84, 164, 121, 154, 210, 142, 255, 222, 255,},
+ {53, 85, 165, 122, 155, 212, 143, 255, 224, 255,},
+ {54, 87, 167, 122, 155, 213, 144, 255, 225, 255,},
+ {55, 88, 169, 123, 155, 215, 145, 255, 226, 255,},
+ {56, 89, 171, 124, 156, 216, 146, 255, 227, 255,},
+ {57, 91, 172, 124, 156, 217, 146, 255, 228, 255,},
+ {58, 92, 174, 125, 157, 218, 147, 255, 229, 255,},
+ {59, 93, 176, 126, 157, 220, 148, 255, 230, 255,},
+ {60, 95, 177, 126, 158, 221, 149, 255, 231, 255,},
+ {61, 96, 179, 127, 158, 222, 150, 255, 232, 255,},
+ {62, 97, 180, 127, 159, 223, 151, 255, 232, 255,},
+ {63, 99, 182, 128, 159, 224, 152, 255, 233, 255,},
+ {64, 100, 183, 129, 159, 225, 153, 255, 234, 255,},
+ {65, 101, 185, 129, 160, 226, 154, 255, 235, 255,},
+ {66, 103, 186, 130, 160, 227, 155, 255, 236, 255,},
+ {67, 104, 188, 131, 161, 228, 156, 255, 236, 255,},
+ {68, 105, 189, 131, 161, 229, 157, 255, 237, 255,},
+ {69, 106, 190, 132, 162, 230, 158, 255, 238, 255,},
+ {70, 108, 192, 133, 162, 231, 159, 255, 238, 255,},
+ {71, 109, 193, 133, 162, 231, 159, 255, 239, 255,},
+ {72, 110, 194, 134, 163, 232, 160, 255, 240, 255,},
+ {73, 111, 196, 134, 163, 233, 161, 255, 240, 255,},
+ {74, 113, 197, 135, 164, 234, 162, 255, 241, 255,},
+ {75, 114, 198, 136, 164, 235, 163, 255, 241, 255,},
+ {76, 115, 199, 136, 165, 235, 164, 255, 242, 255,},
+ {77, 116, 200, 137, 165, 236, 165, 255, 243, 255,},
+ {78, 118, 202, 138, 166, 237, 166, 255, 243, 255,},
+ {79, 119, 203, 138, 166, 237, 167, 255, 244, 255,},
+ {80, 120, 204, 139, 167, 238, 168, 255, 244, 255,},
+ {81, 121, 205, 140, 167, 239, 168, 255, 244, 255,},
+ {82, 123, 206, 140, 167, 239, 169, 255, 245, 255,},
+ {83, 124, 207, 141, 168, 240, 170, 255, 245, 255,},
+ {84, 125, 208, 142, 168, 240, 171, 255, 246, 255,},
+ {85, 126, 209, 142, 169, 241, 172, 255, 246, 255,},
+ {86, 127, 210, 143, 169, 241, 173, 255, 247, 255,},
+ {87, 129, 211, 144, 170, 242, 174, 255, 247, 255,},
+ {88, 130, 212, 144, 170, 242, 175, 255, 247, 255,},
+ {89, 131, 213, 145, 171, 243, 175, 255, 248, 255,},
+ {90, 132, 214, 146, 171, 243, 176, 255, 248, 255,},
+ {91, 133, 215, 146, 171, 244, 177, 255, 248, 255,},
+ {92, 134, 216, 147, 172, 244, 178, 255, 249, 255,},
+ {93, 136, 217, 148, 172, 245, 179, 255, 249, 255,},
+ {94, 137, 218, 148, 173, 245, 180, 255, 249, 255,},
+ {95, 138, 219, 149, 173, 245, 181, 255, 249, 255,},
+ {96, 139, 220, 150, 174, 246, 181, 255, 250, 255,},
+ {97, 140, 220, 150, 174, 246, 182, 255, 250, 255,},
+ {98, 141, 221, 151, 175, 247, 183, 255, 250, 255,},
+ {99, 142, 222, 152, 175, 247, 184, 255, 250, 255,},
+ {100, 144, 223, 152, 176, 247, 185, 255, 251, 255,},
+ {101, 145, 224, 153, 176, 248, 186, 255, 251, 255,},
+ {102, 146, 224, 154, 177, 248, 186, 255, 251, 255,},
+ {103, 147, 225, 154, 177, 248, 187, 255, 251, 255,},
+ {104, 148, 226, 155, 177, 248, 188, 255, 252, 255,},
+ {105, 149, 226, 156, 178, 249, 189, 255, 252, 255,},
+ {106, 150, 227, 156, 178, 249, 190, 255, 252, 255,},
+ {107, 151, 228, 157, 179, 249, 191, 255, 252, 255,},
+ {108, 152, 229, 158, 179, 250, 191, 255, 252, 255,},
+ {109, 153, 229, 158, 180, 250, 192, 255, 252, 255,},
+ {110, 154, 230, 159, 180, 250, 193, 255, 253, 255,},
+ {111, 155, 231, 160, 181, 250, 194, 255, 253, 255,},
+ {112, 157, 231, 160, 181, 251, 195, 255, 253, 255,},
+ {113, 158, 232, 161, 182, 251, 195, 255, 253, 255,},
+ {114, 159, 232, 162, 182, 251, 196, 255, 253, 255,},
+ {115, 160, 233, 162, 183, 251, 197, 255, 253, 255,},
+ {116, 161, 234, 163, 183, 251, 198, 255, 253, 255,},
+ {117, 162, 234, 164, 184, 252, 198, 255, 254, 255,},
+ {118, 163, 235, 165, 184, 252, 199, 255, 254, 255,},
+ {119, 164, 235, 165, 185, 252, 200, 255, 254, 255,},
+ {120, 165, 236, 166, 185, 252, 201, 255, 254, 255,},
+ {121, 166, 236, 167, 186, 252, 201, 255, 254, 255,},
+ {122, 167, 237, 167, 186, 252, 202, 255, 254, 255,},
+ {123, 168, 237, 168, 186, 253, 203, 255, 254, 255,},
+ {124, 169, 238, 169, 187, 253, 204, 255, 254, 255,},
+ {125, 170, 238, 169, 187, 253, 204, 255, 254, 255,},
+ {126, 171, 239, 170, 188, 253, 205, 255, 254, 255,},
+ {127, 172, 239, 171, 188, 253, 206, 255, 254, 255,},
+ {128, 173, 240, 171, 189, 253, 207, 255, 255, 255,},
+ {129, 174, 240, 172, 189, 253, 207, 255, 255, 255,},
+ {130, 175, 241, 173, 190, 253, 208, 255, 255, 255,},
+ {131, 176, 241, 174, 190, 254, 209, 255, 255, 255,},
+ {132, 177, 241, 174, 191, 254, 209, 255, 255, 255,},
+ {133, 178, 242, 175, 191, 254, 210, 255, 255, 255,},
+ {134, 179, 242, 176, 192, 254, 211, 255, 255, 255,},
+ {135, 180, 243, 176, 192, 254, 212, 255, 255, 255,},
+ {136, 180, 243, 177, 193, 254, 212, 255, 255, 255,},
+ {137, 181, 243, 178, 193, 254, 213, 255, 255, 255,},
+ {138, 182, 244, 179, 194, 254, 214, 255, 255, 255,},
+ {139, 183, 244, 179, 194, 254, 214, 255, 255, 255,},
+ {140, 184, 244, 180, 195, 254, 215, 255, 255, 255,},
+ {141, 185, 245, 181, 195, 254, 216, 255, 255, 255,},
+ {142, 186, 245, 181, 196, 255, 216, 255, 255, 255,},
+ {143, 187, 245, 182, 196, 255, 217, 255, 255, 255,},
+ {144, 188, 246, 183, 197, 255, 218, 255, 255, 255,},
+ {145, 189, 246, 183, 197, 255, 218, 255, 255, 255,},
+ {146, 190, 246, 184, 198, 255, 219, 255, 255, 255,},
+ {147, 191, 247, 185, 198, 255, 220, 255, 255, 255,},
+ {148, 191, 247, 186, 199, 255, 220, 255, 255, 255,},
+ {149, 192, 247, 186, 199, 255, 221, 255, 255, 255,},
+ {150, 193, 248, 187, 200, 255, 221, 255, 255, 255,},
+ {151, 194, 248, 188, 200, 255, 222, 255, 255, 255,},
+ {152, 195, 248, 188, 201, 255, 223, 255, 255, 255,},
+ {153, 196, 248, 189, 201, 255, 223, 255, 255, 255,},
+ {154, 197, 249, 190, 202, 255, 224, 255, 255, 255,},
+ {155, 198, 249, 191, 202, 255, 224, 255, 255, 255,},
+ {156, 198, 249, 191, 203, 255, 225, 255, 255, 255,},
+ {157, 199, 249, 192, 203, 255, 226, 255, 255, 255,},
+ {158, 200, 250, 193, 204, 255, 226, 255, 255, 255,},
+ {159, 201, 250, 193, 204, 255, 227, 255, 255, 255,},
+ {160, 202, 250, 194, 205, 255, 227, 255, 255, 255,},
+ {161, 203, 250, 195, 206, 255, 228, 255, 255, 255,},
+ {162, 203, 250, 196, 206, 255, 228, 255, 255, 255,},
+ {163, 204, 251, 196, 207, 255, 229, 255, 255, 255,},
+ {164, 205, 251, 197, 207, 255, 229, 255, 255, 255,},
+ {165, 206, 251, 198, 208, 255, 230, 255, 255, 255,},
+ {166, 207, 251, 198, 208, 255, 231, 255, 255, 255,},
+ {167, 207, 251, 199, 209, 255, 231, 255, 255, 255,},
+ {168, 208, 252, 200, 209, 255, 232, 255, 255, 255,},
+ {169, 209, 252, 201, 210, 255, 232, 255, 255, 255,},
+ {170, 210, 252, 201, 210, 255, 233, 255, 255, 255,},
+ {171, 211, 252, 202, 211, 255, 233, 255, 255, 255,},
+ {172, 211, 252, 203, 211, 255, 234, 255, 255, 255,},
+ {173, 212, 252, 203, 212, 255, 234, 255, 255, 255,},
+ {174, 213, 252, 204, 212, 255, 235, 255, 255, 255,},
+ {175, 214, 253, 205, 213, 255, 235, 255, 255, 255,},
+ {176, 214, 253, 206, 213, 255, 236, 255, 255, 255,},
+ {177, 215, 253, 206, 214, 255, 236, 255, 255, 255,},
+ {178, 216, 253, 207, 214, 255, 237, 255, 255, 255,},
+ {179, 217, 253, 208, 215, 255, 237, 255, 255, 255,},
+ {180, 217, 253, 208, 216, 255, 237, 255, 255, 255,},
+ {181, 218, 253, 209, 216, 255, 238, 255, 255, 255,},
+ {182, 219, 254, 210, 217, 255, 238, 255, 255, 255,},
+ {183, 220, 254, 211, 217, 255, 239, 255, 255, 255,},
+ {184, 220, 254, 211, 218, 255, 239, 255, 255, 255,},
+ {185, 221, 254, 212, 218, 255, 240, 255, 255, 255,},
+ {186, 222, 254, 213, 219, 255, 240, 255, 255, 255,},
+ {187, 222, 254, 213, 219, 255, 241, 255, 255, 255,},
+ {188, 223, 254, 214, 220, 255, 241, 255, 255, 255,},
+ {189, 224, 254, 215, 220, 255, 241, 255, 255, 255,},
+ {190, 225, 254, 215, 221, 255, 242, 255, 255, 255,},
+ {191, 225, 254, 216, 221, 255, 242, 255, 255, 255,},
+ {192, 226, 254, 217, 222, 255, 243, 255, 255, 255,},
+ {193, 227, 255, 218, 223, 255, 243, 255, 255, 255,},
+ {194, 227, 255, 218, 223, 255, 243, 255, 255, 255,},
+ {195, 228, 255, 219, 224, 255, 244, 255, 255, 255,},
+ {196, 229, 255, 220, 224, 255, 244, 255, 255, 255,},
+ {197, 229, 255, 220, 225, 255, 244, 255, 255, 255,},
+ {198, 230, 255, 221, 225, 255, 245, 255, 255, 255,},
+ {199, 230, 255, 222, 226, 255, 245, 255, 255, 255,},
+ {200, 231, 255, 222, 226, 255, 246, 255, 255, 255,},
+ {201, 232, 255, 223, 227, 255, 246, 255, 255, 255,},
+ {202, 232, 255, 224, 228, 255, 246, 255, 255, 255,},
+ {203, 233, 255, 224, 228, 255, 247, 255, 255, 255,},
+ {204, 234, 255, 225, 229, 255, 247, 255, 255, 255,},
+ {205, 234, 255, 226, 229, 255, 247, 255, 255, 255,},
+ {206, 235, 255, 227, 230, 255, 248, 255, 255, 255,},
+ {207, 235, 255, 227, 230, 255, 248, 255, 255, 255,},
+ {208, 236, 255, 228, 231, 255, 248, 255, 255, 255,},
+ {209, 237, 255, 229, 231, 255, 248, 255, 255, 255,},
+ {210, 237, 255, 229, 232, 255, 249, 255, 255, 255,},
+ {211, 238, 255, 230, 233, 255, 249, 255, 255, 255,},
+ {212, 238, 255, 231, 233, 255, 249, 255, 255, 255,},
+ {213, 239, 255, 231, 234, 255, 250, 255, 255, 255,},
+ {214, 239, 255, 232, 234, 255, 250, 255, 255, 255,},
+ {215, 240, 255, 233, 235, 255, 250, 255, 255, 255,},
+ {216, 241, 255, 233, 235, 255, 250, 255, 255, 255,},
+ {217, 241, 255, 234, 236, 255, 251, 255, 255, 255,},
+ {218, 242, 255, 235, 236, 255, 251, 255, 255, 255,},
+ {219, 242, 255, 235, 237, 255, 251, 255, 255, 255,},
+ {220, 243, 255, 236, 237, 255, 251, 255, 255, 255,},
+ {221, 243, 255, 236, 238, 255, 252, 255, 255, 255,},
+ {222, 244, 255, 237, 239, 255, 252, 255, 255, 255,},
+ {223, 244, 255, 238, 239, 255, 252, 255, 255, 255,},
+ {224, 245, 255, 238, 240, 255, 252, 255, 255, 255,},
+ {225, 245, 255, 239, 240, 255, 252, 255, 255, 255,},
+ {226, 246, 255, 240, 241, 255, 253, 255, 255, 255,},
+ {227, 246, 255, 240, 241, 255, 253, 255, 255, 255,},
+ {228, 247, 255, 241, 242, 255, 253, 255, 255, 255,},
+ {229, 247, 255, 242, 242, 255, 253, 255, 255, 255,},
+ {230, 248, 255, 242, 243, 255, 253, 255, 255, 255,},
+ {231, 248, 255, 243, 244, 255, 254, 255, 255, 255,},
+ {232, 248, 255, 243, 244, 255, 254, 255, 255, 255,},
+ {233, 249, 255, 244, 245, 255, 254, 255, 255, 255,},
+ {234, 249, 255, 245, 245, 255, 254, 255, 255, 255,},
+ {235, 250, 255, 245, 246, 255, 254, 255, 255, 255,},
+ {236, 250, 255, 246, 246, 255, 254, 255, 255, 255,},
+ {237, 251, 255, 246, 247, 255, 255, 255, 255, 255,},
+ {238, 251, 255, 247, 247, 255, 255, 255, 255, 255,},
+ {239, 251, 255, 248, 248, 255, 255, 255, 255, 255,},
+ {240, 252, 255, 248, 248, 255, 255, 255, 255, 255,},
+ {241, 252, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {242, 252, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {243, 253, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {244, 253, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {245, 253, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {246, 254, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {247, 254, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {248, 254, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {249, 255, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {250, 255, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {251, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {252, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+ {253, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {254, 255, 255, 255, 255, 255, 255, 255, 255, 255,},
+ {255, 255, 255, 255, 255, 255, 255, 255, 255, 255,}
+};
+
+const vp9_prob vp9_modelcoefprobs_gg625[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.625)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 2, 6, 88, 130, 10, 88, 35, 94, 40,},
+ {2, 4, 11, 89, 131, 19, 90, 60, 99, 67,},
+ {3, 6, 15, 90, 132, 27, 92, 80, 103, 88,},
+ {4, 7, 20, 91, 132, 34, 93, 97, 107, 105,},
+ {5, 9, 24, 92, 133, 41, 94, 112, 110, 120,},
+ {6, 11, 28, 93, 134, 48, 95, 125, 113, 132,},
+ {7, 13, 33, 93, 134, 54, 97, 136, 116, 143,},
+ {8, 14, 36, 94, 135, 60, 98, 146, 119, 152,},
+ {9, 16, 40, 95, 135, 65, 99, 155, 122, 161,},
+ {10, 18, 44, 95, 136, 70, 100, 163, 125, 168,},
+ {11, 19, 48, 96, 136, 75, 101, 170, 127, 175,},
+ {12, 21, 51, 97, 137, 80, 102, 176, 130, 181,},
+ {13, 23, 55, 97, 137, 85, 102, 182, 132, 187,},
+ {14, 24, 58, 98, 138, 89, 103, 188, 135, 192,},
+ {15, 26, 61, 99, 138, 94, 104, 193, 137, 196,},
+ {16, 27, 64, 99, 139, 98, 105, 197, 140, 201,},
+ {17, 29, 67, 100, 139, 102, 106, 201, 142, 205,},
+ {18, 30, 70, 101, 140, 106, 107, 205, 144, 208,},
+ {19, 32, 73, 101, 140, 109, 108, 209, 146, 211,},
+ {20, 34, 76, 102, 140, 113, 109, 212, 148, 214,},
+ {21, 35, 79, 102, 141, 116, 109, 215, 151, 217,},
+ {22, 37, 82, 103, 141, 120, 110, 218, 153, 220,},
+ {23, 38, 85, 103, 142, 123, 111, 220, 155, 222,},
+ {24, 40, 87, 104, 142, 126, 112, 223, 157, 224,},
+ {25, 41, 90, 105, 143, 129, 113, 225, 159, 226,},
+ {26, 42, 93, 105, 143, 132, 113, 227, 161, 228,},
+ {27, 44, 95, 106, 143, 135, 114, 229, 162, 230,},
+ {28, 45, 98, 106, 144, 138, 115, 230, 164, 232,},
+ {29, 47, 100, 107, 144, 141, 116, 232, 166, 233,},
+ {30, 48, 103, 107, 145, 144, 117, 234, 168, 235,},
+ {31, 50, 105, 108, 145, 146, 117, 235, 170, 236,},
+ {32, 51, 107, 108, 145, 149, 118, 236, 171, 237,},
+ {33, 52, 110, 109, 146, 151, 119, 238, 173, 238,},
+ {34, 54, 112, 110, 146, 154, 120, 239, 175, 239,},
+ {35, 55, 114, 110, 147, 156, 120, 240, 176, 240,},
+ {36, 57, 116, 111, 147, 158, 121, 241, 178, 241,},
+ {37, 58, 119, 111, 147, 161, 122, 242, 180, 242,},
+ {38, 59, 121, 112, 148, 163, 123, 243, 181, 243,},
+ {39, 61, 123, 112, 148, 165, 123, 244, 183, 244,},
+ {40, 62, 125, 113, 148, 167, 124, 244, 184, 245,},
+ {41, 63, 127, 113, 149, 169, 125, 245, 186, 245,},
+ {42, 65, 129, 114, 149, 171, 126, 246, 187, 246,},
+ {43, 66, 131, 114, 150, 173, 126, 246, 188, 247,},
+ {44, 67, 133, 115, 150, 175, 127, 247, 190, 247,},
+ {45, 69, 135, 115, 150, 177, 128, 247, 191, 248,},
+ {46, 70, 136, 116, 151, 178, 129, 248, 193, 248,},
+ {47, 71, 138, 116, 151, 180, 129, 248, 194, 249,},
+ {48, 73, 140, 117, 151, 182, 130, 249, 195, 249,},
+ {49, 74, 142, 118, 152, 184, 131, 249, 197, 250,},
+ {50, 75, 144, 118, 152, 185, 131, 250, 198, 250,},
+ {51, 76, 145, 119, 153, 187, 132, 250, 199, 250,},
+ {52, 78, 147, 119, 153, 188, 133, 251, 200, 251,},
+ {53, 79, 149, 120, 153, 190, 134, 251, 201, 251,},
+ {54, 80, 151, 120, 154, 192, 134, 251, 203, 251,},
+ {55, 82, 152, 121, 154, 193, 135, 251, 204, 252,},
+ {56, 83, 154, 121, 154, 194, 136, 252, 205, 252,},
+ {57, 84, 155, 122, 155, 196, 136, 252, 206, 252,},
+ {58, 85, 157, 122, 155, 197, 137, 252, 207, 252,},
+ {59, 86, 158, 123, 156, 199, 138, 252, 208, 252,},
+ {60, 88, 160, 123, 156, 200, 139, 253, 209, 253,},
+ {61, 89, 162, 124, 156, 201, 139, 253, 210, 253,},
+ {62, 90, 163, 124, 157, 202, 140, 253, 211, 253,},
+ {63, 91, 164, 125, 157, 204, 141, 253, 212, 253,},
+ {64, 93, 166, 125, 157, 205, 141, 253, 213, 253,},
+ {65, 94, 167, 126, 158, 206, 142, 254, 214, 254,},
+ {66, 95, 169, 126, 158, 207, 143, 254, 215, 254,},
+ {67, 96, 170, 127, 158, 208, 143, 254, 216, 254,},
+ {68, 97, 172, 127, 159, 209, 144, 254, 217, 254,},
+ {69, 98, 173, 128, 159, 210, 145, 254, 218, 254,},
+ {70, 100, 174, 128, 160, 212, 146, 254, 219, 254,},
+ {71, 101, 176, 129, 160, 213, 146, 254, 220, 254,},
+ {72, 102, 177, 130, 160, 214, 147, 254, 220, 254,},
+ {73, 103, 178, 130, 161, 215, 148, 255, 221, 255,},
+ {74, 104, 179, 131, 161, 216, 148, 255, 222, 255,},
+ {75, 105, 181, 131, 161, 217, 149, 255, 223, 255,},
+ {76, 107, 182, 132, 162, 217, 150, 255, 224, 255,},
+ {77, 108, 183, 132, 162, 218, 150, 255, 224, 255,},
+ {78, 109, 184, 133, 163, 219, 151, 255, 225, 255,},
+ {79, 110, 185, 133, 163, 220, 152, 255, 226, 255,},
+ {80, 111, 187, 134, 163, 221, 153, 255, 227, 255,},
+ {81, 112, 188, 134, 164, 222, 153, 255, 227, 255,},
+ {82, 113, 189, 135, 164, 223, 154, 255, 228, 255,},
+ {83, 115, 190, 135, 164, 223, 155, 255, 229, 255,},
+ {84, 116, 191, 136, 165, 224, 155, 255, 229, 255,},
+ {85, 117, 192, 136, 165, 225, 156, 255, 230, 255,},
+ {86, 118, 193, 137, 165, 226, 157, 255, 231, 255,},
+ {87, 119, 194, 137, 166, 226, 157, 255, 231, 255,},
+ {88, 120, 195, 138, 166, 227, 158, 255, 232, 255,},
+ {89, 121, 196, 139, 167, 228, 159, 255, 232, 255,},
+ {90, 122, 197, 139, 167, 229, 159, 255, 233, 255,},
+ {91, 123, 198, 140, 167, 229, 160, 255, 234, 255,},
+ {92, 124, 199, 140, 168, 230, 161, 255, 234, 255,},
+ {93, 125, 200, 141, 168, 231, 162, 255, 235, 255,},
+ {94, 127, 201, 141, 168, 231, 162, 255, 235, 255,},
+ {95, 128, 202, 142, 169, 232, 163, 255, 236, 255,},
+ {96, 129, 203, 142, 169, 232, 164, 255, 236, 255,},
+ {97, 130, 204, 143, 170, 233, 164, 255, 237, 255,},
+ {98, 131, 205, 143, 170, 234, 165, 255, 237, 255,},
+ {99, 132, 206, 144, 170, 234, 166, 255, 238, 255,},
+ {100, 133, 207, 144, 171, 235, 166, 255, 238, 255,},
+ {101, 134, 208, 145, 171, 235, 167, 255, 239, 255,},
+ {102, 135, 209, 146, 171, 236, 168, 255, 239, 255,},
+ {103, 136, 209, 146, 172, 236, 168, 255, 240, 255,},
+ {104, 137, 210, 147, 172, 237, 169, 255, 240, 255,},
+ {105, 138, 211, 147, 173, 237, 170, 255, 240, 255,},
+ {106, 139, 212, 148, 173, 238, 170, 255, 241, 255,},
+ {107, 140, 213, 148, 173, 238, 171, 255, 241, 255,},
+ {108, 141, 213, 149, 174, 239, 172, 255, 242, 255,},
+ {109, 142, 214, 149, 174, 239, 172, 255, 242, 255,},
+ {110, 143, 215, 150, 175, 240, 173, 255, 242, 255,},
+ {111, 144, 216, 151, 175, 240, 174, 255, 243, 255,},
+ {112, 145, 217, 151, 175, 240, 174, 255, 243, 255,},
+ {113, 146, 217, 152, 176, 241, 175, 255, 244, 255,},
+ {114, 147, 218, 152, 176, 241, 176, 255, 244, 255,},
+ {115, 148, 219, 153, 176, 242, 177, 255, 244, 255,},
+ {116, 149, 219, 153, 177, 242, 177, 255, 245, 255,},
+ {117, 150, 220, 154, 177, 242, 178, 255, 245, 255,},
+ {118, 151, 221, 155, 178, 243, 179, 255, 245, 255,},
+ {119, 152, 222, 155, 178, 243, 179, 255, 245, 255,},
+ {120, 153, 222, 156, 178, 244, 180, 255, 246, 255,},
+ {121, 154, 223, 156, 179, 244, 181, 255, 246, 255,},
+ {122, 155, 224, 157, 179, 244, 181, 255, 246, 255,},
+ {123, 156, 224, 157, 180, 245, 182, 255, 247, 255,},
+ {124, 157, 225, 158, 180, 245, 183, 255, 247, 255,},
+ {125, 158, 225, 159, 180, 245, 183, 255, 247, 255,},
+ {126, 159, 226, 159, 181, 246, 184, 255, 247, 255,},
+ {127, 160, 227, 160, 181, 246, 185, 255, 248, 255,},
+ {128, 161, 227, 160, 182, 246, 185, 255, 248, 255,},
+ {129, 162, 228, 161, 182, 246, 186, 255, 248, 255,},
+ {130, 163, 228, 161, 182, 247, 187, 255, 248, 255,},
+ {131, 164, 229, 162, 183, 247, 187, 255, 249, 255,},
+ {132, 165, 230, 163, 183, 247, 188, 255, 249, 255,},
+ {133, 166, 230, 163, 184, 248, 189, 255, 249, 255,},
+ {134, 166, 231, 164, 184, 248, 189, 255, 249, 255,},
+ {135, 167, 231, 164, 184, 248, 190, 255, 250, 255,},
+ {136, 168, 232, 165, 185, 248, 191, 255, 250, 255,},
+ {137, 169, 232, 166, 185, 248, 191, 255, 250, 255,},
+ {138, 170, 233, 166, 186, 249, 192, 255, 250, 255,},
+ {139, 171, 233, 167, 186, 249, 192, 255, 250, 255,},
+ {140, 172, 234, 167, 187, 249, 193, 255, 251, 255,},
+ {141, 173, 234, 168, 187, 249, 194, 255, 251, 255,},
+ {142, 174, 235, 169, 187, 250, 194, 255, 251, 255,},
+ {143, 175, 235, 169, 188, 250, 195, 255, 251, 255,},
+ {144, 176, 236, 170, 188, 250, 196, 255, 251, 255,},
+ {145, 177, 236, 170, 189, 250, 196, 255, 251, 255,},
+ {146, 177, 237, 171, 189, 250, 197, 255, 252, 255,},
+ {147, 178, 237, 172, 189, 251, 198, 255, 252, 255,},
+ {148, 179, 238, 172, 190, 251, 198, 255, 252, 255,},
+ {149, 180, 238, 173, 190, 251, 199, 255, 252, 255,},
+ {150, 181, 238, 173, 191, 251, 200, 255, 252, 255,},
+ {151, 182, 239, 174, 191, 251, 200, 255, 252, 255,},
+ {152, 183, 239, 175, 192, 251, 201, 255, 252, 255,},
+ {153, 184, 240, 175, 192, 252, 202, 255, 252, 255,},
+ {154, 184, 240, 176, 193, 252, 202, 255, 253, 255,},
+ {155, 185, 240, 177, 193, 252, 203, 255, 253, 255,},
+ {156, 186, 241, 177, 193, 252, 203, 255, 253, 255,},
+ {157, 187, 241, 178, 194, 252, 204, 255, 253, 255,},
+ {158, 188, 242, 178, 194, 252, 205, 255, 253, 255,},
+ {159, 189, 242, 179, 195, 252, 205, 255, 253, 255,},
+ {160, 190, 242, 180, 195, 253, 206, 255, 253, 255,},
+ {161, 190, 243, 180, 196, 253, 207, 255, 253, 255,},
+ {162, 191, 243, 181, 196, 253, 207, 255, 254, 255,},
+ {163, 192, 243, 182, 197, 253, 208, 255, 254, 255,},
+ {164, 193, 244, 182, 197, 253, 209, 255, 254, 255,},
+ {165, 194, 244, 183, 197, 253, 209, 255, 254, 255,},
+ {166, 195, 244, 184, 198, 253, 210, 255, 254, 255,},
+ {167, 196, 245, 184, 198, 253, 210, 255, 254, 255,},
+ {168, 196, 245, 185, 199, 253, 211, 255, 254, 255,},
+ {169, 197, 245, 186, 199, 254, 212, 255, 254, 255,},
+ {170, 198, 246, 186, 200, 254, 212, 255, 254, 255,},
+ {171, 199, 246, 187, 200, 254, 213, 255, 254, 255,},
+ {172, 200, 246, 188, 201, 254, 214, 255, 254, 255,},
+ {173, 200, 246, 188, 201, 254, 214, 255, 254, 255,},
+ {174, 201, 247, 189, 202, 254, 215, 255, 254, 255,},
+ {175, 202, 247, 189, 202, 254, 215, 255, 255, 255,},
+ {176, 203, 247, 190, 203, 254, 216, 255, 255, 255,},
+ {177, 204, 248, 191, 203, 254, 217, 255, 255, 255,},
+ {178, 204, 248, 191, 204, 254, 217, 255, 255, 255,},
+ {179, 205, 248, 192, 204, 254, 218, 255, 255, 255,},
+ {180, 206, 248, 193, 204, 254, 218, 255, 255, 255,},
+ {181, 207, 249, 194, 205, 255, 219, 255, 255, 255,},
+ {182, 208, 249, 194, 205, 255, 220, 255, 255, 255,},
+ {183, 208, 249, 195, 206, 255, 220, 255, 255, 255,},
+ {184, 209, 249, 196, 206, 255, 221, 255, 255, 255,},
+ {185, 210, 250, 196, 207, 255, 221, 255, 255, 255,},
+ {186, 211, 250, 197, 207, 255, 222, 255, 255, 255,},
+ {187, 211, 250, 198, 208, 255, 223, 255, 255, 255,},
+ {188, 212, 250, 198, 208, 255, 223, 255, 255, 255,},
+ {189, 213, 250, 199, 209, 255, 224, 255, 255, 255,},
+ {190, 214, 251, 200, 209, 255, 224, 255, 255, 255,},
+ {191, 215, 251, 200, 210, 255, 225, 255, 255, 255,},
+ {192, 215, 251, 201, 211, 255, 225, 255, 255, 255,},
+ {193, 216, 251, 202, 211, 255, 226, 255, 255, 255,},
+ {194, 217, 251, 203, 212, 255, 227, 255, 255, 255,},
+ {195, 218, 252, 203, 212, 255, 227, 255, 255, 255,},
+ {196, 218, 252, 204, 213, 255, 228, 255, 255, 255,},
+ {197, 219, 252, 205, 213, 255, 228, 255, 255, 255,},
+ {198, 220, 252, 205, 214, 255, 229, 255, 255, 255,},
+ {199, 221, 252, 206, 214, 255, 229, 255, 255, 255,},
+ {200, 221, 252, 207, 215, 255, 230, 255, 255, 255,},
+ {201, 222, 252, 208, 215, 255, 231, 255, 255, 255,},
+ {202, 223, 253, 208, 216, 255, 231, 255, 255, 255,},
+ {203, 223, 253, 209, 216, 255, 232, 255, 255, 255,},
+ {204, 224, 253, 210, 217, 255, 232, 255, 255, 255,},
+ {205, 225, 253, 211, 218, 255, 233, 255, 255, 255,},
+ {206, 226, 253, 211, 218, 255, 233, 255, 255, 255,},
+ {207, 226, 253, 212, 219, 255, 234, 255, 255, 255,},
+ {208, 227, 253, 213, 219, 255, 234, 255, 255, 255,},
+ {209, 228, 254, 214, 220, 255, 235, 255, 255, 255,},
+ {210, 228, 254, 214, 220, 255, 236, 255, 255, 255,},
+ {211, 229, 254, 215, 221, 255, 236, 255, 255, 255,},
+ {212, 230, 254, 216, 222, 255, 237, 255, 255, 255,},
+ {213, 230, 254, 217, 222, 255, 237, 255, 255, 255,},
+ {214, 231, 254, 217, 223, 255, 238, 255, 255, 255,},
+ {215, 232, 254, 218, 223, 255, 238, 255, 255, 255,},
+ {216, 233, 254, 219, 224, 255, 239, 255, 255, 255,},
+ {217, 233, 254, 220, 225, 255, 239, 255, 255, 255,},
+ {218, 234, 255, 220, 225, 255, 240, 255, 255, 255,},
+ {219, 235, 255, 221, 226, 255, 240, 255, 255, 255,},
+ {220, 235, 255, 222, 226, 255, 241, 255, 255, 255,},
+ {221, 236, 255, 223, 227, 255, 241, 255, 255, 255,},
+ {222, 237, 255, 224, 228, 255, 242, 255, 255, 255,},
+ {223, 237, 255, 224, 228, 255, 242, 255, 255, 255,},
+ {224, 238, 255, 225, 229, 255, 243, 255, 255, 255,},
+ {225, 238, 255, 226, 230, 255, 243, 255, 255, 255,},
+ {226, 239, 255, 227, 230, 255, 244, 255, 255, 255,},
+ {227, 240, 255, 228, 231, 255, 244, 255, 255, 255,},
+ {228, 240, 255, 228, 232, 255, 245, 255, 255, 255,},
+ {229, 241, 255, 229, 232, 255, 245, 255, 255, 255,},
+ {230, 242, 255, 230, 233, 255, 246, 255, 255, 255,},
+ {231, 242, 255, 231, 234, 255, 246, 255, 255, 255,},
+ {232, 243, 255, 232, 234, 255, 247, 255, 255, 255,},
+ {233, 243, 255, 233, 235, 255, 247, 255, 255, 255,},
+ {234, 244, 255, 233, 236, 255, 247, 255, 255, 255,},
+ {235, 245, 255, 234, 236, 255, 248, 255, 255, 255,},
+ {236, 245, 255, 235, 237, 255, 248, 255, 255, 255,},
+ {237, 246, 255, 236, 238, 255, 249, 255, 255, 255,},
+ {238, 247, 255, 237, 239, 255, 249, 255, 255, 255,},
+ {239, 247, 255, 238, 239, 255, 250, 255, 255, 255,},
+ {240, 248, 255, 239, 240, 255, 250, 255, 255, 255,},
+ {241, 248, 255, 240, 241, 255, 251, 255, 255, 255,},
+ {242, 249, 255, 241, 242, 255, 251, 255, 255, 255,},
+ {243, 249, 255, 241, 243, 255, 251, 255, 255, 255,},
+ {244, 250, 255, 242, 243, 255, 252, 255, 255, 255,},
+ {245, 251, 255, 243, 244, 255, 252, 255, 255, 255,},
+ {246, 251, 255, 244, 245, 255, 253, 255, 255, 255,},
+ {247, 252, 255, 245, 246, 255, 253, 255, 255, 255,},
+ {248, 252, 255, 246, 247, 255, 253, 255, 255, 255,},
+ {249, 253, 255, 247, 248, 255, 254, 255, 255, 255,},
+ {250, 253, 255, 248, 249, 255, 254, 255, 255, 255,},
+ {251, 254, 255, 249, 250, 255, 254, 255, 255, 255,},
+ {252, 254, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {253, 255, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {254, 255, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {255, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+};
+
+const vp9_prob vp9_modelcoefprobs_gg875p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.625)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 1, 3, 86, 128, 6, 86, 22, 89, 28,},
+ {1, 2, 6, 86, 129, 11, 87, 42, 92, 52,},
+ {2, 3, 9, 87, 129, 17, 88, 59, 94, 73,},
+ {2, 4, 12, 87, 129, 22, 89, 75, 97, 92,},
+ {3, 5, 14, 88, 130, 27, 89, 90, 100, 108,},
+ {3, 6, 17, 88, 130, 33, 90, 103, 102, 122,},
+ {4, 7, 20, 88, 130, 37, 91, 115, 105, 135,},
+ {4, 8, 23, 89, 131, 42, 92, 126, 108, 147,},
+ {5, 9, 25, 89, 131, 47, 92, 137, 110, 157,},
+ {5, 10, 28, 90, 131, 52, 93, 146, 113, 167,},
+ {6, 11, 31, 90, 132, 56, 94, 154, 115, 175,},
+ {6, 12, 33, 90, 132, 60, 94, 162, 118, 183,},
+ {7, 13, 36, 91, 132, 65, 95, 170, 120, 190,},
+ {7, 14, 39, 91, 132, 69, 96, 176, 123, 196,},
+ {8, 15, 41, 92, 133, 73, 96, 182, 125, 201,},
+ {8, 16, 44, 92, 133, 77, 97, 188, 128, 206,},
+ {9, 17, 46, 92, 133, 81, 98, 193, 130, 211,},
+ {9, 18, 49, 93, 134, 85, 99, 198, 133, 215,},
+ {10, 19, 51, 93, 134, 89, 99, 203, 135, 219,},
+ {10, 20, 54, 93, 134, 92, 100, 207, 137, 222,},
+ {11, 21, 56, 94, 134, 96, 101, 211, 140, 226,},
+ {12, 22, 58, 94, 135, 100, 101, 214, 142, 228,},
+ {12, 23, 61, 95, 135, 103, 102, 217, 145, 231,},
+ {13, 24, 63, 95, 135, 106, 103, 220, 147, 233,},
+ {13, 25, 66, 95, 136, 110, 103, 223, 149, 235,},
+ {14, 26, 68, 96, 136, 113, 104, 226, 151, 237,},
+ {14, 27, 70, 96, 136, 116, 105, 228, 154, 239,},
+ {15, 28, 72, 97, 136, 119, 106, 230, 156, 241,},
+ {15, 29, 75, 97, 137, 122, 106, 232, 158, 242,},
+ {16, 30, 77, 97, 137, 125, 107, 234, 160, 243,},
+ {17, 31, 79, 98, 137, 128, 108, 236, 163, 245,},
+ {17, 32, 81, 98, 138, 131, 108, 237, 165, 246,},
+ {18, 33, 83, 99, 138, 134, 109, 239, 167, 247,},
+ {18, 34, 86, 99, 138, 137, 110, 240, 169, 248,},
+ {19, 35, 88, 99, 138, 140, 111, 242, 171, 248,},
+ {19, 36, 90, 100, 139, 142, 111, 243, 173, 249,},
+ {20, 37, 92, 100, 139, 145, 112, 244, 175, 250,},
+ {20, 38, 94, 101, 139, 148, 113, 245, 177, 250,},
+ {21, 39, 96, 101, 140, 150, 113, 246, 179, 251,},
+ {22, 40, 98, 101, 140, 153, 114, 246, 181, 251,},
+ {22, 41, 100, 102, 140, 155, 115, 247, 183, 252,},
+ {23, 42, 102, 102, 140, 157, 116, 248, 185, 252,},
+ {23, 43, 104, 103, 141, 160, 116, 249, 186, 253,},
+ {24, 44, 106, 103, 141, 162, 117, 249, 188, 253,},
+ {25, 45, 108, 103, 141, 164, 118, 250, 190, 253,},
+ {25, 46, 110, 104, 142, 166, 119, 250, 192, 253,},
+ {26, 47, 112, 104, 142, 168, 119, 251, 193, 254,},
+ {26, 48, 114, 105, 142, 171, 120, 251, 195, 254,},
+ {27, 49, 116, 105, 143, 173, 121, 252, 197, 254,},
+ {27, 50, 118, 105, 143, 175, 122, 252, 198, 254,},
+ {28, 51, 119, 106, 143, 177, 122, 252, 200, 254,},
+ {29, 52, 121, 106, 143, 179, 123, 253, 201, 255,},
+ {29, 53, 123, 107, 144, 180, 124, 253, 203, 255,},
+ {30, 54, 125, 107, 144, 182, 125, 253, 204, 255,},
+ {30, 55, 127, 108, 144, 184, 125, 253, 206, 255,},
+ {31, 56, 128, 108, 145, 186, 126, 254, 207, 255,},
+ {32, 57, 130, 108, 145, 188, 127, 254, 209, 255,},
+ {32, 58, 132, 109, 145, 189, 128, 254, 210, 255,},
+ {33, 59, 134, 109, 146, 191, 128, 254, 211, 255,},
+ {33, 60, 135, 110, 146, 193, 129, 254, 213, 255,},
+ {34, 61, 137, 110, 146, 194, 130, 254, 214, 255,},
+ {35, 62, 139, 111, 146, 196, 131, 255, 215, 255,},
+ {35, 63, 140, 111, 147, 197, 131, 255, 216, 255,},
+ {36, 64, 142, 112, 147, 199, 132, 255, 218, 255,},
+ {37, 65, 144, 112, 147, 200, 133, 255, 219, 255,},
+ {37, 66, 145, 112, 148, 202, 134, 255, 220, 255,},
+ {38, 67, 147, 113, 148, 203, 135, 255, 221, 255,},
+ {38, 68, 148, 113, 148, 204, 135, 255, 222, 255,},
+ {39, 69, 150, 114, 149, 206, 136, 255, 223, 255,},
+ {40, 70, 151, 114, 149, 207, 137, 255, 224, 255,},
+ {40, 71, 153, 115, 149, 208, 138, 255, 225, 255,},
+ {41, 72, 154, 115, 150, 210, 138, 255, 226, 255,},
+ {42, 73, 156, 116, 150, 211, 139, 255, 227, 255,},
+ {42, 74, 157, 116, 150, 212, 140, 255, 228, 255,},
+ {43, 75, 159, 117, 151, 213, 141, 255, 229, 255,},
+ {44, 76, 160, 117, 151, 214, 142, 255, 230, 255,},
+ {44, 77, 162, 117, 151, 216, 142, 255, 231, 255,},
+ {45, 78, 163, 118, 152, 217, 143, 255, 231, 255,},
+ {45, 79, 165, 118, 152, 218, 144, 255, 232, 255,},
+ {46, 80, 166, 119, 152, 219, 145, 255, 233, 255,},
+ {47, 81, 167, 119, 153, 220, 146, 255, 234, 255,},
+ {47, 82, 169, 120, 153, 221, 146, 255, 235, 255,},
+ {48, 83, 170, 120, 153, 222, 147, 255, 235, 255,},
+ {49, 84, 171, 121, 154, 223, 148, 255, 236, 255,},
+ {49, 85, 173, 121, 154, 224, 149, 255, 237, 255,},
+ {50, 86, 174, 122, 154, 225, 150, 255, 237, 255,},
+ {51, 87, 175, 122, 155, 225, 150, 255, 238, 255,},
+ {51, 88, 177, 123, 155, 226, 151, 255, 239, 255,},
+ {52, 89, 178, 123, 155, 227, 152, 255, 239, 255,},
+ {53, 90, 179, 124, 156, 228, 153, 255, 240, 255,},
+ {53, 91, 180, 124, 156, 229, 154, 255, 240, 255,},
+ {54, 92, 182, 125, 156, 230, 154, 255, 241, 255,},
+ {55, 93, 183, 125, 157, 230, 155, 255, 241, 255,},
+ {55, 94, 184, 126, 157, 231, 156, 255, 242, 255,},
+ {56, 95, 185, 126, 157, 232, 157, 255, 242, 255,},
+ {57, 96, 187, 127, 158, 233, 158, 255, 243, 255,},
+ {57, 97, 188, 127, 158, 233, 159, 255, 243, 255,},
+ {58, 98, 189, 128, 158, 234, 159, 255, 244, 255,},
+ {59, 99, 190, 128, 159, 235, 160, 255, 244, 255,},
+ {60, 100, 191, 129, 159, 235, 161, 255, 245, 255,},
+ {60, 101, 192, 129, 160, 236, 162, 255, 245, 255,},
+ {61, 102, 193, 130, 160, 237, 163, 255, 246, 255,},
+ {62, 103, 194, 131, 160, 237, 164, 255, 246, 255,},
+ {62, 104, 196, 131, 161, 238, 164, 255, 246, 255,},
+ {63, 105, 197, 132, 161, 238, 165, 255, 247, 255,},
+ {64, 106, 198, 132, 161, 239, 166, 255, 247, 255,},
+ {64, 107, 199, 133, 162, 239, 167, 255, 247, 255,},
+ {65, 108, 200, 133, 162, 240, 168, 255, 248, 255,},
+ {66, 109, 201, 134, 163, 241, 168, 255, 248, 255,},
+ {67, 110, 202, 134, 163, 241, 169, 255, 248, 255,},
+ {67, 111, 203, 135, 163, 242, 170, 255, 249, 255,},
+ {68, 112, 204, 135, 164, 242, 171, 255, 249, 255,},
+ {69, 113, 205, 136, 164, 242, 172, 255, 249, 255,},
+ {69, 114, 206, 137, 164, 243, 173, 255, 250, 255,},
+ {70, 115, 207, 137, 165, 243, 173, 255, 250, 255,},
+ {71, 116, 208, 138, 165, 244, 174, 255, 250, 255,},
+ {72, 117, 208, 138, 166, 244, 175, 255, 250, 255,},
+ {72, 118, 209, 139, 166, 245, 176, 255, 251, 255,},
+ {73, 119, 210, 139, 166, 245, 177, 255, 251, 255,},
+ {74, 120, 211, 140, 167, 245, 178, 255, 251, 255,},
+ {75, 121, 212, 141, 167, 246, 178, 255, 251, 255,},
+ {75, 122, 213, 141, 168, 246, 179, 255, 251, 255,},
+ {76, 123, 214, 142, 168, 246, 180, 255, 252, 255,},
+ {77, 124, 215, 142, 168, 247, 181, 255, 252, 255,},
+ {78, 125, 215, 143, 169, 247, 182, 255, 252, 255,},
+ {78, 126, 216, 144, 169, 247, 182, 255, 252, 255,},
+ {79, 127, 217, 144, 170, 248, 183, 255, 252, 255,},
+ {80, 128, 218, 145, 170, 248, 184, 255, 253, 255,},
+ {81, 129, 219, 145, 170, 248, 185, 255, 253, 255,},
+ {82, 130, 219, 146, 171, 249, 186, 255, 253, 255,},
+ {82, 131, 220, 147, 171, 249, 187, 255, 253, 255,},
+ {83, 132, 221, 147, 172, 249, 187, 255, 253, 255,},
+ {84, 133, 222, 148, 172, 249, 188, 255, 253, 255,},
+ {85, 134, 222, 148, 173, 250, 189, 255, 253, 255,},
+ {85, 135, 223, 149, 173, 250, 190, 255, 254, 255,},
+ {86, 136, 224, 150, 173, 250, 191, 255, 254, 255,},
+ {87, 137, 225, 150, 174, 250, 191, 255, 254, 255,},
+ {88, 138, 225, 151, 174, 251, 192, 255, 254, 255,},
+ {89, 139, 226, 152, 175, 251, 193, 255, 254, 255,},
+ {89, 140, 227, 152, 175, 251, 194, 255, 254, 255,},
+ {90, 141, 227, 153, 176, 251, 195, 255, 254, 255,},
+ {91, 142, 228, 153, 176, 251, 195, 255, 254, 255,},
+ {92, 143, 229, 154, 176, 252, 196, 255, 254, 255,},
+ {93, 144, 229, 155, 177, 252, 197, 255, 254, 255,},
+ {93, 145, 230, 155, 177, 252, 198, 255, 255, 255,},
+ {94, 146, 231, 156, 178, 252, 199, 255, 255, 255,},
+ {95, 147, 231, 157, 178, 252, 199, 255, 255, 255,},
+ {96, 148, 232, 157, 179, 252, 200, 255, 255, 255,},
+ {97, 149, 232, 158, 179, 253, 201, 255, 255, 255,},
+ {98, 150, 233, 159, 180, 253, 202, 255, 255, 255,},
+ {99, 151, 234, 159, 180, 253, 202, 255, 255, 255,},
+ {99, 152, 234, 160, 181, 253, 203, 255, 255, 255,},
+ {100, 153, 235, 161, 181, 253, 204, 255, 255, 255,},
+ {101, 154, 235, 162, 182, 253, 205, 255, 255, 255,},
+ {102, 155, 236, 162, 182, 253, 206, 255, 255, 255,},
+ {103, 156, 236, 163, 183, 254, 206, 255, 255, 255,},
+ {104, 157, 237, 164, 183, 254, 207, 255, 255, 255,},
+ {105, 158, 237, 164, 183, 254, 208, 255, 255, 255,},
+ {105, 159, 238, 165, 184, 254, 209, 255, 255, 255,},
+ {106, 160, 238, 166, 184, 254, 209, 255, 255, 255,},
+ {107, 161, 239, 166, 185, 254, 210, 255, 255, 255,},
+ {108, 162, 239, 167, 185, 254, 211, 255, 255, 255,},
+ {109, 163, 240, 168, 186, 254, 212, 255, 255, 255,},
+ {110, 164, 240, 169, 186, 254, 212, 255, 255, 255,},
+ {111, 165, 241, 169, 187, 254, 213, 255, 255, 255,},
+ {112, 166, 241, 170, 187, 255, 214, 255, 255, 255,},
+ {113, 167, 242, 171, 188, 255, 215, 255, 255, 255,},
+ {114, 168, 242, 172, 189, 255, 215, 255, 255, 255,},
+ {114, 169, 242, 172, 189, 255, 216, 255, 255, 255,},
+ {115, 170, 243, 173, 190, 255, 217, 255, 255, 255,},
+ {116, 171, 243, 174, 190, 255, 217, 255, 255, 255,},
+ {117, 172, 244, 175, 191, 255, 218, 255, 255, 255,},
+ {118, 173, 244, 175, 191, 255, 219, 255, 255, 255,},
+ {119, 174, 244, 176, 192, 255, 220, 255, 255, 255,},
+ {120, 175, 245, 177, 192, 255, 220, 255, 255, 255,},
+ {121, 176, 245, 178, 193, 255, 221, 255, 255, 255,},
+ {122, 177, 245, 178, 193, 255, 222, 255, 255, 255,},
+ {123, 178, 246, 179, 194, 255, 222, 255, 255, 255,},
+ {124, 179, 246, 180, 194, 255, 223, 255, 255, 255,},
+ {125, 180, 247, 181, 195, 255, 224, 255, 255, 255,},
+ {126, 181, 247, 182, 196, 255, 224, 255, 255, 255,},
+ {127, 182, 247, 182, 196, 255, 225, 255, 255, 255,},
+ {128, 183, 247, 183, 197, 255, 226, 255, 255, 255,},
+ {129, 184, 248, 184, 197, 255, 226, 255, 255, 255,},
+ {130, 185, 248, 185, 198, 255, 227, 255, 255, 255,},
+ {131, 186, 248, 186, 198, 255, 228, 255, 255, 255,},
+ {132, 187, 249, 186, 199, 255, 228, 255, 255, 255,},
+ {133, 188, 249, 187, 200, 255, 229, 255, 255, 255,},
+ {134, 189, 249, 188, 200, 255, 230, 255, 255, 255,},
+ {135, 190, 249, 189, 201, 255, 230, 255, 255, 255,},
+ {136, 191, 250, 190, 201, 255, 231, 255, 255, 255,},
+ {137, 192, 250, 191, 202, 255, 231, 255, 255, 255,},
+ {138, 193, 250, 191, 203, 255, 232, 255, 255, 255,},
+ {139, 194, 250, 192, 203, 255, 233, 255, 255, 255,},
+ {140, 195, 251, 193, 204, 255, 233, 255, 255, 255,},
+ {142, 196, 251, 194, 204, 255, 234, 255, 255, 255,},
+ {143, 197, 251, 195, 205, 255, 234, 255, 255, 255,},
+ {144, 198, 251, 196, 206, 255, 235, 255, 255, 255,},
+ {145, 199, 252, 197, 206, 255, 236, 255, 255, 255,},
+ {146, 200, 252, 197, 207, 255, 236, 255, 255, 255,},
+ {147, 201, 252, 198, 208, 255, 237, 255, 255, 255,},
+ {148, 202, 252, 199, 208, 255, 237, 255, 255, 255,},
+ {149, 203, 252, 200, 209, 255, 238, 255, 255, 255,},
+ {151, 204, 253, 201, 210, 255, 238, 255, 255, 255,},
+ {152, 205, 253, 202, 210, 255, 239, 255, 255, 255,},
+ {153, 206, 253, 203, 211, 255, 239, 255, 255, 255,},
+ {154, 207, 253, 204, 212, 255, 240, 255, 255, 255,},
+ {155, 208, 253, 205, 212, 255, 241, 255, 255, 255,},
+ {157, 209, 253, 206, 213, 255, 241, 255, 255, 255,},
+ {158, 210, 253, 206, 214, 255, 242, 255, 255, 255,},
+ {159, 211, 254, 207, 214, 255, 242, 255, 255, 255,},
+ {160, 212, 254, 208, 215, 255, 243, 255, 255, 255,},
+ {162, 213, 254, 209, 216, 255, 243, 255, 255, 255,},
+ {163, 214, 254, 210, 217, 255, 244, 255, 255, 255,},
+ {164, 215, 254, 211, 217, 255, 244, 255, 255, 255,},
+ {165, 216, 254, 212, 218, 255, 244, 255, 255, 255,},
+ {167, 217, 254, 213, 219, 255, 245, 255, 255, 255,},
+ {168, 218, 254, 214, 219, 255, 245, 255, 255, 255,},
+ {169, 219, 255, 215, 220, 255, 246, 255, 255, 255,},
+ {171, 220, 255, 216, 221, 255, 246, 255, 255, 255,},
+ {172, 221, 255, 217, 222, 255, 247, 255, 255, 255,},
+ {174, 222, 255, 218, 223, 255, 247, 255, 255, 255,},
+ {175, 223, 255, 219, 223, 255, 248, 255, 255, 255,},
+ {177, 224, 255, 220, 224, 255, 248, 255, 255, 255,},
+ {178, 225, 255, 221, 225, 255, 248, 255, 255, 255,},
+ {179, 226, 255, 222, 226, 255, 249, 255, 255, 255,},
+ {181, 227, 255, 223, 227, 255, 249, 255, 255, 255,},
+ {182, 228, 255, 224, 227, 255, 250, 255, 255, 255,},
+ {184, 229, 255, 225, 228, 255, 250, 255, 255, 255,},
+ {186, 230, 255, 226, 229, 255, 250, 255, 255, 255,},
+ {187, 231, 255, 227, 230, 255, 251, 255, 255, 255,},
+ {189, 232, 255, 228, 231, 255, 251, 255, 255, 255,},
+ {190, 233, 255, 229, 232, 255, 251, 255, 255, 255,},
+ {192, 234, 255, 230, 232, 255, 252, 255, 255, 255,},
+ {194, 235, 255, 231, 233, 255, 252, 255, 255, 255,},
+ {196, 236, 255, 232, 234, 255, 252, 255, 255, 255,},
+ {197, 237, 255, 233, 235, 255, 253, 255, 255, 255,},
+ {199, 238, 255, 234, 236, 255, 253, 255, 255, 255,},
+ {201, 239, 255, 235, 237, 255, 253, 255, 255, 255,},
+ {203, 240, 255, 237, 238, 255, 253, 255, 255, 255,},
+ {205, 241, 255, 238, 239, 255, 254, 255, 255, 255,},
+ {207, 242, 255, 239, 240, 255, 254, 255, 255, 255,},
+ {209, 243, 255, 240, 241, 255, 254, 255, 255, 255,},
+ {211, 244, 255, 241, 242, 255, 254, 255, 255, 255,},
+ {214, 245, 255, 242, 243, 255, 255, 255, 255, 255,},
+ {216, 246, 255, 243, 244, 255, 255, 255, 255, 255,},
+ {218, 247, 255, 244, 245, 255, 255, 255, 255, 255,},
+ {221, 248, 255, 246, 246, 255, 255, 255, 255, 255,},
+ {224, 249, 255, 247, 247, 255, 255, 255, 255, 255,},
+ {226, 250, 255, 248, 248, 255, 255, 255, 255, 255,},
+ {229, 251, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {233, 252, 255, 251, 251, 255, 255, 255, 255, 255,},
+ {236, 253, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {241, 254, 255, 253, 253, 255, 255, 255, 255, 255,},
+ {246, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+};
+
+const vp9_prob vp9_modelcoefprobs_gg75p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.625)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 1, 3, 86, 129, 6, 87, 21, 90, 26,},
+ {1, 2, 6, 87, 129, 11, 88, 39, 93, 47,},
+ {2, 3, 9, 87, 130, 16, 89, 55, 96, 65,},
+ {2, 4, 11, 88, 130, 21, 89, 69, 98, 81,},
+ {3, 5, 14, 88, 130, 26, 90, 82, 101, 95,},
+ {3, 6, 17, 89, 131, 31, 91, 94, 103, 107,},
+ {4, 7, 20, 89, 131, 35, 92, 105, 105, 119,},
+ {4, 8, 22, 90, 131, 40, 92, 115, 108, 129,},
+ {5, 9, 25, 90, 132, 44, 93, 124, 110, 138,},
+ {5, 10, 27, 91, 132, 48, 94, 133, 112, 147,},
+ {6, 11, 30, 91, 132, 52, 95, 141, 114, 155,},
+ {6, 12, 32, 92, 133, 56, 95, 148, 116, 162,},
+ {7, 13, 35, 92, 133, 60, 96, 155, 118, 168,},
+ {7, 14, 37, 92, 133, 64, 97, 161, 121, 174,},
+ {8, 15, 40, 93, 134, 68, 97, 167, 123, 180,},
+ {9, 16, 42, 93, 134, 71, 98, 173, 125, 185,},
+ {9, 17, 44, 94, 134, 75, 99, 178, 127, 190,},
+ {10, 18, 47, 94, 135, 78, 99, 182, 129, 195,},
+ {10, 19, 49, 94, 135, 82, 100, 187, 131, 199,},
+ {11, 20, 51, 95, 135, 85, 100, 191, 133, 202,},
+ {11, 21, 54, 95, 135, 88, 101, 195, 135, 206,},
+ {12, 22, 56, 96, 136, 92, 102, 199, 137, 209,},
+ {13, 23, 58, 96, 136, 95, 102, 202, 138, 213,},
+ {13, 24, 61, 96, 136, 98, 103, 206, 140, 215,},
+ {14, 25, 63, 97, 137, 101, 104, 209, 142, 218,},
+ {14, 26, 65, 97, 137, 104, 104, 211, 144, 221,},
+ {15, 27, 67, 98, 137, 107, 105, 214, 146, 223,},
+ {15, 28, 69, 98, 138, 110, 106, 217, 148, 225,},
+ {16, 29, 71, 98, 138, 113, 106, 219, 150, 227,},
+ {17, 30, 73, 99, 138, 115, 107, 221, 151, 229,},
+ {17, 31, 76, 99, 138, 118, 107, 223, 153, 231,},
+ {18, 32, 78, 100, 139, 121, 108, 225, 155, 232,},
+ {18, 33, 80, 100, 139, 123, 109, 227, 157, 234,},
+ {19, 34, 82, 100, 139, 126, 109, 229, 158, 235,},
+ {20, 35, 84, 101, 140, 128, 110, 231, 160, 237,},
+ {20, 36, 86, 101, 140, 131, 111, 232, 162, 238,},
+ {21, 37, 88, 102, 140, 133, 111, 234, 164, 239,},
+ {21, 38, 90, 102, 140, 136, 112, 235, 165, 240,},
+ {22, 39, 92, 102, 141, 138, 112, 236, 167, 241,},
+ {23, 40, 94, 103, 141, 140, 113, 237, 169, 242,},
+ {23, 41, 95, 103, 141, 143, 114, 238, 170, 243,},
+ {24, 42, 97, 103, 142, 145, 114, 240, 172, 244,},
+ {25, 43, 99, 104, 142, 147, 115, 241, 173, 245,},
+ {25, 44, 101, 104, 142, 149, 116, 242, 175, 246,},
+ {26, 45, 103, 105, 142, 151, 116, 242, 176, 246,},
+ {26, 46, 105, 105, 143, 153, 117, 243, 178, 247,},
+ {27, 47, 107, 105, 143, 156, 117, 244, 180, 248,},
+ {28, 48, 108, 106, 143, 158, 118, 245, 181, 248,},
+ {28, 49, 110, 106, 144, 159, 119, 245, 182, 249,},
+ {29, 50, 112, 107, 144, 161, 119, 246, 184, 249,},
+ {30, 51, 114, 107, 144, 163, 120, 247, 185, 250,},
+ {30, 52, 115, 108, 144, 165, 121, 247, 187, 250,},
+ {31, 53, 117, 108, 145, 167, 121, 248, 188, 250,},
+ {32, 54, 119, 108, 145, 169, 122, 248, 190, 251,},
+ {32, 55, 121, 109, 145, 171, 123, 249, 191, 251,},
+ {33, 56, 122, 109, 146, 172, 123, 249, 192, 251,},
+ {34, 57, 124, 110, 146, 174, 124, 250, 194, 252,},
+ {34, 58, 126, 110, 146, 176, 125, 250, 195, 252,},
+ {35, 59, 127, 110, 147, 177, 125, 250, 196, 252,},
+ {36, 60, 129, 111, 147, 179, 126, 251, 197, 253,},
+ {36, 61, 130, 111, 147, 181, 127, 251, 199, 253,},
+ {37, 62, 132, 112, 147, 182, 127, 251, 200, 253,},
+ {38, 63, 134, 112, 148, 184, 128, 252, 201, 253,},
+ {38, 64, 135, 112, 148, 185, 128, 252, 202, 253,},
+ {39, 65, 137, 113, 148, 187, 129, 252, 204, 254,},
+ {40, 66, 138, 113, 149, 188, 130, 253, 205, 254,},
+ {40, 67, 140, 114, 149, 190, 130, 253, 206, 254,},
+ {41, 68, 141, 114, 149, 191, 131, 253, 207, 254,},
+ {42, 69, 143, 115, 150, 192, 132, 253, 208, 254,},
+ {42, 70, 144, 115, 150, 194, 132, 253, 209, 254,},
+ {43, 71, 146, 115, 150, 195, 133, 254, 210, 254,},
+ {44, 72, 147, 116, 150, 197, 134, 254, 211, 255,},
+ {44, 73, 149, 116, 151, 198, 134, 254, 212, 255,},
+ {45, 74, 150, 117, 151, 199, 135, 254, 213, 255,},
+ {46, 75, 152, 117, 151, 200, 136, 254, 214, 255,},
+ {46, 76, 153, 118, 152, 202, 136, 254, 215, 255,},
+ {47, 77, 154, 118, 152, 203, 137, 254, 216, 255,},
+ {48, 78, 156, 119, 152, 204, 138, 254, 217, 255,},
+ {49, 79, 157, 119, 153, 205, 139, 255, 218, 255,},
+ {49, 80, 159, 119, 153, 206, 139, 255, 219, 255,},
+ {50, 81, 160, 120, 153, 207, 140, 255, 220, 255,},
+ {51, 82, 161, 120, 154, 208, 141, 255, 221, 255,},
+ {51, 83, 163, 121, 154, 210, 141, 255, 222, 255,},
+ {52, 84, 164, 121, 154, 211, 142, 255, 223, 255,},
+ {53, 85, 165, 122, 154, 212, 143, 255, 223, 255,},
+ {54, 86, 166, 122, 155, 213, 143, 255, 224, 255,},
+ {54, 87, 168, 123, 155, 214, 144, 255, 225, 255,},
+ {55, 88, 169, 123, 155, 215, 145, 255, 226, 255,},
+ {56, 89, 170, 123, 156, 216, 145, 255, 227, 255,},
+ {57, 90, 172, 124, 156, 217, 146, 255, 227, 255,},
+ {57, 91, 173, 124, 156, 218, 147, 255, 228, 255,},
+ {58, 92, 174, 125, 157, 218, 147, 255, 229, 255,},
+ {59, 93, 175, 125, 157, 219, 148, 255, 230, 255,},
+ {60, 94, 176, 126, 157, 220, 149, 255, 230, 255,},
+ {60, 95, 178, 126, 158, 221, 150, 255, 231, 255,},
+ {61, 96, 179, 127, 158, 222, 150, 255, 232, 255,},
+ {62, 97, 180, 127, 158, 223, 151, 255, 232, 255,},
+ {63, 98, 181, 128, 159, 224, 152, 255, 233, 255,},
+ {63, 99, 182, 128, 159, 224, 152, 255, 234, 255,},
+ {64, 100, 183, 129, 159, 225, 153, 255, 234, 255,},
+ {65, 101, 184, 129, 160, 226, 154, 255, 235, 255,},
+ {66, 102, 186, 130, 160, 227, 154, 255, 235, 255,},
+ {66, 103, 187, 130, 160, 227, 155, 255, 236, 255,},
+ {67, 104, 188, 131, 161, 228, 156, 255, 236, 255,},
+ {68, 105, 189, 131, 161, 229, 157, 255, 237, 255,},
+ {69, 106, 190, 132, 161, 230, 157, 255, 238, 255,},
+ {69, 107, 191, 132, 162, 230, 158, 255, 238, 255,},
+ {70, 108, 192, 133, 162, 231, 159, 255, 239, 255,},
+ {71, 109, 193, 133, 163, 232, 159, 255, 239, 255,},
+ {72, 110, 194, 134, 163, 232, 160, 255, 240, 255,},
+ {73, 111, 195, 134, 163, 233, 161, 255, 240, 255,},
+ {73, 112, 196, 135, 164, 233, 162, 255, 241, 255,},
+ {74, 113, 197, 135, 164, 234, 162, 255, 241, 255,},
+ {75, 114, 198, 136, 164, 235, 163, 255, 241, 255,},
+ {76, 115, 199, 136, 165, 235, 164, 255, 242, 255,},
+ {77, 116, 200, 137, 165, 236, 165, 255, 242, 255,},
+ {77, 117, 201, 137, 165, 236, 165, 255, 243, 255,},
+ {78, 118, 202, 138, 166, 237, 166, 255, 243, 255,},
+ {79, 119, 203, 138, 166, 237, 167, 255, 244, 255,},
+ {80, 120, 204, 139, 166, 238, 167, 255, 244, 255,},
+ {81, 121, 205, 139, 167, 238, 168, 255, 244, 255,},
+ {82, 122, 206, 140, 167, 239, 169, 255, 245, 255,},
+ {82, 123, 206, 141, 168, 239, 170, 255, 245, 255,},
+ {83, 124, 207, 141, 168, 240, 170, 255, 245, 255,},
+ {84, 125, 208, 142, 168, 240, 171, 255, 246, 255,},
+ {85, 126, 209, 142, 169, 241, 172, 255, 246, 255,},
+ {86, 127, 210, 143, 169, 241, 173, 255, 246, 255,},
+ {87, 128, 211, 143, 169, 242, 173, 255, 247, 255,},
+ {87, 129, 212, 144, 170, 242, 174, 255, 247, 255,},
+ {88, 130, 212, 144, 170, 242, 175, 255, 247, 255,},
+ {89, 131, 213, 145, 171, 243, 176, 255, 248, 255,},
+ {90, 132, 214, 146, 171, 243, 176, 255, 248, 255,},
+ {91, 133, 215, 146, 171, 244, 177, 255, 248, 255,},
+ {92, 134, 216, 147, 172, 244, 178, 255, 248, 255,},
+ {93, 135, 216, 147, 172, 244, 179, 255, 249, 255,},
+ {93, 136, 217, 148, 173, 245, 179, 255, 249, 255,},
+ {94, 137, 218, 148, 173, 245, 180, 255, 249, 255,},
+ {95, 138, 219, 149, 173, 245, 181, 255, 249, 255,},
+ {96, 139, 220, 150, 174, 246, 181, 255, 250, 255,},
+ {97, 140, 220, 150, 174, 246, 182, 255, 250, 255,},
+ {98, 141, 221, 151, 175, 246, 183, 255, 250, 255,},
+ {99, 142, 222, 151, 175, 247, 184, 255, 250, 255,},
+ {100, 143, 222, 152, 175, 247, 184, 255, 251, 255,},
+ {100, 144, 223, 153, 176, 247, 185, 255, 251, 255,},
+ {101, 145, 224, 153, 176, 248, 186, 255, 251, 255,},
+ {102, 146, 224, 154, 177, 248, 187, 255, 251, 255,},
+ {103, 147, 225, 154, 177, 248, 187, 255, 251, 255,},
+ {104, 148, 226, 155, 178, 248, 188, 255, 252, 255,},
+ {105, 149, 226, 156, 178, 249, 189, 255, 252, 255,},
+ {106, 150, 227, 156, 178, 249, 190, 255, 252, 255,},
+ {107, 151, 228, 157, 179, 249, 190, 255, 252, 255,},
+ {108, 152, 228, 158, 179, 249, 191, 255, 252, 255,},
+ {109, 153, 229, 158, 180, 250, 192, 255, 252, 255,},
+ {110, 154, 230, 159, 180, 250, 193, 255, 253, 255,},
+ {111, 155, 230, 159, 181, 250, 193, 255, 253, 255,},
+ {111, 156, 231, 160, 181, 250, 194, 255, 253, 255,},
+ {112, 157, 231, 161, 181, 251, 195, 255, 253, 255,},
+ {113, 158, 232, 161, 182, 251, 196, 255, 253, 255,},
+ {114, 159, 233, 162, 182, 251, 196, 255, 253, 255,},
+ {115, 160, 233, 163, 183, 251, 197, 255, 253, 255,},
+ {116, 161, 234, 163, 183, 251, 198, 255, 253, 255,},
+ {117, 162, 234, 164, 184, 252, 199, 255, 254, 255,},
+ {118, 163, 235, 165, 184, 252, 199, 255, 254, 255,},
+ {119, 164, 235, 165, 185, 252, 200, 255, 254, 255,},
+ {120, 165, 236, 166, 185, 252, 201, 255, 254, 255,},
+ {121, 166, 236, 167, 186, 252, 202, 255, 254, 255,},
+ {122, 167, 237, 167, 186, 252, 202, 255, 254, 255,},
+ {123, 168, 237, 168, 187, 253, 203, 255, 254, 255,},
+ {124, 169, 238, 169, 187, 253, 204, 255, 254, 255,},
+ {125, 170, 238, 169, 188, 253, 205, 255, 254, 255,},
+ {126, 171, 239, 170, 188, 253, 205, 255, 254, 255,},
+ {127, 172, 239, 171, 189, 253, 206, 255, 254, 255,},
+ {128, 173, 240, 172, 189, 253, 207, 255, 255, 255,},
+ {129, 174, 240, 172, 190, 253, 208, 255, 255, 255,},
+ {130, 175, 241, 173, 190, 253, 208, 255, 255, 255,},
+ {131, 176, 241, 174, 191, 254, 209, 255, 255, 255,},
+ {132, 177, 242, 175, 191, 254, 210, 255, 255, 255,},
+ {133, 178, 242, 175, 192, 254, 210, 255, 255, 255,},
+ {134, 179, 242, 176, 192, 254, 211, 255, 255, 255,},
+ {135, 180, 243, 177, 193, 254, 212, 255, 255, 255,},
+ {137, 181, 243, 177, 193, 254, 213, 255, 255, 255,},
+ {138, 182, 244, 178, 194, 254, 213, 255, 255, 255,},
+ {139, 183, 244, 179, 194, 254, 214, 255, 255, 255,},
+ {140, 184, 244, 180, 195, 254, 215, 255, 255, 255,},
+ {141, 185, 245, 181, 195, 254, 216, 255, 255, 255,},
+ {142, 186, 245, 181, 196, 255, 216, 255, 255, 255,},
+ {143, 187, 245, 182, 196, 255, 217, 255, 255, 255,},
+ {144, 188, 246, 183, 197, 255, 218, 255, 255, 255,},
+ {145, 189, 246, 184, 197, 255, 218, 255, 255, 255,},
+ {146, 190, 247, 184, 198, 255, 219, 255, 255, 255,},
+ {147, 191, 247, 185, 199, 255, 220, 255, 255, 255,},
+ {149, 192, 247, 186, 199, 255, 221, 255, 255, 255,},
+ {150, 193, 247, 187, 200, 255, 221, 255, 255, 255,},
+ {151, 194, 248, 188, 200, 255, 222, 255, 255, 255,},
+ {152, 195, 248, 188, 201, 255, 223, 255, 255, 255,},
+ {153, 196, 248, 189, 201, 255, 223, 255, 255, 255,},
+ {154, 197, 249, 190, 202, 255, 224, 255, 255, 255,},
+ {156, 198, 249, 191, 203, 255, 225, 255, 255, 255,},
+ {157, 199, 249, 192, 203, 255, 225, 255, 255, 255,},
+ {158, 200, 250, 193, 204, 255, 226, 255, 255, 255,},
+ {159, 201, 250, 193, 205, 255, 227, 255, 255, 255,},
+ {160, 202, 250, 194, 205, 255, 227, 255, 255, 255,},
+ {162, 203, 250, 195, 206, 255, 228, 255, 255, 255,},
+ {163, 204, 251, 196, 206, 255, 229, 255, 255, 255,},
+ {164, 205, 251, 197, 207, 255, 229, 255, 255, 255,},
+ {165, 206, 251, 198, 208, 255, 230, 255, 255, 255,},
+ {166, 207, 251, 199, 208, 255, 231, 255, 255, 255,},
+ {168, 208, 251, 200, 209, 255, 231, 255, 255, 255,},
+ {169, 209, 252, 201, 210, 255, 232, 255, 255, 255,},
+ {170, 210, 252, 201, 210, 255, 233, 255, 255, 255,},
+ {172, 211, 252, 202, 211, 255, 233, 255, 255, 255,},
+ {173, 212, 252, 203, 212, 255, 234, 255, 255, 255,},
+ {174, 213, 252, 204, 212, 255, 235, 255, 255, 255,},
+ {175, 214, 253, 205, 213, 255, 235, 255, 255, 255,},
+ {177, 215, 253, 206, 214, 255, 236, 255, 255, 255,},
+ {178, 216, 253, 207, 215, 255, 237, 255, 255, 255,},
+ {179, 217, 253, 208, 215, 255, 237, 255, 255, 255,},
+ {181, 218, 253, 209, 216, 255, 238, 255, 255, 255,},
+ {182, 219, 254, 210, 217, 255, 238, 255, 255, 255,},
+ {184, 220, 254, 211, 217, 255, 239, 255, 255, 255,},
+ {185, 221, 254, 212, 218, 255, 240, 255, 255, 255,},
+ {186, 222, 254, 213, 219, 255, 240, 255, 255, 255,},
+ {188, 223, 254, 214, 220, 255, 241, 255, 255, 255,},
+ {189, 224, 254, 215, 221, 255, 241, 255, 255, 255,},
+ {191, 225, 254, 216, 221, 255, 242, 255, 255, 255,},
+ {192, 226, 254, 217, 222, 255, 243, 255, 255, 255,},
+ {194, 227, 255, 218, 223, 255, 243, 255, 255, 255,},
+ {195, 228, 255, 219, 224, 255, 244, 255, 255, 255,},
+ {197, 229, 255, 220, 225, 255, 244, 255, 255, 255,},
+ {198, 230, 255, 221, 225, 255, 245, 255, 255, 255,},
+ {200, 231, 255, 222, 226, 255, 245, 255, 255, 255,},
+ {201, 232, 255, 223, 227, 255, 246, 255, 255, 255,},
+ {203, 233, 255, 224, 228, 255, 247, 255, 255, 255,},
+ {205, 234, 255, 226, 229, 255, 247, 255, 255, 255,},
+ {206, 235, 255, 227, 230, 255, 248, 255, 255, 255,},
+ {208, 236, 255, 228, 231, 255, 248, 255, 255, 255,},
+ {210, 237, 255, 229, 232, 255, 249, 255, 255, 255,},
+ {211, 238, 255, 230, 233, 255, 249, 255, 255, 255,},
+ {213, 239, 255, 231, 234, 255, 250, 255, 255, 255,},
+ {215, 240, 255, 233, 235, 255, 250, 255, 255, 255,},
+ {217, 241, 255, 234, 236, 255, 251, 255, 255, 255,},
+ {219, 242, 255, 235, 237, 255, 251, 255, 255, 255,},
+ {221, 243, 255, 236, 238, 255, 252, 255, 255, 255,},
+ {223, 244, 255, 237, 239, 255, 252, 255, 255, 255,},
+ {225, 245, 255, 239, 240, 255, 252, 255, 255, 255,},
+ {227, 246, 255, 240, 241, 255, 253, 255, 255, 255,},
+ {229, 247, 255, 241, 242, 255, 253, 255, 255, 255,},
+ {231, 248, 255, 243, 244, 255, 254, 255, 255, 255,},
+ {233, 249, 255, 244, 245, 255, 254, 255, 255, 255,},
+ {236, 250, 255, 246, 246, 255, 254, 255, 255, 255,},
+ {238, 251, 255, 247, 247, 255, 255, 255, 255, 255,},
+ {241, 252, 255, 249, 249, 255, 255, 255, 255, 255,},
+ {244, 253, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {247, 254, 255, 252, 252, 255, 255, 255, 255, 255,},
+ {251, 255, 255, 254, 254, 255, 255, 255, 255, 255,},
+};
+
+const vp9_prob vp9_modelcoefprobs_gg625p1[COEFPROB_MODELS][ENTROPY_NODES - 1] = {
+ // Probs generated with a Generalized Gaussian (with shape parameter 0.625)
+ // source model with varying quantizer step size for a uniform quantizer
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, // do not use
+ {1, 1, 3, 87, 129, 6, 87, 20, 91, 24,},
+ {1, 2, 6, 88, 130, 11, 89, 36, 94, 41,},
+ {2, 3, 8, 88, 130, 15, 90, 50, 97, 56,},
+ {2, 4, 11, 89, 131, 20, 90, 62, 99, 70,},
+ {3, 5, 14, 90, 131, 24, 91, 74, 102, 81,},
+ {3, 6, 16, 90, 132, 29, 92, 84, 104, 92,},
+ {4, 7, 19, 91, 132, 33, 93, 93, 106, 101,},
+ {4, 8, 21, 91, 132, 37, 93, 102, 108, 110,},
+ {5, 9, 24, 92, 133, 40, 94, 110, 110, 118,},
+ {5, 10, 26, 92, 133, 44, 95, 118, 111, 125,},
+ {6, 11, 29, 93, 134, 48, 96, 125, 113, 132,},
+ {7, 12, 31, 93, 134, 51, 96, 132, 115, 139,},
+ {7, 13, 33, 93, 134, 55, 97, 138, 117, 145,},
+ {8, 14, 36, 94, 135, 58, 97, 144, 119, 150,},
+ {8, 15, 38, 94, 135, 62, 98, 149, 120, 155,},
+ {9, 16, 40, 95, 135, 65, 99, 154, 122, 160,},
+ {10, 17, 42, 95, 136, 68, 99, 159, 124, 165,},
+ {10, 18, 45, 96, 136, 71, 100, 164, 125, 169,},
+ {11, 19, 47, 96, 136, 74, 100, 168, 127, 174,},
+ {11, 20, 49, 96, 136, 77, 101, 173, 128, 177,},
+ {12, 21, 51, 97, 137, 80, 102, 176, 130, 181,},
+ {13, 22, 53, 97, 137, 83, 102, 180, 131, 185,},
+ {13, 23, 55, 98, 137, 86, 103, 184, 133, 188,},
+ {14, 24, 57, 98, 138, 89, 103, 187, 135, 191,},
+ {14, 25, 59, 98, 138, 91, 104, 190, 136, 194,},
+ {15, 26, 61, 99, 138, 94, 104, 193, 138, 197,},
+ {16, 27, 64, 99, 139, 97, 105, 196, 139, 200,},
+ {16, 28, 66, 100, 139, 99, 106, 199, 141, 202,},
+ {17, 29, 68, 100, 139, 102, 106, 201, 142, 205,},
+ {18, 30, 69, 100, 139, 104, 107, 204, 143, 207,},
+ {18, 31, 71, 101, 140, 107, 107, 206, 145, 209,},
+ {19, 32, 73, 101, 140, 109, 108, 209, 146, 211,},
+ {20, 33, 75, 102, 140, 112, 108, 211, 148, 213,},
+ {20, 34, 77, 102, 141, 114, 109, 213, 149, 215,},
+ {21, 35, 79, 102, 141, 116, 109, 215, 150, 217,},
+ {22, 36, 81, 103, 141, 119, 110, 217, 152, 219,},
+ {22, 37, 83, 103, 141, 121, 110, 218, 153, 220,},
+ {23, 38, 85, 103, 142, 123, 111, 220, 155, 222,},
+ {24, 39, 87, 104, 142, 125, 112, 222, 156, 224,},
+ {24, 40, 88, 104, 142, 127, 112, 223, 157, 225,},
+ {25, 41, 90, 105, 143, 129, 113, 225, 159, 226,},
+ {26, 42, 92, 105, 143, 131, 113, 226, 160, 228,},
+ {26, 43, 94, 105, 143, 133, 114, 227, 161, 229,},
+ {27, 44, 95, 106, 143, 135, 114, 229, 162, 230,},
+ {28, 45, 97, 106, 144, 137, 115, 230, 164, 231,},
+ {28, 46, 99, 107, 144, 139, 115, 231, 165, 232,},
+ {29, 47, 101, 107, 144, 141, 116, 232, 166, 233,},
+ {30, 48, 102, 107, 145, 143, 116, 233, 168, 234,},
+ {31, 49, 104, 108, 145, 145, 117, 234, 169, 235,},
+ {31, 50, 106, 108, 145, 147, 118, 235, 170, 236,},
+ {32, 51, 107, 108, 145, 149, 118, 236, 171, 237,},
+ {33, 52, 109, 109, 146, 150, 119, 237, 172, 238,},
+ {33, 53, 111, 109, 146, 152, 119, 238, 174, 239,},
+ {34, 54, 112, 110, 146, 154, 120, 239, 175, 240,},
+ {35, 55, 114, 110, 146, 156, 120, 240, 176, 240,},
+ {36, 56, 115, 110, 147, 157, 121, 240, 177, 241,},
+ {36, 57, 117, 111, 147, 159, 121, 241, 178, 242,},
+ {37, 58, 119, 111, 147, 161, 122, 242, 180, 242,},
+ {38, 59, 120, 112, 148, 162, 122, 242, 181, 243,},
+ {38, 60, 122, 112, 148, 164, 123, 243, 182, 244,},
+ {39, 61, 123, 112, 148, 165, 124, 244, 183, 244,},
+ {40, 62, 125, 113, 148, 167, 124, 244, 184, 245,},
+ {41, 63, 126, 113, 149, 168, 125, 245, 185, 245,},
+ {41, 64, 128, 114, 149, 170, 125, 245, 186, 246,},
+ {42, 65, 129, 114, 149, 171, 126, 246, 187, 246,},
+ {43, 66, 131, 114, 150, 173, 126, 246, 188, 247,},
+ {44, 67, 132, 115, 150, 174, 127, 247, 189, 247,},
+ {44, 68, 134, 115, 150, 176, 127, 247, 191, 247,},
+ {45, 69, 135, 116, 150, 177, 128, 248, 192, 248,},
+ {46, 70, 136, 116, 151, 178, 129, 248, 193, 248,},
+ {47, 71, 138, 116, 151, 180, 129, 248, 194, 249,},
+ {48, 72, 139, 117, 151, 181, 130, 249, 195, 249,},
+ {48, 73, 141, 117, 152, 183, 130, 249, 196, 249,},
+ {49, 74, 142, 118, 152, 184, 131, 249, 197, 250,},
+ {50, 75, 143, 118, 152, 185, 131, 250, 198, 250,},
+ {51, 76, 145, 118, 152, 186, 132, 250, 199, 250,},
+ {51, 77, 146, 119, 153, 188, 132, 250, 200, 250,},
+ {52, 78, 148, 119, 153, 189, 133, 251, 201, 251,},
+ {53, 79, 149, 120, 153, 190, 134, 251, 201, 251,},
+ {54, 80, 150, 120, 154, 191, 134, 251, 202, 251,},
+ {55, 81, 151, 120, 154, 192, 135, 251, 203, 251,},
+ {55, 82, 153, 121, 154, 194, 135, 252, 204, 252,},
+ {56, 83, 154, 121, 155, 195, 136, 252, 205, 252,},
+ {57, 84, 155, 122, 155, 196, 136, 252, 206, 252,},
+ {58, 85, 157, 122, 155, 197, 137, 252, 207, 252,},
+ {59, 86, 158, 123, 155, 198, 138, 252, 208, 252,},
+ {59, 87, 159, 123, 156, 199, 138, 253, 209, 253,},
+ {60, 88, 160, 123, 156, 200, 139, 253, 210, 253,},
+ {61, 89, 162, 124, 156, 201, 139, 253, 210, 253,},
+ {62, 90, 163, 124, 157, 202, 140, 253, 211, 253,},
+ {63, 91, 164, 125, 157, 203, 140, 253, 212, 253,},
+ {64, 92, 165, 125, 157, 204, 141, 253, 213, 253,},
+ {64, 93, 166, 126, 158, 205, 142, 254, 214, 253,},
+ {65, 94, 168, 126, 158, 206, 142, 254, 214, 254,},
+ {66, 95, 169, 126, 158, 207, 143, 254, 215, 254,},
+ {67, 96, 170, 127, 158, 208, 143, 254, 216, 254,},
+ {68, 97, 171, 127, 159, 209, 144, 254, 217, 254,},
+ {69, 98, 172, 128, 159, 210, 145, 254, 218, 254,},
+ {69, 99, 173, 128, 159, 211, 145, 254, 218, 254,},
+ {70, 100, 175, 129, 160, 212, 146, 254, 219, 254,},
+ {71, 101, 176, 129, 160, 213, 146, 254, 220, 254,},
+ {72, 102, 177, 130, 160, 214, 147, 254, 220, 254,},
+ {73, 103, 178, 130, 161, 214, 148, 255, 221, 255,},
+ {74, 104, 179, 130, 161, 215, 148, 255, 222, 255,},
+ {75, 105, 180, 131, 161, 216, 149, 255, 223, 255,},
+ {75, 106, 181, 131, 162, 217, 149, 255, 223, 255,},
+ {76, 107, 182, 132, 162, 218, 150, 255, 224, 255,},
+ {77, 108, 183, 132, 162, 219, 151, 255, 225, 255,},
+ {78, 109, 184, 133, 163, 219, 151, 255, 225, 255,},
+ {79, 110, 185, 133, 163, 220, 152, 255, 226, 255,},
+ {80, 111, 186, 134, 163, 221, 152, 255, 226, 255,},
+ {81, 112, 187, 134, 164, 222, 153, 255, 227, 255,},
+ {82, 113, 188, 135, 164, 222, 154, 255, 228, 255,},
+ {83, 114, 189, 135, 164, 223, 154, 255, 228, 255,},
+ {83, 115, 190, 136, 165, 224, 155, 255, 229, 255,},
+ {84, 116, 191, 136, 165, 224, 156, 255, 230, 255,},
+ {85, 117, 192, 137, 165, 225, 156, 255, 230, 255,},
+ {86, 118, 193, 137, 166, 226, 157, 255, 231, 255,},
+ {87, 119, 194, 137, 166, 226, 157, 255, 231, 255,},
+ {88, 120, 195, 138, 166, 227, 158, 255, 232, 255,},
+ {89, 121, 196, 138, 167, 228, 159, 255, 232, 255,},
+ {90, 122, 197, 139, 167, 228, 159, 255, 233, 255,},
+ {91, 123, 198, 139, 167, 229, 160, 255, 233, 255,},
+ {92, 124, 199, 140, 168, 230, 161, 255, 234, 255,},
+ {93, 125, 200, 140, 168, 230, 161, 255, 234, 255,},
+ {93, 126, 201, 141, 168, 231, 162, 255, 235, 255,},
+ {94, 127, 202, 141, 169, 231, 163, 255, 235, 255,},
+ {95, 128, 203, 142, 169, 232, 163, 255, 236, 255,},
+ {96, 129, 203, 142, 169, 233, 164, 255, 236, 255,},
+ {97, 130, 204, 143, 170, 233, 164, 255, 237, 255,},
+ {98, 131, 205, 143, 170, 234, 165, 255, 237, 255,},
+ {99, 132, 206, 144, 170, 234, 166, 255, 238, 255,},
+ {100, 133, 207, 145, 171, 235, 166, 255, 238, 255,},
+ {101, 134, 208, 145, 171, 235, 167, 255, 239, 255,},
+ {102, 135, 209, 146, 171, 236, 168, 255, 239, 255,},
+ {103, 136, 209, 146, 172, 236, 168, 255, 240, 255,},
+ {104, 137, 210, 147, 172, 237, 169, 255, 240, 255,},
+ {105, 138, 211, 147, 173, 237, 170, 255, 240, 255,},
+ {106, 139, 212, 148, 173, 238, 170, 255, 241, 255,},
+ {107, 140, 213, 148, 173, 238, 171, 255, 241, 255,},
+ {108, 141, 213, 149, 174, 239, 172, 255, 242, 255,},
+ {109, 142, 214, 149, 174, 239, 172, 255, 242, 255,},
+ {110, 143, 215, 150, 174, 240, 173, 255, 242, 255,},
+ {111, 144, 216, 150, 175, 240, 174, 255, 243, 255,},
+ {112, 145, 216, 151, 175, 240, 174, 255, 243, 255,},
+ {113, 146, 217, 152, 176, 241, 175, 255, 243, 255,},
+ {114, 147, 218, 152, 176, 241, 176, 255, 244, 255,},
+ {115, 148, 219, 153, 176, 242, 176, 255, 244, 255,},
+ {116, 149, 219, 153, 177, 242, 177, 255, 244, 255,},
+ {117, 150, 220, 154, 177, 242, 178, 255, 245, 255,},
+ {118, 151, 221, 154, 178, 243, 178, 255, 245, 255,},
+ {119, 152, 221, 155, 178, 243, 179, 255, 245, 255,},
+ {120, 153, 222, 156, 178, 244, 180, 255, 246, 255,},
+ {121, 154, 223, 156, 179, 244, 180, 255, 246, 255,},
+ {122, 155, 223, 157, 179, 244, 181, 255, 246, 255,},
+ {123, 156, 224, 157, 180, 245, 182, 255, 247, 255,},
+ {124, 157, 225, 158, 180, 245, 183, 255, 247, 255,},
+ {125, 158, 225, 159, 180, 245, 183, 255, 247, 255,},
+ {126, 159, 226, 159, 181, 246, 184, 255, 247, 255,},
+ {127, 160, 227, 160, 181, 246, 185, 255, 248, 255,},
+ {128, 161, 227, 160, 182, 246, 185, 255, 248, 255,},
+ {129, 162, 228, 161, 182, 246, 186, 255, 248, 255,},
+ {130, 163, 229, 162, 183, 247, 187, 255, 248, 255,},
+ {131, 164, 229, 162, 183, 247, 187, 255, 249, 255,},
+ {132, 165, 230, 163, 183, 247, 188, 255, 249, 255,},
+ {133, 166, 230, 163, 184, 248, 189, 255, 249, 255,},
+ {135, 167, 231, 164, 184, 248, 190, 255, 249, 255,},
+ {136, 168, 232, 165, 185, 248, 190, 255, 250, 255,},
+ {137, 169, 232, 165, 185, 248, 191, 255, 250, 255,},
+ {138, 170, 233, 166, 186, 249, 192, 255, 250, 255,},
+ {139, 171, 233, 167, 186, 249, 192, 255, 250, 255,},
+ {140, 172, 234, 167, 187, 249, 193, 255, 251, 255,},
+ {141, 173, 234, 168, 187, 249, 194, 255, 251, 255,},
+ {142, 174, 235, 169, 187, 250, 195, 255, 251, 255,},
+ {143, 175, 235, 169, 188, 250, 195, 255, 251, 255,},
+ {144, 176, 236, 170, 188, 250, 196, 255, 251, 255,},
+ {146, 177, 236, 171, 189, 250, 197, 255, 251, 255,},
+ {147, 178, 237, 171, 189, 251, 197, 255, 252, 255,},
+ {148, 179, 237, 172, 190, 251, 198, 255, 252, 255,},
+ {149, 180, 238, 173, 190, 251, 199, 255, 252, 255,},
+ {150, 181, 238, 173, 191, 251, 200, 255, 252, 255,},
+ {151, 182, 239, 174, 191, 251, 200, 255, 252, 255,},
+ {152, 183, 239, 175, 192, 251, 201, 255, 252, 255,},
+ {153, 184, 240, 176, 192, 252, 202, 255, 253, 255,},
+ {155, 185, 240, 176, 193, 252, 203, 255, 253, 255,},
+ {156, 186, 241, 177, 193, 252, 203, 255, 253, 255,},
+ {157, 187, 241, 178, 194, 252, 204, 255, 253, 255,},
+ {158, 188, 242, 179, 194, 252, 205, 255, 253, 255,},
+ {159, 189, 242, 179, 195, 252, 206, 255, 253, 255,},
+ {160, 190, 242, 180, 195, 253, 206, 255, 253, 255,},
+ {162, 191, 243, 181, 196, 253, 207, 255, 253, 255,},
+ {163, 192, 243, 182, 196, 253, 208, 255, 254, 255,},
+ {164, 193, 244, 182, 197, 253, 209, 255, 254, 255,},
+ {165, 194, 244, 183, 198, 253, 209, 255, 254, 255,},
+ {166, 195, 244, 184, 198, 253, 210, 255, 254, 255,},
+ {168, 196, 245, 185, 199, 253, 211, 255, 254, 255,},
+ {169, 197, 245, 185, 199, 254, 212, 255, 254, 255,},
+ {170, 198, 246, 186, 200, 254, 212, 255, 254, 255,},
+ {171, 199, 246, 187, 200, 254, 213, 255, 254, 255,},
+ {172, 200, 246, 188, 201, 254, 214, 255, 254, 255,},
+ {174, 201, 247, 189, 201, 254, 215, 255, 254, 255,},
+ {175, 202, 247, 189, 202, 254, 215, 255, 255, 255,},
+ {176, 203, 247, 190, 203, 254, 216, 255, 255, 255,},
+ {177, 204, 248, 191, 203, 254, 217, 255, 255, 255,},
+ {179, 205, 248, 192, 204, 254, 218, 255, 255, 255,},
+ {180, 206, 248, 193, 204, 254, 218, 255, 255, 255,},
+ {181, 207, 249, 194, 205, 255, 219, 255, 255, 255,},
+ {183, 208, 249, 195, 206, 255, 220, 255, 255, 255,},
+ {184, 209, 249, 195, 206, 255, 221, 255, 255, 255,},
+ {185, 210, 250, 196, 207, 255, 221, 255, 255, 255,},
+ {186, 211, 250, 197, 208, 255, 222, 255, 255, 255,},
+ {188, 212, 250, 198, 208, 255, 223, 255, 255, 255,},
+ {189, 213, 250, 199, 209, 255, 224, 255, 255, 255,},
+ {190, 214, 251, 200, 210, 255, 224, 255, 255, 255,},
+ {192, 215, 251, 201, 210, 255, 225, 255, 255, 255,},
+ {193, 216, 251, 202, 211, 255, 226, 255, 255, 255,},
+ {194, 217, 251, 203, 212, 255, 227, 255, 255, 255,},
+ {196, 218, 252, 204, 212, 255, 228, 255, 255, 255,},
+ {197, 219, 252, 205, 213, 255, 228, 255, 255, 255,},
+ {198, 220, 252, 206, 214, 255, 229, 255, 255, 255,},
+ {200, 221, 252, 207, 215, 255, 230, 255, 255, 255,},
+ {201, 222, 252, 208, 215, 255, 231, 255, 255, 255,},
+ {202, 223, 253, 209, 216, 255, 231, 255, 255, 255,},
+ {204, 224, 253, 210, 217, 255, 232, 255, 255, 255,},
+ {205, 225, 253, 211, 218, 255, 233, 255, 255, 255,},
+ {207, 226, 253, 212, 218, 255, 234, 255, 255, 255,},
+ {208, 227, 253, 213, 219, 255, 234, 255, 255, 255,},
+ {209, 228, 254, 214, 220, 255, 235, 255, 255, 255,},
+ {211, 229, 254, 215, 221, 255, 236, 255, 255, 255,},
+ {212, 230, 254, 216, 222, 255, 237, 255, 255, 255,},
+ {214, 231, 254, 217, 223, 255, 238, 255, 255, 255,},
+ {215, 232, 254, 218, 223, 255, 238, 255, 255, 255,},
+ {217, 233, 254, 219, 224, 255, 239, 255, 255, 255,},
+ {218, 234, 255, 221, 225, 255, 240, 255, 255, 255,},
+ {220, 235, 255, 222, 226, 255, 241, 255, 255, 255,},
+ {221, 236, 255, 223, 227, 255, 241, 255, 255, 255,},
+ {223, 237, 255, 224, 228, 255, 242, 255, 255, 255,},
+ {224, 238, 255, 225, 229, 255, 243, 255, 255, 255,},
+ {226, 239, 255, 227, 230, 255, 244, 255, 255, 255,},
+ {227, 240, 255, 228, 231, 255, 244, 255, 255, 255,},
+ {229, 241, 255, 229, 232, 255, 245, 255, 255, 255,},
+ {231, 242, 255, 231, 233, 255, 246, 255, 255, 255,},
+ {232, 243, 255, 232, 234, 255, 247, 255, 255, 255,},
+ {234, 244, 255, 233, 236, 255, 247, 255, 255, 255,},
+ {235, 245, 255, 235, 237, 255, 248, 255, 255, 255,},
+ {237, 246, 255, 236, 238, 255, 249, 255, 255, 255,},
+ {239, 247, 255, 238, 239, 255, 250, 255, 255, 255,},
+ {241, 248, 255, 239, 241, 255, 250, 255, 255, 255,},
+ {242, 249, 255, 241, 242, 255, 251, 255, 255, 255,},
+ {244, 250, 255, 243, 243, 255, 252, 255, 255, 255,},
+ {246, 251, 255, 244, 245, 255, 253, 255, 255, 255,},
+ {248, 252, 255, 246, 247, 255, 253, 255, 255, 255,},
+ {250, 253, 255, 248, 248, 255, 254, 255, 255, 255,},
+ {252, 254, 255, 250, 250, 255, 255, 255, 255, 255,},
+ {254, 255, 255, 253, 253, 255, 255, 255, 255, 255,},
+};
+
+void vp9_get_model_distribution(vp9_prob p, vp9_prob *tree_probs,
+ int b, int r) {
+ const vp9_prob (*model)[ENTROPY_NODES - 1];
+#if UNCONSTRAINED_NODES == 2
+ if (r != INTRA_FRAME && b == PLANE_TYPE_UV)
+ model = vp9_modelcoefprobs_gg75;
+ else if (r == INTRA_FRAME && b == PLANE_TYPE_UV)
+ model = vp9_modelcoefprobs_gg75;
+ else if (r != INTRA_FRAME && b == PLANE_TYPE_Y_WITH_DC)
+ model = vp9_modelcoefprobs_gg75;
+ else
+ model = vp9_modelcoefprobs_gg75;
+#else
+ if (r != INTRA_FRAME && b == PLANE_TYPE_UV)
+ model = vp9_modelcoefprobs_gg75p1;
+ else if (r == INTRA_FRAME && b == PLANE_TYPE_UV)
+ model = vp9_modelcoefprobs_gg75p1;
+ else if (r != INTRA_FRAME && b == PLANE_TYPE_Y_WITH_DC)
+ model = vp9_modelcoefprobs_gg75p1;
+ else
+ model = vp9_modelcoefprobs_gg75p1;
+#endif
+ vpx_memcpy(tree_probs + UNCONSTRAINED_NODES,
+ model[p] + UNCONSTRAINED_NODES - 1,
+ (ENTROPY_NODES - UNCONSTRAINED_NODES) * sizeof(vp9_prob));
+}
+#endif
+
static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28];
static void init_bit_tree(vp9_tree_index *p, int n) {
@@ -937,8 +2094,188 @@
#include "vp9/common/vp9_default_coef_probs.h"
-#if CONFIG_NEWCOEFCONTEXT
+// This function updates and then returns n AC coefficient context
+// This is currently a placeholder function to allow experimentation
+// using various context models based on the energy earlier tokens
+// within the current block.
+//
+// For now it just returns the previously used context.
+#define MAX_NEIGHBORS 2
+int vp9_get_coef_context(const int *scan, const int *neighbors,
+ int nb_pad, uint8_t *token_cache, int c, int l) {
+ int eob = l;
+ assert(nb_pad == MAX_NEIGHBORS);
+ if (c == eob) {
+ return 0;
+ } else {
+ int ctx;
+ assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
+ if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
+ ctx = (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
+ token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
+ } else {
+ ctx = token_cache[neighbors[MAX_NEIGHBORS * c + 0]];
+ }
+ return vp9_pt_energy_class[ctx];
+ }
+};
+void vp9_default_coef_probs(VP9_COMMON *pc) {
+#if CONFIG_MODELCOEFPROB
+ int b, r, c, p;
+#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_DEFAULT_COUNTS
+ int h, g;
+ for (h = 0; h < MAX_NZC_CONTEXTS; ++h) {
+ for (g = 0; g < REF_TYPES; ++g) {
+ int i;
+ unsigned int branch_ct4x4[NZC4X4_NODES][2];
+ unsigned int branch_ct8x8[NZC8X8_NODES][2];
+ unsigned int branch_ct16x16[NZC16X16_NODES][2];
+ unsigned int branch_ct32x32[NZC32X32_NODES][2];
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ vp9_nzc4x4_tree,
+ pc->fc.nzc_probs_4x4[h][g][i], branch_ct4x4,
+ default_nzc_counts_4x4[h][g][i], 0);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ vp9_nzc8x8_tree,
+ pc->fc.nzc_probs_8x8[h][g][i], branch_ct8x8,
+ default_nzc_counts_8x8[h][g][i], 0);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ vp9_nzc16x16_tree,
+ pc->fc.nzc_probs_16x16[h][g][i], branch_ct16x16,
+ default_nzc_counts_16x16[h][g][i], 0);
+ }
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ vp9_tree_probs_from_distribution(
+ vp9_nzc32x32_tree,
+ pc->fc.nzc_probs_32x32[h][g][i], branch_ct32x32,
+ default_nzc_counts_32x32[h][g][i], 0);
+ }
+ }
+ }
+#else
+ vpx_memcpy(pc->fc.nzc_probs_4x4, default_nzc_probs_4x4,
+ sizeof(pc->fc.nzc_probs_4x4));
+ vpx_memcpy(pc->fc.nzc_probs_8x8, default_nzc_probs_8x8,
+ sizeof(pc->fc.nzc_probs_8x8));
+ vpx_memcpy(pc->fc.nzc_probs_16x16, default_nzc_probs_16x16,
+ sizeof(pc->fc.nzc_probs_16x16));
+ vpx_memcpy(pc->fc.nzc_probs_32x32, default_nzc_probs_32x32,
+ sizeof(pc->fc.nzc_probs_32x32));
+#endif
+ vpx_memcpy(pc->fc.nzc_pcat_probs, default_nzc_pcat_probs,
+ sizeof(pc->fc.nzc_pcat_probs));
+#endif // CONFIG_CODE_NONZEROCOUNT
+#if CONFIG_MODELCOEFPROB
+ for (b = 0; b < BLOCK_TYPES; ++b)
+ for (r = 0; r < REF_TYPES; ++r)
+ for (c = 0; c < COEF_BANDS; ++c)
+ for (p = 0; p < PREV_COEF_CONTEXTS; ++p) {
+ int t;
+ for (t = 0; t < UNCONSTRAINED_NODES; t++)
+ pc->fc.coef_probs_4x4[b][r][c][p][t] =
+ default_coef_probs_4x4[b][r][c][p][t];
+ vp9_get_model_distribution(
+ default_coef_probs_4x4[b][r][c][p][UNCONSTRAINED_NODES - 1],
+ pc->fc.coef_probs_4x4[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++)
+ pc->fc.coef_probs_8x8[b][r][c][p][t] =
+ default_coef_probs_8x8[b][r][c][p][t];
+ vp9_get_model_distribution(
+ default_coef_probs_8x8[b][r][c][p][UNCONSTRAINED_NODES - 1],
+ pc->fc.coef_probs_8x8[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++)
+ pc->fc.coef_probs_16x16[b][r][c][p][t] =
+ default_coef_probs_16x16[b][r][c][p][t];
+ vp9_get_model_distribution(
+ default_coef_probs_16x16[b][r][c][p][UNCONSTRAINED_NODES - 1],
+ pc->fc.coef_probs_16x16[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++)
+ pc->fc.coef_probs_32x32[b][r][c][p][t] =
+ default_coef_probs_32x32[b][r][c][p][t];
+ vp9_get_model_distribution(
+ default_coef_probs_32x32[b][r][c][p][UNCONSTRAINED_NODES - 1],
+ pc->fc.coef_probs_32x32[b][r][c][p], b, r);
+ }
+#else
+ vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4,
+ sizeof(pc->fc.coef_probs_4x4));
+ vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
+ sizeof(pc->fc.coef_probs_8x8));
+ vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16,
+ sizeof(pc->fc.coef_probs_16x16));
+ vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32,
+ sizeof(pc->fc.coef_probs_32x32));
+#endif
+}
+
+#if CONFIG_MODELCOEFPROB
+// This is a placeholder function that will enable the default coef probs to
+// change for key frames based on the base_qindex. If base_qindex is large,
+// we can expect probabilities of zeros to be bigger, and vice versa. The rest
+// of the probabilities are derived from the nodel.
+void vp9_adjust_default_coef_probs(VP9_COMMON *cm) {
+ static const int factor_bits = 4;
+ static const int factor_rnd = 8; // (1 << (factor_bits - 1))
+ int b, r, c, p;
+ int factor = (1 << factor_bits);
+ /*
+ if (cm->base_qindex < 32)
+ factor -= ((32 - cm->base_qindex) >> 4);
+ */
+ if (cm->base_qindex > 128)
+ factor += ((cm->base_qindex - 128) >> 4);
+ // printf(" Q %d factor %d\n", cm->base_qindex, factor);
+
+ for (b = 0; b < BLOCK_TYPES; ++b)
+ for (r = 0; r < REF_TYPES; ++r)
+ for (c = 0; c < COEF_BANDS; ++c)
+ for (p = 0; p < PREV_COEF_CONTEXTS; ++p) {
+ int t, x;
+ vp9_prob prob;
+ for (t = 0; t < UNCONSTRAINED_NODES; t++) {
+ x = (default_coef_probs_4x4[b][r][c][p][t] * factor + factor_rnd)
+ >> factor_bits;
+ prob = (x > 255 ? 255 : (x < 1 ? 1 : x));
+ cm->fc.coef_probs_4x4[b][r][c][p][t] = prob;
+ }
+ vp9_get_model_distribution(
+ prob, cm->fc.coef_probs_4x4[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++) {
+ x = (default_coef_probs_8x8[b][r][c][p][t] * factor + factor_rnd)
+ >> factor_bits;
+ prob = (x > 255 ? 255 : (x < 1 ? 1 : x));
+ cm->fc.coef_probs_8x8[b][r][c][p][t] = prob;
+ }
+ vp9_get_model_distribution(
+ prob, cm->fc.coef_probs_8x8[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++) {
+ x = (default_coef_probs_16x16[b][r][c][p][t] * factor + factor_rnd)
+ >> factor_bits;
+ prob = (x > 255 ? 255 : (x < 1 ? 1 : x));
+ cm->fc.coef_probs_16x16[b][r][c][p][t] = prob;
+ }
+ vp9_get_model_distribution(
+ prob, cm->fc.coef_probs_16x16[b][r][c][p], b, r);
+ for (t = 0; t < UNCONSTRAINED_NODES; t++) {
+ x = (default_coef_probs_32x32[b][r][c][p][t] * factor + factor_rnd)
+ >> factor_bits;
+ prob = (x > 255 ? 255 : (x < 1 ? 1 : x));
+ cm->fc.coef_probs_32x32[b][r][c][p][t] = prob;
+ }
+ vp9_get_model_distribution(
+ prob, cm->fc.coef_probs_32x32[b][r][c][p], b, r);
+ }
+}
+#endif
+
// Neighborhood 5-tuples for various scans and blocksizes,
// in {top, left, topleft, topright, bottomleft} order
// for each position in raster scan order.
@@ -950,158 +2287,1235 @@
DECLARE_ALIGNED(16, int,
vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
+ vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
+DECLARE_ALIGNED(16, int,
+ vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
+DECLARE_ALIGNED(16, int,
vp9_default_zig_zag1d_8x8_neighbors[64 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
+ vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
+DECLARE_ALIGNED(16, int,
+ vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
+DECLARE_ALIGNED(16, int,
vp9_default_zig_zag1d_16x16_neighbors[256 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int,
vp9_default_zig_zag1d_32x32_neighbors[1024 * MAX_NEIGHBORS]);
-static int find_in_scan(const int *scan, int l, int m) {
- int i, l2 = l * l;
- for (i = 0; i < l2; ++i) {
- if (scan[i] == m)
- return i;
+static int find_in_scan(const int *scan, int l, int idx) {
+ int n, l2 = l * l;
+ for (n = 0; n < l2; n++) {
+ int rc = scan[n];
+ if (rc == idx)
+ return n;
}
+ assert(0);
return -1;
}
-
-static void init_scan_neighbors(const int *scan, int l, int *neighbors) {
+static void init_scan_neighbors(const int *scan, int l, int *neighbors,
+ int max_neighbors) {
int l2 = l * l;
- int m, n, i, j, k;
- for (n = 0; n < l2; ++n) {
- int locn = find_in_scan(scan, l, n);
- int z = -1;
- i = n / l;
- j = n % l;
- for (k = 0; k < MAX_NEIGHBORS; ++k)
- neighbors[MAX_NEIGHBORS * n + k] = -1;
- if (i - 1 >= 0) {
- m = (i - 1) * l + j;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n] = m;
- if (m == 0) z = 0;
+ int n, i, j;
+
+ for (n = 0; n < l2; n++) {
+ int rc = scan[n];
+ assert(max_neighbors == MAX_NEIGHBORS);
+ i = rc / l;
+ j = rc % l;
+ if (i > 0 && j > 0) {
+ // col/row scan is used for adst/dct, and generally means that
+ // energy decreases to zero much faster in the dimension in
+ // which ADST is used compared to the direction in which DCT
+ // is used. Likewise, we find much higher correlation between
+ // coefficients within the direction in which DCT is used.
+ // Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff
+ // as a context. If ADST or DCT is used in both directions, we
+ // use the combination of the two as a context.
+ int a = find_in_scan(scan, l, (i - 1) * l + j);
+ int b = find_in_scan(scan, l, i * l + j - 1);
+ if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 ||
+ scan == vp9_col_scan_16x16) {
+ neighbors[max_neighbors * n + 0] = a;
+ neighbors[max_neighbors * n + 1] = -1;
+ } else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 ||
+ scan == vp9_row_scan_16x16) {
+ neighbors[max_neighbors * n + 0] = b;
+ neighbors[max_neighbors * n + 1] = -1;
+ } else {
+ neighbors[max_neighbors * n + 0] = a;
+ neighbors[max_neighbors * n + 1] = b;
}
+ } else if (i > 0) {
+ neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j);
+ neighbors[max_neighbors * n + 1] = -1;
+ } else if (j > 0) {
+ neighbors[max_neighbors * n + 0] =
+ find_in_scan(scan, l, i * l + j - 1);
+ neighbors[max_neighbors * n + 1] = -1;
+ } else {
+ assert(n == 0);
+ // dc predictor doesn't use previous tokens
+ neighbors[max_neighbors * n + 0] = -1;
}
- if (j - 1 >= 0) {
- m = i * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 1] = m;
- if (m == 0) z = 1;
- }
- }
- if (i - 1 >= 0 && j - 1 >= 0) {
- m = (i - 1) * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 2] = m;
- if (m == 0) z = 2;
- }
- }
- if (i - 1 >= 0 && j + 1 < l) {
- m = (i - 1) * l + j + 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 3] = m;
- if (m == 0) z = 3;
- }
- }
- if (i + 1 < l && j - 1 >= 0) {
- m = (i + 1) * l + j - 1;
- if (find_in_scan(scan, l, m) < locn) {
- neighbors[MAX_NEIGHBORS * n + 4] = m;
- if (m == 0) z = 4;
- }
- }
- if (z != -1) { // zero exists
- int v = 0;
- for (k = 0; k < MAX_NEIGHBORS; ++k)
- v += (neighbors[MAX_NEIGHBORS * n + k] > 0);
- if (v) {
- neighbors[MAX_NEIGHBORS * n + z] = -1;
- }
- }
+ assert(neighbors[max_neighbors * n + 0] < n);
}
}
void vp9_init_neighbors() {
init_scan_neighbors(vp9_default_zig_zag1d_4x4, 4,
- vp9_default_zig_zag1d_4x4_neighbors);
+ vp9_default_zig_zag1d_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_row_scan_4x4, 4,
- vp9_row_scan_4x4_neighbors);
+ vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_col_scan_4x4, 4,
- vp9_col_scan_4x4_neighbors);
+ vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_zig_zag1d_8x8, 8,
- vp9_default_zig_zag1d_8x8_neighbors);
+ vp9_default_zig_zag1d_8x8_neighbors, MAX_NEIGHBORS);
+ init_scan_neighbors(vp9_row_scan_8x8, 8,
+ vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS);
+ init_scan_neighbors(vp9_col_scan_8x8, 8,
+ vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_zig_zag1d_16x16, 16,
- vp9_default_zig_zag1d_16x16_neighbors);
+ vp9_default_zig_zag1d_16x16_neighbors, MAX_NEIGHBORS);
+ init_scan_neighbors(vp9_row_scan_16x16, 16,
+ vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS);
+ init_scan_neighbors(vp9_col_scan_16x16, 16,
+ vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS);
init_scan_neighbors(vp9_default_zig_zag1d_32x32, 32,
- vp9_default_zig_zag1d_32x32_neighbors);
+ vp9_default_zig_zag1d_32x32_neighbors, MAX_NEIGHBORS);
}
-const int *vp9_get_coef_neighbors_handle(const int *scan) {
+const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad) {
if (scan == vp9_default_zig_zag1d_4x4) {
+ *pad = MAX_NEIGHBORS;
return vp9_default_zig_zag1d_4x4_neighbors;
} else if (scan == vp9_row_scan_4x4) {
+ *pad = MAX_NEIGHBORS;
return vp9_row_scan_4x4_neighbors;
} else if (scan == vp9_col_scan_4x4) {
+ *pad = MAX_NEIGHBORS;
return vp9_col_scan_4x4_neighbors;
} else if (scan == vp9_default_zig_zag1d_8x8) {
+ *pad = MAX_NEIGHBORS;
return vp9_default_zig_zag1d_8x8_neighbors;
+ } else if (scan == vp9_row_scan_8x8) {
+ *pad = 2;
+ return vp9_row_scan_8x8_neighbors;
+ } else if (scan == vp9_col_scan_8x8) {
+ *pad = 2;
+ return vp9_col_scan_8x8_neighbors;
} else if (scan == vp9_default_zig_zag1d_16x16) {
+ *pad = MAX_NEIGHBORS;
return vp9_default_zig_zag1d_16x16_neighbors;
+ } else if (scan == vp9_row_scan_16x16) {
+ *pad = 2;
+ return vp9_row_scan_16x16_neighbors;
+ } else if (scan == vp9_col_scan_16x16) {
+ *pad = 2;
+ return vp9_col_scan_16x16_neighbors;
} else if (scan == vp9_default_zig_zag1d_32x32) {
+ *pad = MAX_NEIGHBORS;
return vp9_default_zig_zag1d_32x32_neighbors;
+ } else {
+ assert(0);
+ return NULL;
}
- return vp9_default_zig_zag1d_4x4_neighbors;
}
-int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc,
- const int *neigbor_handle, int rc) {
- static int neighbors_used = MAX_NEIGHBORS; // maximum is MAX_NEIGHBORS
- const int *nb = neigbor_handle + rc * MAX_NEIGHBORS;
- int i, v, val = 0, n = 0;
- for (i = 0; i < neighbors_used; ++i) {
- if (nb[i] == -1 || (nb[i] == 0 && nodc)) {
- continue;
- }
- v = abs(qcoeff_ptr[nb[i]]);
- val = (v > val ? v : val);
- n++;
+void vp9_coef_tree_initialize() {
+ vp9_init_neighbors();
+ init_bit_trees();
+ vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_tokens_from_tree(vp9_nzc4x4_encodings, vp9_nzc4x4_tree);
+ vp9_tokens_from_tree(vp9_nzc8x8_encodings, vp9_nzc8x8_tree);
+ vp9_tokens_from_tree(vp9_nzc16x16_encodings, vp9_nzc16x16_tree);
+ vp9_tokens_from_tree(vp9_nzc32x32_encodings, vp9_nzc32x32_tree);
+#endif
+}
+
+#if CONFIG_CODE_NONZEROCOUNT
+
+#define mb_in_cur_tile(cm, mb_row, mb_col) \
+ ((mb_col) >= (cm)->cur_tile_mb_col_start && \
+ (mb_col) <= (cm)->cur_tile_mb_col_end && \
+ (mb_row) >= 0)
+
+#define choose_nzc_context(nzc_exp, t2, t1) \
+ ((nzc_exp) >= (t2) ? 2 : (nzc_exp) >= (t1) ? 1 : 0)
+
+#define NZC_T2_32X32 (16 << 6)
+#define NZC_T1_32X32 (4 << 6)
+
+#define NZC_T2_16X16 (12 << 6)
+#define NZC_T1_16X16 (3 << 6)
+
+#define NZC_T2_8X8 (8 << 6)
+#define NZC_T1_8X8 (2 << 6)
+
+#define NZC_T2_4X4 (4 << 6)
+#define NZC_T1_4X4 (1 << 6)
+
+// Transforms a mb16 block index to a sb64 block index
+static inline int mb16_to_sb64_index(int mb_row, int mb_col, int block) {
+ int r = (mb_row & 3);
+ int c = (mb_col & 3);
+ int b;
+ if (block < 16) { // Y
+ int ib = block >> 2;
+ int jb = block & 3;
+ ib += r * 4;
+ jb += c * 4;
+ b = ib * 16 + jb;
+ assert(b < 256);
+ return b;
+ } else { // UV
+ int base = block - (block & 3);
+ int ib = (block - base) >> 1;
+ int jb = (block - base) & 1;
+ ib += r * 2;
+ jb += c * 2;
+ b = base * 16 + ib * 8 + jb;
+ assert(b >= 256 && b < 384);
+ return b;
}
- if (n == 0)
+}
+
+// Transforms a mb16 block index to a sb32 block index
+static inline int mb16_to_sb32_index(int mb_row, int mb_col, int block) {
+ int r = (mb_row & 1);
+ int c = (mb_col & 1);
+ int b;
+ if (block < 16) { // Y
+ int ib = block >> 2;
+ int jb = block & 3;
+ ib += r * 4;
+ jb += c * 4;
+ b = ib * 8 + jb;
+ assert(b < 64);
+ return b;
+ } else { // UV
+ int base = block - (block & 3);
+ int ib = (block - base) >> 1;
+ int jb = (block - base) & 1;
+ ib += r * 2;
+ jb += c * 2;
+ b = base * 4 + ib * 4 + jb;
+ assert(b >= 64 && b < 96);
+ return b;
+ }
+}
+
+static inline int block_to_txfm_index(int block, TX_SIZE tx_size, int s) {
+ // s is the log of the number of 4x4 blocks in each row/col of larger block
+ int b, ib, jb, nb;
+ ib = block >> s;
+ jb = block - (ib << s);
+ ib >>= tx_size;
+ jb >>= tx_size;
+ nb = 1 << (s - tx_size);
+ b = (ib * nb + jb) << (2 * tx_size);
+ return b;
+}
+
+/* BEGIN - Helper functions to get the y nzcs */
+static unsigned int get_nzc_4x4_y_sb64(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 256);
+ b = block_to_txfm_index(block, mi->txfm_size, 4);
+ assert(b < 256);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+
+static unsigned int get_nzc_4x4_y_sb32(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 64);
+ b = block_to_txfm_index(block, mi->txfm_size, 3);
+ assert(b < 64);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+
+static unsigned int get_nzc_4x4_y_mb16(MB_MODE_INFO *mi, int block) {
+ int b;
+ assert(block < 16);
+ b = block_to_txfm_index(block, mi->txfm_size, 2);
+ assert(b < 16);
+ return mi->nzcs[b] << (6 - 2 * mi->txfm_size);
+}
+/* END - Helper functions to get the y nzcs */
+
+/* Function to get y nzc where block index is in mb16 terms */
+static unsigned int get_nzc_4x4_y(VP9_COMMON *cm, MODE_INFO *m,
+ int mb_row, int mb_col, int block) {
+ // NOTE: All values returned are at 64 times the true value at 4x4 scale
+ MB_MODE_INFO *const mi = &m->mbmi;
+ const int mis = cm->mode_info_stride;
+ if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
return 0;
- else if (val <= 1)
- return val;
- else if (val < 4)
- return 2;
+ if (mi->sb_type == BLOCK_SIZE_SB64X64) {
+ int r = mb_row & 3;
+ int c = mb_col & 3;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_y_sb64(
+ &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block));
+ } else if (mi->sb_type == BLOCK_SIZE_SB32X32) {
+ int r = mb_row & 1;
+ int c = mb_col & 1;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_y_sb32(
+ &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
+ } else {
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
+ return 0;
+ return get_nzc_4x4_y_mb16(mi, block);
+ }
+}
+
+/* BEGIN - Helper functions to get the uv nzcs */
+static unsigned int get_nzc_4x4_uv_sb64(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 256 && block < 384);
+ uvtxfm_size = mi->txfm_size;
+ base = 256 + (block & 64);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 3);
+ assert(b >= 256 && b < 384);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
+}
+
+static unsigned int get_nzc_4x4_uv_sb32(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 64 && block < 96);
+ if (mi->txfm_size == TX_32X32)
+ uvtxfm_size = TX_16X16;
else
- return 3;
+ uvtxfm_size = mi->txfm_size;
+ base = 64 + (block & 16);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 2);
+ assert(b >= 64 && b < 96);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
}
-#endif /* CONFIG_NEWCOEFCONTEXT */
-void vp9_default_coef_probs(VP9_COMMON *pc) {
- vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4,
- sizeof(pc->fc.coef_probs_4x4));
- vpx_memcpy(pc->fc.hybrid_coef_probs_4x4, default_hybrid_coef_probs_4x4,
- sizeof(pc->fc.hybrid_coef_probs_4x4));
+static unsigned int get_nzc_4x4_uv_mb16(MB_MODE_INFO *mi, int block) {
+ int b;
+ int base, uvtxfm_size;
+ assert(block >= 16 && block < 24);
+ if (mi->txfm_size == TX_8X8 &&
+ (mi->mode == SPLITMV || mi->mode == I8X8_PRED))
+ uvtxfm_size = TX_4X4;
+ else if (mi->txfm_size == TX_16X16)
+ uvtxfm_size = TX_8X8;
+ else
+ uvtxfm_size = mi->txfm_size;
+ base = 16 + (block & 4);
+ block -= base;
+ b = base + block_to_txfm_index(block, uvtxfm_size, 1);
+ assert(b >= 16 && b < 24);
+ return mi->nzcs[b] << (6 - 2 * uvtxfm_size);
+}
+/* END - Helper functions to get the uv nzcs */
- vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8,
- sizeof(pc->fc.coef_probs_8x8));
- vpx_memcpy(pc->fc.hybrid_coef_probs_8x8, default_hybrid_coef_probs_8x8,
- sizeof(pc->fc.hybrid_coef_probs_8x8));
+/* Function to get uv nzc where block index is in mb16 terms */
+static unsigned int get_nzc_4x4_uv(VP9_COMMON *cm, MODE_INFO *m,
+ int mb_row, int mb_col, int block) {
+ // NOTE: All values returned are at 64 times the true value at 4x4 scale
+ MB_MODE_INFO *const mi = &m->mbmi;
+ const int mis = cm->mode_info_stride;
+ if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col))
+ return 0;
+ if (mi->sb_type == BLOCK_SIZE_SB64X64) {
+ int r = mb_row & 3;
+ int c = mb_col & 3;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_uv_sb64(
+ &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block));
+ } else if (mi->sb_type == BLOCK_SIZE_SB32X32) {
+ int r = mb_row & 1;
+ int c = mb_col & 1;
+ m -= c + r * mis;
+ if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c))
+ return 0;
+ else
+ return get_nzc_4x4_uv_sb32(
+ &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block));
+ } else {
+ return get_nzc_4x4_uv_mb16(mi, block);
+ }
+}
- vpx_memcpy(pc->fc.coef_probs_16x16, default_coef_probs_16x16,
- sizeof(pc->fc.coef_probs_16x16));
- vpx_memcpy(pc->fc.hybrid_coef_probs_16x16,
- default_hybrid_coef_probs_16x16,
- sizeof(pc->fc.hybrid_coef_probs_16x16));
- vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32,
- sizeof(pc->fc.coef_probs_32x32));
+int vp9_get_nzc_context_y_sb64(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 256);
+ switch (txfm_size) {
+ case TX_32X32:
+ assert((block & 63) == 0);
+ if (block < 128) {
+ int o = (block >> 6) * 2;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o + 1,
+ mb_row - 1, mb_col + o + 1, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 128] << 3;
+ }
+ if ((block & 127) == 0) {
+ int o = (block >> 7) * 2;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 64] << 3;
+ }
+ nzc_exp <<= 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+ break;
+
+ case TX_16X16:
+ assert((block & 15) == 0);
+ if (block < 64) {
+ int o = block >> 4;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 64] << 4;
+ }
+ if ((block & 63) == 0) {
+ int o = block >> 6;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 32) {
+ int o = block >> 3;
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 5;
+ }
+ if ((block & 31) == 0) {
+ int o = block >> 6;
+ int p = ((block >> 5) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+ break;
+
+ case TX_4X4:
+ if (block < 16) {
+ int o = block >> 2;
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 16] << 6);
+ }
+ if ((block & 15) == 0) {
+ int o = block >> 6;
+ int p = (block >> 4) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+ break;
+
+ default:
+ return 0;
+ }
}
-void vp9_coef_tree_initialize() {
- init_bit_trees();
- vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
+int vp9_get_nzc_context_y_sb32(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 64);
+ switch (txfm_size) {
+ case TX_32X32:
+ assert(block == 0);
+ nzc_exp =
+ (get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 12) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 13) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 14) +
+ get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 15)) << 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+ break;
+
+ case TX_16X16:
+ assert((block & 15) == 0);
+ if (block < 32) {
+ int o = (block >> 4) & 1;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 4;
+ }
+ if ((block & 31) == 0) {
+ int o = block >> 5;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 16) {
+ int o = block >> 3;
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) +
+ get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 16] << 5;
+ }
+ if ((block & 15) == 0) {
+ int o = block >> 5;
+ int p = ((block >> 4) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+ break;
+
+ case TX_4X4:
+ if (block < 8) {
+ int o = block >> 2;
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 8] << 6);
+ }
+ if ((block & 7) == 0) {
+ int o = block >> 5;
+ int p = (block >> 3) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+ break;
+
+ default:
+ return 0;
+ break;
+ }
}
+int vp9_get_nzc_context_y_mb16(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ assert(block < 16);
+ switch (txfm_size) {
+ case TX_16X16:
+ assert(block == 0);
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15);
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (block < 8) {
+ int p = ((block >> 2) & 1) ? 14 : 12;
+ nzc_exp =
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p) +
+ get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p + 1);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 8] << 5;
+ }
+ if ((block & 7) == 0) {
+ int p = ((block >> 3) & 1) ? 11 : 3;
+ nzc_exp +=
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p) +
+ get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p + 4);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (block < 4) {
+ int p = block & 3;
+ nzc_exp = get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col,
+ 12 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 4] << 6);
+ }
+ if ((block & 3) == 0) {
+ int p = (block >> 2) & 3;
+ nzc_exp += get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1,
+ 3 + 4 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+int vp9_get_nzc_context_uv_sb64(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 63);
+ const int boff = (block & 63);
+ const int base_mb16 = base >> 4;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 256 && block < 384);
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_32X32:
+ assert(block == 256 || block == 320);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1,
+ base_mb16 + 3);
+ nzc_exp <<= 2;
+ // Note nzc_exp is 64 times the average value expected at 32x32 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32);
+
+ case TX_16X16:
+ // uv txfm_size 16x16
+ assert((block & 15) == 0);
+ if (boff < 32) {
+ int o = (boff >> 4) & 1;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 32] << 4;
+ }
+ if ((boff & 31) == 0) {
+ int o = boff >> 5;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis,
+ mb_row + o, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis,
+ mb_row + o, mb_col - 1, base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis,
+ mb_row + o + 1, mb_col - 1, base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 16] << 4;
+ }
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (boff < 16) {
+ int o = boff >> 2;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 16] << 5;
+ }
+ if ((boff & 15) == 0) {
+ int o = boff >> 4;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 8) {
+ int o = boff >> 1;
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 8] << 6);
+ }
+ if ((boff & 7) == 0) {
+ int o = boff >> 4;
+ int p = (boff >> 3) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context_uv_sb32(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 15);
+ const int boff = (block & 15);
+ const int base_mb16 = base >> 2;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 64 && block < 96);
+ if (txfm_size == TX_32X32)
+ txfm_size_uv = TX_16X16;
+ else
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_16X16:
+ // uv txfm_size 16x16
+ assert(block == 64 || block == 80);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1,
+ base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1,
+ base_mb16 + 3);
+ nzc_exp <<= 1;
+ // Note nzc_exp is 64 times the average value expected at 16x16 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16);
+ break;
+
+ case TX_8X8:
+ assert((block & 3) == 0);
+ if (boff < 8) {
+ int o = boff >> 2;
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 3);
+ } else {
+ nzc_exp = cur->mbmi.nzcs[block - 8] << 5;
+ }
+ if ((boff & 7) == 0) {
+ int o = boff >> 3;
+ nzc_exp +=
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 3);
+ } else {
+ nzc_exp += cur->mbmi.nzcs[block - 4] << 5;
+ }
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 4) {
+ int o = boff >> 1;
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 4] << 6);
+ }
+ if ((boff & 3) == 0) {
+ int o = boff >> 3;
+ int p = (boff >> 2) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context_uv_mb16(VP9_COMMON *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block) {
+ // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy
+ // neighboring blocks are
+ int mis = cm->mode_info_stride;
+ int nzc_exp = 0;
+ const int base = block - (block & 3);
+ const int boff = (block & 3);
+ const int base_mb16 = base;
+ TX_SIZE txfm_size = cur->mbmi.txfm_size;
+ TX_SIZE txfm_size_uv;
+
+ assert(block >= 16 && block < 24);
+ if (txfm_size == TX_16X16)
+ txfm_size_uv = TX_8X8;
+ else if (txfm_size == TX_8X8 &&
+ (cur->mbmi.mode == I8X8_PRED || cur->mbmi.mode == SPLITMV))
+ txfm_size_uv = TX_4X4;
+ else
+ txfm_size_uv = txfm_size;
+
+ switch (txfm_size_uv) {
+ case TX_8X8:
+ assert((block & 3) == 0);
+ nzc_exp =
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 2) +
+ get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 3) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 1) +
+ get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 3);
+ // Note nzc_exp is 64 times the average value expected at 8x8 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8);
+
+ case TX_4X4:
+ if (boff < 2) {
+ int p = boff & 1;
+ nzc_exp = get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col,
+ base_mb16 + 2 + p);
+ } else {
+ nzc_exp = (cur->mbmi.nzcs[block - 2] << 6);
+ }
+ if ((boff & 1) == 0) {
+ int p = (boff >> 1) & 1;
+ nzc_exp += get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1,
+ base_mb16 + 1 + 2 * p);
+ } else {
+ nzc_exp += (cur->mbmi.nzcs[block - 1] << 6);
+ }
+ nzc_exp >>= 1;
+ // Note nzc_exp is 64 times the average value expected at 4x4 scale
+ return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4);
+
+ default:
+ return 0;
+ }
+}
+
+int vp9_get_nzc_context(VP9_COMMON *cm, MACROBLOCKD *xd, int block) {
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ assert(block < 384);
+ if (block < 256)
+ return vp9_get_nzc_context_y_sb64(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_sb64(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ assert(block < 96);
+ if (block < 64)
+ return vp9_get_nzc_context_y_sb32(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_sb32(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ } else {
+ assert(block < 64);
+ if (block < 16)
+ return vp9_get_nzc_context_y_mb16(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ else
+ return vp9_get_nzc_context_uv_mb16(cm, xd->mode_info_context,
+ get_mb_row(xd), get_mb_col(xd), block);
+ }
+}
+
+static void update_nzc(VP9_COMMON *cm,
+ uint16_t nzc,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type) {
+ int e, c;
+ c = codenzc(nzc);
+ if (tx_size == TX_32X32)
+ cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_16X16)
+ cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_8X8)
+ cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ else if (tx_size == TX_4X4)
+ cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ else
+ assert(0);
+
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ int b = (x >> e) & 1;
+ cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ }
+}
+
+static void update_nzcs_sb64(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void update_nzcs_sb32(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void update_nzcs_mb16(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void vp9_update_nzc_counts(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64)
+ update_nzcs_sb64(cm, xd, mb_row, mb_col);
+ else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32)
+ update_nzcs_sb32(cm, xd, mb_row, mb_col);
+ else
+ update_nzcs_mb16(cm, xd, mb_row, mb_col);
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
// #define COEF_COUNT_TESTING
#define COEF_COUNT_SAT 24
@@ -1111,42 +3525,55 @@
#define COEF_COUNT_SAT_AFTER_KEY 24
#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-static void update_coef_probs(vp9_coeff_probs *dst_coef_probs,
- vp9_coeff_probs *pre_coef_probs,
- int block_types, vp9_coeff_count *coef_counts,
- int count_sat, int update_factor) {
- int t, i, j, k, count;
+static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs,
+ vp9_coeff_probs *pre_coef_probs,
+ int block_types, vp9_coeff_count *coef_counts,
+ unsigned int (*eob_branch_count)[REF_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS],
+ int count_sat, int update_factor) {
+ int t, i, j, k, l, count;
unsigned int branch_ct[ENTROPY_NODES][2];
vp9_prob coef_probs[ENTROPY_NODES];
int factor;
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_ADAPT
+ int entropy_nodes_adapt = UNCONSTRAINED_ADAPT_NODES;
+#else
+ int entropy_nodes_adapt = ENTROPY_NODES;
+#endif
for (i = 0; i < block_types; ++i)
- for (j = 0; j < COEF_BANDS; ++j)
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
- vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
- vp9_coef_encodings, vp9_coef_tree,
- coef_probs, branch_ct,
- coef_counts[i][j][k]);
- for (t = 0; t < ENTROPY_NODES; ++t) {
- count = branch_ct[t][0] + branch_ct[t][1];
- count = count > count_sat ? count_sat : count;
- factor = (update_factor * count / count_sat);
- dst_coef_probs[i][j][k][t] = weighted_prob(pre_coef_probs[i][j][k][t],
- coef_probs[t], factor);
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ if (l >= 3 && k == 0)
+ continue;
+ vp9_tree_probs_from_distribution(vp9_coef_tree,
+ coef_probs, branch_ct,
+ coef_counts[i][j][k][l], 0);
+ branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
+ coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
+ for (t = 0; t < entropy_nodes_adapt; ++t) {
+ count = branch_ct[t][0] + branch_ct[t][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ dst_coef_probs[i][j][k][l][t] =
+ weighted_prob(pre_coef_probs[i][j][k][l][t],
+ coef_probs[t], factor);
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_ADAPT
+ if (t == UNCONSTRAINED_NODES - 1)
+ vp9_get_model_distribution(
+ dst_coef_probs[i][j][k][l][UNCONSTRAINED_NODES - 1],
+ dst_coef_probs[i][j][k][l], i, j);
+#endif
+ }
}
- }
}
void vp9_adapt_coef_probs(VP9_COMMON *cm) {
-#ifdef COEF_COUNT_TESTING
- int t, i, j, k;
-#endif
int count_sat;
int update_factor; /* denominator 256 */
- // printf("Frame type: %d\n", cm->frame_type);
if (cm->frame_type == KEY_FRAME) {
update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
count_sat = COEF_COUNT_SAT_KEY;
@@ -1158,87 +3585,141 @@
count_sat = COEF_COUNT_SAT;
}
-#ifdef COEF_COUNT_TESTING
- {
- printf("static const unsigned int\ncoef_counts"
- "[BLOCK_TYPES] [COEF_BANDS]"
- "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
- for (i = 0; i < BLOCK_TYPES; ++i) {
- printf(" {\n");
- for (j = 0; j < COEF_BANDS; ++j) {
- printf(" {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- printf(" {");
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- printf("%d, ", cm->fc.coef_counts[i][j][k][t]);
- printf("},\n");
+ adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4,
+ BLOCK_TYPES, cm->fc.coef_counts_4x4,
+ cm->fc.eob_branch_counts[TX_4X4],
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8,
+ BLOCK_TYPES, cm->fc.coef_counts_8x8,
+ cm->fc.eob_branch_counts[TX_8X8],
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16,
+ BLOCK_TYPES, cm->fc.coef_counts_16x16,
+ cm->fc.eob_branch_counts[TX_16X16],
+ count_sat, update_factor);
+ adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
+ BLOCK_TYPES, cm->fc.coef_counts_32x32,
+ cm->fc.eob_branch_counts[TX_32X32],
+ count_sat, update_factor);
+}
+
+#if CONFIG_CODE_NONZEROCOUNT
+static void adapt_nzc_probs(VP9_COMMON *cm,
+ int block_size,
+ int count_sat,
+ int update_factor) {
+ int c, r, b, n;
+ int count, factor;
+ unsigned int nzc_branch_ct[NZC32X32_NODES][2];
+ vp9_prob nzc_probs[NZC32X32_NODES];
+ int tokens, nodes;
+ const vp9_tree_index *nzc_tree;
+ vp9_prob *dst_nzc_probs;
+ vp9_prob *pre_nzc_probs;
+ unsigned int *nzc_counts;
+
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_tree = vp9_nzc32x32_tree;
+ dst_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_32x32[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_tree = vp9_nzc16x16_tree;
+ dst_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_16x16[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_tree = vp9_nzc8x8_tree;
+ dst_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_8x8[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_8x8[0][0][0];
+ } else {
+ nzc_tree = vp9_nzc4x4_tree;
+ tokens = NZC4X4_TOKENS;
+ dst_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ pre_nzc_probs = cm->fc.pre_nzc_probs_4x4[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_4x4[0][0][0];
+ }
+ nodes = tokens - 1;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c)
+ for (r = 0; r < REF_TYPES; ++r)
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ int offset_tokens = offset * tokens;
+ vp9_tree_probs_from_distribution(nzc_tree,
+ nzc_probs, nzc_branch_ct,
+ nzc_counts + offset_tokens, 0);
+ for (n = 0; n < nodes; ++n) {
+ count = nzc_branch_ct[n][0] + nzc_branch_ct[n][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ dst_nzc_probs[offset_nodes + n] =
+ weighted_prob(pre_nzc_probs[offset_nodes + n],
+ nzc_probs[n], factor);
}
- printf(" },\n");
}
- printf(" },\n");
- }
- printf("};\n");
- printf("static const unsigned int\ncoef_counts_8x8"
- "[BLOCK_TYPES_8X8] [COEF_BANDS]"
- "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
- for (i = 0; i < BLOCK_TYPES_8X8; ++i) {
- printf(" {\n");
- for (j = 0; j < COEF_BANDS; ++j) {
- printf(" {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- printf(" {");
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- printf("%d, ", cm->fc.coef_counts_8x8[i][j][k][t]);
- printf("},\n");
- }
- printf(" },\n");
+}
+
+static void adapt_nzc_pcat(VP9_COMMON *cm, int count_sat, int update_factor) {
+ int c, t;
+ int count, factor;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ int b;
+ for (b = 0; b < bits; ++b) {
+ vp9_prob prob = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ count = cm->fc.nzc_pcat_counts[c][t][b][0] +
+ cm->fc.nzc_pcat_counts[c][t][b][1];
+ count = count > count_sat ? count_sat : count;
+ factor = (update_factor * count / count_sat);
+ cm->fc.nzc_pcat_probs[c][t][b] = weighted_prob(
+ cm->fc.pre_nzc_pcat_probs[c][t][b], prob, factor);
}
- printf(" },\n");
}
- printf("};\n");
- printf("static const unsigned int\nhybrid_coef_counts"
- "[BLOCK_TYPES] [COEF_BANDS]"
- "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {\n");
- for (i = 0; i < BLOCK_TYPES; ++i) {
- printf(" {\n");
- for (j = 0; j < COEF_BANDS; ++j) {
- printf(" {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- printf(" {");
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- printf("%d, ", cm->fc.hybrid_coef_counts[i][j][k][t]);
- printf("},\n");
+ }
+}
+
+// #define NZC_COUNT_TESTING
+void vp9_adapt_nzc_probs(VP9_COMMON *cm) {
+ int count_sat;
+ int update_factor; /* denominator 256 */
+#ifdef NZC_COUNT_TESTING
+ int c, r, b, t;
+ printf("\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c)
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ printf(" %d,", cm->fc.nzc_counts_4x4[c][r][b][t]);
}
- printf(" },\n");
+ printf("}\n");
}
- printf(" },\n");
+ printf("\n");
}
- printf("};\n");
- }
#endif
- update_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4,
- BLOCK_TYPES_4X4, cm->fc.coef_counts_4x4,
- count_sat, update_factor);
- update_coef_probs(cm->fc.hybrid_coef_probs_4x4,
- cm->fc.pre_hybrid_coef_probs_4x4,
- BLOCK_TYPES_4X4, cm->fc.hybrid_coef_counts_4x4,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8,
- BLOCK_TYPES_8X8, cm->fc.coef_counts_8x8,
- count_sat, update_factor);
- update_coef_probs(cm->fc.hybrid_coef_probs_8x8,
- cm->fc.pre_hybrid_coef_probs_8x8,
- BLOCK_TYPES_8X8, cm->fc.hybrid_coef_counts_8x8,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16,
- BLOCK_TYPES_16X16, cm->fc.coef_counts_16x16,
- count_sat, update_factor);
- update_coef_probs(cm->fc.hybrid_coef_probs_16x16,
- cm->fc.pre_hybrid_coef_probs_16x16,
- BLOCK_TYPES_16X16, cm->fc.hybrid_coef_counts_16x16,
- count_sat, update_factor);
- update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32,
- BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32,
- count_sat, update_factor);
+ if (cm->frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_KEY;
+ count_sat = COEF_COUNT_SAT_KEY;
+ } else if (cm->last_frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+ count_sat = COEF_COUNT_SAT_AFTER_KEY;
+ } else {
+ update_factor = COEF_MAX_UPDATE_FACTOR;
+ count_sat = COEF_COUNT_SAT;
+ }
+
+ adapt_nzc_probs(cm, 4, count_sat, update_factor);
+ adapt_nzc_probs(cm, 8, count_sat, update_factor);
+ adapt_nzc_probs(cm, 16, count_sat, update_factor);
+ adapt_nzc_probs(cm, 32, count_sat, update_factor);
+ adapt_nzc_pcat(cm, count_sat, update_factor);
}
+#endif // CONFIG_CODE_NONZEROCOUNT
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -15,7 +15,6 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_coefupdateprobs.h"
extern const int vp9_i8x8_block[4];
@@ -31,10 +30,10 @@
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 13+1 */
+#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 14+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
-#define MAX_ENTROPY_TOKENS 12
-#define ENTROPY_NODES 11
+#define MAX_ENTROPY_TOKENS 12
+#define ENTROPY_NODES 11
#define EOSB_TOKEN 127 /* Not signalled, encoder only */
#define INTER_MODE_CONTEXTS 7
@@ -59,31 +58,20 @@
/* Coefficients are predicted via a 3-dimensional probability table. */
-/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
-#define BLOCK_TYPES_4X4 4
+/* Outside dimension. 0 = Y with DC, 1 = UV */
+#define BLOCK_TYPES 2
+#define REF_TYPES 2 // intra=0, inter=1
-#define BLOCK_TYPES_8X8 4
+/* Middle dimension reflects the coefficient position within the transform. */
+#define COEF_BANDS 6
-#define BLOCK_TYPES_16X16 4
-
-#define BLOCK_TYPES_32X32 4
-
-/* Middle dimension is a coarsening of the coefficient's
- position within the 4x4 DCT. */
-
-#define COEF_BANDS 8
-extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_4x4[16]);
-extern DECLARE_ALIGNED(64, const int, vp9_coef_bands_8x8[64]);
-extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_16x16[256]);
-extern DECLARE_ALIGNED(16, const int, vp9_coef_bands_32x32[1024]);
-
-/* Inside dimension is 3-valued measure of nearby complexity, that is,
- the extent to which nearby coefficients are nonzero. For the first
- coefficient (DC, unless block type is 0), we look at the (already encoded)
- blocks above and to the left of the current block. The context index is
- then the number (0,1,or 2) of these blocks having nonzero coefficients.
- After decoding a coefficient, the measure is roughly the size of the
- most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
+/* Inside dimension is measure of nearby complexity, that reflects the energy
+ of nearby coefficients are nonzero. For the first coefficient (DC, unless
+ block type is 0), we look at the (already encoded) blocks above and to the
+ left of the current block. The context index is then the number (0,1,or 2)
+ of these blocks having nonzero coefficients.
+ After decoding a coefficient, the measure is determined by the size of the
+ most recently decoded coefficient.
Note that the intuitive meaning of this measure changes as coefficients
are decoded, e.g., prior to the first token, a zero means that my neighbors
are empty while, after the first token, because of the use of end-of-block,
@@ -94,21 +82,18 @@
distinct bands). */
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
-#define PREV_COEF_CONTEXTS 4
+#define PREV_COEF_CONTEXTS 6
-typedef unsigned int vp9_coeff_count[COEF_BANDS][PREV_COEF_CONTEXTS]
+typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
-typedef unsigned int vp9_coeff_stats[COEF_BANDS][PREV_COEF_CONTEXTS]
+typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[ENTROPY_NODES][2];
-typedef vp9_prob vp9_coeff_probs[COEF_BANDS][PREV_COEF_CONTEXTS]
+typedef vp9_prob vp9_coeff_probs[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[ENTROPY_NODES];
#define SUBEXP_PARAM 4 /* Subexponential code parameter */
#define MODULUS_PARAM 13 /* Modulus parameter */
-extern DECLARE_ALIGNED(16, const uint8_t,
- vp9_prev_token_class[MAX_ENTROPY_TOKENS]);
-
struct VP9Common;
void vp9_default_coef_probs(struct VP9Common *);
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_4x4[16]);
@@ -117,38 +102,168 @@
extern DECLARE_ALIGNED(16, const int, vp9_row_scan_4x4[16]);
extern DECLARE_ALIGNED(64, const int, vp9_default_zig_zag1d_8x8[64]);
+
+extern DECLARE_ALIGNED(16, const int, vp9_col_scan_8x8[64]);
+extern DECLARE_ALIGNED(16, const int, vp9_row_scan_8x8[64]);
+
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_16x16[256]);
+
+extern DECLARE_ALIGNED(16, const int, vp9_col_scan_16x16[256]);
+extern DECLARE_ALIGNED(16, const int, vp9_row_scan_16x16[256]);
+
extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]);
void vp9_coef_tree_initialize(void);
void vp9_adapt_coef_probs(struct VP9Common *);
-static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
+static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
}
-#if CONFIG_NEWCOEFCONTEXT
+static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) {
+ /* Clear entropy contexts */
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+}
-#define MAX_NEIGHBORS 5
-#define NEWCOEFCONTEXT_BAND_COND(b) ((b) >= 1)
-void vp9_init_neighbors(void);
+static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) {
+ /* Clear entropy contexts */
+ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+ vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+}
-const int *vp9_get_coef_neighbors_handle(const int *scan);
-int vp9_get_coef_neighbor_context(const short int *qcoeff_ptr, int nodc,
- const int *neigbor_handle, int rc);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_row_scan_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_col_scan_4x4_neighbors[
- 16 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_8x8_neighbors[
- 64 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_16x16_neighbors[
- 256 * MAX_NEIGHBORS]);
-extern DECLARE_ALIGNED(16, int, vp9_default_zig_zag1d_32x32_neighbors[
- 1024 * MAX_NEIGHBORS]);
-#endif // CONFIG_NEWCOEFCONTEXT
+extern const int vp9_coef_bands8x8[64];
+extern const int vp9_coef_bands4x4[16];
+
+static int get_coef_band(const int *scan, TX_SIZE tx_size, int coef_index) {
+ if (tx_size == TX_4X4) {
+ return vp9_coef_bands4x4[scan[coef_index]];
+ } else {
+ const int pos = scan[coef_index];
+ const int sz = 1 << (2 + tx_size);
+ const int x = pos & (sz - 1), y = pos >> (2 + tx_size);
+ if (x >= 8 || y >= 8)
+ return 5;
+ else
+ return vp9_coef_bands8x8[y * 8 + x];
+ }
+}
+extern int vp9_get_coef_context(const int *scan, const int *neighbors,
+ int nb_pad, uint8_t *token_cache, int c, int l);
+const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);
+
+#if CONFIG_MODELCOEFPROB
+#define COEFPROB_BITS 8
+#define COEFPROB_MODELS (1 << COEFPROB_BITS)
+
+// 2 => EOB and Zero nodes are unconstrained, rest are modeled
+// 3 => EOB, Zero and One nodes are unconstrained, rest are modeled
+#define UNCONSTRAINED_NODES 3 // Choose one of 2 or 3
+
+// whether forward updates are model-based
+#define MODEL_BASED_UPDATE 0
+// if model-based how many nodes are unconstrained
+#define UNCONSTRAINED_UPDATE_NODES 3
+// whether backward updates are model-based
+#define MODEL_BASED_ADAPT 0
+#define UNCONSTRAINED_ADAPT_NODES 3
+
+// whether to adjust the coef probs for key frames based on qindex
+#define ADJUST_KF_COEF_PROBS 0
+
+typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][2];
+extern const vp9_prob vp9_modelcoefprobs[COEFPROB_MODELS][ENTROPY_NODES - 1];
+void vp9_get_model_distribution(vp9_prob model, vp9_prob *tree_probs,
+ int b, int r);
+void vp9_adjust_default_coef_probs(struct VP9Common *cm);
+#endif // CONFIG_MODELCOEFPROB
+
+#if CONFIG_CODE_NONZEROCOUNT
+/* Alphabet for number of non-zero symbols in block */
+#define NZC_0 0 /* Used for all blocks */
+#define NZC_1 1 /* Used for all blocks */
+#define NZC_2 2 /* Used for all blocks */
+#define NZC_3TO4 3 /* Used for all blocks */
+#define NZC_5TO8 4 /* Used for all blocks */
+#define NZC_9TO16 5 /* Used for all blocks */
+#define NZC_17TO32 6 /* Used for 8x8 and larger blocks */
+#define NZC_33TO64 7 /* Used for 8x8 and larger blocks */
+#define NZC_65TO128 8 /* Used for 16x16 and larger blocks */
+#define NZC_129TO256 9 /* Used for 16x16 and larger blocks */
+#define NZC_257TO512 10 /* Used for 32x32 and larger blocks */
+#define NZC_513TO1024 11 /* Used for 32x32 and larger blocks */
+
+/* Number of tokens for each block size */
+#define NZC4X4_TOKENS 6
+#define NZC8X8_TOKENS 8
+#define NZC16X16_TOKENS 10
+#define NZC32X32_TOKENS 12
+
+/* Number of nodes for each block size */
+#define NZC4X4_NODES 5
+#define NZC8X8_NODES 7
+#define NZC16X16_NODES 9
+#define NZC32X32_NODES 11
+
+/* Max number of tokens with extra bits */
+#define NZC_TOKENS_EXTRA 9
+
+/* Max number of extra bits */
+#define NZC_BITS_EXTRA 9
+
+/* Tokens without extra bits */
+#define NZC_TOKENS_NOEXTRA (NZC32X32_TOKENS - NZC_TOKENS_EXTRA)
+
+#define MAX_NZC_CONTEXTS 3
+
+/* whether to update extra bit probabilities */
+#define NZC_PCAT_UPDATE
+
+/* nzc trees */
+extern const vp9_tree_index vp9_nzc4x4_tree[];
+extern const vp9_tree_index vp9_nzc8x8_tree[];
+extern const vp9_tree_index vp9_nzc16x16_tree[];
+extern const vp9_tree_index vp9_nzc32x32_tree[];
+
+/* nzc encodings */
+extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS];
+extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS];
+extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS];
+extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS];
+
+#define codenzc(x) (\
+ (x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \
+ (x) <= 16 ? 5 : (x) <= 32 ? 6 : (x) <= 64 ? 7 :\
+ (x) <= 128 ? 8 : (x) <= 256 ? 9 : (x) <= 512 ? 10 : 11)
+
+int vp9_get_nzc_context_y_sb64(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_y_sb32(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_y_mb16(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_sb64(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_sb32(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context_uv_mb16(struct VP9Common *cm, MODE_INFO *cur,
+ int mb_row, int mb_col, int block);
+int vp9_get_nzc_context(struct VP9Common *cm, MACROBLOCKD *xd, int block);
+void vp9_update_nzc_counts(struct VP9Common *cm, MACROBLOCKD *xd,
+ int mb_row, int mb_col);
+void vp9_adapt_nzc_probs(struct VP9Common *cm);
+
+/* Extra bits array */
+extern const int vp9_extranzcbits[NZC32X32_TOKENS];
+
+/* Base nzc values */
+extern const int vp9_basenzcvalue[NZC32X32_TOKENS];
+
+#endif // CONFIG_CODE_NONZEROCOUNT
+
+#include "vp9/common/vp9_coefupdateprobs.h"
+
#endif // VP9_COMMON_VP9_ENTROPY_H_
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -11,9 +11,10 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_modecont.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_alloccommon.h"
#include "vpx_mem/vpx_mem.h"
-
static const unsigned int kf_y_mode_cts[8][VP9_YMODES] = {
/* DC V H D45 135 117 153 D27 D63 TM i8x8 BPRED */
{12, 6, 5, 5, 5, 5, 5, 5, 5, 2, 22, 200},
@@ -114,8 +115,6 @@
return SUBMVREF_NORMAL;
}
-const vp9_prob vp9_sub_mv_ref_prob [VP9_SUBMVREFS - 1] = { 180, 162, 25};
-
const vp9_prob vp9_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP9_SUBMVREFS - 1] = {
{ 147, 136, 18 },
{ 106, 145, 1 },
@@ -301,40 +300,32 @@
void vp9_init_mbmode_probs(VP9_COMMON *x) {
unsigned int bct [VP9_YMODES] [2]; /* num Ymodes > num UV modes */
- vp9_tree_probs_from_distribution(VP9_YMODES, vp9_ymode_encodings,
- vp9_ymode_tree, x->fc.ymode_prob,
- bct, y_mode_cts);
- vp9_tree_probs_from_distribution(VP9_I32X32_MODES, vp9_sb_ymode_encodings,
- vp9_sb_ymode_tree, x->fc.sb_ymode_prob,
- bct, y_mode_cts);
+ vp9_tree_probs_from_distribution(vp9_ymode_tree, x->fc.ymode_prob,
+ bct, y_mode_cts, 0);
+ vp9_tree_probs_from_distribution(vp9_sb_ymode_tree, x->fc.sb_ymode_prob,
+ bct, y_mode_cts, 0);
{
int i;
for (i = 0; i < 8; i++) {
- vp9_tree_probs_from_distribution(VP9_YMODES, vp9_kf_ymode_encodings,
- vp9_kf_ymode_tree, x->kf_ymode_prob[i],
- bct, kf_y_mode_cts[i]);
- vp9_tree_probs_from_distribution(VP9_I32X32_MODES,
- vp9_sb_kf_ymode_encodings,
- vp9_sb_kf_ymode_tree,
+ vp9_tree_probs_from_distribution(vp9_kf_ymode_tree, x->kf_ymode_prob[i],
+ bct, kf_y_mode_cts[i], 0);
+ vp9_tree_probs_from_distribution(vp9_sb_kf_ymode_tree,
x->sb_kf_ymode_prob[i], bct,
- kf_y_mode_cts[i]);
+ kf_y_mode_cts[i], 0);
}
}
{
int i;
for (i = 0; i < VP9_YMODES; i++) {
- vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings,
- vp9_uv_mode_tree, x->kf_uv_mode_prob[i],
- bct, kf_uv_mode_cts[i]);
- vp9_tree_probs_from_distribution(VP9_UV_MODES, vp9_uv_mode_encodings,
- vp9_uv_mode_tree, x->fc.uv_mode_prob[i],
- bct, uv_mode_cts[i]);
+ vp9_tree_probs_from_distribution(vp9_uv_mode_tree, x->kf_uv_mode_prob[i],
+ bct, kf_uv_mode_cts[i], 0);
+ vp9_tree_probs_from_distribution(vp9_uv_mode_tree, x->fc.uv_mode_prob[i],
+ bct, uv_mode_cts[i], 0);
}
}
- vp9_tree_probs_from_distribution(VP9_I8X8_MODES, vp9_i8x8_mode_encodings,
- vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob,
- bct, i8x8_mode_cts);
+ vp9_tree_probs_from_distribution(vp9_i8x8_mode_tree, x->fc.i8x8_mode_prob,
+ bct, i8x8_mode_cts, 0);
vpx_memcpy(x->fc.sub_mv_ref_prob, vp9_sub_mv_ref_prob2,
sizeof(vp9_sub_mv_ref_prob2));
@@ -344,6 +335,9 @@
#if CONFIG_COMP_INTERINTRA_PRED
x->fc.interintra_prob = VP9_DEF_INTERINTRA_PROB;
#endif
+ x->ref_pred_probs[0] = 120;
+ x->ref_pred_probs[1] = 80;
+ x->ref_pred_probs[2] = 40;
}
@@ -351,8 +345,7 @@
vp9_prob p[VP9_NKF_BINTRAMODES - 1],
unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2],
const unsigned int events[VP9_NKF_BINTRAMODES]) {
- vp9_tree_probs_from_distribution(VP9_NKF_BINTRAMODES, vp9_bmode_encodings,
- vp9_bmode_tree, p, branch_ct, events);
+ vp9_tree_probs_from_distribution(vp9_bmode_tree, p, branch_ct, events, 0);
}
void vp9_default_bmode_probs(vp9_prob p[VP9_NKF_BINTRAMODES - 1]) {
@@ -364,8 +357,7 @@
vp9_prob p[VP9_KF_BINTRAMODES - 1],
unsigned int branch_ct[VP9_KF_BINTRAMODES - 1][2],
const unsigned int events[VP9_KF_BINTRAMODES]) {
- vp9_tree_probs_from_distribution(VP9_KF_BINTRAMODES, vp9_kf_bmode_encodings,
- vp9_kf_bmode_tree, p, branch_ct, events);
+ vp9_tree_probs_from_distribution(vp9_kf_bmode_tree, p, branch_ct, events, 0);
}
void vp9_kf_default_bmode_probs(vp9_prob p[VP9_KF_BINTRAMODES]
@@ -419,6 +411,14 @@
#else
const int vp9_switchable_interp_map[SWITCHABLE+1] = {-1, 0, 1, -1, -1};
#endif
+#endif // VP9_SWITCHABLE_FILTERS
+
+// Indicates if the filter is interpolating or non-interpolating
+// Note currently only the EIGHTTAP_SMOOTH is non-interpolating
+#if CONFIG_ENABLE_6TAP
+const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {1, 0, 1, 1, 1, -1};
+#else
+const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {0, 1, 1, 1, -1};
#endif
void vp9_entropy_mode_init() {
@@ -480,7 +480,7 @@
#define MVREF_COUNT_SAT 20
#define MVREF_MAX_UPDATE_FACTOR 128
-void vp9_update_mode_context(VP9_COMMON *pc) {
+void vp9_adapt_mode_context(VP9_COMMON *pc) {
int i, j;
unsigned int (*mv_ref_ct)[4][2];
int (*mode_context)[4];
@@ -526,9 +526,10 @@
#define MODE_COUNT_SAT 20
#define MODE_MAX_UPDATE_FACTOR 144
-static void update_mode_probs(int n_modes, struct vp9_token_struct *encoding,
+static void update_mode_probs(int n_modes,
const vp9_tree_index *tree, unsigned int *cnt,
- vp9_prob *pre_probs, vp9_prob *dst_probs) {
+ vp9_prob *pre_probs, vp9_prob *dst_probs,
+ unsigned int tok0_offset) {
#define MAX_PROBS 32
vp9_prob probs[MAX_PROBS];
unsigned int branch_ct[MAX_PROBS][2];
@@ -535,8 +536,7 @@
int t, count, factor;
assert(n_modes - 1 < MAX_PROBS);
- vp9_tree_probs_from_distribution(n_modes, encoding, tree, probs,
- branch_ct, cnt);
+ vp9_tree_probs_from_distribution(tree, probs, branch_ct, cnt, tok0_offset);
for (t = 0; t < n_modes - 1; ++t) {
count = branch_ct[t][0] + branch_ct[t][1];
count = count > MODE_COUNT_SAT ? MODE_COUNT_SAT : count;
@@ -592,31 +592,32 @@
#endif
#endif
- update_mode_probs(VP9_YMODES, vp9_ymode_encodings, vp9_ymode_tree,
+ update_mode_probs(VP9_YMODES, vp9_ymode_tree,
cm->fc.ymode_counts, cm->fc.pre_ymode_prob,
- cm->fc.ymode_prob);
- update_mode_probs(VP9_I32X32_MODES, vp9_sb_ymode_encodings, vp9_sb_ymode_tree,
+ cm->fc.ymode_prob, 0);
+ update_mode_probs(VP9_I32X32_MODES, vp9_sb_ymode_tree,
cm->fc.sb_ymode_counts, cm->fc.pre_sb_ymode_prob,
- cm->fc.sb_ymode_prob);
+ cm->fc.sb_ymode_prob, 0);
for (i = 0; i < VP9_YMODES; ++i) {
- update_mode_probs(VP9_UV_MODES, vp9_uv_mode_encodings, vp9_uv_mode_tree,
+ update_mode_probs(VP9_UV_MODES, vp9_uv_mode_tree,
cm->fc.uv_mode_counts[i], cm->fc.pre_uv_mode_prob[i],
- cm->fc.uv_mode_prob[i]);
+ cm->fc.uv_mode_prob[i], 0);
}
- update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_encodings, vp9_bmode_tree,
+ update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_tree,
cm->fc.bmode_counts, cm->fc.pre_bmode_prob,
- cm->fc.bmode_prob);
- update_mode_probs(VP9_I8X8_MODES, vp9_i8x8_mode_encodings,
+ cm->fc.bmode_prob, 0);
+ update_mode_probs(VP9_I8X8_MODES,
vp9_i8x8_mode_tree, cm->fc.i8x8_mode_counts,
- cm->fc.pre_i8x8_mode_prob, cm->fc.i8x8_mode_prob);
+ cm->fc.pre_i8x8_mode_prob, cm->fc.i8x8_mode_prob, 0);
for (i = 0; i < SUBMVREF_COUNT; ++i) {
- update_mode_probs(VP9_SUBMVREFS, vp9_sub_mv_ref_encoding_array,
+ update_mode_probs(VP9_SUBMVREFS,
vp9_sub_mv_ref_tree, cm->fc.sub_mv_ref_counts[i],
- cm->fc.pre_sub_mv_ref_prob[i], cm->fc.sub_mv_ref_prob[i]);
+ cm->fc.pre_sub_mv_ref_prob[i], cm->fc.sub_mv_ref_prob[i],
+ LEFT4X4);
}
- update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_encodings, vp9_mbsplit_tree,
+ update_mode_probs(VP9_NUMMBSPLITS, vp9_mbsplit_tree,
cm->fc.mbsplit_counts, cm->fc.pre_mbsplit_prob,
- cm->fc.mbsplit_prob);
+ cm->fc.mbsplit_prob, 0);
#if CONFIG_COMP_INTERINTRA_PRED
if (cm->use_interintra) {
int factor, interintra_prob, count;
@@ -630,4 +631,66 @@
interintra_prob, factor);
}
#endif
+}
+
+static void set_default_lf_deltas(MACROBLOCKD *xd) {
+ xd->mode_ref_lf_delta_enabled = 1;
+ xd->mode_ref_lf_delta_update = 1;
+
+ xd->ref_lf_deltas[INTRA_FRAME] = 2;
+ xd->ref_lf_deltas[LAST_FRAME] = 0;
+ xd->ref_lf_deltas[GOLDEN_FRAME] = -2;
+ xd->ref_lf_deltas[ALTREF_FRAME] = -2;
+
+ xd->mode_lf_deltas[0] = 4; // BPRED
+ xd->mode_lf_deltas[1] = -2; // Zero
+ xd->mode_lf_deltas[2] = 2; // New mv
+ xd->mode_lf_deltas[3] = 4; // Split mv
+}
+
+void vp9_setup_past_independence(VP9_COMMON *cm, MACROBLOCKD *xd) {
+ // Reset the segment feature data to the default stats:
+ // Features disabled, 0, with delta coding (Default state).
+ int i;
+ vp9_clearall_segfeatures(xd);
+ xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
+ if (cm->last_frame_seg_map)
+ vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols));
+
+ /* reset the mode ref deltas for loop filter */
+ vpx_memset(xd->last_ref_lf_deltas, 0, sizeof(xd->last_ref_lf_deltas));
+ vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->last_mode_lf_deltas));
+ set_default_lf_deltas(xd);
+
+ vp9_default_coef_probs(cm);
+ vp9_init_mbmode_probs(cm);
+ vp9_default_bmode_probs(cm->fc.bmode_prob);
+ vp9_kf_default_bmode_probs(cm->kf_bmode_prob);
+ vp9_init_mv_probs(cm);
+ // To force update of the sharpness
+ cm->last_sharpness_level = -1;
+
+ vp9_init_mode_contexts(cm);
+
+ for (i = 0; i < NUM_FRAME_CONTEXTS; i++) {
+ vpx_memcpy(&cm->frame_contexts[i], &cm->fc, sizeof(cm->fc));
+ }
+
+ vpx_memset(cm->prev_mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+ vpx_memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
+
+ vp9_update_mode_info_border(cm, cm->mip);
+ vp9_update_mode_info_in_image(cm, cm->mi);
+
+#if CONFIG_NEW_MVREF
+ // Defaults probabilities for encoding the MV ref id signal
+ vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB,
+ sizeof(xd->mb_mv_ref_probs));
+#endif
+ cm->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
+ cm->ref_frame_sign_bias[ALTREF_FRAME] = 0;
+
+ cm->frame_context_idx = 0;
}
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -34,8 +34,6 @@
extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
-extern const vp9_prob vp9_sub_mv_ref_prob[VP9_SUBMVREFS - 1];
-
extern const vp9_prob vp9_sub_mv_ref_prob2[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
extern const unsigned int vp9_kf_default_bmode_counts[VP9_KF_BINTRAMODES]
@@ -76,11 +74,14 @@
struct VP9Common;
+/* sets up common features to forget past dependence */
+void vp9_setup_past_independence(struct VP9Common *cm, MACROBLOCKD *xd);
+
void vp9_init_mbmode_probs(struct VP9Common *x);
extern void vp9_init_mode_contexts(struct VP9Common *pc);
-extern void vp9_update_mode_context(struct VP9Common *pc);
+extern void vp9_adapt_mode_context(struct VP9Common *pc);
extern void vp9_accum_mv_refs(struct VP9Common *pc,
MB_PREDICTION_MODE m,
@@ -100,6 +101,8 @@
[VP9_SWITCHABLE_FILTERS];
extern const int vp9_switchable_interp_map[SWITCHABLE + 1];
+
+extern const int vp9_is_interpolating_filter[SWITCHABLE + 1];
extern const vp9_tree_index vp9_switchable_interp_tree
[2 * (VP9_SWITCHABLE_FILTERS - 1)];
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -42,7 +42,10 @@
-MV_CLASS_2, -MV_CLASS_3,
10, 12,
-MV_CLASS_4, -MV_CLASS_5,
- -MV_CLASS_6, -MV_CLASS_7,
+ -MV_CLASS_6, 14,
+ 16, 18,
+ -MV_CLASS_7, -MV_CLASS_8,
+ -MV_CLASS_9, -MV_CLASS_10,
};
struct vp9_token_struct vp9_mv_class_encodings[MV_CLASSES];
@@ -62,24 +65,24 @@
{32, 64, 96},
{
{ /* vert component */
- 128, /* sign */
- {224, 144, 192, 168, 192, 176, 192}, /* class */
- {216}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224}, /* bits */
- {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
- {64, 96, 64}, /* fp */
- 160, /* class0_hp bit */
- 128, /* hp */
+ 128, /* sign */
+ {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */
+ {216}, /* class0 */
+ {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
+ {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
+ {64, 96, 64}, /* fp */
+ 160, /* class0_hp bit */
+ 128, /* hp */
},
{ /* hor component */
- 128, /* sign */
- {216, 128, 176, 160, 176, 176, 192}, /* class */
- {208}, /* class0 */
- {136, 140, 148, 160, 176, 192, 224}, /* bits */
- {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
- {64, 96, 64}, /* fp */
- 160, /* class0_hp bit */
- 128, /* hp */
+ 128, /* sign */
+ {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */
+ {208}, /* class0 */
+ {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
+ {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */
+ {64, 96, 64}, /* fp */
+ 160, /* class0_hp bit */
+ 128, /* hp */
}
},
};
@@ -103,6 +106,9 @@
else if (z < CLASS0_SIZE * 256) c = MV_CLASS_5;
else if (z < CLASS0_SIZE * 512) c = MV_CLASS_6;
else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+ else if (z < CLASS0_SIZE * 2048) c = MV_CLASS_8;
+ else if (z < CLASS0_SIZE * 4096) c = MV_CLASS_9;
+ else if (z < CLASS0_SIZE * 8192) c = MV_CLASS_10;
else assert(0);
if (offset)
*offset = z - mv_class_base(c);
@@ -110,11 +116,8 @@
}
int vp9_use_nmv_hp(const MV *ref) {
- if ((abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
- (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH)
- return 1;
- else
- return 0;
+ return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+ (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
}
int vp9_get_mv_mag(MV_CLASS_TYPE c, int offset) {
@@ -134,6 +137,7 @@
int incr,
int usehp) {
int s, z, c, o, d, e, f;
+ if (!incr) return;
assert (v != 0); /* should not be zero */
s = v < 0;
mvcomp->sign[s] += incr;
@@ -211,24 +215,26 @@
}
}
-static void adapt_prob(vp9_prob *dest, vp9_prob prep, vp9_prob newp,
+static void adapt_prob(vp9_prob *dest, vp9_prob prep,
unsigned int ct[2]) {
int count = ct[0] + ct[1];
-
if (count) {
+ vp9_prob newp = get_binary_prob(ct[0], ct[1]);
count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
*dest = weighted_prob(prep, newp,
MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
+ } else {
+ *dest = prep;
}
}
-void vp9_counts_process(nmv_context_counts *NMVcount, int usehp) {
- counts_to_context(&NMVcount->comps[0], usehp);
- counts_to_context(&NMVcount->comps[1], usehp);
+void vp9_counts_process(nmv_context_counts *nmv_count, int usehp) {
+ counts_to_context(&nmv_count->comps[0], usehp);
+ counts_to_context(&nmv_count->comps[1], usehp);
}
void vp9_counts_to_nmv_context(
- nmv_context_counts *NMVcount,
+ nmv_context_counts *nmv_count,
nmv_context *prob,
int usehp,
unsigned int (*branch_ct_joint)[2],
@@ -241,81 +247,90 @@
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]) {
int i, j, k;
- vp9_counts_process(NMVcount, usehp);
- vp9_tree_probs_from_distribution(MV_JOINTS,
- vp9_mv_joint_encodings,
- vp9_mv_joint_tree,
+ vp9_counts_process(nmv_count, usehp);
+ vp9_tree_probs_from_distribution(vp9_mv_joint_tree,
prob->joints,
branch_ct_joint,
- NMVcount->joints);
+ nmv_count->joints, 0);
for (i = 0; i < 2; ++i) {
- prob->comps[i].sign = get_binary_prob(NMVcount->comps[i].sign[0],
- NMVcount->comps[i].sign[1]);
- branch_ct_sign[i][0] = NMVcount->comps[i].sign[0];
- branch_ct_sign[i][1] = NMVcount->comps[i].sign[1];
- vp9_tree_probs_from_distribution(MV_CLASSES,
- vp9_mv_class_encodings,
- vp9_mv_class_tree,
+ prob->comps[i].sign = get_binary_prob(nmv_count->comps[i].sign[0],
+ nmv_count->comps[i].sign[1]);
+ branch_ct_sign[i][0] = nmv_count->comps[i].sign[0];
+ branch_ct_sign[i][1] = nmv_count->comps[i].sign[1];
+ vp9_tree_probs_from_distribution(vp9_mv_class_tree,
prob->comps[i].classes,
branch_ct_classes[i],
- NMVcount->comps[i].classes);
- vp9_tree_probs_from_distribution(CLASS0_SIZE,
- vp9_mv_class0_encodings,
- vp9_mv_class0_tree,
+ nmv_count->comps[i].classes, 0);
+ vp9_tree_probs_from_distribution(vp9_mv_class0_tree,
prob->comps[i].class0,
branch_ct_class0[i],
- NMVcount->comps[i].class0);
+ nmv_count->comps[i].class0, 0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
- prob->comps[i].bits[j] = get_binary_prob(NMVcount->comps[i].bits[j][0],
- NMVcount->comps[i].bits[j][1]);
- branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0];
- branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1];
+ prob->comps[i].bits[j] = get_binary_prob(nmv_count->comps[i].bits[j][0],
+ nmv_count->comps[i].bits[j][1]);
+ branch_ct_bits[i][j][0] = nmv_count->comps[i].bits[j][0];
+ branch_ct_bits[i][j][1] = nmv_count->comps[i].bits[j][1];
}
}
for (i = 0; i < 2; ++i) {
for (k = 0; k < CLASS0_SIZE; ++k) {
- vp9_tree_probs_from_distribution(4,
- vp9_mv_fp_encodings,
- vp9_mv_fp_tree,
+ vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
prob->comps[i].class0_fp[k],
branch_ct_class0_fp[i][k],
- NMVcount->comps[i].class0_fp[k]);
+ nmv_count->comps[i].class0_fp[k], 0);
}
- vp9_tree_probs_from_distribution(4,
- vp9_mv_fp_encodings,
- vp9_mv_fp_tree,
+ vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
prob->comps[i].fp,
branch_ct_fp[i],
- NMVcount->comps[i].fp);
+ nmv_count->comps[i].fp, 0);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
prob->comps[i].class0_hp =
- get_binary_prob(NMVcount->comps[i].class0_hp[0],
- NMVcount->comps[i].class0_hp[1]);
- branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0];
- branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1];
+ get_binary_prob(nmv_count->comps[i].class0_hp[0],
+ nmv_count->comps[i].class0_hp[1]);
+ branch_ct_class0_hp[i][0] = nmv_count->comps[i].class0_hp[0];
+ branch_ct_class0_hp[i][1] = nmv_count->comps[i].class0_hp[1];
- prob->comps[i].hp = get_binary_prob(NMVcount->comps[i].hp[0],
- NMVcount->comps[i].hp[1]);
- branch_ct_hp[i][0] = NMVcount->comps[i].hp[0];
- branch_ct_hp[i][1] = NMVcount->comps[i].hp[1];
+ prob->comps[i].hp = get_binary_prob(nmv_count->comps[i].hp[0],
+ nmv_count->comps[i].hp[1]);
+ branch_ct_hp[i][0] = nmv_count->comps[i].hp[0];
+ branch_ct_hp[i][1] = nmv_count->comps[i].hp[1];
}
}
}
+static unsigned int adapt_probs(unsigned int i,
+ vp9_tree tree,
+ vp9_prob this_probs[],
+ const vp9_prob last_probs[],
+ const unsigned int num_events[]) {
+ vp9_prob this_prob;
+
+ const uint32_t left = tree[i] <= 0
+ ? num_events[-tree[i]]
+ : adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
+
+ const uint32_t right = tree[i + 1] <= 0
+ ? num_events[-tree[i + 1]]
+ : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
+
+ uint32_t weight = left + right;
+ if (weight) {
+ this_prob = get_binary_prob(left, right);
+ weight = weight > MV_COUNT_SAT ? MV_COUNT_SAT : weight;
+ this_prob = weighted_prob(last_probs[i >> 1], this_prob,
+ MV_MAX_UPDATE_FACTOR * weight / MV_COUNT_SAT);
+ } else {
+ this_prob = last_probs[i >> 1];
+ }
+ this_probs[i >> 1] = this_prob;
+ return left + right;
+}
+
+
void vp9_adapt_nmv_probs(VP9_COMMON *cm, int usehp) {
- int i, j, k;
- nmv_context prob;
- unsigned int branch_ct_joint[MV_JOINTS - 1][2];
- unsigned int branch_ct_sign[2][2];
- unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
- unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
- unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
- unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
- unsigned int branch_ct_fp[2][4 - 1][2];
- unsigned int branch_ct_class0_hp[2][2];
- unsigned int branch_ct_hp[2][2];
+ int i, j;
#ifdef MV_COUNT_TESTING
printf("joints count: ");
for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
@@ -376,75 +391,48 @@
smooth_counts(&cm->fc.NMVcount.comps[0]);
smooth_counts(&cm->fc.NMVcount.comps[1]);
#endif
- vp9_counts_to_nmv_context(&cm->fc.NMVcount,
- &prob,
- usehp,
- branch_ct_joint,
- branch_ct_sign,
- branch_ct_classes,
- branch_ct_class0,
- branch_ct_bits,
- branch_ct_class0_fp,
- branch_ct_fp,
- branch_ct_class0_hp,
- branch_ct_hp);
+ vp9_counts_process(&cm->fc.NMVcount, usehp);
- for (j = 0; j < MV_JOINTS - 1; ++j) {
- adapt_prob(&cm->fc.nmvc.joints[j],
- cm->fc.pre_nmvc.joints[j],
- prob.joints[j],
- branch_ct_joint[j]);
- }
+ adapt_probs(0, vp9_mv_joint_tree,
+ cm->fc.nmvc.joints, cm->fc.pre_nmvc.joints,
+ cm->fc.NMVcount.joints);
+
for (i = 0; i < 2; ++i) {
adapt_prob(&cm->fc.nmvc.comps[i].sign,
cm->fc.pre_nmvc.comps[i].sign,
- prob.comps[i].sign,
- branch_ct_sign[i]);
- for (j = 0; j < MV_CLASSES - 1; ++j) {
- adapt_prob(&cm->fc.nmvc.comps[i].classes[j],
- cm->fc.pre_nmvc.comps[i].classes[j],
- prob.comps[i].classes[j],
- branch_ct_classes[i][j]);
- }
- for (j = 0; j < CLASS0_SIZE - 1; ++j) {
- adapt_prob(&cm->fc.nmvc.comps[i].class0[j],
- cm->fc.pre_nmvc.comps[i].class0[j],
- prob.comps[i].class0[j],
- branch_ct_class0[i][j]);
- }
+ cm->fc.NMVcount.comps[i].sign);
+ adapt_probs(0, vp9_mv_class_tree,
+ cm->fc.nmvc.comps[i].classes, cm->fc.pre_nmvc.comps[i].classes,
+ cm->fc.NMVcount.comps[i].classes);
+ adapt_probs(0, vp9_mv_class0_tree,
+ cm->fc.nmvc.comps[i].class0, cm->fc.pre_nmvc.comps[i].class0,
+ cm->fc.NMVcount.comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
cm->fc.pre_nmvc.comps[i].bits[j],
- prob.comps[i].bits[j],
- branch_ct_bits[i][j]);
+ cm->fc.NMVcount.comps[i].bits[j]);
}
}
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j) {
- for (k = 0; k < 3; ++k) {
- adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k],
- cm->fc.pre_nmvc.comps[i].class0_fp[j][k],
- prob.comps[i].class0_fp[j][k],
- branch_ct_class0_fp[i][j][k]);
- }
+ adapt_probs(0, vp9_mv_fp_tree,
+ cm->fc.nmvc.comps[i].class0_fp[j],
+ cm->fc.pre_nmvc.comps[i].class0_fp[j],
+ cm->fc.NMVcount.comps[i].class0_fp[j]);
}
- for (j = 0; j < 3; ++j) {
- adapt_prob(&cm->fc.nmvc.comps[i].fp[j],
- cm->fc.pre_nmvc.comps[i].fp[j],
- prob.comps[i].fp[j],
- branch_ct_fp[i][j]);
- }
+ adapt_probs(0, vp9_mv_fp_tree,
+ cm->fc.nmvc.comps[i].fp,
+ cm->fc.pre_nmvc.comps[i].fp,
+ cm->fc.NMVcount.comps[i].fp);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
cm->fc.pre_nmvc.comps[i].class0_hp,
- prob.comps[i].class0_hp,
- branch_ct_class0_hp[i]);
+ cm->fc.NMVcount.comps[i].class0_hp);
adapt_prob(&cm->fc.nmvc.comps[i].hp,
cm->fc.pre_nmvc.comps[i].hp,
- prob.comps[i].hp,
- branch_ct_hp[i]);
+ cm->fc.NMVcount.comps[i].hp);
}
}
}
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -49,7 +49,7 @@
extern struct vp9_token_struct vp9_mv_joint_encodings [MV_JOINTS];
/* Symbols for coding magnitude class of nonzero components */
-#define MV_CLASSES 8
+#define MV_CLASSES 11
typedef enum {
MV_CLASS_0 = 0, /* (0, 2] integer pel */
MV_CLASS_1 = 1, /* (2, 4] integer pel */
@@ -59,6 +59,9 @@
MV_CLASS_5 = 5, /* (32, 64] integer pel */
MV_CLASS_6 = 6, /* (64, 128] integer pel */
MV_CLASS_7 = 7, /* (128, 256] integer pel */
+ MV_CLASS_8 = 8, /* (256, 512] integer pel */
+ MV_CLASS_9 = 9, /* (512, 1024] integer pel */
+ MV_CLASS_10 = 10, /* (1024,2048] integer pel */
} MV_CLASS_TYPE;
extern const vp9_tree_index vp9_mv_class_tree[2 * MV_CLASSES - 2];
--- a/vp9/common/vp9_extend.c
+++ b/vp9/common/vp9_extend.c
@@ -11,159 +11,137 @@
#include "vp9/common/vp9_extend.h"
#include "vpx_mem/vpx_mem.h"
-static void copy_and_extend_plane(uint8_t *s, /* source */
- int sp, /* source pitch */
- uint8_t *d, /* destination */
- int dp, /* destination pitch */
- int h, /* height */
- int w, /* width */
- int et, /* extend top border */
- int el, /* extend left border */
- int eb, /* extend bottom border */
- int er) { /* extend right border */
- int i;
- uint8_t *src_ptr1, *src_ptr2;
- uint8_t *dest_ptr1, *dest_ptr2;
- int linesize;
+static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
+ uint8_t *dst, int dst_pitch,
+ int w, int h,
+ int extend_top, int extend_left,
+ int extend_bottom, int extend_right) {
+ int i, linesize;
- /* copy the left and right most columns out */
- src_ptr1 = s;
- src_ptr2 = s + w - 1;
- dest_ptr1 = d - el;
- dest_ptr2 = d + w;
+ // copy the left and right most columns out
+ const uint8_t *src_ptr1 = src;
+ const uint8_t *src_ptr2 = src + w - 1;
+ uint8_t *dst_ptr1 = dst - extend_left;
+ uint8_t *dst_ptr2 = dst + w;
for (i = 0; i < h; i++) {
- vpx_memset(dest_ptr1, src_ptr1[0], el);
- vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
- vpx_memset(dest_ptr2, src_ptr2[0], er);
- src_ptr1 += sp;
- src_ptr2 += sp;
- dest_ptr1 += dp;
- dest_ptr2 += dp;
+ vpx_memset(dst_ptr1, src_ptr1[0], extend_left);
+ vpx_memcpy(dst_ptr1 + extend_left, src_ptr1, w);
+ vpx_memset(dst_ptr2, src_ptr2[0], extend_right);
+ src_ptr1 += src_pitch;
+ src_ptr2 += src_pitch;
+ dst_ptr1 += dst_pitch;
+ dst_ptr2 += dst_pitch;
}
- /* Now copy the top and bottom lines into each line of the respective
- * borders
- */
- src_ptr1 = d - el;
- src_ptr2 = d + dp * (h - 1) - el;
- dest_ptr1 = d + dp * (-et) - el;
- dest_ptr2 = d + dp * (h) - el;
- linesize = el + er + w;
+ // Now copy the top and bottom lines into each line of the respective
+ // borders
+ src_ptr1 = dst - extend_left;
+ src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
+ dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
+ dst_ptr2 = dst + dst_pitch * (h) - extend_left;
+ linesize = extend_left + extend_right + w;
- for (i = 0; i < et; i++) {
- vpx_memcpy(dest_ptr1, src_ptr1, linesize);
- dest_ptr1 += dp;
+ for (i = 0; i < extend_top; i++) {
+ vpx_memcpy(dst_ptr1, src_ptr1, linesize);
+ dst_ptr1 += dst_pitch;
}
- for (i = 0; i < eb; i++) {
- vpx_memcpy(dest_ptr2, src_ptr2, linesize);
- dest_ptr2 += dp;
+ for (i = 0; i < extend_bottom; i++) {
+ vpx_memcpy(dst_ptr2, src_ptr2, linesize);
+ dst_ptr2 += dst_pitch;
}
}
-void vp9_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst) {
- int et = dst->border;
- int el = dst->border;
- int eb = dst->border + dst->y_height - src->y_height;
- int er = dst->border + dst->y_width - src->y_width;
+ const int et_y = dst->border;
+ const int el_y = dst->border;
+ const int eb_y = dst->border + dst->y_height - src->y_height;
+ const int er_y = dst->border + dst->y_width - src->y_width;
+ const int et_uv = dst->border >> 1;
+ const int el_uv = dst->border >> 1;
+ const int eb_uv = (dst->border >> 1) + dst->uv_height - src->uv_height;
+ const int er_uv = (dst->border >> 1) + dst->uv_width - src->uv_width;
+
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
- src->y_height, src->y_width,
- et, el, eb, er);
+ src->y_width, src->y_height,
+ et_y, el_y, eb_y, er_y);
- et = dst->border >> 1;
- el = dst->border >> 1;
- eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
- er = (dst->border >> 1) + dst->uv_width - src->uv_width;
-
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
- src->uv_height, src->uv_width,
- et, el, eb, er);
+ src->uv_width, src->uv_height,
+ et_uv, el_uv, eb_uv, er_uv);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
- src->uv_height, src->uv_width,
- et, el, eb, er);
+ src->uv_width, src->uv_height,
+ et_y, el_y, eb_uv, er_uv);
}
-void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw) {
- int et = dst->border;
- int el = dst->border;
- int eb = dst->border + dst->y_height - src->y_height;
- int er = dst->border + dst->y_width - src->y_width;
- int src_y_offset = srcy * src->y_stride + srcx;
- int dst_y_offset = srcy * dst->y_stride + srcx;
- int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
- int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
-
// If the side is not touching the bounder then don't extend.
- if (srcy)
- et = 0;
- if (srcx)
- el = 0;
- if (srcy + srch != src->y_height)
- eb = 0;
- if (srcx + srcw != src->y_width)
- er = 0;
+ const int et_y = srcy ? 0 : dst->border;
+ const int el_y = srcx ? 0 : dst->border;
+ const int eb_y = srcy + srch != src->y_height ? 0 :
+ dst->border + dst->y_height - src->y_height;
+ const int er_y = srcx + srcw != src->y_width ? 0 :
+ dst->border + dst->y_width - src->y_width;
+ const int src_y_offset = srcy * src->y_stride + srcx;
+ const int dst_y_offset = srcy * dst->y_stride + srcx;
- copy_and_extend_plane(src->y_buffer + src_y_offset,
- src->y_stride,
- dst->y_buffer + dst_y_offset,
- dst->y_stride,
- srch, srcw,
- et, el, eb, er);
+ const int et_uv = (et_y + 1) >> 1;
+ const int el_uv = (el_y + 1) >> 1;
+ const int eb_uv = (eb_y + 1) >> 1;
+ const int er_uv = (er_y + 1) >> 1;
+ const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
+ const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
+ const int srch_uv = (srch + 1) >> 1;
+ const int srcw_uv = (srcw + 1) >> 1;
- et = (et + 1) >> 1;
- el = (el + 1) >> 1;
- eb = (eb + 1) >> 1;
- er = (er + 1) >> 1;
- srch = (srch + 1) >> 1;
- srcw = (srcw + 1) >> 1;
+ copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
+ dst->y_buffer + dst_y_offset, dst->y_stride,
+ srcw, srch,
+ et_y, el_y, eb_y, er_y);
- copy_and_extend_plane(src->u_buffer + src_uv_offset,
- src->uv_stride,
- dst->u_buffer + dst_uv_offset,
- dst->uv_stride,
- srch, srcw,
- et, el, eb, er);
+ copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
+ dst->u_buffer + dst_uv_offset, dst->uv_stride,
+ srcw_uv, srch_uv,
+ et_uv, el_uv, eb_uv, er_uv);
- copy_and_extend_plane(src->v_buffer + src_uv_offset,
- src->uv_stride,
- dst->v_buffer + dst_uv_offset,
- dst->uv_stride,
- srch, srcw,
- et, el, eb, er);
+ copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
+ dst->v_buffer + dst_uv_offset, dst->uv_stride,
+ srcw_uv, srch_uv,
+ et_uv, el_uv, eb_uv, er_uv);
}
-/* note the extension is only for the last row, for intra prediction purpose */
-void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, uint8_t *YPtr,
- uint8_t *UPtr, uint8_t *VPtr) {
+// note the extension is only for the last row, for intra prediction purpose
+void vp9_extend_mb_row(YV12_BUFFER_CONFIG *buf,
+ uint8_t *y, uint8_t *u, uint8_t *v) {
int i;
- YPtr += ybf->y_stride * 14;
- UPtr += ybf->uv_stride * 6;
- VPtr += ybf->uv_stride * 6;
+ y += buf->y_stride * 14;
+ u += buf->uv_stride * 6;
+ v += buf->uv_stride * 6;
for (i = 0; i < 4; i++) {
- YPtr[i] = YPtr[-1];
- UPtr[i] = UPtr[-1];
- VPtr[i] = VPtr[-1];
+ y[i] = y[-1];
+ u[i] = u[-1];
+ v[i] = v[-1];
}
- YPtr += ybf->y_stride;
- UPtr += ybf->uv_stride;
- VPtr += ybf->uv_stride;
+ y += buf->y_stride;
+ u += buf->uv_stride;
+ v += buf->uv_stride;
for (i = 0; i < 4; i++) {
- YPtr[i] = YPtr[-1];
- UPtr[i] = UPtr[-1];
- VPtr[i] = VPtr[-1];
+ y[i] = y[-1];
+ u[i] = u[-1];
+ v[i] = v[-1];
}
}
--- a/vp9/common/vp9_extend.h
+++ b/vp9/common/vp9_extend.h
@@ -14,15 +14,17 @@
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
-void vp9_extend_mb_row(YV12_BUFFER_CONFIG *ybf, uint8_t *YPtr,
- uint8_t *UPtr, uint8_t *VPtr);
-void vp9_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
-void vp9_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst,
int srcy, int srcx,
int srch, int srcw);
+
+void vp9_extend_mb_row(YV12_BUFFER_CONFIG *buf,
+ uint8_t *y, uint8_t *u, uint8_t *v);
+
#endif // VP9_COMMON_VP9_EXTEND_H_
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -15,28 +15,30 @@
#include "vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
-DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = {
- { 128, 0 },
- { 120, 8 },
- { 112, 16 },
- { 104, 24 },
- { 96, 32 },
- { 88, 40 },
- { 80, 48 },
- { 72, 56 },
- { 64, 64 },
- { 56, 72 },
- { 48, 80 },
- { 40, 88 },
- { 32, 96 },
- { 24, 104 },
- { 16, 112 },
- { 8, 120 }
+DECLARE_ALIGNED(256, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 },
+ { 0, 0, 0, 120, 8, 0, 0, 0 },
+ { 0, 0, 0, 112, 16, 0, 0, 0 },
+ { 0, 0, 0, 104, 24, 0, 0, 0 },
+ { 0, 0, 0, 96, 32, 0, 0, 0 },
+ { 0, 0, 0, 88, 40, 0, 0, 0 },
+ { 0, 0, 0, 80, 48, 0, 0, 0 },
+ { 0, 0, 0, 72, 56, 0, 0, 0 },
+ { 0, 0, 0, 64, 64, 0, 0, 0 },
+ { 0, 0, 0, 56, 72, 0, 0, 0 },
+ { 0, 0, 0, 48, 80, 0, 0, 0 },
+ { 0, 0, 0, 40, 88, 0, 0, 0 },
+ { 0, 0, 0, 32, 96, 0, 0, 0 },
+ { 0, 0, 0, 24, 104, 0, 0, 0 },
+ { 0, 0, 0, 16, 112, 0, 0, 0 },
+ { 0, 0, 0, 8, 120, 0, 0, 0 }
};
-#define FILTER_ALPHA 0
-#define FILTER_ALPHA_SHARP 1
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {
+#define FILTER_ALPHA 0
+#define FILTER_ALPHA_SHARP 0
+#define FILTER_ALPHA_SMOOTH 50
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8])
+ = {
#if FILTER_ALPHA == 0
/* Lagrangian interpolation filter */
{ 0, 0, 0, 128, 0, 0, 0, 0},
@@ -55,6 +57,7 @@
{ -1, 3, -9, 27, 118, -13, 4, -1},
{ 0, 2, -6, 18, 122, -10, 3, -1},
{ 0, 1, -3, 8, 126, -5, 1, 0}
+
#elif FILTER_ALPHA == 50
/* Generated using MATLAB:
* alpha = 0.5;
@@ -79,11 +82,13 @@
{ 0, 3, -9, 27, 118, -13, 3, -1},
{ 0, 2, -6, 18, 122, -10, 2, 0},
{ 0, 1, -3, 8, 126, -5, 1, 0}
+
#endif /* FILTER_ALPHA */
};
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {
-#if FILTER_ALPHA_SHARP == 1
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8])
+ = {
+#if FILTER_ALPHA_SHARP == 0
/* dct based filter */
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
@@ -101,31 +106,34 @@
{-2, 5, -10, 27, 121, -17, 7, -3},
{-1, 3, -6, 17, 125, -13, 5, -2},
{0, 1, -3, 8, 127, -7, 3, -1}
-#elif FILTER_ALPHA_SHARP == 75
- /* alpha = 0.75 */
- {0, 0, 0, 128, 0, 0, 0, 0},
- {-1, 2, -6, 126, 9, -3, 2, -1},
- {-1, 4, -11, 123, 18, -7, 3, -1},
- {-2, 6, -16, 119, 28, -10, 5, -2},
- {-2, 7, -19, 113, 38, -13, 6, -2},
- {-3, 8, -21, 106, 49, -16, 7, -2},
- {-3, 9, -22, 99, 59, -19, 8, -3},
- {-3, 9, -23, 90, 70, -21, 9, -3},
- {-3, 9, -22, 80, 80, -22, 9, -3},
- {-3, 9, -21, 70, 90, -23, 9, -3},
- {-3, 8, -19, 59, 99, -22, 9, -3},
- {-2, 7, -16, 49, 106, -21, 8, -3},
- {-2, 6, -13, 38, 113, -19, 7, -2},
- {-2, 5, -10, 28, 119, -16, 6, -2},
- {-1, 3, -7, 18, 123, -11, 4, -1},
- {-1, 2, -3, 9, 126, -6, 2, -1}
+
+#elif FILTER_ALPHA_SHARP == 80
+ /* alpha = 0.80 */
+ { 0, 0, 0, 128, 0, 0, 0, 0},
+ {-1, 2, -6, 127, 9, -4, 2, -1},
+ {-2, 5, -12, 124, 18, -7, 4, -2},
+ {-2, 7, -16, 119, 28, -11, 5, -2},
+ {-3, 8, -19, 114, 38, -14, 7, -3},
+ {-3, 9, -22, 107, 49, -17, 8, -3},
+ {-4, 10, -23, 99, 60, -20, 10, -4},
+ {-4, 11, -23, 90, 70, -22, 10, -4},
+ {-4, 11, -23, 80, 80, -23, 11, -4},
+ {-4, 10, -22, 70, 90, -23, 11, -4},
+ {-4, 10, -20, 60, 99, -23, 10, -4},
+ {-3, 8, -17, 49, 107, -22, 9, -3},
+ {-3, 7, -14, 38, 114, -19, 8, -3},
+ {-2, 5, -11, 28, 119, -16, 7, -2},
+ {-2, 4, -7, 18, 124, -12, 5, -2},
+ {-1, 2, -4, 9, 127, -6, 2, -1}
#endif /* FILTER_ALPHA_SHARP */
};
-DECLARE_ALIGNED(16, const int16_t,
+DECLARE_ALIGNED(256, const int16_t,
vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {
/* 8-tap lowpass filter */
/* Hamming window */
+ /* freqmultiplier = 0.625 */
+#if FILTER_ALPHA_SMOOTH == 625
{-1, -7, 32, 80, 32, -7, -1, 0},
{-1, -8, 28, 80, 37, -7, -2, 1},
{ 0, -8, 24, 79, 41, -7, -2, 1},
@@ -142,1074 +150,44 @@
{ 1, -3, -5, 45, 78, 20, -8, 0},
{ 1, -2, -7, 41, 79, 24, -8, 0},
{ 1, -2, -7, 37, 80, 28, -8, -1}
-};
-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6]) = {
- {0, 0, 128, 0, 0, 0},
- {1, -5, 125, 8, -2, 1},
- {1, -8, 122, 17, -5, 1},
- {2, -11, 116, 27, -8, 2},
- {3, -14, 110, 37, -10, 2},
- {3, -15, 103, 47, -12, 2},
- {3, -16, 95, 57, -14, 3},
- {3, -16, 86, 67, -15, 3},
- {3, -16, 77, 77, -16, 3},
- {3, -15, 67, 86, -16, 3},
- {3, -14, 57, 95, -16, 3},
- {2, -12, 47, 103, -15, 3},
- {2, -10, 37, 110, -14, 3},
- {2, -8, 27, 116, -11, 2},
- {1, -5, 17, 122, -8, 1},
- {1, -2, 8, 125, -5, 1}
+#elif FILTER_ALPHA_SMOOTH == 50
+ /* freqmultiplier = 0.5 */
+ {-3, 0, 35, 64, 35, 0, -3, 0},
+ {-3, -1, 32, 64, 38, 1, -3, 0},
+ {-2, -2, 29, 63, 41, 2, -3, 0},
+ {-2, -2, 26, 63, 43, 4, -4, 0},
+ {-2, -3, 24, 62, 46, 5, -4, 0},
+ {-2, -3, 21, 60, 49, 7, -4, 0},
+ {-1, -4, 18, 59, 51, 9, -4, 0},
+ {-1, -4, 16, 57, 53, 12, -4, -1},
+ {-1, -4, 14, 55, 55, 14, -4, -1},
+ {-1, -4, 12, 53, 57, 16, -4, -1},
+ {0, -4, 9, 51, 59, 18, -4, -1},
+ {0, -4, 7, 49, 60, 21, -3, -2},
+ {0, -4, 5, 46, 62, 24, -3, -2},
+ {0, -4, 4, 43, 63, 26, -2, -2},
+ {0, -3, 2, 41, 63, 29, -2, -2},
+ {0, -3, 1, 38, 64, 32, -1, -3}
+#endif
};
-static void filter_block2d_first_pass_6(uint8_t *src_ptr,
- int *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
- int temp;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
- ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
- ((int)src_ptr[0] * vp9_filter[2]) +
- ((int)src_ptr[pixel_step] * vp9_filter[3]) +
- ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
- ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
- (VP9_FILTER_WEIGHT >> 1); /* Rounding */
-
- /* Normalize back to 0-255 */
- output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT);
- src_ptr++;
- }
-
- /* Next row... */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-static void filter_block2d_second_pass_6(int *src_ptr,
- uint8_t *output_ptr,
- int output_pitch,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
- int temp;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- /* Apply filter */
- temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
- ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
- ((int)src_ptr[0] * vp9_filter[2]) +
- ((int)src_ptr[pixel_step] * vp9_filter[3]) +
- ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
- ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
- (VP9_FILTER_WEIGHT >> 1); /* Rounding */
-
- /* Normalize back to 0-255 */
- output_ptr[j] = clip_pixel(temp >> VP9_FILTER_SHIFT);
- src_ptr++;
- }
-
- /* Start next row */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_pitch;
- }
-}
-
-/*
- * The only functional difference between filter_block2d_second_pass()
- * and this function is that filter_block2d_second_pass() does a sixtap
- * filter on the input and stores it in the output. This function
- * (filter_block2d_second_pass_avg()) does a sixtap filter on the input,
- * and then averages that with the content already present in the output
- * ((filter_result + dest + 1) >> 1) and stores that in the output.
- */
-static void filter_block2d_second_pass_avg_6(int *src_ptr,
- uint8_t *output_ptr,
- int output_pitch,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
- int temp;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- /* Apply filter */
- temp = ((int)src_ptr[-2 * (int)pixel_step] * vp9_filter[0]) +
- ((int)src_ptr[-1 * (int)pixel_step] * vp9_filter[1]) +
- ((int)src_ptr[0] * vp9_filter[2]) +
- ((int)src_ptr[pixel_step] * vp9_filter[3]) +
- ((int)src_ptr[2 * pixel_step] * vp9_filter[4]) +
- ((int)src_ptr[3 * pixel_step] * vp9_filter[5]) +
- (VP9_FILTER_WEIGHT >> 1); /* Rounding */
-
- /* Normalize back to 0-255 */
- output_ptr[j] = (clip_pixel(temp >> VP9_FILTER_SHIFT) +
- output_ptr[j] + 1) >> 1;
- src_ptr++;
- }
-
- /* Start next row */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_pitch;
- }
-}
-
-#define Interp_Extend 3
-static void filter_block2d_6(uint8_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- int output_pitch,
- const int16_t *HFilter,
- const int16_t *VFilter) {
- int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_6(FData + 4 * (Interp_Extend - 1), output_ptr,
- output_pitch, 4, 4, 4, 4, VFilter);
-}
-
-
-void vp9_sixtap_predict4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- filter_block2d_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter,
- VFilter);
-}
-
-/*
- * The difference between filter_block2d_6() and filter_block2d_avg_6 is
- * that filter_block2d_6() does a 6-tap filter and stores it in the output
- * buffer, whereas filter_block2d_avg_6() does the same 6-tap filter, and
- * then averages that with the content already present in the output
- * ((filter_result + dest + 1) >> 1) and stores that in the output.
- */
-static void filter_block2d_avg_6(uint8_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- int output_pitch,
- const int16_t *HFilter,
- const int16_t *VFilter) {
- int FData[(3 + Interp_Extend * 2) * 4]; /* Temp data buffer */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 3 + Interp_Extend * 2, 4, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_avg_6(FData + 4 * (Interp_Extend - 1), output_ptr,
- output_pitch, 4, 4, 4, 4, VFilter);
-}
-
-void vp9_sixtap_predict_avg4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- filter_block2d_avg_6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch,
- HFilter, VFilter);
-}
-
-void vp9_sixtap_predict8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
- int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr,
- dst_pitch, 8, 8, 8, 8, VFilter);
-
-}
-
-void vp9_sixtap_predict_avg8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
- int FData[(7 + Interp_Extend * 2) * 8]; /* Temp data buffer */
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 7 + Interp_Extend * 2, 8, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_avg_6(FData + 8 * (Interp_Extend - 1), dst_ptr,
- dst_pitch, 8, 8, 8, 8, VFilter);
-}
-
-void vp9_sixtap_predict8x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
- int FData[(3 + Interp_Extend * 2) * 8]; /* Temp data buffer */
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 3 + Interp_Extend * 2, 8, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_6(FData + 8 * (Interp_Extend - 1), dst_ptr,
- dst_pitch, 8, 8, 4, 8, VFilter);
-}
-
-void vp9_sixtap_predict16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
- int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_6(FData + 16 * (Interp_Extend - 1), dst_ptr,
- dst_pitch, 16, 16, 16, 16, VFilter);
-}
-
-void vp9_sixtap_predict_avg16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
- int FData[(15 + Interp_Extend * 2) * 16]; /* Temp data buffer */
-
- HFilter = vp9_sub_pel_filters_6[xoffset]; /* 6 tap */
- VFilter = vp9_sub_pel_filters_6[yoffset]; /* 6 tap */
-
- /* First filter 1-D horizontally... */
- filter_block2d_first_pass_6(
- src_ptr - ((Interp_Extend - 1) * src_pixels_per_line), FData,
- src_pixels_per_line, 1, 15 + Interp_Extend * 2, 16, HFilter);
-
- /* then filter vertically... */
- filter_block2d_second_pass_avg_6(FData + 16 * (Interp_Extend - 1), dst_ptr,
- dst_pitch, 16, 16, 16, 16, VFilter);
-}
-
-typedef enum {
- VPX_FILTER_4x4 = 0,
- VPX_FILTER_8x8 = 1,
- VPX_FILTER_8x4 = 2,
- VPX_FILTER_16x16 = 3,
-} filter_size_t;
-
-static const unsigned int filter_size_to_wh[][2] = {
- {4, 4},
- {8, 8},
- {8, 4},
- {16,16},
+DECLARE_ALIGNED(256, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8])
+ = {
+ {0, 0, 0, 128, 0, 0, 0, 0},
+ {0, 1, -5, 125, 8, -2, 1, 0},
+ {0, 1, -8, 122, 17, -5, 1, 0},
+ {0, 2, -11, 116, 27, -8, 2, 0},
+ {0, 3, -14, 110, 37, -10, 2, 0},
+ {0, 3, -15, 103, 47, -12, 2, 0},
+ {0, 3, -16, 95, 57, -14, 3, 0},
+ {0, 3, -16, 86, 67, -15, 3, 0},
+ {0, 3, -16, 77, 77, -16, 3, 0},
+ {0, 3, -15, 67, 86, -16, 3, 0},
+ {0, 3, -14, 57, 95, -16, 3, 0},
+ {0, 2, -12, 47, 103, -15, 3, 0},
+ {0, 2, -10, 37, 110, -14, 3, 0},
+ {0, 2, -8, 27, 116, -11, 2, 0},
+ {0, 1, -5, 17, 122, -8, 1, 0},
+ {0, 1, -2, 8, 125, -5, 1, 0}
};
-
-static void filter_block2d_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter,
- const int16_t *VFilter,
- const filter_size_t filter_size,
- uint8_t *dst_ptr,
- unsigned int dst_stride) {
- const unsigned int output_width = filter_size_to_wh[filter_size][0];
- const unsigned int output_height = filter_size_to_wh[filter_size][1];
-
- // Between passes, we use an intermediate buffer whose height is extended to
- // have enough horizontally filtered values as input for the vertical pass.
- // This buffer is allocated to be big enough for the largest block type we
- // support.
- const int kInterp_Extend = 4;
- const unsigned int intermediate_height =
- (kInterp_Extend - 1) + output_height + kInterp_Extend;
-
- /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
- * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
- * + kInterp_Extend
- * = 3 + 16 + 4
- * = 23
- * and filter_max_width = 16
- */
- uint8_t intermediate_buffer[23 * 16];
- const int intermediate_next_stride = 1 - intermediate_height * output_width;
-
- // Horizontal pass (src -> transposed intermediate).
- {
- uint8_t *output_ptr = intermediate_buffer;
- const int src_next_row_stride = src_stride - output_width;
- unsigned int i, j;
- src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
- for (i = 0; i < intermediate_height; i++) {
- for (j = 0; j < output_width; j++) {
- // Apply filter...
- int temp = ((int)src_ptr[0] * HFilter[0]) +
- ((int)src_ptr[1] * HFilter[1]) +
- ((int)src_ptr[2] * HFilter[2]) +
- ((int)src_ptr[3] * HFilter[3]) +
- ((int)src_ptr[4] * HFilter[4]) +
- ((int)src_ptr[5] * HFilter[5]) +
- ((int)src_ptr[6] * HFilter[6]) +
- ((int)src_ptr[7] * HFilter[7]) +
- (VP9_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
- src_ptr++;
- output_ptr += intermediate_height;
- }
- src_ptr += src_next_row_stride;
- output_ptr += intermediate_next_stride;
- }
- }
-
- // Vertical pass (transposed intermediate -> dst).
- {
- uint8_t *src_ptr = intermediate_buffer;
- const int dst_next_row_stride = dst_stride - output_width;
- unsigned int i, j;
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- // Apply filter...
- int temp = ((int)src_ptr[0] * VFilter[0]) +
- ((int)src_ptr[1] * VFilter[1]) +
- ((int)src_ptr[2] * VFilter[2]) +
- ((int)src_ptr[3] * VFilter[3]) +
- ((int)src_ptr[4] * VFilter[4]) +
- ((int)src_ptr[5] * VFilter[5]) +
- ((int)src_ptr[6] * VFilter[6]) +
- ((int)src_ptr[7] * VFilter[7]) +
- (VP9_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
- src_ptr += intermediate_height;
- }
- src_ptr += intermediate_next_stride;
- dst_ptr += dst_next_row_stride;
- }
- }
-}
-
-void vp9_filter_block2d_4x4_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter_aligned16,
- const int16_t *VFilter_aligned16,
- uint8_t *dst_ptr,
- unsigned int dst_stride) {
- filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,
- VPX_FILTER_4x4, dst_ptr, dst_stride);
-}
-
-void vp9_filter_block2d_8x4_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter_aligned16,
- const int16_t *VFilter_aligned16,
- uint8_t *dst_ptr,
- unsigned int dst_stride) {
- filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,
- VPX_FILTER_8x4, dst_ptr, dst_stride);
-}
-
-void vp9_filter_block2d_8x8_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter_aligned16,
- const int16_t *VFilter_aligned16,
- uint8_t *dst_ptr,
- unsigned int dst_stride) {
- filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,
- VPX_FILTER_8x8, dst_ptr, dst_stride);
-}
-
-void vp9_filter_block2d_16x16_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter_aligned16,
- const int16_t *VFilter_aligned16,
- uint8_t *dst_ptr,
- unsigned int dst_stride) {
- filter_block2d_8_c(src_ptr, src_stride, HFilter_aligned16, VFilter_aligned16,
- VPX_FILTER_16x16, dst_ptr, dst_stride);
-}
-
-static void block2d_average_c(uint8_t *src,
- unsigned int src_stride,
- uint8_t *output_ptr,
- unsigned int output_stride,
- const filter_size_t filter_size) {
- const unsigned int output_width = filter_size_to_wh[filter_size][0];
- const unsigned int output_height = filter_size_to_wh[filter_size][1];
-
- unsigned int i, j;
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
- }
- output_ptr += output_stride;
- }
-}
-
-#define block2d_average block2d_average_c
-
-void vp9_eighttap_predict4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_sub_pel_filters_8[xoffset];
- VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict_avg4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
- uint8_t tmp[4 * 4];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 4);
- block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);
-}
-
-void vp9_eighttap_predict4x4_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_sub_pel_filters_8s[xoffset];
- VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict4x4_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_sub_pel_filters_8lp[xoffset];
- VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line,
- HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict_avg4x4_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
- uint8_t tmp[4 * 4];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 4);
- block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);
-}
-
-void vp9_eighttap_predict_avg4x4_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
- uint8_t tmp[4 * 4];
-
- vp9_filter_block2d_4x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 4);
- block2d_average(tmp, 4, dst_ptr, dst_pitch, VPX_FILTER_4x4);
-}
-
-
-void vp9_eighttap_predict8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict8x8_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict8x8_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict_avg8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- uint8_t tmp[8 * 8];
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 8);
- block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);
-}
-
-void vp9_eighttap_predict_avg8x8_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- uint8_t tmp[8 * 8];
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 8);
- block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);
-}
-
-void vp9_eighttap_predict_avg8x8_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- uint8_t tmp[8 * 8];
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_8x8_8(src_ptr, src_pixels_per_line, HFilter, VFilter, tmp,
- 8);
- block2d_average(tmp, 8, dst_ptr, dst_pitch, VPX_FILTER_8x8);
-}
-
-void vp9_eighttap_predict8x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict8x4_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict8x4_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_8x4_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict16x16_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict16x16_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- dst_ptr, dst_pitch);
-}
-
-void vp9_eighttap_predict_avg16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);
- const int16_t *HFilter = vp9_sub_pel_filters_8[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- tmp, 16);
- block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);
-}
-
-void vp9_eighttap_predict_avg16x16_sharp_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);
- const int16_t *HFilter = vp9_sub_pel_filters_8s[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8s[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- tmp, 16);
- block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);
-}
-
-void vp9_eighttap_predict_avg16x16_smooth_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp, 16 * 16);
- const int16_t *HFilter = vp9_sub_pel_filters_8lp[xoffset];
- const int16_t *VFilter = vp9_sub_pel_filters_8lp[yoffset];
-
- vp9_filter_block2d_16x16_8(src_ptr, src_pixels_per_line, HFilter, VFilter,
- tmp, 16);
- block2d_average(tmp, 16, dst_ptr, dst_pitch, VPX_FILTER_16x16);
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_first_pass
- *
- * INPUTS : uint8_t *src_ptr : Pointer to source block.
- * uint32_t src_stride : Stride of source block.
- * uint32_t height : Block height.
- * uint32_t width : Block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : int32_t *dst_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
- * in the horizontal direction to produce the filtered output
- * block. Used to implement first-pass of 2-D separable filter.
- *
- * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- *
- ****************************************************************************/
-static void filter_block2d_bil_first_pass(uint8_t *src_ptr,
- uint16_t *dst_ptr,
- unsigned int src_stride,
- unsigned int height,
- unsigned int width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- /* Apply bilinear filter */
- dst_ptr[j] = (((int)src_ptr[0] * vp9_filter[0]) +
- ((int)src_ptr[1] * vp9_filter[1]) +
- (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;
- src_ptr++;
- }
-
- /* Next row... */
- src_ptr += src_stride - width;
- dst_ptr += width;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_second_pass
- *
- * INPUTS : int32_t *src_ptr : Pointer to source block.
- * uint32_t dst_pitch : Destination block pitch.
- * uint32_t height : Block height.
- * uint32_t width : Block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
- * in the vertical direction to produce the filtered output
- * block. Used to implement second-pass of 2-D separable filter.
- *
- * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- *
- ****************************************************************************/
-static void filter_block2d_bil_second_pass(uint16_t *src_ptr,
- uint8_t *dst_ptr,
- int dst_pitch,
- unsigned int height,
- unsigned int width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
- int temp;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- /* Apply filter */
- temp = ((int)src_ptr[0] * vp9_filter[0]) +
- ((int)src_ptr[width] * vp9_filter[1]) +
- (VP9_FILTER_WEIGHT / 2);
- dst_ptr[j] = (unsigned int)(temp >> VP9_FILTER_SHIFT);
- src_ptr++;
- }
-
- /* Next row... */
- dst_ptr += dst_pitch;
- }
-}
-
-/*
- * As before for filter_block2d_second_pass_avg(), the functional difference
- * between filter_block2d_bil_second_pass() and filter_block2d_bil_second_pass_avg()
- * is that filter_block2d_bil_second_pass() does a bilinear filter on input
- * and stores the result in output; filter_block2d_bil_second_pass_avg(),
- * instead, does a bilinear filter on input, averages the resulting value
- * with the values already present in the output and stores the result of
- * that back into the output ((filter_result + dest + 1) >> 1).
- */
-static void filter_block2d_bil_second_pass_avg(uint16_t *src_ptr,
- uint8_t *dst_ptr,
- int dst_pitch,
- unsigned int height,
- unsigned int width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
- int temp;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- /* Apply filter */
- temp = (((int)src_ptr[0] * vp9_filter[0]) +
- ((int)src_ptr[width] * vp9_filter[1]) +
- (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;
- dst_ptr[j] = (unsigned int)((temp + dst_ptr[j] + 1) >> 1);
- src_ptr++;
- }
-
- /* Next row... */
- dst_ptr += dst_pitch;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil
- *
- * INPUTS : uint8_t *src_ptr : Pointer to source block.
- * uint32_t src_pitch : Stride of source block.
- * uint32_t dst_pitch : Stride of destination block.
- * int32_t *HFilter : Array of 2 horizontal filter taps.
- * int32_t *VFilter : Array of 2 vertical filter taps.
- * int32_t Width : Block width
- * int32_t Height : Block height
- *
- * OUTPUTS : uint16_t *dst_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : 2-D filters an input block by applying a 2-tap
- * bi-linear filter horizontally followed by a 2-tap
- * bi-linear filter vertically on the result.
- *
- * SPECIAL NOTES : The largest block size can be handled here is 16x16
- *
- ****************************************************************************/
-static void filter_block2d_bil(uint8_t *src_ptr,
- uint8_t *dst_ptr,
- unsigned int src_pitch,
- unsigned int dst_pitch,
- const int16_t *HFilter,
- const int16_t *VFilter,
- int Width,
- int Height) {
-
- uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */
-
- /* First filter 1-D horizontally... */
- filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
-
- /* then 1-D vertically... */
- filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
-}
-
-static void filter_block2d_bil_avg(uint8_t *src_ptr,
- uint8_t *dst_ptr,
- unsigned int src_pitch,
- unsigned int dst_pitch,
- const int16_t *HFilter,
- const int16_t *VFilter,
- int Width,
- int Height) {
- uint16_t FData[17 * 16]; /* Temp data buffer used in filtering */
-
- /* First filter 1-D horizontally... */
- filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
-
- /* then 1-D vertically... */
- filter_block2d_bil_second_pass_avg(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
-}
-
-void vp9_bilinear_predict4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
-}
-
-void vp9_bilinear_predict_avg4x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
- dst_pitch, HFilter, VFilter, 4, 4);
-}
-
-void vp9_bilinear_predict8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
-
-}
-
-void vp9_bilinear_predict_avg8x8_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
- dst_pitch, HFilter, VFilter, 8, 8);
-}
-
-void vp9_bilinear_predict8x4_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
-
-}
-
-void vp9_bilinear_predict16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
-}
-
-void vp9_bilinear_predict_avg16x16_c(uint8_t *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- uint8_t *dst_ptr,
- int dst_pitch) {
- const int16_t *HFilter;
- const int16_t *VFilter;
-
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
-
- filter_block2d_bil_avg(src_ptr, dst_ptr, src_pixels_per_line,
- dst_pitch, HFilter, VFilter, 16, 16);
-}
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -21,10 +21,17 @@
#define SUBPEL_SHIFTS 16
-extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][2];
-extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][6];
+extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][8];
+extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8];
extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8];
extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8];
extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8];
+
+// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
+// filter kernel as a 2 tap filter.
+#define BF_LENGTH (sizeof(vp9_bilinear_filters[0]) / \
+ sizeof(vp9_bilinear_filters[0][0]))
+#define BF_OFFSET (BF_LENGTH / 2 - 1)
+#define VP9_BILINEAR_FILTERS_2TAP(x) (vp9_bilinear_filters[x] + BF_OFFSET)
#endif // VP9_COMMON_VP9_FILTER_H_
--- a/vp9/common/vp9_findnearmv.c
+++ b/vp9/common/vp9_findnearmv.c
@@ -9,10 +9,11 @@
*/
+#include <limits.h>
+
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_sadmxn.h"
#include "vp9/common/vp9_subpelvar.h"
-#include <limits.h>
const uint8_t vp9_mbsplit_offset[4][16] = {
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
@@ -32,8 +33,7 @@
}
vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
- vp9_prob p[4], const int context
- ) {
+ vp9_prob p[4], const int context) {
p[0] = pc->fc.vp9_mode_contexts[context][0];
p[1] = pc->fc.vp9_mode_contexts[context][1];
p[2] = pc->fc.vp9_mode_contexts[context][2];
@@ -87,8 +87,8 @@
uint8_t temp2[2 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3,
src_pixels_per_line, 1, 3, 16, HFilter);
@@ -108,8 +108,8 @@
uint8_t temp2[2 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3,
src_pixels_per_line, 1, 17, 2, HFilter);
@@ -118,10 +118,12 @@
return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
}
+#if CONFIG_USESELECTREFMV
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
*/
+
void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
uint8_t *ref_y_buffer,
int ref_y_stride,
@@ -141,130 +143,140 @@
int_mv sorted_mvs[MAX_MV_REF_CANDIDATES];
int zero_seen = FALSE;
- // Default all to 0,0 if nothing else available
- nearest->as_int = near->as_int = 0;
- vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
+ if (ref_y_buffer) {
- above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
- above_ref = ref_y_buffer - ref_y_stride * 2;
+ // Default all to 0,0 if nothing else available
+ nearest->as_int = near->as_int = 0;
+ vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));
+
+ above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
+ above_ref = ref_y_buffer - ref_y_stride * 2;
#if CONFIG_ABOVESPREFMV
- above_src -= 4;
- above_ref -= 4;
+ above_src -= 4;
+ above_ref -= 4;
#else
- left_src = xd->dst.y_buffer - 2;
- left_ref = ref_y_buffer - 2;
+ left_src = xd->dst.y_buffer - 2;
+ left_ref = ref_y_buffer - 2;
#endif
- // Limit search to the predicted best few candidates
- for(i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- int_mv this_mv;
- int offset = 0;
- int row_offset, col_offset;
+ // Limit search to the predicted best few candidates
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+ int_mv this_mv;
+ int offset = 0;
+ int row_offset, col_offset;
- this_mv.as_int = mvlist[i].as_int;
+ this_mv.as_int = mvlist[i].as_int;
- // If we see a 0,0 vector for a second time we have reached the end of
- // the list of valid candidate vectors.
- if (!this_mv.as_int && zero_seen)
- break;
+ // If we see a 0,0 vector for a second time we have reached the end of
+ // the list of valid candidate vectors.
+ if (!this_mv.as_int && zero_seen)
+ break;
- zero_seen = zero_seen || !this_mv.as_int;
+ zero_seen = zero_seen || !this_mv.as_int;
#if !CONFIG_ABOVESPREFMV
- clamp_mv(&this_mv,
- xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+ clamp_mv(&this_mv,
+ xd->mb_to_left_edge - LEFT_TOP_MARGIN + 24,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
#else
- clamp_mv(&this_mv,
- xd->mb_to_left_edge - LEFT_TOP_MARGIN + 32,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+ clamp_mv(&this_mv,
+ xd->mb_to_left_edge - LEFT_TOP_MARGIN + 32,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN + 24,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
#endif
- row_offset = this_mv.as_mv.row >> 3;
- col_offset = this_mv.as_mv.col >> 3;
- offset = ref_y_stride * row_offset + col_offset;
- score = 0;
- if (xd->up_available) {
- vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src, xd->dst.y_stride, &sse);
- score += sse;
- if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
- vp9_sub_pixel_variance16x2(above_ref + offset + 16,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 16, xd->dst.y_stride, &sse);
- score += sse;
- }
- if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
- vp9_sub_pixel_variance16x2(above_ref + offset + 32,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 32, xd->dst.y_stride, &sse);
- score += sse;
- vp9_sub_pixel_variance16x2(above_ref + offset + 48,
- ref_y_stride,
- SP(this_mv.as_mv.col),
- SP(this_mv.as_mv.row),
- above_src + 48, xd->dst.y_stride, &sse);
- score += sse;
- }
- }
+ row_offset = this_mv.as_mv.row >> 3;
+ col_offset = this_mv.as_mv.col >> 3;
+ offset = ref_y_stride * row_offset + col_offset;
+ score = 0;
#if !CONFIG_ABOVESPREFMV
- if (xd->left_available) {
- vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+ if (xd->up_available) {
+#else
+ if (xd->up_available && xd->left_available) {
+#endif
+ vp9_sub_pixel_variance16x2(above_ref + offset, ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
- left_src, xd->dst.y_stride, &sse);
- score += sse;
- if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16,
+ above_src, xd->dst.y_stride, &sse);
+ score += sse;
+ if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
+ vp9_sub_pixel_variance16x2(above_ref + offset + 16,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 16,
- xd->dst.y_stride, &sse);
- score += sse;
- }
- if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32,
+ above_src + 16, xd->dst.y_stride, &sse);
+ score += sse;
+ }
+ if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
+ vp9_sub_pixel_variance16x2(above_ref + offset + 32,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 32,
- xd->dst.y_stride, &sse);
- score += sse;
- vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48,
+ above_src + 32, xd->dst.y_stride, &sse);
+ score += sse;
+ vp9_sub_pixel_variance16x2(above_ref + offset + 48,
ref_y_stride,
SP(this_mv.as_mv.col),
SP(this_mv.as_mv.row),
- left_src + xd->dst.y_stride * 48,
- xd->dst.y_stride, &sse);
+ above_src + 48, xd->dst.y_stride, &sse);
+ score += sse;
+ }
+ }
+#if !CONFIG_ABOVESPREFMV
+ if (xd->left_available) {
+ vp9_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src, xd->dst.y_stride, &sse);
score += sse;
+ if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) {
+ vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16,
+ ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src + xd->dst.y_stride * 16,
+ xd->dst.y_stride, &sse);
+ score += sse;
+ }
+ if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) {
+ vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32,
+ ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src + xd->dst.y_stride * 32,
+ xd->dst.y_stride, &sse);
+ score += sse;
+ vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48,
+ ref_y_stride,
+ SP(this_mv.as_mv.col),
+ SP(this_mv.as_mv.row),
+ left_src + xd->dst.y_stride * 48,
+ xd->dst.y_stride, &sse);
+ score += sse;
+ }
}
- }
#endif
- // Add the entry to our list and then resort the list on score.
- ref_scores[i] = score;
- sorted_mvs[i].as_int = this_mv.as_int;
- j = i;
- while (j > 0) {
- if (ref_scores[j] < ref_scores[j-1]) {
- ref_scores[j] = ref_scores[j-1];
- sorted_mvs[j].as_int = sorted_mvs[j-1].as_int;
- ref_scores[j-1] = score;
- sorted_mvs[j-1].as_int = this_mv.as_int;
- j--;
- } else
- break;
+ // Add the entry to our list and then resort the list on score.
+ ref_scores[i] = score;
+ sorted_mvs[i].as_int = this_mv.as_int;
+ j = i;
+ while (j > 0) {
+ if (ref_scores[j] < ref_scores[j-1]) {
+ ref_scores[j] = ref_scores[j-1];
+ sorted_mvs[j].as_int = sorted_mvs[j-1].as_int;
+ ref_scores[j-1] = score;
+ sorted_mvs[j-1].as_int = this_mv.as_int;
+ j--;
+ } else {
+ break;
+ }
+ }
}
+ } else {
+ vpx_memcpy(sorted_mvs, mvlist, sizeof(sorted_mvs));
}
// Make sure all the candidates are properly clamped etc
@@ -273,23 +285,35 @@
clamp_mv2(&sorted_mvs[i], xd);
}
- // Provided that there are non zero vectors available there will not
- // be more than one 0,0 entry in the sorted list.
- // The best ref mv is always set to the first entry (which gave the best
- // results. The nearest is set to the first non zero vector if available and
- // near to the second non zero vector if available.
- // We do not use 0,0 as a nearest or near as 0,0 has its own mode.
- if ( sorted_mvs[0].as_int ) {
- nearest->as_int = sorted_mvs[0].as_int;
- if ( sorted_mvs[1].as_int )
- near->as_int = sorted_mvs[1].as_int;
- else
- near->as_int = sorted_mvs[2].as_int;
+ // Nearest may be a 0,0 or non zero vector and now matches the chosen
+ // "best reference". This has advantages when it is used as part of a
+ // compound predictor as it means a non zero vector can be paired using
+ // this mode with a 0 vector. The Near vector is still forced to be a
+ // non zero candidate if one is avaialble.
+ nearest->as_int = sorted_mvs[0].as_int;
+ if ( sorted_mvs[1].as_int ) {
+ near->as_int = sorted_mvs[1].as_int;
} else {
- nearest->as_int = sorted_mvs[1].as_int;
- near->as_int = sorted_mvs[2].as_int;
+ near->as_int = sorted_mvs[2].as_int;
}
// Copy back the re-ordered mv list
vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));
}
+#else
+void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
+ uint8_t *ref_y_buffer,
+ int ref_y_stride,
+ int_mv *mvlist,
+ int_mv *nearest,
+ int_mv *near) {
+ int i;
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+ lower_mv_precision(&mvlist[i], xd->allow_high_precision_mv);
+ clamp_mv2(&mvlist[i], xd);
+ }
+ *nearest = mvlist[0];
+ *near = mvlist[1];
+}
+#endif
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -17,6 +17,9 @@
#include "vp9/common/vp9_treecoder.h"
#include "vp9/common/vp9_onyxc_int.h"
+#define LEFT_TOP_MARGIN (16 << 3)
+#define RIGHT_BOTTOM_MARGIN (16 << 3)
+
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
@@ -28,9 +31,9 @@
int_mv *nearest,
int_mv *near);
-static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) {
- MV xmv;
- xmv = mvp->as_mv;
+static void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
+ int_mv *mvp, const int *ref_frame_sign_bias) {
+ MV xmv = mvp->as_mv;
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
xmv.row *= -1;
@@ -40,8 +43,6 @@
mvp->as_mv = xmv;
}
-#define LEFT_TOP_MARGIN (16 << 3)
-#define RIGHT_BOTTOM_MARGIN (16 << 3)
static void clamp_mv(int_mv *mv,
int mb_to_left_edge,
@@ -71,10 +72,10 @@
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge) {
- return (mv->as_mv.col < mb_to_left_edge) ||
- (mv->as_mv.col > mb_to_right_edge) ||
- (mv->as_mv.row < mb_to_top_edge) ||
- (mv->as_mv.row > mb_to_bottom_edge);
+ return mv->as_mv.col < mb_to_left_edge ||
+ mv->as_mv.col > mb_to_right_edge ||
+ mv->as_mv.row < mb_to_top_edge ||
+ mv->as_mv.row > mb_to_bottom_edge;
}
vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
@@ -83,21 +84,30 @@
extern const uint8_t vp9_mbsplit_offset[4][16];
-static int left_block_mv(const MODE_INFO *cur_mb, int b) {
+static int left_block_mv(const MACROBLOCKD *xd,
+ const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
- /* On L edge, get from MB to left of us */
+ if (!xd->left_available)
+ return 0;
+
+ // On L edge, get from MB to left of us
--cur_mb;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv[0].as_int;
+
b += 4;
}
- return (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
-static int left_block_second_mv(const MODE_INFO *cur_mb, int b) {
+static int left_block_second_mv(const MACROBLOCKD *xd,
+ const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
+ if (!xd->left_available)
+ return 0;
+
/* On L edge, get from MB to left of us */
--cur_mb;
@@ -108,8 +118,8 @@
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 1)->as_mv.second.as_int :
- (cur_mb->bmi + b - 1)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 1)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -122,7 +132,7 @@
b += 16;
}
- return (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ return (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
@@ -137,8 +147,8 @@
}
return cur_mb->mbmi.second_ref_frame > 0 ?
- (cur_mb->bmi + b - 4)->as_mv.second.as_int :
- (cur_mb->bmi + b - 4)->as_mv.first.as_int;
+ (cur_mb->bmi + b - 4)->as_mv[1].as_int :
+ (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
--- /dev/null
+++ b/vp9/common/vp9_idct.c
@@ -1,0 +1,1307 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_systemdependent.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+void vp9_short_iwalsh4x4_c(int16_t *input, int16_t *output, int pitch) {
+ int i;
+ int a1, b1, c1, d1;
+ int16_t *ip = input;
+ int16_t *op = output;
+ const int half_pitch = pitch >> 1;
+
+ for (i = 0; i < 4; i++) {
+ a1 = (ip[0] + ip[3]) >> WHT_UPSCALE_FACTOR;
+ b1 = (ip[1] + ip[2]) >> WHT_UPSCALE_FACTOR;
+ c1 = (ip[1] - ip[2]) >> WHT_UPSCALE_FACTOR;
+ d1 = (ip[0] - ip[3]) >> WHT_UPSCALE_FACTOR;
+
+ op[0] = (a1 + b1 + 1) >> 1;
+ op[1] = (c1 + d1) >> 1;
+ op[2] = (a1 - b1) >> 1;
+ op[3] = (d1 - c1) >> 1;
+
+ ip += 4;
+ op += half_pitch;
+ }
+
+ ip = output;
+ op = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[half_pitch * 0] + ip[half_pitch * 3];
+ b1 = ip[half_pitch * 1] + ip[half_pitch * 2];
+ c1 = ip[half_pitch * 1] - ip[half_pitch * 2];
+ d1 = ip[half_pitch * 0] - ip[half_pitch * 3];
+
+
+ op[half_pitch * 0] = (a1 + b1 + 1) >> 1;
+ op[half_pitch * 1] = (c1 + d1) >> 1;
+ op[half_pitch * 2] = (a1 - b1) >> 1;
+ op[half_pitch * 3] = (d1 - c1) >> 1;
+
+ ip++;
+ op++;
+ }
+}
+
+void vp9_short_iwalsh4x4_1_c(int16_t *in, int16_t *out, int pitch) {
+ int i;
+ int16_t tmp[4];
+ int16_t *ip = in;
+ int16_t *op = tmp;
+ const int half_pitch = pitch >> 1;
+
+ op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;
+ op[1] = op[2] = op[3] = (ip[0] >> WHT_UPSCALE_FACTOR) >> 1;
+
+ ip = tmp;
+ op = out;
+ for (i = 0; i < 4; i++) {
+ op[half_pitch * 0] = (ip[0] + 1) >> 1;
+ op[half_pitch * 1] = op[half_pitch * 2] = op[half_pitch * 3] = ip[0] >> 1;
+ ip++;
+ op++;
+ }
+}
+
+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
+ uint8_t *dst_ptr,
+ int pitch, int stride) {
+ int r, c;
+ int16_t dc = input_dc;
+ int16_t tmp[4 * 4];
+ vp9_short_iwalsh4x4_1_c(&dc, tmp, 4 << 1);
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++)
+ dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]);
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+}
+
+void vp9_idct4_1d_c(int16_t *input, int16_t *output) {
+ int16_t step[4];
+ int temp1, temp2;
+ // stage 1
+ temp1 = (input[0] + input[2]) * cospi_16_64;
+ temp2 = (input[0] - input[2]) * cospi_16_64;
+ step[0] = dct_const_round_shift(temp1);
+ step[1] = dct_const_round_shift(temp2);
+ temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
+ temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
+ step[2] = dct_const_round_shift(temp1);
+ step[3] = dct_const_round_shift(temp2);
+
+ // stage 2
+ output[0] = step[0] + step[3];
+ output[1] = step[1] + step[2];
+ output[2] = step[1] - step[2];
+ output[3] = step[0] - step[3];
+}
+
+void vp9_short_idct4x4_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[4 * 4];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j];
+ vp9_idct4_1d(temp_in, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j * 4 + i];
+ vp9_idct4_1d(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
+ }
+}
+
+void vp9_short_idct4x4_1_c(int16_t *input, int16_t *output, int pitch) {
+ int i;
+ int a1;
+ int16_t *op = output;
+ const int half_pitch = pitch >> 1;
+ int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ for (i = 0; i < 4; i++) {
+ op[0] = op[1] = op[2] = op[3] = a1;
+ op += half_pitch;
+ }
+}
+
+void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr,
+ uint8_t *dst_ptr, int pitch, int stride) {
+ int a1;
+ int r, c;
+ int16_t out = dct_const_round_shift(input_dc * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++)
+ dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);
+
+ dst_ptr += stride;
+ pred_ptr += pitch;
+ }
+}
+
+static void idct8_1d(int16_t *input, int16_t *output) {
+ int16_t step1[8], step2[8];
+ int temp1, temp2;
+ // stage 1
+ step1[0] = input[0];
+ step1[2] = input[4];
+ step1[1] = input[2];
+ step1[3] = input[6];
+ temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
+ temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
+ step1[4] = dct_const_round_shift(temp1);
+ step1[7] = dct_const_round_shift(temp2);
+ temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
+ temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+
+ // stage 2 & stage 3 - even half
+ vp9_idct4_1d(step1, step1);
+
+ // stage 2 - odd half
+ step2[4] = step1[4] + step1[5];
+ step2[5] = step1[4] - step1[5];
+ step2[6] = -step1[6] + step1[7];
+ step2[7] = step1[6] + step1[7];
+
+ // stage 3 -odd half
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = step1[0] + step1[7];
+ output[1] = step1[1] + step1[6];
+ output[2] = step1[2] + step1[5];
+ output[3] = step1[3] + step1[4];
+ output[4] = step1[3] - step1[4];
+ output[5] = step1[2] - step1[5];
+ output[6] = step1[1] - step1[6];
+ output[7] = step1[0] - step1[7];
+}
+
+void vp9_short_idct8x8_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[8 * 8];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ idct8_1d(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j * 8 + i];
+ idct8_1d(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
+ }
+}
+
+static void iadst4_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+
+ int x0 = input[0];
+ int x1 = input[1];
+ int x2 = input[2];
+ int x3 = input[3];
+
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_2_9 * x0;
+ s2 = sinpi_3_9 * x1;
+ s3 = sinpi_4_9 * x2;
+ s4 = sinpi_1_9 * x2;
+ s5 = sinpi_2_9 * x3;
+ s6 = sinpi_4_9 * x3;
+ s7 = x0 - x2 + x3;
+
+ x0 = s0 + s3 + s5;
+ x1 = s1 - s4 - s6;
+ x2 = sinpi_3_9 * s7;
+ x3 = s2;
+
+ s0 = x0 + x3;
+ s1 = x1 + x3;
+ s2 = x2;
+ s3 = x0 + x1 - x3;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = dct_const_round_shift(s0);
+ output[1] = dct_const_round_shift(s1);
+ output[2] = dct_const_round_shift(s2);
+ output[3] = dct_const_round_shift(s3);
+}
+
+void vp9_short_iht4x4_c(int16_t *input, int16_t *output,
+ int pitch, int tx_type) {
+ const transform_2d IHT_4[] = {
+ { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0
+ { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1
+ { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2
+ { iadst4_1d, iadst4_1d } // ADST_ADST = 3
+ };
+
+ int i, j;
+ int16_t out[4 * 4];
+ int16_t *outptr = out;
+ int16_t temp_in[4], temp_out[4];
+
+ // inverse transform row vectors
+ for (i = 0; i < 4; ++i) {
+ IHT_4[tx_type].rows(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j * 4 + i];
+ IHT_4[tx_type].cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 4);
+ }
+}
+
+static void iadst8_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+
+ int x0 = input[7];
+ int x1 = input[0];
+ int x2 = input[5];
+ int x3 = input[2];
+ int x4 = input[3];
+ int x5 = input[4];
+ int x6 = input[1];
+ int x7 = input[6];
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+ output[0] = output[1] = output[2] = output[3] = output[4]
+ = output[5] = output[6] = output[7] = 0;
+ return;
+ }
+
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+ x0 = dct_const_round_shift(s0 + s4);
+ x1 = dct_const_round_shift(s1 + s5);
+ x2 = dct_const_round_shift(s2 + s6);
+ x3 = dct_const_round_shift(s3 + s7);
+ x4 = dct_const_round_shift(s0 - s4);
+ x5 = dct_const_round_shift(s1 - s5);
+ x6 = dct_const_round_shift(s2 - s6);
+ x7 = dct_const_round_shift(s3 - s7);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
+
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+
+ output[0] = x0;
+ output[1] = -x4;
+ output[2] = x6;
+ output[3] = -x2;
+ output[4] = x3;
+ output[5] = -x7;
+ output[6] = x5;
+ output[7] = -x1;
+}
+
+static const transform_2d IHT_8[] = {
+ { idct8_1d, idct8_1d }, // DCT_DCT = 0
+ { iadst8_1d, idct8_1d }, // ADST_DCT = 1
+ { idct8_1d, iadst8_1d }, // DCT_ADST = 2
+ { iadst8_1d, iadst8_1d } // ADST_ADST = 3
+};
+
+void vp9_short_iht8x8_c(int16_t *input, int16_t *output,
+ int pitch, int tx_type) {
+ int i, j;
+ int16_t out[8 * 8];
+ int16_t *outptr = out;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = IHT_8[tx_type];
+
+ // inverse transform row vectors
+ for (i = 0; i < 8; ++i) {
+ ht.rows(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j * 8 + i];
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
+ }
+}
+
+void vp9_short_idct10_8x8_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[8 * 8];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+
+ vpx_memset(out, 0, sizeof(out));
+ // First transform rows
+ // only first 4 row has non-zero coefs
+ for (i = 0; i < 4; ++i) {
+ idct8_1d(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j * 8 + i];
+ idct8_1d(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 5);
+ }
+}
+
+void vp9_short_idct1_8x8_c(int16_t *input, int16_t *output) {
+ int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ output[0] = ROUND_POWER_OF_TWO(out, 5);
+}
+
+static void idct16_1d(int16_t *input, int16_t *output) {
+ int16_t step1[16], step2[16];
+ int temp1, temp2;
+
+ // stage 1
+ step1[0] = input[0/2];
+ step1[1] = input[16/2];
+ step1[2] = input[8/2];
+ step1[3] = input[24/2];
+ step1[4] = input[4/2];
+ step1[5] = input[20/2];
+ step1[6] = input[12/2];
+ step1[7] = input[28/2];
+ step1[8] = input[2/2];
+ step1[9] = input[18/2];
+ step1[10] = input[10/2];
+ step1[11] = input[26/2];
+ step1[12] = input[6/2];
+ step1[13] = input[22/2];
+ step1[14] = input[14/2];
+ step1[15] = input[30/2];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = dct_const_round_shift(temp1);
+ step2[15] = dct_const_round_shift(temp2);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = dct_const_round_shift(temp1);
+ step2[14] = dct_const_round_shift(temp2);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = dct_const_round_shift(temp1);
+ step2[12] = dct_const_round_shift(temp2);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = dct_const_round_shift(temp1);
+ step1[7] = dct_const_round_shift(temp2);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+
+ step1[8] = step2[8] + step2[9];
+ step1[9] = step2[8] - step2[9];
+ step1[10] = -step2[10] + step2[11];
+ step1[11] = step2[10] + step2[11];
+ step1[12] = step2[12] + step2[13];
+ step1[13] = step2[12] - step2[13];
+ step1[14] = -step2[14] + step2[15];
+ step1[15] = step2[14] + step2[15];
+
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = dct_const_round_shift(temp1);
+ step2[1] = dct_const_round_shift(temp2);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = dct_const_round_shift(temp1);
+ step2[3] = dct_const_round_shift(temp2);
+ step2[4] = step1[4] + step1[5];
+ step2[5] = step1[4] - step1[5];
+ step2[6] = -step1[6] + step1[7];
+ step2[7] = step1[6] + step1[7];
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = dct_const_round_shift(temp1);
+ step2[14] = dct_const_round_shift(temp2);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = step2[0] + step2[3];
+ step1[1] = step2[1] + step2[2];
+ step1[2] = step2[1] - step2[2];
+ step1[3] = step2[0] - step2[3];
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+ step1[7] = step2[7];
+
+ step1[8] = step2[8] + step2[11];
+ step1[9] = step2[9] + step2[10];
+ step1[10] = step2[9] - step2[10];
+ step1[11] = step2[8] - step2[11];
+ step1[12] = -step2[12] + step2[15];
+ step1[13] = -step2[13] + step2[14];
+ step1[14] = step2[13] + step2[14];
+ step1[15] = step2[12] + step2[15];
+
+ // stage 6
+ step2[0] = step1[0] + step1[7];
+ step2[1] = step1[1] + step1[6];
+ step2[2] = step1[2] + step1[5];
+ step2[3] = step1[3] + step1[4];
+ step2[4] = step1[3] - step1[4];
+ step2[5] = step1[2] - step1[5];
+ step2[6] = step1[1] - step1[6];
+ step2[7] = step1[0] - step1[7];
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = dct_const_round_shift(temp1);
+ step2[12] = dct_const_round_shift(temp2);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = step2[0] + step2[15];
+ output[1] = step2[1] + step2[14];
+ output[2] = step2[2] + step2[13];
+ output[3] = step2[3] + step2[12];
+ output[4] = step2[4] + step2[11];
+ output[5] = step2[5] + step2[10];
+ output[6] = step2[6] + step2[9];
+ output[7] = step2[7] + step2[8];
+ output[8] = step2[7] - step2[8];
+ output[9] = step2[6] - step2[9];
+ output[10] = step2[5] - step2[10];
+ output[11] = step2[4] - step2[11];
+ output[12] = step2[3] - step2[12];
+ output[13] = step2[2] - step2[13];
+ output[14] = step2[1] - step2[14];
+ output[15] = step2[0] - step2[15];
+}
+
+void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[16 * 16];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+
+ // First transform rows
+ for (i = 0; i < 16; ++i) {
+ idct16_1d(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j * 16 + i];
+ idct16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ }
+}
+
+void iadst16_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
+
+ int x0 = input[15];
+ int x1 = input[0];
+ int x2 = input[13];
+ int x3 = input[2];
+ int x4 = input[11];
+ int x5 = input[4];
+ int x6 = input[9];
+ int x7 = input[6];
+ int x8 = input[7];
+ int x9 = input[8];
+ int x10 = input[5];
+ int x11 = input[10];
+ int x12 = input[3];
+ int x13 = input[12];
+ int x14 = input[1];
+ int x15 = input[14];
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
+ | x9 | x10 | x11 | x12 | x13 | x14 | x15)) {
+ output[0] = output[1] = output[2] = output[3] = output[4]
+ = output[5] = output[6] = output[7] = output[8]
+ = output[9] = output[10] = output[11] = output[12]
+ = output[13] = output[14] = output[15] = 0;
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = dct_const_round_shift(s0 + s8);
+ x1 = dct_const_round_shift(s1 + s9);
+ x2 = dct_const_round_shift(s2 + s10);
+ x3 = dct_const_round_shift(s3 + s11);
+ x4 = dct_const_round_shift(s4 + s12);
+ x5 = dct_const_round_shift(s5 + s13);
+ x6 = dct_const_round_shift(s6 + s14);
+ x7 = dct_const_round_shift(s7 + s15);
+ x8 = dct_const_round_shift(s0 - s8);
+ x9 = dct_const_round_shift(s1 - s9);
+ x10 = dct_const_round_shift(s2 - s10);
+ x11 = dct_const_round_shift(s3 - s11);
+ x12 = dct_const_round_shift(s4 - s12);
+ x13 = dct_const_round_shift(s5 - s13);
+ x14 = dct_const_round_shift(s6 - s14);
+ x15 = dct_const_round_shift(s7 - s15);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = s0 + s4;
+ x1 = s1 + s5;
+ x2 = s2 + s6;
+ x3 = s3 + s7;
+ x4 = s0 - s4;
+ x5 = s1 - s5;
+ x6 = s2 - s6;
+ x7 = s3 - s7;
+ x8 = dct_const_round_shift(s8 + s12);
+ x9 = dct_const_round_shift(s9 + s13);
+ x10 = dct_const_round_shift(s10 + s14);
+ x11 = dct_const_round_shift(s11 + s15);
+ x12 = dct_const_round_shift(s8 - s12);
+ x13 = dct_const_round_shift(s9 - s13);
+ x14 = dct_const_round_shift(s10 - s14);
+ x15 = dct_const_round_shift(s11 - s15);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+ x8 = s8 + s10;
+ x9 = s9 + s11;
+ x10 = s8 - s10;
+ x11 = s9 - s11;
+ x12 = dct_const_round_shift(s12 + s14);
+ x13 = dct_const_round_shift(s13 + s15);
+ x14 = dct_const_round_shift(s12 - s14);
+ x15 = dct_const_round_shift(s13 - s15);
+
+ // stage 4
+ s2 = (- cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (- x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (- x10 + x11);
+ s14 = (- cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+ x10 = dct_const_round_shift(s10);
+ x11 = dct_const_round_shift(s11);
+ x14 = dct_const_round_shift(s14);
+ x15 = dct_const_round_shift(s15);
+
+ output[0] = x0;
+ output[1] = -x8;
+ output[2] = x12;
+ output[3] = -x4;
+ output[4] = x6;
+ output[5] = x14;
+ output[6] = x10;
+ output[7] = x2;
+ output[8] = x3;
+ output[9] = x11;
+ output[10] = x15;
+ output[11] = x7;
+ output[12] = x5;
+ output[13] = -x13;
+ output[14] = x9;
+ output[15] = -x1;
+}
+
+static const transform_2d IHT_16[] = {
+ { idct16_1d, idct16_1d }, // DCT_DCT = 0
+ { iadst16_1d, idct16_1d }, // ADST_DCT = 1
+ { idct16_1d, iadst16_1d }, // DCT_ADST = 2
+ { iadst16_1d, iadst16_1d } // ADST_ADST = 3
+};
+
+void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
+ int pitch, int tx_type) {
+ int i, j;
+ int16_t out[16 * 16];
+ int16_t *outptr = out;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = IHT_16[tx_type];
+
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ ht.rows(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j * 16 + i];
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ }
+}
+
+void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[16 * 16];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+
+ /* First transform rows. Since all non-zero dct coefficients are in
+ * upper-left 4x4 area, we only need to calculate first 4 rows here.
+ */
+ vpx_memset(out, 0, sizeof(out));
+ for (i = 0; i < 4; ++i) {
+ idct16_1d(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j*16 + i];
+ idct16_1d(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ }
+}
+
+
+void vp9_short_idct1_16x16_c(int16_t *input, int16_t *output) {
+ int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ output[0] = ROUND_POWER_OF_TWO(out, 6);
+}
+
+static void idct32_1d(int16_t *input, int16_t *output) {
+ int16_t step1[32], step2[32];
+ int temp1, temp2;
+
+ // stage 1
+ step1[0] = input[0];
+ step1[1] = input[16];
+ step1[2] = input[8];
+ step1[3] = input[24];
+ step1[4] = input[4];
+ step1[5] = input[20];
+ step1[6] = input[12];
+ step1[7] = input[28];
+ step1[8] = input[2];
+ step1[9] = input[18];
+ step1[10] = input[10];
+ step1[11] = input[26];
+ step1[12] = input[6];
+ step1[13] = input[22];
+ step1[14] = input[14];
+ step1[15] = input[30];
+
+ temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
+ temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
+ step1[16] = dct_const_round_shift(temp1);
+ step1[31] = dct_const_round_shift(temp2);
+
+ temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
+ temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
+ step1[17] = dct_const_round_shift(temp1);
+ step1[30] = dct_const_round_shift(temp2);
+
+ temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
+ temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
+ step1[18] = dct_const_round_shift(temp1);
+ step1[29] = dct_const_round_shift(temp2);
+
+ temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
+ temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
+ step1[19] = dct_const_round_shift(temp1);
+ step1[28] = dct_const_round_shift(temp2);
+
+ temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
+ temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
+ step1[20] = dct_const_round_shift(temp1);
+ step1[27] = dct_const_round_shift(temp2);
+
+ temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
+ temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
+ step1[21] = dct_const_round_shift(temp1);
+ step1[26] = dct_const_round_shift(temp2);
+
+ temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
+ temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
+ step1[22] = dct_const_round_shift(temp1);
+ step1[25] = dct_const_round_shift(temp2);
+
+ temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
+ temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
+ step1[23] = dct_const_round_shift(temp1);
+ step1[24] = dct_const_round_shift(temp2);
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = dct_const_round_shift(temp1);
+ step2[15] = dct_const_round_shift(temp2);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = dct_const_round_shift(temp1);
+ step2[14] = dct_const_round_shift(temp2);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = dct_const_round_shift(temp1);
+ step2[12] = dct_const_round_shift(temp2);
+
+ step2[16] = step1[16] + step1[17];
+ step2[17] = step1[16] - step1[17];
+ step2[18] = -step1[18] + step1[19];
+ step2[19] = step1[18] + step1[19];
+ step2[20] = step1[20] + step1[21];
+ step2[21] = step1[20] - step1[21];
+ step2[22] = -step1[22] + step1[23];
+ step2[23] = step1[22] + step1[23];
+ step2[24] = step1[24] + step1[25];
+ step2[25] = step1[24] - step1[25];
+ step2[26] = -step1[26] + step1[27];
+ step2[27] = step1[26] + step1[27];
+ step2[28] = step1[28] + step1[29];
+ step2[29] = step1[28] - step1[29];
+ step2[30] = -step1[30] + step1[31];
+ step2[31] = step1[30] + step1[31];
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = dct_const_round_shift(temp1);
+ step1[7] = dct_const_round_shift(temp2);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+
+ step1[8] = step2[8] + step2[9];
+ step1[9] = step2[8] - step2[9];
+ step1[10] = -step2[10] + step2[11];
+ step1[11] = step2[10] + step2[11];
+ step1[12] = step2[12] + step2[13];
+ step1[13] = step2[12] - step2[13];
+ step1[14] = -step2[14] + step2[15];
+ step1[15] = step2[14] + step2[15];
+
+ step1[16] = step2[16];
+ step1[31] = step2[31];
+ temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
+ temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
+ step1[17] = dct_const_round_shift(temp1);
+ step1[30] = dct_const_round_shift(temp2);
+ temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
+ temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
+ step1[18] = dct_const_round_shift(temp1);
+ step1[29] = dct_const_round_shift(temp2);
+ step1[19] = step2[19];
+ step1[20] = step2[20];
+ temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
+ temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
+ step1[21] = dct_const_round_shift(temp1);
+ step1[26] = dct_const_round_shift(temp2);
+ temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
+ temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
+ step1[22] = dct_const_round_shift(temp1);
+ step1[25] = dct_const_round_shift(temp2);
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[27] = step2[27];
+ step1[28] = step2[28];
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = dct_const_round_shift(temp1);
+ step2[1] = dct_const_round_shift(temp2);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = dct_const_round_shift(temp1);
+ step2[3] = dct_const_round_shift(temp2);
+ step2[4] = step1[4] + step1[5];
+ step2[5] = step1[4] - step1[5];
+ step2[6] = -step1[6] + step1[7];
+ step2[7] = step1[6] + step1[7];
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = dct_const_round_shift(temp1);
+ step2[14] = dct_const_round_shift(temp2);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ step2[16] = step1[16] + step1[19];
+ step2[17] = step1[17] + step1[18];
+ step2[18] = step1[17] - step1[18];
+ step2[19] = step1[16] - step1[19];
+ step2[20] = -step1[20] + step1[23];
+ step2[21] = -step1[21] + step1[22];
+ step2[22] = step1[21] + step1[22];
+ step2[23] = step1[20] + step1[23];
+
+ step2[24] = step1[24] + step1[27];
+ step2[25] = step1[25] + step1[26];
+ step2[26] = step1[25] - step1[26];
+ step2[27] = step1[24] - step1[27];
+ step2[28] = -step1[28] + step1[31];
+ step2[29] = -step1[29] + step1[30];
+ step2[30] = step1[29] + step1[30];
+ step2[31] = step1[28] + step1[31];
+
+ // stage 5
+ step1[0] = step2[0] + step2[3];
+ step1[1] = step2[1] + step2[2];
+ step1[2] = step2[1] - step2[2];
+ step1[3] = step2[0] - step2[3];
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = dct_const_round_shift(temp1);
+ step1[6] = dct_const_round_shift(temp2);
+ step1[7] = step2[7];
+
+ step1[8] = step2[8] + step2[11];
+ step1[9] = step2[9] + step2[10];
+ step1[10] = step2[9] - step2[10];
+ step1[11] = step2[8] - step2[11];
+ step1[12] = -step2[12] + step2[15];
+ step1[13] = -step2[13] + step2[14];
+ step1[14] = step2[13] + step2[14];
+ step1[15] = step2[12] + step2[15];
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
+ temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
+ step1[18] = dct_const_round_shift(temp1);
+ step1[29] = dct_const_round_shift(temp2);
+ temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
+ temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
+ step1[19] = dct_const_round_shift(temp1);
+ step1[28] = dct_const_round_shift(temp2);
+ temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
+ temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
+ step1[20] = dct_const_round_shift(temp1);
+ step1[27] = dct_const_round_shift(temp2);
+ temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
+ temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
+ step1[21] = dct_const_round_shift(temp1);
+ step1[26] = dct_const_round_shift(temp2);
+ step1[22] = step2[22];
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[25] = step2[25];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // stage 6
+ step2[0] = step1[0] + step1[7];
+ step2[1] = step1[1] + step1[6];
+ step2[2] = step1[2] + step1[5];
+ step2[3] = step1[3] + step1[4];
+ step2[4] = step1[3] - step1[4];
+ step2[5] = step1[2] - step1[5];
+ step2[6] = step1[1] - step1[6];
+ step2[7] = step1[0] - step1[7];
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = dct_const_round_shift(temp1);
+ step2[13] = dct_const_round_shift(temp2);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = dct_const_round_shift(temp1);
+ step2[12] = dct_const_round_shift(temp2);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ step2[16] = step1[16] + step1[23];
+ step2[17] = step1[17] + step1[22];
+ step2[18] = step1[18] + step1[21];
+ step2[19] = step1[19] + step1[20];
+ step2[20] = step1[19] - step1[20];
+ step2[21] = step1[18] - step1[21];
+ step2[22] = step1[17] - step1[22];
+ step2[23] = step1[16] - step1[23];
+
+ step2[24] = -step1[24] + step1[31];
+ step2[25] = -step1[25] + step1[30];
+ step2[26] = -step1[26] + step1[29];
+ step2[27] = -step1[27] + step1[28];
+ step2[28] = step1[27] + step1[28];
+ step2[29] = step1[26] + step1[29];
+ step2[30] = step1[25] + step1[30];
+ step2[31] = step1[24] + step1[31];
+
+ // stage 7
+ step1[0] = step2[0] + step2[15];
+ step1[1] = step2[1] + step2[14];
+ step1[2] = step2[2] + step2[13];
+ step1[3] = step2[3] + step2[12];
+ step1[4] = step2[4] + step2[11];
+ step1[5] = step2[5] + step2[10];
+ step1[6] = step2[6] + step2[9];
+ step1[7] = step2[7] + step2[8];
+ step1[8] = step2[7] - step2[8];
+ step1[9] = step2[6] - step2[9];
+ step1[10] = step2[5] - step2[10];
+ step1[11] = step2[4] - step2[11];
+ step1[12] = step2[3] - step2[12];
+ step1[13] = step2[2] - step2[13];
+ step1[14] = step2[1] - step2[14];
+ step1[15] = step2[0] - step2[15];
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ step1[18] = step2[18];
+ step1[19] = step2[19];
+ temp1 = (-step2[20] + step2[27]) * cospi_16_64;
+ temp2 = (step2[20] + step2[27]) * cospi_16_64;
+ step1[20] = dct_const_round_shift(temp1);
+ step1[27] = dct_const_round_shift(temp2);
+ temp1 = (-step2[21] + step2[26]) * cospi_16_64;
+ temp2 = (step2[21] + step2[26]) * cospi_16_64;
+ step1[21] = dct_const_round_shift(temp1);
+ step1[26] = dct_const_round_shift(temp2);
+ temp1 = (-step2[22] + step2[25]) * cospi_16_64;
+ temp2 = (step2[22] + step2[25]) * cospi_16_64;
+ step1[22] = dct_const_round_shift(temp1);
+ step1[25] = dct_const_round_shift(temp2);
+ temp1 = (-step2[23] + step2[24]) * cospi_16_64;
+ temp2 = (step2[23] + step2[24]) * cospi_16_64;
+ step1[23] = dct_const_round_shift(temp1);
+ step1[24] = dct_const_round_shift(temp2);
+ step1[28] = step2[28];
+ step1[29] = step2[29];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // final stage
+ output[0] = step1[0] + step1[31];
+ output[1] = step1[1] + step1[30];
+ output[2] = step1[2] + step1[29];
+ output[3] = step1[3] + step1[28];
+ output[4] = step1[4] + step1[27];
+ output[5] = step1[5] + step1[26];
+ output[6] = step1[6] + step1[25];
+ output[7] = step1[7] + step1[24];
+ output[8] = step1[8] + step1[23];
+ output[9] = step1[9] + step1[22];
+ output[10] = step1[10] + step1[21];
+ output[11] = step1[11] + step1[20];
+ output[12] = step1[12] + step1[19];
+ output[13] = step1[13] + step1[18];
+ output[14] = step1[14] + step1[17];
+ output[15] = step1[15] + step1[16];
+ output[16] = step1[15] - step1[16];
+ output[17] = step1[14] - step1[17];
+ output[18] = step1[13] - step1[18];
+ output[19] = step1[12] - step1[19];
+ output[20] = step1[11] - step1[20];
+ output[21] = step1[10] - step1[21];
+ output[22] = step1[9] - step1[22];
+ output[23] = step1[8] - step1[23];
+ output[24] = step1[7] - step1[24];
+ output[25] = step1[6] - step1[25];
+ output[26] = step1[5] - step1[26];
+ output[27] = step1[4] - step1[27];
+ output[28] = step1[3] - step1[28];
+ output[29] = step1[2] - step1[29];
+ output[30] = step1[1] - step1[30];
+ output[31] = step1[0] - step1[31];
+}
+
+void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[32 * 32];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[32], temp_out[32];
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ idct32_1d(input, outptr);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = out[j * 32 + i];
+ idct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ }
+}
+
+void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
+ int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ output[0] = ROUND_POWER_OF_TWO(out, 6);
+}
+
+void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[32 * 32];
+ int16_t *outptr = out;
+ const int half_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[32], temp_out[32];
+
+ /* First transform rows. Since all non-zero dct coefficients are in
+ * upper-left 4x4 area, we only need to calculate first 4 rows here.
+ */
+ vpx_memset(out, 0, sizeof(out));
+ for (i = 0; i < 4; ++i) {
+ idct32_1d(input, outptr);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = out[j * 32 + i];
+ idct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6);
+ }
+}
--- /dev/null
+++ b/vp9/common/vp9_idct.h
@@ -1,0 +1,85 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_IDCT_H_
+#define VP9_COMMON_VP9_IDCT_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+
+// Constants and Macros used by all idct/dct functions
+#define DCT_CONST_BITS 14
+#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
+
+#define pair_set_epi16(a, b) \
+ _mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
+
+// Constants are round(16384 * cos(k*Pi/64)) where k = 1 to 31.
+// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
+static const int cospi_1_64 = 16364;
+static const int cospi_2_64 = 16305;
+static const int cospi_3_64 = 16207;
+static const int cospi_4_64 = 16069;
+static const int cospi_5_64 = 15893;
+static const int cospi_6_64 = 15679;
+static const int cospi_7_64 = 15426;
+static const int cospi_8_64 = 15137;
+static const int cospi_9_64 = 14811;
+static const int cospi_10_64 = 14449;
+static const int cospi_11_64 = 14053;
+static const int cospi_12_64 = 13623;
+static const int cospi_13_64 = 13160;
+static const int cospi_14_64 = 12665;
+static const int cospi_15_64 = 12140;
+static const int cospi_16_64 = 11585;
+static const int cospi_17_64 = 11003;
+static const int cospi_18_64 = 10394;
+static const int cospi_19_64 = 9760;
+static const int cospi_20_64 = 9102;
+static const int cospi_21_64 = 8423;
+static const int cospi_22_64 = 7723;
+static const int cospi_23_64 = 7005;
+static const int cospi_24_64 = 6270;
+static const int cospi_25_64 = 5520;
+static const int cospi_26_64 = 4756;
+static const int cospi_27_64 = 3981;
+static const int cospi_28_64 = 3196;
+static const int cospi_29_64 = 2404;
+static const int cospi_30_64 = 1606;
+static const int cospi_31_64 = 804;
+
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const int sinpi_1_9 = 5283;
+static const int sinpi_2_9 = 9929;
+static const int sinpi_3_9 = 13377;
+static const int sinpi_4_9 = 15212;
+
+static INLINE int dct_const_round_shift(int input) {
+ int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ assert(INT16_MIN <= rv && rv <= INT16_MAX);
+ return rv;
+}
+
+static INLINE int dct_32_round(int input) {
+ int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ assert(-131072 <= rv && rv <= 131071);
+ return rv;
+}
+
+typedef void (*transform_1d)(int16_t*, int16_t*);
+
+typedef struct {
+ transform_1d cols, rows; // vertical and horizontal
+} transform_2d;
+
+#endif // VP9_COMMON_VP9_IDCT_H_
--- a/vp9/common/vp9_idctllm.c
+++ /dev/null
@@ -1,2670 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
- * Notes:
- *
- * This implementation makes use of 16 bit fixed point verio of two multiply
- * constants:
- * 1. sqrt(2) * cos (pi/8)
- * 2. sqrt(2) * sin (pi/8)
- * Becuase the first constant is bigger than 1, to maintain the same 16 bit
- * fixed point precision as the second one, we use a trick of
- * x * a = x + x*(a-1)
- * so
- * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
- **************************************************************************/
-#include <assert.h>
-#include <math.h>
-#include "./vpx_config.h"
-#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/common/vp9_common.h"
-
-static const int cospi8sqrt2minus1 = 20091;
-static const int sinpi8sqrt2 = 35468;
-static const int rounding = 0;
-
-static const int16_t idct_i4[16] = {
- 8192, 10703, 8192, 4433,
- 8192, 4433, -8192, -10703,
- 8192, -4433, -8192, 10703,
- 8192, -10703, 8192, -4433
-};
-
-static const int16_t iadst_i4[16] = {
- 3736, 9459, 10757, 7021,
- 7021, 9459, -3736, -10757,
- 9459, 0, -9459, 9459,
- 10757, -9459, 7021, -3736
-};
-
-static const int16_t idct_i8[64] = {
- 5793, 8035, 7568, 6811,
- 5793, 4551, 3135, 1598,
- 5793, 6811, 3135, -1598,
- -5793, -8035, -7568, -4551,
- 5793, 4551, -3135, -8035,
- -5793, 1598, 7568, 6811,
- 5793, 1598, -7568, -4551,
- 5793, 6811, -3135, -8035,
- 5793, -1598, -7568, 4551,
- 5793, -6811, -3135, 8035,
- 5793, -4551, -3135, 8035,
- -5793, -1598, 7568, -6811,
- 5793, -6811, 3135, 1598,
- -5793, 8035, -7568, 4551,
- 5793, -8035, 7568, -6811,
- 5793, -4551, 3135, -1598
-};
-
-static const int16_t iadst_i8[64] = {
- 1460, 4184, 6342, 7644,
- 7914, 7114, 5354, 2871,
- 2871, 7114, 7644, 4184,
- -1460, -6342, -7914, -5354,
- 4184, 7914, 2871, -5354,
- -7644, -1460, 6342, 7114,
- 5354, 6342, -4184, -7114,
- 2871, 7644, -1460, -7914,
- 6342, 2871, -7914, 1460,
- 7114, -5354, -4184, 7644,
- 7114, -1460, -5354, 7914,
- -4184, -2871, 7644, -6342,
- 7644, -5354, 1460, 2871,
- -6342, 7914, -7114, 4184,
- 7914, -7644, 7114, -6342,
- 5354, -4184, 2871, -1460
-};
-
-
-
-static const int16_t idct_i16[256] = {
- 4096, 5765, 5681, 5543, 5352, 5109, 4816, 4478,
- 4096, 3675, 3218, 2731, 2217, 1682, 1130, 568,
- 4096, 5543, 4816, 3675, 2217, 568, -1130, -2731,
- -4096, -5109, -5681, -5765, -5352, -4478, -3218, -1682,
- 4096, 5109, 3218, 568, -2217, -4478, -5681, -5543,
- -4096, -1682, 1130, 3675, 5352, 5765, 4816, 2731,
- 4096, 4478, 1130, -2731, -5352, -5543, -3218, 568,
- 4096, 5765, 4816, 1682, -2217, -5109, -5681, -3675,
- 4096, 3675, -1130, -5109, -5352, -1682, 3218, 5765,
- 4096, -568, -4816, -5543, -2217, 2731, 5681, 4478,
- 4096, 2731, -3218, -5765, -2217, 3675, 5681, 1682,
- -4096, -5543, -1130, 4478, 5352, 568, -4816, -5109,
- 4096, 1682, -4816, -4478, 2217, 5765, 1130, -5109,
- -4096, 2731, 5681, 568, -5352, -3675, 3218, 5543,
- 4096, 568, -5681, -1682, 5352, 2731, -4816, -3675,
- 4096, 4478, -3218, -5109, 2217, 5543, -1130, -5765,
- 4096, -568, -5681, 1682, 5352, -2731, -4816, 3675,
- 4096, -4478, -3218, 5109, 2217, -5543, -1130, 5765,
- 4096, -1682, -4816, 4478, 2217, -5765, 1130, 5109,
- -4096, -2731, 5681, -568, -5352, 3675, 3218, -5543,
- 4096, -2731, -3218, 5765, -2217, -3675, 5681, -1682,
- -4096, 5543, -1130, -4478, 5352, -568, -4816, 5109,
- 4096, -3675, -1130, 5109, -5352, 1682, 3218, -5765,
- 4096, 568, -4816, 5543, -2217, -2731, 5681, -4478,
- 4096, -4478, 1130, 2731, -5352, 5543, -3218, -568,
- 4096, -5765, 4816, -1682, -2217, 5109, -5681, 3675,
- 4096, -5109, 3218, -568, -2217, 4478, -5681, 5543,
- -4096, 1682, 1130, -3675, 5352, -5765, 4816, -2731,
- 4096, -5543, 4816, -3675, 2217, -568, -1130, 2731,
- -4096, 5109, -5681, 5765, -5352, 4478, -3218, 1682,
- 4096, -5765, 5681, -5543, 5352, -5109, 4816, -4478,
- 4096, -3675, 3218, -2731, 2217, -1682, 1130, -568
-};
-
-static const int16_t iadst_i16[256] = {
- 542, 1607, 2614, 3526, 4311, 4940, 5390, 5646,
- 5698, 5543, 5189, 4646, 3936, 3084, 2120, 1080,
- 1080, 3084, 4646, 5543, 5646, 4940, 3526, 1607,
- -542, -2614, -4311, -5390, -5698, -5189, -3936, -2120,
- 1607, 4311, 5646, 5189, 3084, 0, -3084, -5189,
- -5646, -4311, -1607, 1607, 4311, 5646, 5189, 3084,
- 2120, 5189, 5390, 2614, -1607, -4940, -5543, -3084,
- 1080, 4646, 5646, 3526, -542, -4311, -5698, -3936,
- 2614, 5646, 3936, -1080, -5189, -4940, -542, 4311,
- 5543, 2120, -3084, -5698, -3526, 1607, 5390, 4646,
- 3084, 5646, 1607, -4311, -5189, 0, 5189, 4311,
- -1607, -5646, -3084, 3084, 5646, 1607, -4311, -5189,
- 3526, 5189, -1080, -5698, -1607, 4940, 3936, -3084,
- -5390, 542, 5646, 2120, -4646, -4311, 2614, 5543,
- 3936, 4311, -3526, -4646, 3084, 4940, -2614, -5189,
- 2120, 5390, -1607, -5543, 1080, 5646, -542, -5698,
- 4311, 3084, -5189, -1607, 5646, 0, -5646, 1607,
- 5189, -3084, -4311, 4311, 3084, -5189, -1607, 5646,
- 4646, 1607, -5698, 2120, 4311, -4940, -1080, 5646,
- -2614, -3936, 5189, 542, -5543, 3084, 3526, -5390,
- 4940, 0, -4940, 4940, 0, -4940, 4940, 0,
- -4940, 4940, 0, -4940, 4940, 0, -4940, 4940,
- 5189, -1607, -3084, 5646, -4311, 0, 4311, -5646,
- 3084, 1607, -5189, 5189, -1607, -3084, 5646, -4311,
- 5390, -3084, -542, 3936, -5646, 4940, -2120, -1607,
- 4646, -5698, 4311, -1080, -2614, 5189, -5543, 3526,
- 5543, -4311, 2120, 542, -3084, 4940, -5698, 5189,
- -3526, 1080, 1607, -3936, 5390, -5646, 4646, -2614,
- 5646, -5189, 4311, -3084, 1607, 0, -1607, 3084,
- -4311, 5189, -5646, 5646, -5189, 4311, -3084, 1607,
- 5698, -5646, 5543, -5390, 5189, -4940, 4646, -4311,
- 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542
-};
-
-
-/* Converted the transforms to integer form. */
-#define HORIZONTAL_SHIFT 14 // 16
-#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
-#define VERTICAL_SHIFT 17 // 15
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
- TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
- int i, j, k;
- int nz_dim;
- int16_t imbuf[256];
-
- const int16_t *ip = input;
- int16_t *op = output;
- int16_t *im = &imbuf[0];
-
- /* pointers to vertical and horizontal transforms. */
- const int16_t *ptv = NULL, *pth = NULL;
- int shortpitch = pitch >> 1;
-
- switch (tx_type) {
- case ADST_ADST :
- ptv = pth = (tx_dim == 4) ? &iadst_i4[0]
- : ((tx_dim == 8) ? &iadst_i8[0]
- : &iadst_i16[0]);
- break;
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &iadst_i4[0]
- : ((tx_dim == 8) ? &iadst_i8[0] : &iadst_i16[0]);
- pth = (tx_dim == 4) ? &idct_i4[0]
- : ((tx_dim == 8) ? &idct_i8[0] : &idct_i16[0]);
- break;
- case DCT_ADST :
- ptv = (tx_dim == 4) ? &idct_i4[0]
- : ((tx_dim == 8) ? &idct_i8[0] : &idct_i16[0]);
- pth = (tx_dim == 4) ? &iadst_i4[0]
- : ((tx_dim == 8) ? &iadst_i8[0] : &iadst_i16[0]);
- break;
- case DCT_DCT :
- ptv = pth = (tx_dim == 4) ? &idct_i4[0]
- : ((tx_dim == 8) ? &idct_i8[0]
- : &idct_i16[0]);
- break;
- default:
- assert(0);
- break;
- }
-
- nz_dim = tx_dim;
- if(tx_dim > 4) {
- if(eobs < 36) {
- vpx_memset(im, 0, 512);
- nz_dim = 8;
- if(eobs < 3) {
- nz_dim = 2;
- } else if(eobs < 10) {
- nz_dim = 4;
- }
- }
- }
-
- /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps
- * from right to left:
- * 1. horizontal transform: Y= Z*Transposed_M2
- * 2. vertical transform: X = M1*Y
- * In SIMD, doing this way could eliminate the transpose needed if it is
- * calculated from left to right.
- */
- /* Horizontal transformation */
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < nz_dim; i++) {
- int temp = 0;
-
- for (k = 0; k < nz_dim; k++) {
- temp += ip[k] * pth[k];
- }
-
- /* Calculate im and store it in its transposed position. */
- im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
- ip += tx_dim;
- }
- im += tx_dim;
- pth += tx_dim;
- ip = input;
- }
-
- /* Vertical transformation */
- im = &imbuf[0];
-
- for (i = 0; i < tx_dim; i++) {
- for (j = 0; j < tx_dim; j++) {
- int temp = 0;
-
- for (k = 0; k < nz_dim; k++) {
- temp += ptv[k] * im[k];
- }
-
- op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
- im += tx_dim;
- }
- im = &imbuf[0];
- ptv += tx_dim;
- op += shortpitch;
- }
-}
-
-void vp9_short_idct4x4llm_c(int16_t *input, int16_t *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
-
- int16_t *ip = input;
- int16_t *op = output;
- int temp1, temp2;
- int shortpitch = pitch >> 1;
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[8];
- b1 = ip[0] - ip[8];
-
- temp1 = (ip[4] * sinpi8sqrt2 + rounding) >> 16;
- temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1 + rounding) >> 16);
- c1 = temp1 - temp2;
-
- temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1 + rounding) >> 16);
- temp2 = (ip[12] * sinpi8sqrt2 + rounding) >> 16;
- d1 = temp1 + temp2;
-
- op[shortpitch * 0] = a1 + d1;
- op[shortpitch * 3] = a1 - d1;
-
- op[shortpitch * 1] = b1 + c1;
- op[shortpitch * 2] = b1 - c1;
-
- ip++;
- op++;
- }
-
- ip = output;
- op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[2];
- b1 = ip[0] - ip[2];
-
- temp1 = (ip[1] * sinpi8sqrt2 + rounding) >> 16;
- temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1 + rounding) >> 16);
- c1 = temp1 - temp2;
-
- temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1 + rounding) >> 16);
- temp2 = (ip[3] * sinpi8sqrt2 + rounding) >> 16;
- d1 = temp1 + temp2;
-
- op[0] = (a1 + d1 + 16) >> 5;
- op[3] = (a1 - d1 + 16) >> 5;
-
- op[1] = (b1 + c1 + 16) >> 5;
- op[2] = (b1 - c1 + 16) >> 5;
-
- ip += shortpitch;
- op += shortpitch;
- }
-}
-
-void vp9_short_idct4x4llm_1_c(int16_t *input, int16_t *output, int pitch) {
- int i;
- int a1;
- int16_t *op = output;
- int shortpitch = pitch >> 1;
- a1 = ((input[0] + 16) >> 5);
- for (i = 0; i < 4; i++) {
- op[0] = a1;
- op[1] = a1;
- op[2] = a1;
- op[3] = a1;
- op += shortpitch;
- }
-}
-
-void vp9_dc_only_idct_add_c(int input_dc, uint8_t *pred_ptr,
- uint8_t *dst_ptr, int pitch, int stride) {
- int a1 = ((input_dc + 16) >> 5);
- int r, c;
-
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);
- }
-
- dst_ptr += stride;
- pred_ptr += pitch;
- }
-}
-
-void vp9_short_inv_walsh4x4_c(int16_t *input, int16_t *output) {
- int i;
- int a1, b1, c1, d1;
- int16_t *ip = input;
- int16_t *op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ((ip[0] + ip[3]));
- b1 = ((ip[1] + ip[2]));
- c1 = ((ip[1] - ip[2]));
- d1 = ((ip[0] - ip[3]));
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[1] = (c1 + d1) >> 1;
- op[2] = (a1 - b1) >> 1;
- op[3] = (d1 - c1) >> 1;
-
- ip += 4;
- op += 4;
- }
-
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[12];
- b1 = ip[4] + ip[8];
- c1 = ip[4] - ip[8];
- d1 = ip[0] - ip[12];
- op[0] = (a1 + b1 + 1) >> 1;
- op[4] = (c1 + d1) >> 1;
- op[8] = (a1 - b1) >> 1;
- op[12] = (d1 - c1) >> 1;
- ip++;
- op++;
- }
-}
-
-void vp9_short_inv_walsh4x4_1_c(int16_t *in, int16_t *out) {
- int i;
- int16_t tmp[4];
- int16_t *ip = in;
- int16_t *op = tmp;
-
- op[0] = (ip[0] + 1) >> 1;
- op[1] = op[2] = op[3] = (ip[0] >> 1);
-
- ip = tmp;
- op = out;
- for (i = 0; i < 4; i++) {
- op[0] = (ip[0] + 1) >> 1;
- op[4] = op[8] = op[12] = (ip[0] >> 1);
- ip++;
- op++;
- }
-}
-
-#if CONFIG_LOSSLESS
-void vp9_short_inv_walsh4x4_lossless_c(int16_t *input, int16_t *output) {
- int i;
- int a1, b1, c1, d1;
- int16_t *ip = input;
- int16_t *op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ((ip[0] + ip[3])) >> Y2_WHT_UPSCALE_FACTOR;
- b1 = ((ip[1] + ip[2])) >> Y2_WHT_UPSCALE_FACTOR;
- c1 = ((ip[1] - ip[2])) >> Y2_WHT_UPSCALE_FACTOR;
- d1 = ((ip[0] - ip[3])) >> Y2_WHT_UPSCALE_FACTOR;
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[1] = (c1 + d1) >> 1;
- op[2] = (a1 - b1) >> 1;
- op[3] = (d1 - c1) >> 1;
-
- ip += 4;
- op += 4;
- }
-
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[12];
- b1 = ip[4] + ip[8];
- c1 = ip[4] - ip[8];
- d1 = ip[0] - ip[12];
-
-
- op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[4] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[8] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[12] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
-
- ip++;
- op++;
- }
-}
-
-void vp9_short_inv_walsh4x4_1_lossless_c(int16_t *in, int16_t *out) {
- int i;
- int16_t tmp[4];
- int16_t *ip = in;
- int16_t *op = tmp;
-
- op[0] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) + 1) >> 1;
- op[1] = op[2] = op[3] = ((ip[0] >> Y2_WHT_UPSCALE_FACTOR) >> 1);
-
- ip = tmp;
- op = out;
- for (i = 0; i < 4; i++) {
- op[0] = ((ip[0] + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[4] = op[8] = op[12] = ((ip[0] >> 1)) << Y2_WHT_UPSCALE_FACTOR;
- ip++;
- op++;
- }
-}
-
-void vp9_short_inv_walsh4x4_x8_c(int16_t *input, int16_t *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- int16_t *ip = input;
- int16_t *op = output;
- int shortpitch = pitch >> 1;
-
- for (i = 0; i < 4; i++) {
- a1 = ((ip[0] + ip[3])) >> WHT_UPSCALE_FACTOR;
- b1 = ((ip[1] + ip[2])) >> WHT_UPSCALE_FACTOR;
- c1 = ((ip[1] - ip[2])) >> WHT_UPSCALE_FACTOR;
- d1 = ((ip[0] - ip[3])) >> WHT_UPSCALE_FACTOR;
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[1] = (c1 + d1) >> 1;
- op[2] = (a1 - b1) >> 1;
- op[3] = (d1 - c1) >> 1;
-
- ip += 4;
- op += shortpitch;
- }
-
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[shortpitch * 0] + ip[shortpitch * 3];
- b1 = ip[shortpitch * 1] + ip[shortpitch * 2];
- c1 = ip[shortpitch * 1] - ip[shortpitch * 2];
- d1 = ip[shortpitch * 0] - ip[shortpitch * 3];
-
-
- op[shortpitch * 0] = (a1 + b1 + 1) >> 1;
- op[shortpitch * 1] = (c1 + d1) >> 1;
- op[shortpitch * 2] = (a1 - b1) >> 1;
- op[shortpitch * 3] = (d1 - c1) >> 1;
-
- ip++;
- op++;
- }
-}
-
-void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) {
- int i;
- int16_t tmp[4];
- int16_t *ip = in;
- int16_t *op = tmp;
- int shortpitch = pitch >> 1;
-
- op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;
- op[1] = op[2] = op[3] = ((ip[0] >> WHT_UPSCALE_FACTOR) >> 1);
-
-
- ip = tmp;
- op = out;
- for (i = 0; i < 4; i++) {
- op[shortpitch * 0] = (ip[0] + 1) >> 1;
- op[shortpitch * 1] = op[shortpitch * 2] = op[shortpitch * 3] = ip[0] >> 1;
- ip++;
- op++;
- }
-}
-
-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
- uint8_t *dst_ptr,
- int pitch, int stride) {
- int r, c;
- short tmp[16];
- vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
-
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- dst_ptr[c] = clip_pixel(tmp[r * 4 + c] + pred_ptr[c]);
- }
-
- dst_ptr += stride;
- pred_ptr += pitch;
- }
-}
-#endif
-
-void vp9_dc_only_idct_add_8x8_c(short input_dc,
- uint8_t *pred_ptr,
- uint8_t *dst_ptr,
- int pitch, int stride) {
- int a1 = ((input_dc + 16) >> 5);
- int r, c, b;
- uint8_t *orig_pred = pred_ptr;
- uint8_t *orig_dst = dst_ptr;
- for (b = 0; b < 4; b++) {
- for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
- dst_ptr[c] = clip_pixel(a1 + pred_ptr[c]);
- }
-
- dst_ptr += stride;
- pred_ptr += pitch;
- }
- dst_ptr = orig_dst + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * stride;
- pred_ptr = orig_pred + (b + 1) % 2 * 4 + (b + 1) / 2 * 4 * pitch;
- }
-}
-
-#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
-#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
-#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
-#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
-#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
-#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
-
-/* row (horizontal) IDCT
- *
- * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
- * ( k + - ) * l ) l=0 8 2
- *
- * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
-
-static void idctrow(int *blk) {
- int x0, x1, x2, x3, x4, x5, x6, x7, x8;
- /* shortcut */
- if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
- (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) {
- blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
- = blk[5] = blk[6] = blk[7] = blk[0] << 3;
- return;
- }
-
- x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
- /* first stage */
- x8 = W7 * (x4 + x5);
- x4 = x8 + (W1 - W7) * x4;
- x5 = x8 - (W1 + W7) * x5;
- x8 = W3 * (x6 + x7);
- x6 = x8 - (W3 - W5) * x6;
- x7 = x8 - (W3 + W5) * x7;
-
- /* second stage */
- x8 = x0 + x1;
- x0 -= x1;
- x1 = W6 * (x3 + x2);
- x2 = x1 - (W2 + W6) * x2;
- x3 = x1 + (W2 - W6) * x3;
- x1 = x4 + x6;
- x4 -= x6;
- x6 = x5 + x7;
- x5 -= x7;
-
- /* third stage */
- x7 = x8 + x3;
- x8 -= x3;
- x3 = x0 + x2;
- x0 -= x2;
- x2 = (181 * (x4 + x5) + 128) >> 8;
- x4 = (181 * (x4 - x5) + 128) >> 8;
-
- /* fourth stage */
- blk[0] = (x7 + x1) >> 8;
- blk[1] = (x3 + x2) >> 8;
- blk[2] = (x0 + x4) >> 8;
- blk[3] = (x8 + x6) >> 8;
- blk[4] = (x8 - x6) >> 8;
- blk[5] = (x0 - x4) >> 8;
- blk[6] = (x3 - x2) >> 8;
- blk[7] = (x7 - x1) >> 8;
-}
-
-/* column (vertical) IDCT
- *
- * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] *
- * cos( -- * ( k + - ) * l ) l=0 8 2
- *
- * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
-static void idctcol(int *blk) {
- int x0, x1, x2, x3, x4, x5, x6, x7, x8;
-
- /* shortcut */
- if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
- (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
- (x7 = blk[8 * 3]))) {
- blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
- = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
- = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);
- return;
- }
-
- x0 = (blk[8 * 0] << 8) + 16384;
-
- /* first stage */
- x8 = W7 * (x4 + x5) + 4;
- x4 = (x8 + (W1 - W7) * x4) >> 3;
- x5 = (x8 - (W1 + W7) * x5) >> 3;
- x8 = W3 * (x6 + x7) + 4;
- x6 = (x8 - (W3 - W5) * x6) >> 3;
- x7 = (x8 - (W3 + W5) * x7) >> 3;
-
- /* second stage */
- x8 = x0 + x1;
- x0 -= x1;
- x1 = W6 * (x3 + x2) + 4;
- x2 = (x1 - (W2 + W6) * x2) >> 3;
- x3 = (x1 + (W2 - W6) * x3) >> 3;
- x1 = x4 + x6;
- x4 -= x6;
- x6 = x5 + x7;
- x5 -= x7;
-
- /* third stage */
- x7 = x8 + x3;
- x8 -= x3;
- x3 = x0 + x2;
- x0 -= x2;
- x2 = (181 * (x4 + x5) + 128) >> 8;
- x4 = (181 * (x4 - x5) + 128) >> 8;
-
- /* fourth stage */
- blk[8 * 0] = (x7 + x1) >> 14;
- blk[8 * 1] = (x3 + x2) >> 14;
- blk[8 * 2] = (x0 + x4) >> 14;
- blk[8 * 3] = (x8 + x6) >> 14;
- blk[8 * 4] = (x8 - x6) >> 14;
- blk[8 * 5] = (x0 - x4) >> 14;
- blk[8 * 6] = (x3 - x2) >> 14;
- blk[8 * 7] = (x7 - x1) >> 14;
-}
-
-#define TX_DIM 8
-void vp9_short_idct8x8_c(int16_t *coefs, int16_t *block, int pitch) {
- int X[TX_DIM * TX_DIM];
- int i, j;
- int shortpitch = pitch >> 1;
-
- for (i = 0; i < TX_DIM; i++) {
- for (j = 0; j < TX_DIM; j++) {
- X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1
- + (coefs[i * TX_DIM + j] < 0)) >> 2;
- }
- }
- for (i = 0; i < 8; i++)
- idctrow(X + 8 * i);
-
- for (i = 0; i < 8; i++)
- idctcol(X + i);
-
- for (i = 0; i < TX_DIM; i++) {
- for (j = 0; j < TX_DIM; j++) {
- block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;
- }
- }
-}
-
-/* Row IDCT when only first 4 coefficients are non-zero. */
-static void idctrow10(int *blk) {
- int x0, x1, x2, x3, x4, x5, x6, x7, x8;
-
- /* shortcut */
- if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) |
- (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) {
- blk[0] = blk[1] = blk[2] = blk[3] = blk[4]
- = blk[5] = blk[6] = blk[7] = blk[0] << 3;
- return;
- }
-
- x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */
- /* first stage */
- x5 = W7 * x4;
- x4 = W1 * x4;
- x6 = W3 * x7;
- x7 = -W5 * x7;
-
- /* second stage */
- x2 = W6 * x3;
- x3 = W2 * x3;
- x1 = x4 + x6;
- x4 -= x6;
- x6 = x5 + x7;
- x5 -= x7;
-
- /* third stage */
- x7 = x0 + x3;
- x8 = x0 - x3;
- x3 = x0 + x2;
- x0 -= x2;
- x2 = (181 * (x4 + x5) + 128) >> 8;
- x4 = (181 * (x4 - x5) + 128) >> 8;
-
- /* fourth stage */
- blk[0] = (x7 + x1) >> 8;
- blk[1] = (x3 + x2) >> 8;
- blk[2] = (x0 + x4) >> 8;
- blk[3] = (x8 + x6) >> 8;
- blk[4] = (x8 - x6) >> 8;
- blk[5] = (x0 - x4) >> 8;
- blk[6] = (x3 - x2) >> 8;
- blk[7] = (x7 - x1) >> 8;
-}
-
-/* Column (vertical) IDCT when only first 4 coefficients are non-zero. */
-static void idctcol10(int *blk) {
- int x0, x1, x2, x3, x4, x5, x6, x7, x8;
-
- /* shortcut */
- if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) |
- (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) |
- (x7 = blk[8 * 3]))) {
- blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3]
- = blk[8 * 4] = blk[8 * 5] = blk[8 * 6]
- = blk[8 * 7] = ((blk[8 * 0] + 32) >> 6);
- return;
- }
-
- x0 = (blk[8 * 0] << 8) + 16384;
-
- /* first stage */
- x5 = (W7 * x4 + 4) >> 3;
- x4 = (W1 * x4 + 4) >> 3;
- x6 = (W3 * x7 + 4) >> 3;
- x7 = (-W5 * x7 + 4) >> 3;
-
- /* second stage */
- x2 = (W6 * x3 + 4) >> 3;
- x3 = (W2 * x3 + 4) >> 3;
- x1 = x4 + x6;
- x4 -= x6;
- x6 = x5 + x7;
- x5 -= x7;
-
- /* third stage */
- x7 = x0 + x3;
- x8 = x0 - x3;
- x3 = x0 + x2;
- x0 -= x2;
- x2 = (181 * (x4 + x5) + 128) >> 8;
- x4 = (181 * (x4 - x5) + 128) >> 8;
-
- /* fourth stage */
- blk[8 * 0] = (x7 + x1) >> 14;
- blk[8 * 1] = (x3 + x2) >> 14;
- blk[8 * 2] = (x0 + x4) >> 14;
- blk[8 * 3] = (x8 + x6) >> 14;
- blk[8 * 4] = (x8 - x6) >> 14;
- blk[8 * 5] = (x0 - x4) >> 14;
- blk[8 * 6] = (x3 - x2) >> 14;
- blk[8 * 7] = (x7 - x1) >> 14;
-}
-
-void vp9_short_idct10_8x8_c(int16_t *coefs, int16_t *block, int pitch) {
- int X[TX_DIM * TX_DIM];
- int i, j;
- int shortpitch = pitch >> 1;
-
- for (i = 0; i < TX_DIM; i++) {
- for (j = 0; j < TX_DIM; j++) {
- X[i * TX_DIM + j] = (int)(coefs[i * TX_DIM + j] + 1
- + (coefs[i * TX_DIM + j] < 0)) >> 2;
- }
- }
-
- /* Do first 4 row idct only since non-zero dct coefficients are all in
- * upper-left 4x4 area. */
- for (i = 0; i < 4; i++)
- idctrow10(X + 8 * i);
-
- for (i = 0; i < 8; i++)
- idctcol10(X + i);
-
- for (i = 0; i < TX_DIM; i++) {
- for (j = 0; j < TX_DIM; j++) {
- block[i * shortpitch + j] = X[i * TX_DIM + j] >> 1;
- }
- }
-}
-
-void vp9_short_ihaar2x2_c(int16_t *input, int16_t *output, int pitch) {
- int i;
- int16_t *ip = input; // 0, 1, 4, 8
- int16_t *op = output;
- for (i = 0; i < 16; i++) {
- op[i] = 0;
- }
-
- op[0] = (ip[0] + ip[1] + ip[4] + ip[8] + 1) >> 1;
- op[1] = (ip[0] - ip[1] + ip[4] - ip[8]) >> 1;
- op[4] = (ip[0] + ip[1] - ip[4] - ip[8]) >> 1;
- op[8] = (ip[0] - ip[1] - ip[4] + ip[8]) >> 1;
-}
-
-
-#if 0
-// Keep a really bad float version as reference for now.
-void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double x;
- const int short_pitch = pitch >> 1;
- int i, j, k, l;
- for (l = 0; l < 16; ++l) {
- for (k = 0; k < 16; ++k) {
- double s = 0;
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j) {
- x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/32;
- if (i != 0)
- x *= sqrt(2.0);
- if (j != 0)
- x *= sqrt(2.0);
- s += x;
- }
- }
- output[k*short_pitch+l] = (short)round(s);
- }
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-#endif
-
-#define TEST_INT_16x16_IDCT 1
-#if !TEST_INT_16x16_IDCT
-
-static void butterfly_16x16_idct_1d(double input[16], double output[16]) {
-
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
-
- // step 1 and 2
- step[ 0] = input[0] + input[8];
- step[ 1] = input[0] - input[8];
-
- temp1 = input[4]*C12;
- temp2 = input[12]*C4;
-
- temp1 -= temp2;
- temp1 *= C8;
-
- step[ 2] = 2*(temp1);
-
- temp1 = input[4]*C4;
- temp2 = input[12]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- step[ 3] = 2*(temp1);
-
- temp1 = input[2]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] + input[10];
-
- step[ 4] = temp1 + temp2;
- step[ 5] = temp1 - temp2;
-
- temp1 = input[14]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] - input[10];
-
- step[ 6] = temp2 - temp1;
- step[ 7] = temp2 + temp1;
-
- // for odd input
- temp1 = input[3]*C12;
- temp2 = input[13]*C4;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[ 8] = 2*(temp1);
-
- temp1 = input[3]*C4;
- temp2 = input[13]*C12;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[ 9] = 2*(temp2);
-
- intermediate[10] = 2*(input[9]*C8);
- intermediate[11] = input[15] - input[1];
- intermediate[12] = input[15] + input[1];
- intermediate[13] = 2*((input[7]*C8));
-
- temp1 = input[11]*C12;
- temp2 = input[5]*C4;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[14] = 2*(temp2);
-
- temp1 = input[11]*C4;
- temp2 = input[5]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[15] = 2*(temp1);
-
- step[ 8] = intermediate[ 8] + intermediate[14];
- step[ 9] = intermediate[ 9] + intermediate[15];
- step[10] = intermediate[10] + intermediate[11];
- step[11] = intermediate[10] - intermediate[11];
- step[12] = intermediate[12] + intermediate[13];
- step[13] = intermediate[12] - intermediate[13];
- step[14] = intermediate[ 8] - intermediate[14];
- step[15] = intermediate[ 9] - intermediate[15];
-
- // step 3
- output[0] = step[ 0] + step[ 3];
- output[1] = step[ 1] + step[ 2];
- output[2] = step[ 1] - step[ 2];
- output[3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4]*C14;
- temp2 = step[ 7]*C2;
- temp1 -= temp2;
- output[4] = (temp1);
-
- temp1 = step[ 4]*C2;
- temp2 = step[ 7]*C14;
- temp1 += temp2;
- output[7] = (temp1);
-
- temp1 = step[ 5]*C10;
- temp2 = step[ 6]*C6;
- temp1 -= temp2;
- output[5] = (temp1);
-
- temp1 = step[ 5]*C6;
- temp2 = step[ 6]*C10;
- temp1 += temp2;
- output[6] = (temp1);
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8]*C7;
- temp2 = output[15]*C9;
- temp1 -= temp2;
- step[ 8] = (temp1);
-
- temp1 = output[9]*C11;
- temp2 = output[14]*C5;
- temp1 += temp2;
- step[ 9] = (temp1);
-
- temp1 = output[10]*C3;
- temp2 = output[13]*C13;
- temp1 -= temp2;
- step[10] = (temp1);
-
- temp1 = output[11]*C15;
- temp2 = output[12]*C1;
- temp1 += temp2;
- step[11] = (temp1);
-
- temp1 = output[11]*C1;
- temp2 = output[12]*C15;
- temp2 -= temp1;
- step[12] = (temp2);
-
- temp1 = output[10]*C13;
- temp2 = output[13]*C3;
- temp1 += temp2;
- step[13] = (temp1);
-
- temp1 = output[9]*C5;
- temp2 = output[14]*C11;
- temp2 -= temp1;
- step[14] = (temp2);
-
- temp1 = output[8]*C9;
- temp2 = output[15]*C7;
- temp1 += temp2;
- step[15] = (temp1);
-
- // step 5
- output[0] = (step[0] + step[15]);
- output[1] = (step[1] + step[14]);
- output[2] = (step[2] + step[13]);
- output[3] = (step[3] + step[12]);
- output[4] = (step[4] + step[11]);
- output[5] = (step[5] + step[10]);
- output[6] = (step[6] + step[ 9]);
- output[7] = (step[7] + step[ 8]);
-
- output[15] = (step[0] - step[15]);
- output[14] = (step[1] - step[14]);
- output[13] = (step[2] - step[13]);
- output[12] = (step[3] - step[12]);
- output[11] = (step[4] - step[11]);
- output[10] = (step[5] - step[10]);
- output[9] = (step[6] - step[ 9]);
- output[8] = (step[7] - step[ 8]);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-// Remove once an int version of iDCT is written
-#if 0
-void reference_16x16_idct_1d(double input[16], double output[16]) {
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- const double kPi = 3.141592653589793238462643383279502884;
- const double kSqrt2 = 1.414213562373095048801688724209698;
- for (int k = 0; k < 16; k++) {
- output[k] = 0.0;
- for (int n = 0; n < 16; n++) {
- output[k] += input[n]*cos(kPi*(2*k+1)*n/32.0);
- if (n == 0)
- output[k] = output[k]/kSqrt2;
- }
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-#endif
-
-void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double out[16*16], out2[16*16];
- const int short_pitch = pitch >> 1;
- int i, j;
- // First transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j + i*short_pitch];
- butterfly_16x16_idct_1d(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out[j + i*16] = temp_out[j];
- }
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j*16 + i];
- butterfly_16x16_idct_1d(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out2[j*16 + i] = temp_out[j];
- }
- for (i = 0; i < 16*16; ++i)
- output[i] = round(out2[i]/128);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#else
-
-#define INITIAL_SHIFT 2
-#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))
-#define RIGHT_SHIFT 14
-#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))
-
-static const int16_t C1 = 16305;
-static const int16_t C2 = 16069;
-static const int16_t C3 = 15679;
-static const int16_t C4 = 15137;
-static const int16_t C5 = 14449;
-static const int16_t C6 = 13623;
-static const int16_t C7 = 12665;
-static const int16_t C8 = 11585;
-static const int16_t C9 = 10394;
-static const int16_t C10 = 9102;
-static const int16_t C11 = 7723;
-static const int16_t C12 = 6270;
-static const int16_t C13 = 4756;
-static const int16_t C14 = 3196;
-static const int16_t C15 = 1606;
-
-static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],
- int last_shift_bits) {
- int16_t step[16];
- int intermediate[16];
- int temp1, temp2;
-
- int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;
- int step1_rounding = 1 << (step1_shift - 1);
- int last_rounding = 0;
-
- if (last_shift_bits > 0)
- last_rounding = 1 << (last_shift_bits - 1);
-
- // step 1 and 2
- step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- temp1 = input[4] * C12;
- temp2 = input[12] * C4;
- temp1 = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift;
-
- temp1 = input[4] * C4;
- temp2 = input[12] * C12;
- temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift;
-
- temp1 = input[2] * C8;
- temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp2 = input[6] + input[10];
- step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- temp1 = input[14] * C8;
- temp1 = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp2 = input[6] - input[10];
- step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- // for odd input
- temp1 = input[3] * C12;
- temp2 = input[13] * C4;
- temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = input[3] * C4;
- temp2 = input[13] * C12;
- temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp2 *= C8;
- intermediate[ 9] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- intermediate[11] = input[15] - input[1];
- intermediate[12] = input[15] + input[1];
- intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = input[11] * C12;
- temp2 = input[5] * C4;
- temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp2 *= C8;
- intermediate[14] = (2 * (temp2) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = input[11] * C4;
- temp2 = input[5] * C12;
- temp1 = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- intermediate[15] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
- step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING)
- >> INITIAL_SHIFT;
-
- // step 3
- output[0] = step[ 0] + step[ 3];
- output[1] = step[ 1] + step[ 2];
- output[2] = step[ 1] - step[ 2];
- output[3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4] * C14;
- temp2 = step[ 7] * C2;
- output[4] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 4] * C2;
- temp2 = step[ 7] * C14;
- output[7] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 5] * C10;
- temp2 = step[ 6] * C6;
- output[5] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 5] * C6;
- temp2 = step[ 6] * C10;
- output[6] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8] * C7;
- temp2 = output[15] * C9;
- step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[9] * C11;
- temp2 = output[14] * C5;
- step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[10] * C3;
- temp2 = output[13] * C13;
- step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[11] * C15;
- temp2 = output[12] * C1;
- step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[11] * C1;
- temp2 = output[12] * C15;
- step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[10] * C13;
- temp2 = output[13] * C3;
- step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[9] * C5;
- temp2 = output[14] * C11;
- step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[8] * C9;
- temp2 = output[15] * C7;
- step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- // step 5
- output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;
- output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;
- output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;
- output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;
- output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;
- output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;
- output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;
- output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;
-
- output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;
- output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;
- output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;
- output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;
- output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;
- output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;
- output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;
- output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;
-}
-
-void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {
- int16_t out[16 * 16];
- int16_t *outptr = &out[0];
- const int short_pitch = pitch >> 1;
- int i, j;
- int16_t temp_in[16], temp_out[16];
-
- // First transform rows
- for (i = 0; i < 16; ++i) {
- butterfly_16x16_idct_1d(input, outptr, 0);
- input += short_pitch;
- outptr += 16;
- }
-
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j * 16 + i];
- butterfly_16x16_idct_1d(temp_in, temp_out, 3);
- for (j = 0; j < 16; ++j)
- output[j * 16 + i] = temp_out[j];
- }
-}
-
-/* The following function is called when we know the maximum number of non-zero
- * dct coefficients is less or equal 10.
- */
-static void butterfly_16x16_idct10_1d(int16_t input[16], int16_t output[16],
- int last_shift_bits) {
- int16_t step[16] = {0};
- int intermediate[16] = {0};
- int temp1, temp2;
- int last_rounding = 0;
-
- if (last_shift_bits > 0)
- last_rounding = 1 << (last_shift_bits - 1);
-
- // step 1 and 2
- step[ 0] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 1] = (input[0] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- temp1 = (2 * (input[2] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- step[ 4] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 5] = (temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- // for odd input
- temp1 = (input[3] * C12 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- intermediate[ 8] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = (-input[3] * C4 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
- temp1 *= C8;
- intermediate[ 9] = (2 * (temp1) + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- step[ 8] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[ 9] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[10] = (-input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[11] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[12] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[13] = (input[1] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[14] = (intermediate[ 8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
- step[15] = (intermediate[ 9] + INITIAL_ROUNDING) >> INITIAL_SHIFT;
-
- // step 3
- output[0] = step[ 0];
- output[1] = step[ 1];
- output[2] = step[ 1];
- output[3] = step[ 0];
-
- temp1 = step[ 4] * C14;
- output[4] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 4] * C2;
- output[7] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 5] * C10;
- output[5] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 5] * C6;
- output[6] = (temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8] * C7;
- temp2 = output[15] * C9;
- step[ 8] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[9] * C11;
- temp2 = output[14] * C5;
- step[ 9] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[10] * C3;
- temp2 = output[13] * C13;
- step[10] = (temp1 - temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[11] * C15;
- temp2 = output[12] * C1;
- step[11] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[11] * C1;
- temp2 = output[12] * C15;
- step[12] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[10] * C13;
- temp2 = output[13] * C3;
- step[13] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[9] * C5;
- temp2 = output[14] * C11;
- step[14] = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[8] * C9;
- temp2 = output[15] * C7;
- step[15] = (temp1 + temp2 + RIGHT_ROUNDING) >> RIGHT_SHIFT;
-
- // step 5
- output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;
- output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;
- output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;
- output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;
- output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;
- output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;
- output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;
- output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;
-
- output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;
- output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;
- output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;
- output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;
- output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;
- output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;
- output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;
- output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;
-}
-
-void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
- int16_t out[16 * 16];
- int16_t *outptr = &out[0];
- const int short_pitch = pitch >> 1;
- int i, j;
- int16_t temp_in[16], temp_out[16];
-
- /* First transform rows. Since all non-zero dct coefficients are in
- * upper-left 4x4 area, we only need to calculate first 4 rows here.
- */
- vpx_memset(out, 0, sizeof(out));
- for (i = 0; i < 4; ++i) {
- butterfly_16x16_idct10_1d(input, outptr, 0);
- input += short_pitch;
- outptr += 16;
- }
-
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j*16 + i];
- butterfly_16x16_idct10_1d(temp_in, temp_out, 3);
- for (j = 0; j < 16; ++j)
- output[j*16 + i] = temp_out[j];
- }
-}
-#undef INITIAL_SHIFT
-#undef INITIAL_ROUNDING
-#undef RIGHT_SHIFT
-#undef RIGHT_ROUNDING
-#endif
-
-#if !CONFIG_DWTDCTHYBRID
-#define DownshiftMultiplyBy2(x) x * 2
-#define DownshiftMultiply(x) x
-
-static void idct16(double *input, double *output, int stride) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
- // step 1 and 2
- step[ 0] = input[stride*0] + input[stride*8];
- step[ 1] = input[stride*0] - input[stride*8];
-
- temp1 = input[stride*4]*C12;
- temp2 = input[stride*12]*C4;
-
- temp1 -= temp2;
- temp1 = DownshiftMultiply(temp1);
- temp1 *= C8;
-
- step[ 2] = DownshiftMultiplyBy2(temp1);
-
- temp1 = input[stride*4]*C4;
- temp2 = input[stride*12]*C12;
- temp1 += temp2;
- temp1 = DownshiftMultiply(temp1);
- temp1 *= C8;
- step[ 3] = DownshiftMultiplyBy2(temp1);
-
- temp1 = input[stride*2]*C8;
- temp1 = DownshiftMultiplyBy2(temp1);
- temp2 = input[stride*6] + input[stride*10];
-
- step[ 4] = temp1 + temp2;
- step[ 5] = temp1 - temp2;
-
- temp1 = input[stride*14]*C8;
- temp1 = DownshiftMultiplyBy2(temp1);
- temp2 = input[stride*6] - input[stride*10];
-
- step[ 6] = temp2 - temp1;
- step[ 7] = temp2 + temp1;
-
- // for odd input
- temp1 = input[stride*3]*C12;
- temp2 = input[stride*13]*C4;
- temp1 += temp2;
- temp1 = DownshiftMultiply(temp1);
- temp1 *= C8;
- intermediate[ 8] = DownshiftMultiplyBy2(temp1);
-
- temp1 = input[stride*3]*C4;
- temp2 = input[stride*13]*C12;
- temp2 -= temp1;
- temp2 = DownshiftMultiply(temp2);
- temp2 *= C8;
- intermediate[ 9] = DownshiftMultiplyBy2(temp2);
-
- intermediate[10] = DownshiftMultiplyBy2(input[stride*9]*C8);
- intermediate[11] = input[stride*15] - input[stride*1];
- intermediate[12] = input[stride*15] + input[stride*1];
- intermediate[13] = DownshiftMultiplyBy2((input[stride*7]*C8));
-
- temp1 = input[stride*11]*C12;
- temp2 = input[stride*5]*C4;
- temp2 -= temp1;
- temp2 = DownshiftMultiply(temp2);
- temp2 *= C8;
- intermediate[14] = DownshiftMultiplyBy2(temp2);
-
- temp1 = input[stride*11]*C4;
- temp2 = input[stride*5]*C12;
- temp1 += temp2;
- temp1 = DownshiftMultiply(temp1);
- temp1 *= C8;
- intermediate[15] = DownshiftMultiplyBy2(temp1);
-
- step[ 8] = intermediate[ 8] + intermediate[14];
- step[ 9] = intermediate[ 9] + intermediate[15];
- step[10] = intermediate[10] + intermediate[11];
- step[11] = intermediate[10] - intermediate[11];
- step[12] = intermediate[12] + intermediate[13];
- step[13] = intermediate[12] - intermediate[13];
- step[14] = intermediate[ 8] - intermediate[14];
- step[15] = intermediate[ 9] - intermediate[15];
-
- // step 3
- output[stride*0] = step[ 0] + step[ 3];
- output[stride*1] = step[ 1] + step[ 2];
- output[stride*2] = step[ 1] - step[ 2];
- output[stride*3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4]*C14;
- temp2 = step[ 7]*C2;
- temp1 -= temp2;
- output[stride*4] = DownshiftMultiply(temp1);
-
- temp1 = step[ 4]*C2;
- temp2 = step[ 7]*C14;
- temp1 += temp2;
- output[stride*7] = DownshiftMultiply(temp1);
-
- temp1 = step[ 5]*C10;
- temp2 = step[ 6]*C6;
- temp1 -= temp2;
- output[stride*5] = DownshiftMultiply(temp1);
-
- temp1 = step[ 5]*C6;
- temp2 = step[ 6]*C10;
- temp1 += temp2;
- output[stride*6] = DownshiftMultiply(temp1);
-
- output[stride*8] = step[ 8] + step[11];
- output[stride*9] = step[ 9] + step[10];
- output[stride*10] = step[ 9] - step[10];
- output[stride*11] = step[ 8] - step[11];
- output[stride*12] = step[12] + step[15];
- output[stride*13] = step[13] + step[14];
- output[stride*14] = step[13] - step[14];
- output[stride*15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[stride*0] + output[stride*7];
- step[ 1] = output[stride*1] + output[stride*6];
- step[ 2] = output[stride*2] + output[stride*5];
- step[ 3] = output[stride*3] + output[stride*4];
- step[ 4] = output[stride*3] - output[stride*4];
- step[ 5] = output[stride*2] - output[stride*5];
- step[ 6] = output[stride*1] - output[stride*6];
- step[ 7] = output[stride*0] - output[stride*7];
-
- temp1 = output[stride*8]*C7;
- temp2 = output[stride*15]*C9;
- temp1 -= temp2;
- step[ 8] = DownshiftMultiply(temp1);
-
- temp1 = output[stride*9]*C11;
- temp2 = output[stride*14]*C5;
- temp1 += temp2;
- step[ 9] = DownshiftMultiply(temp1);
-
- temp1 = output[stride*10]*C3;
- temp2 = output[stride*13]*C13;
- temp1 -= temp2;
- step[10] = DownshiftMultiply(temp1);
-
- temp1 = output[stride*11]*C15;
- temp2 = output[stride*12]*C1;
- temp1 += temp2;
- step[11] = DownshiftMultiply(temp1);
-
- temp1 = output[stride*11]*C1;
- temp2 = output[stride*12]*C15;
- temp2 -= temp1;
- step[12] = DownshiftMultiply(temp2);
-
- temp1 = output[stride*10]*C13;
- temp2 = output[stride*13]*C3;
- temp1 += temp2;
- step[13] = DownshiftMultiply(temp1);
-
- temp1 = output[stride*9]*C5;
- temp2 = output[stride*14]*C11;
- temp2 -= temp1;
- step[14] = DownshiftMultiply(temp2);
-
- temp1 = output[stride*8]*C9;
- temp2 = output[stride*15]*C7;
- temp1 += temp2;
- step[15] = DownshiftMultiply(temp1);
-
- // step 5
- output[stride*0] = step[0] + step[15];
- output[stride*1] = step[1] + step[14];
- output[stride*2] = step[2] + step[13];
- output[stride*3] = step[3] + step[12];
- output[stride*4] = step[4] + step[11];
- output[stride*5] = step[5] + step[10];
- output[stride*6] = step[6] + step[ 9];
- output[stride*7] = step[7] + step[ 8];
-
- output[stride*15] = step[0] - step[15];
- output[stride*14] = step[1] - step[14];
- output[stride*13] = step[2] - step[13];
- output[stride*12] = step[3] - step[12];
- output[stride*11] = step[4] - step[11];
- output[stride*10] = step[5] - step[10];
- output[stride*9] = step[6] - step[ 9];
- output[stride*8] = step[7] - step[ 8];
-}
-
-static void butterfly_32_idct_1d(double *input, double *output, int stride) {
- static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
- static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
- static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
- static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
- static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
- static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
- static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
- static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
- static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
- static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
- static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
- static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
- static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
- static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
- static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
- static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
- static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
-
- double step1[32];
- double step2[32];
-
- step1[ 0] = input[stride*0];
- step1[ 1] = input[stride*2];
- step1[ 2] = input[stride*4];
- step1[ 3] = input[stride*6];
- step1[ 4] = input[stride*8];
- step1[ 5] = input[stride*10];
- step1[ 6] = input[stride*12];
- step1[ 7] = input[stride*14];
- step1[ 8] = input[stride*16];
- step1[ 9] = input[stride*18];
- step1[10] = input[stride*20];
- step1[11] = input[stride*22];
- step1[12] = input[stride*24];
- step1[13] = input[stride*26];
- step1[14] = input[stride*28];
- step1[15] = input[stride*30];
-
- step1[16] = DownshiftMultiplyBy2(input[stride*1]*C16);
- step1[17] = (input[stride*3] + input[stride*1]);
- step1[18] = (input[stride*5] + input[stride*3]);
- step1[19] = (input[stride*7] + input[stride*5]);
- step1[20] = (input[stride*9] + input[stride*7]);
- step1[21] = (input[stride*11] + input[stride*9]);
- step1[22] = (input[stride*13] + input[stride*11]);
- step1[23] = (input[stride*15] + input[stride*13]);
- step1[24] = (input[stride*17] + input[stride*15]);
- step1[25] = (input[stride*19] + input[stride*17]);
- step1[26] = (input[stride*21] + input[stride*19]);
- step1[27] = (input[stride*23] + input[stride*21]);
- step1[28] = (input[stride*25] + input[stride*23]);
- step1[29] = (input[stride*27] + input[stride*25]);
- step1[30] = (input[stride*29] + input[stride*27]);
- step1[31] = (input[stride*31] + input[stride*29]);
-
- idct16(step1, step2, 1);
- idct16(step1 + 16, step2 + 16, 1);
-
- step2[16] = DownshiftMultiply(step2[16] / (2*C1));
- step2[17] = DownshiftMultiply(step2[17] / (2*C3));
- step2[18] = DownshiftMultiply(step2[18] / (2*C5));
- step2[19] = DownshiftMultiply(step2[19] / (2*C7));
- step2[20] = DownshiftMultiply(step2[20] / (2*C9));
- step2[21] = DownshiftMultiply(step2[21] / (2*C11));
- step2[22] = DownshiftMultiply(step2[22] / (2*C13));
- step2[23] = DownshiftMultiply(step2[23] / (2*C15));
- step2[24] = DownshiftMultiply(step2[24] / (2*C17));
- step2[25] = DownshiftMultiply(step2[25] / (2*C19));
- step2[26] = DownshiftMultiply(step2[26] / (2*C21));
- step2[27] = DownshiftMultiply(step2[27] / (2*C23));
- step2[28] = DownshiftMultiply(step2[28] / (2*C25));
- step2[29] = DownshiftMultiply(step2[29] / (2*C27));
- step2[30] = DownshiftMultiply(step2[30] / (2*C29));
- step2[31] = DownshiftMultiply(step2[31] / (2*C31));
-
- output[stride* 0] = step2[ 0] + step2[16];
- output[stride* 1] = step2[ 1] + step2[17];
- output[stride* 2] = step2[ 2] + step2[18];
- output[stride* 3] = step2[ 3] + step2[19];
- output[stride* 4] = step2[ 4] + step2[20];
- output[stride* 5] = step2[ 5] + step2[21];
- output[stride* 6] = step2[ 6] + step2[22];
- output[stride* 7] = step2[ 7] + step2[23];
- output[stride* 8] = step2[ 8] + step2[24];
- output[stride* 9] = step2[ 9] + step2[25];
- output[stride*10] = step2[10] + step2[26];
- output[stride*11] = step2[11] + step2[27];
- output[stride*12] = step2[12] + step2[28];
- output[stride*13] = step2[13] + step2[29];
- output[stride*14] = step2[14] + step2[30];
- output[stride*15] = step2[15] + step2[31];
- output[stride*16] = step2[15] - step2[(31 - 0)];
- output[stride*17] = step2[14] - step2[(31 - 1)];
- output[stride*18] = step2[13] - step2[(31 - 2)];
- output[stride*19] = step2[12] - step2[(31 - 3)];
- output[stride*20] = step2[11] - step2[(31 - 4)];
- output[stride*21] = step2[10] - step2[(31 - 5)];
- output[stride*22] = step2[ 9] - step2[(31 - 6)];
- output[stride*23] = step2[ 8] - step2[(31 - 7)];
- output[stride*24] = step2[ 7] - step2[(31 - 8)];
- output[stride*25] = step2[ 6] - step2[(31 - 9)];
- output[stride*26] = step2[ 5] - step2[(31 - 10)];
- output[stride*27] = step2[ 4] - step2[(31 - 11)];
- output[stride*28] = step2[ 3] - step2[(31 - 12)];
- output[stride*29] = step2[ 2] - step2[(31 - 13)];
- output[stride*30] = step2[ 1] - step2[(31 - 14)];
- output[stride*31] = step2[ 0] - step2[(31 - 15)];
-}
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double out[32*32], out2[32*32];
- const int short_pitch = pitch >> 1;
- int i, j;
- // First transform rows
- for (i = 0; i < 32; ++i) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; ++j)
- temp_in[j] = input[j + i*short_pitch];
- butterfly_32_idct_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; ++j)
- out[j + i*32] = temp_out[j];
- }
- // Then transform columns
- for (i = 0; i < 32; ++i) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; ++j)
- temp_in[j] = out[j*32 + i];
- butterfly_32_idct_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; ++j)
- out2[j*32 + i] = temp_out[j];
- }
- for (i = 0; i < 32*32; ++i)
- output[i] = round(out2[i]/128);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#else // !CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void synthesis_53_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *x++ = ((r = *a++) + 1) >> 1;
- *x++ = *b++ + ((r + (*a) + 2) >> 2);
- }
- *x++ = ((r = *a) + 1) >> 1;
- *x++ = *b + ((r + 1) >> 1);
-}
-
-static void synthesis_53_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, *a, *b;
- int n;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ -= (r + (*b) + 1) >> 1;
- r = *b++;
- }
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- r = *a++;
- *x++ = r;
- *x++ = ((*b++) << 1) + ((r + (*a) + 1) >> 1);
- }
- *x++ = *a;
- *x++ = ((*b) << 1) + *a;
-}
-
-static void dyadic_synthesize_53(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_53_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_53_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 26
-
-// Note: block length must be even for this implementation
-static void synthesis_26_row(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = (r + s + 1) >> 1;
- *x++ = (r - s + 1) >> 1;
- }
-}
-
-static void synthesis_26_col(int length, int16_t *lowpass, int16_t *highpass,
- int16_t *x) {
- int16_t r, s, *a, *b;
- int i, n = length >> 1;
-
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ += (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b += (r - *a + 4) >> 3;
- }
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- s = *b++;
- r = *a++;
- *x++ = r + s;
- *x++ = r - s;
- }
-}
-
-static void dyadic_synthesize_26(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- int16_t buffer[2 * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_26_col(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(*buffer));
- synthesis_26_row(nw, buffer, buffer + hw, &c[i * pitch_c]);
- }
- }
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- x[i * pitch_x + j] = c[i * pitch_c + j] >= 0 ?
- ((c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS) :
- -((-c[i * pitch_c + j] + DWT_PRECISION_RND) >> DWT_PRECISION_BITS);
- }
- }
-}
-
-#elif DWT_TYPE == 97
-
-static void synthesis_97(int length, double *lowpass, double *highpass,
- double *x) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- static const double inv_s_low = 1 / s_low;
- static const double inv_s_high = 1 / s_high;
- int i;
- double y[DWT_MAX_LENGTH];
- // Undo pack and scale
- for (i = 0; i < length / 2; i++) {
- y[i * 2] = lowpass[i] * inv_s_low;
- y[i * 2 + 1] = highpass[i] * inv_s_high;
- }
- memcpy(x, y, sizeof(*y) * length);
- // Undo update 2
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update2 * (x[i-1] + x[i+1]);
- }
- x[0] -= 2 * a_update2 * x[1];
- // Undo predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict2 * x[length - 2];
- // Undo update 1
- for (i = 2; i < length; i += 2) {
- x[i] -= a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] -= 2 * a_update1 * x[1];
- // Undo predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] -= a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] -= 2 * a_predict1 * x[length - 2];
-}
-
-static void dyadic_synthesize_97(int levels, int width, int height, int16_t *c,
- int pitch_c, int16_t *x, int pitch_x) {
- int th[16], tw[16], lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
-
- th[0] = hh;
- tw[0] = hw;
- for (i = 1; i <= levels; i++) {
- th[i] = (th[i - 1] + 1) >> 1;
- tw[i] = (tw[i - 1] + 1) >> 1;
- }
- for (lv = levels - 1; lv >= 0; lv--) {
- nh = th[lv];
- nw = tw[lv];
- hh = th[lv + 1];
- hw = tw[lv + 1];
- if ((nh < 2) || (nw < 2)) continue;
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i] = c[i * pitch_c + j];
- synthesis_97(nh, buffer, buffer + hh, buffer + nh);
- for (i = 0; i < nh; i++)
- y[i * DWT_MAX_LENGTH + j] = buffer[i + nh];
- }
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- synthesis_97(nw, buffer, buffer + hw, &y[i * DWT_MAX_LENGTH]);
- }
- }
- for (i = 0; i < height; i++)
- for (j = 0; j < width; j++)
- x[i * pitch_x + j] = round(y[i * DWT_MAX_LENGTH + j] /
- (1 << DWT_PRECISION_BITS));
-}
-
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement scaling differently so as not to have to use the
-// floating point 16x16 dct
-static void butterfly_16x16_idct_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
-
- // step 1 and 2
- step[ 0] = input[0] + input[8];
- step[ 1] = input[0] - input[8];
-
- temp1 = input[4]*C12;
- temp2 = input[12]*C4;
-
- temp1 -= temp2;
- temp1 *= C8;
-
- step[ 2] = 2*(temp1);
-
- temp1 = input[4]*C4;
- temp2 = input[12]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- step[ 3] = 2*(temp1);
-
- temp1 = input[2]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] + input[10];
-
- step[ 4] = temp1 + temp2;
- step[ 5] = temp1 - temp2;
-
- temp1 = input[14]*C8;
- temp1 = 2*(temp1);
- temp2 = input[6] - input[10];
-
- step[ 6] = temp2 - temp1;
- step[ 7] = temp2 + temp1;
-
- // for odd input
- temp1 = input[3]*C12;
- temp2 = input[13]*C4;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[ 8] = 2*(temp1);
-
- temp1 = input[3]*C4;
- temp2 = input[13]*C12;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[ 9] = 2*(temp2);
-
- intermediate[10] = 2*(input[9]*C8);
- intermediate[11] = input[15] - input[1];
- intermediate[12] = input[15] + input[1];
- intermediate[13] = 2*((input[7]*C8));
-
- temp1 = input[11]*C12;
- temp2 = input[5]*C4;
- temp2 -= temp1;
- temp2 = (temp2);
- temp2 *= C8;
- intermediate[14] = 2*(temp2);
-
- temp1 = input[11]*C4;
- temp2 = input[5]*C12;
- temp1 += temp2;
- temp1 = (temp1);
- temp1 *= C8;
- intermediate[15] = 2*(temp1);
-
- step[ 8] = intermediate[ 8] + intermediate[14];
- step[ 9] = intermediate[ 9] + intermediate[15];
- step[10] = intermediate[10] + intermediate[11];
- step[11] = intermediate[10] - intermediate[11];
- step[12] = intermediate[12] + intermediate[13];
- step[13] = intermediate[12] - intermediate[13];
- step[14] = intermediate[ 8] - intermediate[14];
- step[15] = intermediate[ 9] - intermediate[15];
-
- // step 3
- output[0] = step[ 0] + step[ 3];
- output[1] = step[ 1] + step[ 2];
- output[2] = step[ 1] - step[ 2];
- output[3] = step[ 0] - step[ 3];
-
- temp1 = step[ 4]*C14;
- temp2 = step[ 7]*C2;
- temp1 -= temp2;
- output[4] = (temp1);
-
- temp1 = step[ 4]*C2;
- temp2 = step[ 7]*C14;
- temp1 += temp2;
- output[7] = (temp1);
-
- temp1 = step[ 5]*C10;
- temp2 = step[ 6]*C6;
- temp1 -= temp2;
- output[5] = (temp1);
-
- temp1 = step[ 5]*C6;
- temp2 = step[ 6]*C10;
- temp1 += temp2;
- output[6] = (temp1);
-
- output[8] = step[ 8] + step[11];
- output[9] = step[ 9] + step[10];
- output[10] = step[ 9] - step[10];
- output[11] = step[ 8] - step[11];
- output[12] = step[12] + step[15];
- output[13] = step[13] + step[14];
- output[14] = step[13] - step[14];
- output[15] = step[12] - step[15];
-
- // output 4
- step[ 0] = output[0] + output[7];
- step[ 1] = output[1] + output[6];
- step[ 2] = output[2] + output[5];
- step[ 3] = output[3] + output[4];
- step[ 4] = output[3] - output[4];
- step[ 5] = output[2] - output[5];
- step[ 6] = output[1] - output[6];
- step[ 7] = output[0] - output[7];
-
- temp1 = output[8]*C7;
- temp2 = output[15]*C9;
- temp1 -= temp2;
- step[ 8] = (temp1);
-
- temp1 = output[9]*C11;
- temp2 = output[14]*C5;
- temp1 += temp2;
- step[ 9] = (temp1);
-
- temp1 = output[10]*C3;
- temp2 = output[13]*C13;
- temp1 -= temp2;
- step[10] = (temp1);
-
- temp1 = output[11]*C15;
- temp2 = output[12]*C1;
- temp1 += temp2;
- step[11] = (temp1);
-
- temp1 = output[11]*C1;
- temp2 = output[12]*C15;
- temp2 -= temp1;
- step[12] = (temp2);
-
- temp1 = output[10]*C13;
- temp2 = output[13]*C3;
- temp1 += temp2;
- step[13] = (temp1);
-
- temp1 = output[9]*C5;
- temp2 = output[14]*C11;
- temp2 -= temp1;
- step[14] = (temp2);
-
- temp1 = output[8]*C9;
- temp2 = output[15]*C7;
- temp1 += temp2;
- step[15] = (temp1);
-
- // step 5
- output[0] = (step[0] + step[15]);
- output[1] = (step[1] + step[14]);
- output[2] = (step[2] + step[13]);
- output[3] = (step[3] + step[12]);
- output[4] = (step[4] + step[11]);
- output[5] = (step[5] + step[10]);
- output[6] = (step[6] + step[ 9]);
- output[7] = (step[7] + step[ 8]);
-
- output[15] = (step[0] - step[15]);
- output[14] = (step[1] - step[14]);
- output[13] = (step[2] - step[13]);
- output[12] = (step[3] - step[12]);
- output[11] = (step[4] - step[11]);
- output[10] = (step[5] - step[10]);
- output[9] = (step[6] - step[ 9]);
- output[8] = (step[7] - step[ 8]);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void vp9_short_idct16x16_c_f(int16_t *input, int16_t *output, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double out[16*16], out2[16*16];
- const int short_pitch = pitch >> 1;
- int i, j;
- // First transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j + i*short_pitch];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out[j + i*16] = temp_out[j];
- }
- // Then transform columns
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j*16 + i];
- butterfly_16x16_idct_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- out2[j*16 + i] = temp_out[j];
- }
- for (i = 0; i < 16*16; ++i)
- output[i] = round(out2[i] / (128 >> scale));
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void idct8_1d(double *x) {
- int i, j;
- double t[8];
- static const double idctmat[64] = {
- 0.35355339059327, 0.49039264020162, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.2777851165098, 0.19134171618254, 0.097545161008064,
- 0.35355339059327, 0.41573480615127, 0.19134171618254, -0.097545161008064,
- -0.35355339059327, -0.49039264020161, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.2777851165098, -0.19134171618254, -0.49039264020162,
- -0.35355339059327, 0.097545161008064, 0.46193976625564, 0.41573480615127,
- 0.35355339059327, 0.097545161008063, -0.46193976625564, -0.2777851165098,
- 0.35355339059327, 0.41573480615127, -0.19134171618254, -0.49039264020162,
- 0.35355339059327, -0.097545161008063, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.41573480615127, -0.19134171618255, 0.49039264020162,
- 0.35355339059327, -0.2777851165098, -0.19134171618254, 0.49039264020161,
- -0.35355339059327, -0.097545161008064, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.41573480615127, 0.19134171618254, 0.097545161008065,
- -0.35355339059327, 0.49039264020162, -0.46193976625564, 0.2777851165098,
- 0.35355339059327, -0.49039264020162, 0.46193976625564, -0.41573480615127,
- 0.35355339059327, -0.2777851165098, 0.19134171618255, -0.097545161008064
- };
- for (i = 0; i < 8; ++i) {
- t[i] = 0;
- for (j = 0; j < 8; ++j)
- t[i] += idctmat[i * 8 + j] * x[j];
- }
- for (i = 0; i < 8; ++i) {
- x[i] = t[i];
- }
-}
-
-static void vp9_short_idct8x8_c_f(int16_t *coefs, int16_t *block, int pitch,
- int scale) {
- double X[8 * 8], Y[8];
- int i, j;
- int shortpitch = pitch >> 1;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- X[i * 8 + j] = (double)coefs[i * shortpitch + j];
- }
- }
- for (i = 0; i < 8; i++)
- idct8_1d(X + 8 * i);
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; ++j)
- Y[j] = X[i + 8 * j];
- idct8_1d(Y);
- for (j = 0; j < 8; ++j)
- X[i + 8 * j] = Y[j];
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- block[i * 8 + j] = (int16_t)round(X[i * 8 + j] / (8 >> scale));
- }
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#define multiply_bits(d, n) ((n) < 0 ? (d) >> (n) : (d) << (n))
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 32, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 16 * 33, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(1, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(1, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 32x32 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[8 * 8];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[32 * 32];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct8x8_c_f(input, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8, buffer + i * 8, sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 32, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- vp9_short_idct8x8_c_f(input + 8 * short_pitch + 8, buffer, pitch,
- 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i) {
- vpx_memcpy(buffer2 + i * 32 + 8 * 33, buffer + i * 8,
- sizeof(*buffer2) * 8);
- }
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- buffer2[i * 32 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 2);
- }
- }
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 32, 32, buffer2, 32, output, 32);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 32, 32, buffer2, 32, output, 32);
-#endif
-}
-
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_idct64x64_c(int16_t *input, int16_t *output, int pitch) {
- // assume output is a 64x64 buffer
- // Temporary buffer to hold a 16x16 block for 16x16 inverse dct
- int16_t buffer[16 * 16];
- // Temporary buffer to hold a 32x32 block for inverse 32x32 dwt
- int16_t buffer2[64 * 64];
- // Note: pitch is in bytes, short_pitch is in short units
- const int short_pitch = pitch >> 1;
- int i, j;
-
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the idct16x16 function
- vp9_short_idct16x16_c_f(input, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64, buffer + i * 16, sizeof(*buffer2) * 16);
- }
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_idct16x16_c_f(input + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16, buffer + i * 16, sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 64, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
- vp9_short_idct16x16_c_f(input + 16 * short_pitch + 16, buffer, pitch,
- 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i) {
- vpx_memcpy(buffer2 + i * 64 + 16 * 65, buffer + i * 16,
- sizeof(*buffer2) * 16);
- }
-
- // Copying and scaling highest bands into buffer2
- for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- buffer2[i * 64 + j] =
- multiply_bits(input[i * short_pitch + j], DWT_PRECISION_BITS - 1);
- }
- }
-#endif // DWTDCT_TYPE
-
-#if DWT_TYPE == 26
- dyadic_synthesize_26(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 97
- dyadic_synthesize_97(2, 64, 64, buffer2, 64, output, 64);
-#elif DWT_TYPE == 53
- dyadic_synthesize_53(2, 64, 64, buffer2, 64, output, 64);
-#endif
-}
-#endif // CONFIG_TX64X64
-#endif // !CONFIG_DWTDCTHYBRID
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -11,50 +11,25 @@
#include "vp9/common/vp9_invtrans.h"
#include "./vp9_rtcd.h"
-static void recon_dcblock(MACROBLOCKD *xd) {
- BLOCKD *b = &xd->block[24];
- int i;
-
- for (i = 0; i < 16; i++) {
- xd->block[i].dqcoeff[0] = b->diff[i];
- }
-}
-
-static void recon_dcblock_8x8(MACROBLOCKD *xd) {
- BLOCKD *b = &xd->block[24]; // for coeff 0, 2, 8, 10
-
- xd->block[0].dqcoeff[0] = b->diff[0];
- xd->block[4].dqcoeff[0] = b->diff[1];
- xd->block[8].dqcoeff[0] = b->diff[4];
- xd->block[12].dqcoeff[0] = b->diff[8];
-}
-
-void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
- BLOCKD *b = &xd->block[block];
- if (b->eob <= 1)
- xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
+ int16_t *dqcoeff, int16_t *diff,
+ int pitch) {
+ if (eob <= 1)
+ xd->inv_txm4x4_1(dqcoeff, diff, pitch);
else
- xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+ xd->inv_txm4x4(dqcoeff, diff, pitch);
}
void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
int i;
- BLOCKD *blockd = xd->block;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- /* do 2nd order transform on the dc block */
- vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff);
- recon_dcblock(xd);
- }
-
for (i = 0; i < 16; i++) {
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
- tx_type, 4, xd->block[i].eob);
+ vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
- vp9_inverse_transform_b_4x4(xd, i, 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+ xd->block[i].diff, 32);
}
}
}
@@ -63,7 +38,8 @@
int i;
for (i = 16; i < 24; i++) {
- vp9_inverse_transform_b_4x4(xd, i, 16);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+ xd->block[i].diff, 16);
}
}
@@ -80,19 +56,11 @@
void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
int i;
BLOCKD *blockd = xd->block;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- // do 2nd order transform on the dc block
- vp9_short_ihaar2x2(blockd[24].dqcoeff, blockd[24].diff, 8);
- recon_dcblock_8x8(xd); // need to change for 8x8
- }
-
for (i = 0; i < 9; i += 8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
- xd->block[i].eob);
+ vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
} else {
vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
&blockd[i].diff[0], 32);
@@ -99,10 +67,10 @@
}
}
for (i = 2; i < 11; i += 8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
- xd->block[i + 2].eob);
+ vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
+ 16, tx_type);
} else {
vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
&blockd[i].diff[0], 32);
@@ -132,9 +100,9 @@
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
BLOCKD *bd = &xd->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);
+ vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
} else {
vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
&xd->block[0].diff[0], 32);
@@ -146,13 +114,208 @@
vp9_inverse_transform_mbuv_8x8(xd);
}
-void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) {
- vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64);
+void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) {
+ vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64);
}
-void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) {
- vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024,
- xd_sb->diff + 1024, 32);
- vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280,
- xd_sb->diff + 1280, 32);
+void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 32 * 16,
+ 64);
+ } else {
+ vp9_short_iht16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
+ } else {
+ vp9_short_iht8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
+ } else {
+ vp9_short_iht4x4(xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024,
+ xd->diff + 1024, 32);
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280,
+ xd->diff + 1280, 32);
+}
+
+void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64,
+ xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
+ 32);
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64,
+ xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
+ 32);
+ }
+}
+
+void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n],
+ xd->dqcoeff + 1024 + n * 16,
+ xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
+ 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n],
+ xd->dqcoeff + 1280 + n * 16,
+ xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
+ 32);
+ }
+}
+
+void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_short_idct32x32(xd->dqcoeff + n * 1024,
+ xd->diff + x_idx * 32 + y_idx * 32 * 64, 128);
+ }
+}
+
+void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 64 * 16,
+ 128);
+ } else {
+ vp9_short_iht16x16(xd->dqcoeff + n * 256,
+ xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
+ } else {
+ vp9_short_iht8x8(xd->dqcoeff + n * 64,
+ xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
+
+ if (tx_type == DCT_DCT) {
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
+ } else {
+ vp9_short_iht4x4(xd->dqcoeff + n * 16,
+ xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type);
+ }
+ }
+}
+
+void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) {
+ vp9_short_idct32x32(xd->dqcoeff + 4096,
+ xd->diff + 4096, 64);
+ vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024,
+ xd->diff + 4096 + 1024, 64);
+}
+
+void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16;
+
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
+}
+
+void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8;
+
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
+}
+
+void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4;
+
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n],
+ xd->dqcoeff + 4096 + n * 16,
+ xd->diff + 4096 + off, 64);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n],
+ xd->dqcoeff + 4096 + 1024 + n * 16,
+ xd->diff + 4096 + 1024 + off, 64);
+ }
}
--- a/vp9/common/vp9_invtrans.h
+++ b/vp9/common/vp9_invtrans.h
@@ -15,31 +15,47 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
-extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob,
+ int16_t *dqcoeff, int16_t *diff,
+ int pitch);
-extern void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff,
+void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff,
int16_t *output_coeff, int pitch);
-extern void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff,
+void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff,
int16_t *output_coeff, int pitch);
-extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
-extern void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb);
-extern void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb);
+void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd);
+
+void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd);
+void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd);
#endif // VP9_COMMON_VP9_INVTRANS_H_
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -109,6 +109,9 @@
loop_filter_info_n *lfi = &cm->lf_info;
/* update limits if sharpness has changed */
+ // printf("vp9_loop_filter_frame_init %d\n", default_filt_lvl);
+ // printf("sharpness level: %d [%d]\n",
+ // cm->sharpness_level, cm->last_sharpness_level);
if (cm->last_sharpness_level != cm->sharpness_level) {
vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
@@ -126,7 +129,7 @@
lvl_seg = vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
} else { /* Delta Value */
lvl_seg += vp9_get_segdata(xd, seg, SEG_LVL_ALT_LF);
- lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63 : lvl_seg) : 0;
+ lvl_seg = clamp(lvl_seg, 0, 63);
}
}
@@ -149,13 +152,12 @@
/* Apply delta for Intra modes */
mode = 0; /* B_PRED */
/* Only the split mode BPRED has a further special case */
- lvl_mode = lvl_ref + xd->mode_lf_deltas[mode];
- lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
+ lvl_mode = clamp(lvl_ref + xd->mode_lf_deltas[mode], 0, 63);
lfi->lvl[seg][ref][mode] = lvl_mode;
mode = 1; /* all the rest of Intra modes */
- lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
+ lvl_mode = clamp(lvl_ref, 0, 63);
lfi->lvl[seg][ref][mode] = lvl_mode;
/* LAST, GOLDEN, ALT */
@@ -167,9 +169,7 @@
/* Apply delta for Inter modes */
for (mode = 1; mode < 4; mode++) {
- lvl_mode = lvl_ref + xd->mode_lf_deltas[mode];
- lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
-
+ lvl_mode = clamp(lvl_ref + xd->mode_lf_deltas[mode], 0, 63);
lfi->lvl[seg][ref][mode] = lvl_mode;
}
}
@@ -202,10 +202,12 @@
mbmi1->mv[mbmi1->ref_frame].as_int) &&
mbmi0->ref_frame != INTRA_FRAME;
}
+
void vp9_loop_filter_frame(VP9_COMMON *cm,
MACROBLOCKD *xd,
int frame_filter_level,
- int y_only) {
+ int y_only,
+ int dering) {
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
loop_filter_info_n *lfi_n = &cm->lf_info;
struct loop_filter_info lfi;
@@ -271,7 +273,6 @@
vp9_loop_filter_bv(y_ptr, u_ptr, v_ptr, post->y_stride,
post->uv_stride, &lfi);
}
-
}
/* don't apply across umv border */
if (mb_row > 0 &&
@@ -299,6 +300,62 @@
post->uv_stride, &lfi);
}
}
+#if CONFIG_LOOP_DERING
+ if (dering) {
+ if (mb_row && mb_row < cm->mb_rows - 1 &&
+ mb_col && mb_col < cm->mb_cols - 1) {
+ vp9_post_proc_down_and_across(y_ptr, y_ptr,
+ post->y_stride, post->y_stride,
+ 16, 16, dering);
+ if (!y_only) {
+ vp9_post_proc_down_and_across(u_ptr, u_ptr,
+ post->uv_stride, post->uv_stride,
+ 8, 8, dering);
+ vp9_post_proc_down_and_across(v_ptr, v_ptr,
+ post->uv_stride, post->uv_stride,
+ 8, 8, dering);
+ }
+ } else {
+ // Adjust the filter so that no out-of-frame data is used.
+ uint8_t *dr_y = y_ptr, *dr_u = u_ptr, *dr_v = v_ptr;
+ int w_adjust = 0;
+ int h_adjust = 0;
+
+ if (mb_col == 0) {
+ dr_y += 2;
+ dr_u += 2;
+ dr_v += 2;
+ w_adjust += 2;
+ }
+ if (mb_col == cm->mb_cols - 1)
+ w_adjust += 2;
+ if (mb_row == 0) {
+ dr_y += 2 * post->y_stride;
+ dr_u += 2 * post->uv_stride;
+ dr_v += 2 * post->uv_stride;
+ h_adjust += 2;
+ }
+ if (mb_row == cm->mb_rows - 1)
+ h_adjust += 2;
+ vp9_post_proc_down_and_across_c(dr_y, dr_y,
+ post->y_stride, post->y_stride,
+ 16 - w_adjust, 16 - h_adjust,
+ dering);
+ if (!y_only) {
+ vp9_post_proc_down_and_across_c(dr_u, dr_u,
+ post->uv_stride,
+ post->uv_stride,
+ 8 - w_adjust, 8 - h_adjust,
+ dering);
+ vp9_post_proc_down_and_across_c(dr_v, dr_v,
+ post->uv_stride,
+ post->uv_stride,
+ 8 - w_adjust, 8 - h_adjust,
+ dering);
+ }
+ }
+ }
+#endif
} else {
// FIXME: Not 8x8 aware
if (mb_col > 0 &&
@@ -376,16 +433,13 @@
*/
if (alt_flt_enabled) {
for (i = 0; i < MAX_MB_SEGMENTS; i++) {
- /* Abs value */
if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
+ // Abs value
lvl_seg[i] = vp9_get_segdata(xd, i, SEG_LVL_ALT_LF);
- }
- /* Delta Value */
- else {
- lvl_seg[i] = default_filt_lvl +
- vp9_get_segdata(xd, i, SEG_LVL_ALT_LF);
- lvl_seg[i] = (lvl_seg[i] > 0) ?
- ((lvl_seg[i] > 63) ? 63 : lvl_seg[i]) : 0;
+ } else {
+ // Delta Value
+ lvl_seg[i] = default_filt_lvl + vp9_get_segdata(xd, i, SEG_LVL_ALT_LF);
+ lvl_seg[i] = clamp(lvl_seg[i], 0, 63);
}
}
}
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -83,7 +83,8 @@
void vp9_loop_filter_frame(struct VP9Common *cm,
struct macroblockd *mbd,
int filter_level,
- int y_only);
+ int y_only,
+ int dering);
void vp9_loop_filter_partial_frame(struct VP9Common *cm,
struct macroblockd *mbd,
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -13,7 +13,7 @@
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
-static __inline int8_t signed_char_clamp(int t) {
+static INLINE int8_t signed_char_clamp(int t) {
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
return (int8_t) t;
@@ -21,11 +21,11 @@
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline int8_t filter_mask(uint8_t limit, uint8_t blimit,
- uint8_t p3, uint8_t p2,
- uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1,
- uint8_t q2, uint8_t q3) {
+static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit,
+ uint8_t p3, uint8_t p2,
+ uint8_t p1, uint8_t p0,
+ uint8_t q0, uint8_t q1,
+ uint8_t q2, uint8_t q3) {
int8_t mask = 0;
mask |= (abs(p3 - p2) > limit) * -1;
mask |= (abs(p2 - p1) > limit) * -1;
@@ -39,8 +39,8 @@
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
-static __inline int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1) {
+static INLINE int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0,
+ uint8_t q0, uint8_t q1) {
int8_t hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
hev |= (abs(q1 - q0) > thresh) * -1;
@@ -47,49 +47,38 @@
return hev;
}
-static __inline void filter(int8_t mask, uint8_t hev, uint8_t *op1,
- uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
- int8_t ps0, qs0;
- int8_t ps1, qs1;
- int8_t filter, Filter1, Filter2;
- int8_t u;
+static INLINE void filter(int8_t mask, uint8_t hev, uint8_t *op1,
+ uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
+ int8_t filter1, filter2;
- ps1 = (int8_t) *op1 ^ 0x80;
- ps0 = (int8_t) *op0 ^ 0x80;
- qs0 = (int8_t) *oq0 ^ 0x80;
- qs1 = (int8_t) *oq1 ^ 0x80;
+ const int8_t ps1 = (int8_t) *op1 ^ 0x80;
+ const int8_t ps0 = (int8_t) *op0 ^ 0x80;
+ const int8_t qs0 = (int8_t) *oq0 ^ 0x80;
+ const int8_t qs1 = (int8_t) *oq1 ^ 0x80;
- /* add outer taps if we have high edge variance */
- filter = signed_char_clamp(ps1 - qs1);
- filter &= hev;
+ // add outer taps if we have high edge variance
+ int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
- /* inner taps */
- filter = signed_char_clamp(filter + 3 * (qs0 - ps0));
- filter &= mask;
+ // inner taps
+ filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
- /* save bottom 3 bits so that we round one side +4 and the other +3
- * if it equals 4 we'll set to adjust by -1 to account for the fact
- * we'd round 3 the other way
- */
- Filter1 = signed_char_clamp(filter + 4);
- Filter2 = signed_char_clamp(filter + 3);
- Filter1 >>= 3;
- Filter2 >>= 3;
- u = signed_char_clamp(qs0 - Filter1);
- *oq0 = u ^ 0x80;
- u = signed_char_clamp(ps0 + Filter2);
- *op0 = u ^ 0x80;
- filter = Filter1;
+ // save bottom 3 bits so that we round one side +4 and the other +3
+ // if it equals 4 we'll set to adjust by -1 to account for the fact
+ // we'd round 3 the other way
+ filter1 = signed_char_clamp(filter + 4) >> 3;
+ filter2 = signed_char_clamp(filter + 3) >> 3;
- /* outer tap adjustments */
+ *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
+ *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
+ filter = filter1;
+
+ // outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
- u = signed_char_clamp(qs1 - filter);
- *oq1 = u ^ 0x80;
- u = signed_char_clamp(ps1 + filter);
- *op1 = u ^ 0x80;
+ *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
+ *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
void vp9_loop_filter_horizontal_edge_c(uint8_t *s,
@@ -143,11 +132,11 @@
s += p;
} while (++i < count * 8);
}
-static __inline signed char flatmask(uint8_t thresh,
- uint8_t p4, uint8_t p3, uint8_t p2,
- uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1, uint8_t q2,
- uint8_t q3, uint8_t q4) {
+static INLINE signed char flatmask4(uint8_t thresh,
+ uint8_t p3, uint8_t p2,
+ uint8_t p1, uint8_t p0,
+ uint8_t q0, uint8_t q1,
+ uint8_t q2, uint8_t q3) {
int8_t flat = 0;
flat |= (abs(p1 - p0) > thresh) * -1;
flat |= (abs(q1 - q0) > thresh) * -1;
@@ -155,81 +144,72 @@
flat |= (abs(q0 - q2) > thresh) * -1;
flat |= (abs(p3 - p0) > thresh) * -1;
flat |= (abs(q3 - q0) > thresh) * -1;
+ flat = ~flat;
+ return flat;
+}
+static INLINE signed char flatmask5(uint8_t thresh,
+ uint8_t p4, uint8_t p3, uint8_t p2,
+ uint8_t p1, uint8_t p0,
+ uint8_t q0, uint8_t q1, uint8_t q2,
+ uint8_t q3, uint8_t q4) {
+ int8_t flat = 0;
flat |= (abs(p4 - p0) > thresh) * -1;
flat |= (abs(q4 - q0) > thresh) * -1;
flat = ~flat;
- return flat;
+ return flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3);
}
-static __inline void mbfilter(int8_t mask, uint8_t hev, uint8_t flat,
- uint8_t *op4, uint8_t *op3, uint8_t *op2,
- uint8_t *op1, uint8_t *op0,
- uint8_t *oq0, uint8_t *oq1, uint8_t *oq2,
- uint8_t *oq3, uint8_t *oq4) {
- /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
+
+static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat,
+ uint8_t *op3, uint8_t *op2,
+ uint8_t *op1, uint8_t *op0,
+ uint8_t *oq0, uint8_t *oq1,
+ uint8_t *oq2, uint8_t *oq3) {
+ // use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line
if (flat && mask) {
- uint8_t p0, q0;
- uint8_t p1, q1;
- uint8_t p2, q2;
- uint8_t p3, q3;
- uint8_t p4, q4;
+ const uint8_t p3 = *op3;
+ const uint8_t p2 = *op2;
+ const uint8_t p1 = *op1;
+ const uint8_t p0 = *op0;
+ const uint8_t q0 = *oq0;
+ const uint8_t q1 = *oq1;
+ const uint8_t q2 = *oq2;
+ const uint8_t q3 = *oq3;
- p4 = *op4;
- p3 = *op3;
- p2 = *op2;
- p1 = *op1;
- p0 = *op0;
- q0 = *oq0;
- q1 = *oq1;
- q2 = *oq2;
- q3 = *oq3;
- q4 = *oq4;
-
- *op2 = (p4 + p4 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3;
- *op1 = (p4 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3;
+ *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3;
+ *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3;
*op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3;
*oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3;
- *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4) >> 3;
- *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4) >> 3;
+ *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3;
+ *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3;
} else {
- int8_t ps0, qs0;
- int8_t ps1, qs1;
- int8_t filter, Filter1, Filter2;
- int8_t u;
+ int8_t filter1, filter2;
- ps1 = (int8_t) *op1 ^ 0x80;
- ps0 = (int8_t) *op0 ^ 0x80;
- qs0 = (int8_t) *oq0 ^ 0x80;
- qs1 = (int8_t) *oq1 ^ 0x80;
+ const int8_t ps1 = (int8_t) *op1 ^ 0x80;
+ const int8_t ps0 = (int8_t) *op0 ^ 0x80;
+ const int8_t qs0 = (int8_t) *oq0 ^ 0x80;
+ const int8_t qs1 = (int8_t) *oq1 ^ 0x80;
- /* add outer taps if we have high edge variance */
- filter = signed_char_clamp(ps1 - qs1);
- filter &= hev;
+ // add outer taps if we have high edge variance
+ int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
- /* inner taps */
- filter = signed_char_clamp(filter + 3 * (qs0 - ps0));
- filter &= mask;
+ // inner taps
+ filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
- Filter1 = signed_char_clamp(filter + 4);
- Filter2 = signed_char_clamp(filter + 3);
- Filter1 >>= 3;
- Filter2 >>= 3;
+ filter1 = signed_char_clamp(filter + 4) >> 3;
+ filter2 = signed_char_clamp(filter + 3) >> 3;
- u = signed_char_clamp(qs0 - Filter1);
- *oq0 = u ^ 0x80;
- u = signed_char_clamp(ps0 + Filter2);
- *op0 = u ^ 0x80;
- filter = Filter1;
+ *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
+ *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
+ filter = filter1;
- /* outer tap adjustments */
+ // outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
- u = signed_char_clamp(qs1 - filter);
- *oq1 = u ^ 0x80;
- u = signed_char_clamp(ps1 + filter);
- *op1 = u ^ 0x80;
+ *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
+ *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
}
@@ -254,12 +234,11 @@
hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
- flat = flatmask(1,
- s[-5 * p], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
- s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p], s[ 4 * p]);
+ flat = flatmask4(1, s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
+ s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
mbfilter(mask, hev, flat,
- s - 5 * p, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
- s, s + 1 * p, s + 2 * p, s + 3 * p, s + 4 * p);
+ s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
+ s, s + 1 * p, s + 2 * p, s + 3 * p);
++s;
} while (++i < count * 8);
@@ -283,12 +262,12 @@
s[0], s[1], s[2], s[3]);
hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
- flat = flatmask(1,
- s[-5], s[-4], s[-3], s[-2], s[-1],
- s[ 0], s[ 1], s[ 2], s[ 3], s[ 4]);
+ flat = flatmask4(1,
+ s[-4], s[-3], s[-2], s[-1],
+ s[ 0], s[ 1], s[ 2], s[ 3]);
mbfilter(mask, hev, flat,
- s - 5, s - 4, s - 3, s - 2, s - 1,
- s, s + 1, s + 2, s + 3, s + 4);
+ s - 4, s - 3, s - 2, s - 1,
+ s, s + 1, s + 2, s + 3);
s += p;
} while (++i < count * 8);
@@ -295,41 +274,31 @@
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
-static __inline int8_t simple_filter_mask(uint8_t blimit,
- uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1) {
- /* Why does this cause problems for win32?
- * error C2143: syntax error : missing ';' before 'type'
- * (void) limit;
- */
- int8_t mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
- return mask;
+static INLINE int8_t simple_filter_mask(uint8_t blimit,
+ uint8_t p1, uint8_t p0,
+ uint8_t q0, uint8_t q1) {
+ return (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
}
-static __inline void simple_filter(int8_t mask,
- uint8_t *op1, uint8_t *op0,
- uint8_t *oq0, uint8_t *oq1) {
- int8_t filter, Filter1, Filter2;
- int8_t p1 = (int8_t) *op1 ^ 0x80;
- int8_t p0 = (int8_t) *op0 ^ 0x80;
- int8_t q0 = (int8_t) *oq0 ^ 0x80;
- int8_t q1 = (int8_t) *oq1 ^ 0x80;
- int8_t u;
+static INLINE void simple_filter(int8_t mask,
+ uint8_t *op1, uint8_t *op0,
+ uint8_t *oq0, uint8_t *oq1) {
+ int8_t filter1, filter2;
+ const int8_t p1 = (int8_t) *op1 ^ 0x80;
+ const int8_t p0 = (int8_t) *op0 ^ 0x80;
+ const int8_t q0 = (int8_t) *oq0 ^ 0x80;
+ const int8_t q1 = (int8_t) *oq1 ^ 0x80;
- filter = signed_char_clamp(p1 - q1);
+ int8_t filter = signed_char_clamp(p1 - q1);
filter = signed_char_clamp(filter + 3 * (q0 - p0));
filter &= mask;
- /* save bottom 3 bits so that we round one side +4 and the other +3 */
- Filter1 = signed_char_clamp(filter + 4);
- Filter1 >>= 3;
- u = signed_char_clamp(q0 - Filter1);
- *oq0 = u ^ 0x80;
+ // save bottom 3 bits so that we round one side +4 and the other +3
+ filter1 = signed_char_clamp(filter + 4) >> 3;
+ *oq0 = signed_char_clamp(q0 - filter1) ^ 0x80;
- Filter2 = signed_char_clamp(filter + 3);
- Filter2 >>= 3;
- u = signed_char_clamp(p0 + Filter2);
- *op0 = u ^ 0x80;
+ filter2 = signed_char_clamp(filter + 3) >> 3;
+ *op0 = signed_char_clamp(p0 + filter2) ^ 0x80;
}
void vp9_loop_filter_simple_horizontal_edge_c(uint8_t *s,
@@ -481,42 +450,33 @@
vp9_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
}
-static __inline void wide_mbfilter(int8_t mask, uint8_t hev,
- uint8_t flat, uint8_t flat2,
- uint8_t *op7, uint8_t *op6, uint8_t *op5,
- uint8_t *op4, uint8_t *op3, uint8_t *op2,
- uint8_t *op1, uint8_t *op0, uint8_t *oq0,
- uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
- uint8_t *oq4, uint8_t *oq5, uint8_t *oq6,
- uint8_t *oq7) {
- /* use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line */
+static INLINE void wide_mbfilter(int8_t mask, uint8_t hev,
+ uint8_t flat, uint8_t flat2,
+ uint8_t *op7, uint8_t *op6, uint8_t *op5,
+ uint8_t *op4, uint8_t *op3, uint8_t *op2,
+ uint8_t *op1, uint8_t *op0, uint8_t *oq0,
+ uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
+ uint8_t *oq4, uint8_t *oq5, uint8_t *oq6,
+ uint8_t *oq7) {
+ // use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line
if (flat2 && flat && mask) {
- uint8_t p0, q0;
- uint8_t p1, q1;
- uint8_t p2, q2;
- uint8_t p3, q3;
- uint8_t p4, q4;
- uint8_t p5, q5;
- uint8_t p6, q6;
- uint8_t p7, q7;
+ const uint8_t p7 = *op7;
+ const uint8_t p6 = *op6;
+ const uint8_t p5 = *op5;
+ const uint8_t p4 = *op4;
+ const uint8_t p3 = *op3;
+ const uint8_t p2 = *op2;
+ const uint8_t p1 = *op1;
+ const uint8_t p0 = *op0;
+ const uint8_t q0 = *oq0;
+ const uint8_t q1 = *oq1;
+ const uint8_t q2 = *oq2;
+ const uint8_t q3 = *oq3;
+ const uint8_t q4 = *oq4;
+ const uint8_t q5 = *oq5;
+ const uint8_t q6 = *oq6;
+ const uint8_t q7 = *oq7;
- p7 = *op7;
- p6 = *op6;
- p5 = *op5;
- p4 = *op4;
- p3 = *op3;
- p2 = *op2;
- p1 = *op1;
- p0 = *op0;
- q0 = *oq0;
- q1 = *oq1;
- q2 = *oq2;
- q3 = *oq3;
- q4 = *oq4;
- q5 = *oq5;
- q6 = *oq6;
- q7 = *oq7;
-
*op6 = (p7 * 7 + p6 * 2 +
p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
*op5 = (p7 * 6 + p6 + p5 * 2 +
@@ -546,68 +506,48 @@
*oq6 = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 +
q7 * 7 + 8) >> 4;
} else if (flat && mask) {
- unsigned char p0, q0;
- unsigned char p1, q1;
- unsigned char p2, q2;
- unsigned char p3, q3;
- unsigned char p4, q4;
+ const uint8_t p3 = *op3;
+ const uint8_t p2 = *op2;
+ const uint8_t p1 = *op1;
+ const uint8_t p0 = *op0;
+ const uint8_t q0 = *oq0;
+ const uint8_t q1 = *oq1;
+ const uint8_t q2 = *oq2;
+ const uint8_t q3 = *oq3;
- p4 = *op4;
- p3 = *op3;
- p2 = *op2;
- p1 = *op1;
- p0 = *op0;
- q0 = *oq0;
- q1 = *oq1;
- q2 = *oq2;
- q3 = *oq3;
- q4 = *oq4;
-
- *op2 = (p4 + p4 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3;
- *op1 = (p4 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3;
+ *op2 = (p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0 + 4) >> 3;
+ *op1 = (p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1 + 4) >> 3;
*op0 = (p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2 + 4) >> 3;
*oq0 = (p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3 + 4) >> 3;
- *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q4 + 4) >> 3;
- *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q4 + q4 + 4) >> 3;
+ *oq1 = (p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3 + 4) >> 3;
+ *oq2 = (p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3 + 4) >> 3;
} else {
- signed char ps0, qs0;
- signed char ps1, qs1;
- signed char filter, Filter1, Filter2;
- signed char u;
+ int8_t filter1, filter2;
- ps1 = (signed char) * op1 ^ 0x80;
- ps0 = (signed char) * op0 ^ 0x80;
- qs0 = (signed char) * oq0 ^ 0x80;
- qs1 = (signed char) * oq1 ^ 0x80;
+ const int8_t ps1 = (int8_t) * op1 ^ 0x80;
+ const int8_t ps0 = (int8_t) * op0 ^ 0x80;
+ const int8_t qs0 = (int8_t) * oq0 ^ 0x80;
+ const int8_t qs1 = (int8_t) * oq1 ^ 0x80;
- /* add outer taps if we have high edge variance */
- filter = signed_char_clamp(ps1 - qs1);
- filter &= hev;
+ // add outer taps if we have high edge variance
+ int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
- /* inner taps */
- filter = signed_char_clamp(filter + 3 * (qs0 - ps0));
- filter &= mask;
+ // inner taps
+ filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
+ filter1 = signed_char_clamp(filter + 4) >> 3;
+ filter2 = signed_char_clamp(filter + 3) >> 3;
- Filter1 = signed_char_clamp(filter + 4);
- Filter2 = signed_char_clamp(filter + 3);
- Filter1 >>= 3;
- Filter2 >>= 3;
+ *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
+ *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
+ filter = filter1;
- u = signed_char_clamp(qs0 - Filter1);
- *oq0 = u ^ 0x80;
- u = signed_char_clamp(ps0 + Filter2);
- *op0 = u ^ 0x80;
- filter = Filter1;
-
- /* outer tap adjustments */
+ // outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
- u = signed_char_clamp(qs1 - filter);
- *oq1 = u ^ 0x80;
- u = signed_char_clamp(ps1 + filter);
- *op1 = u ^ 0x80;
+ *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
+ *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
}
@@ -636,19 +576,19 @@
hev = hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
- flat = flatmask(1,
- s[-5 * p], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
- s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p], s[ 4 * p]);
+ flat = flatmask4(1,
+ s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
+ s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
- flat2 = flatmask(1,
- s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], s[-1 * p],
- s[ 0 * p], s[ 4 * p], s[ 5 * p], s[ 6 * p], s[ 7 * p]);
+ flat2 = flatmask5(1,
+ s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], s[-1 * p],
+ s[ 0 * p], s[ 4 * p], s[ 5 * p], s[ 6 * p], s[ 7 * p]);
wide_mbfilter(mask, hev, flat, flat2,
- s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p,
- s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
- s, s + 1 * p, s + 2 * p, s + 3 * p,
- s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p);
+ s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p,
+ s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
+ s, s + 1 * p, s + 2 * p, s + 3 * p,
+ s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p);
++s;
} while (++i < count * 8);
@@ -674,18 +614,18 @@
s[0], s[1], s[2], s[3]);
hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
- flat = flatmask(1,
- s[-5], s[-4], s[-3], s[-2], s[-1],
- s[ 0], s[ 1], s[ 2], s[ 3], s[ 4]);
- flat2 = flatmask(1,
- s[-8], s[-7], s[-6], s[-5], s[-1],
- s[ 0], s[ 4], s[ 5], s[ 6], s[ 7]);
+ flat = flatmask4(1,
+ s[-4], s[-3], s[-2], s[-1],
+ s[ 0], s[ 1], s[ 2], s[ 3]);
+ flat2 = flatmask5(1,
+ s[-8], s[-7], s[-6], s[-5], s[-1],
+ s[ 0], s[ 4], s[ 5], s[ 6], s[ 7]);
wide_mbfilter(mask, hev, flat, flat2,
- s - 8, s - 7, s - 6, s - 5,
- s - 4, s - 3, s - 2, s - 1,
- s, s + 1, s + 2, s + 3,
- s + 4, s + 5, s + 6, s + 7);
+ s - 8, s - 7, s - 6, s - 5,
+ s - 4, s - 3, s - 2, s - 1,
+ s, s + 1, s + 2, s + 3,
+ s + 4, s + 5, s + 6, s + 7);
s += p;
} while (++i < count * 8);
}
--- a/vp9/common/vp9_maskingmv.c
+++ b/vp9/common/vp9_maskingmv.c
@@ -11,7 +11,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-extern unsigned int vp9_sad16x16_sse3(
+
+unsigned int vp9_sad16x16_sse3(
unsigned char *src_ptr,
int src_stride,
unsigned char *ref_ptr,
@@ -18,18 +19,11 @@
int ref_stride,
int max_err);
-extern void vp9_sad16x16x3_sse3(
- unsigned char *src_ptr,
- int src_stride,
- unsigned char *ref_ptr,
- int ref_stride,
- int *results);
-
-extern int vp8_growmaskmb_sse3(
+int vp8_growmaskmb_sse3(
unsigned char *om,
unsigned char *nm);
-extern void vp8_makemask_sse3(
+void vp8_makemask_sse3(
unsigned char *y,
unsigned char *u,
unsigned char *v,
@@ -238,6 +232,7 @@
for (i = 0; i < 256; i++)
ym[i] = nym[i];
}
+
void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v,
unsigned char *ym, unsigned char *uvm,
int yp, int uvp,
@@ -283,6 +278,7 @@
return sad;
}
+
int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp,
unsigned char *ym) {
int i, j;
@@ -294,6 +290,7 @@
return sad;
}
+
int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v,
int yp, int uvp,
unsigned char *dy, unsigned char *du, unsigned char *dv,
@@ -802,5 +799,5 @@
}
fclose(f);
fclose(g);
- return;
+ return 0;
}
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -20,15 +20,15 @@
int mv_stride,
uint8_t **base,
uint8_t **base2,
- int Stride,
+ int stride,
int offset,
BLOCKSET bs) {
if (bs == DEST) {
- b->dst_stride = Stride;
+ b->dst_stride = stride;
b->dst = offset;
b->base_dst = base;
} else {
- b->pre_stride = Stride;
+ b->pre_stride = stride;
b->pre = offset;
b->base_pre = base;
b->base_second_pre = base2;
@@ -102,9 +102,7 @@
}
}
- blockd[24].diff = &xd->diff[384];
-
- for (r = 0; r < 25; r++) {
+ for (r = 0; r < 24; r++) {
blockd[r].qcoeff = xd->qcoeff + r * 16;
blockd[r].dqcoeff = xd->dqcoeff + r * 16;
}
--- a/vp9/common/vp9_modecont.c
+++ b/vp9/common/vp9_modecont.c
@@ -12,7 +12,7 @@
#include "vp9/common/vp9_entropy.h"
const int vp9_default_mode_contexts[INTER_MODE_CONTEXTS][4] = {
- {223, 1, 1, 237}, // 0,0 best: Only candidate
+ {1, 223, 1, 237}, // 0,0 best: Only candidate
{87, 166, 26, 219}, // 0,0 best: non zero candidates
{89, 67, 18, 125}, // 0,0 best: non zero candidates, split
{16, 141, 69, 226}, // strong nz candidate(s), no split
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -23,4 +23,14 @@
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
+struct mv32 {
+ int32_t row;
+ int32_t col;
+};
+
+typedef union int_mv32 {
+ uint64_t as_int;
+ struct mv32 as_mv;
+} int_mv32; /* facilitates faster equality tests and copies */
+
#endif // VP9_COMMON_VP9_MV_H_
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,64 +11,69 @@
#include "vp9/common/vp9_mvref_common.h"
#define MVREF_NEIGHBOURS 8
+
static int mb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
{0, -1}, {-1, 0}, {-1, -1}, {0, -2},
{-2, 0}, {-1, -2}, {-2, -1}, {-2, -2}
};
+
static int mb_ref_distance_weight[MVREF_NEIGHBOURS] =
{ 3, 3, 2, 1, 1, 1, 1, 1 };
+
static int sb_mv_ref_search[MVREF_NEIGHBOURS][2] = {
{0, -1}, {-1, 0}, {1, -1}, {-1, 1},
{-1, -1}, {0, -2}, {-2, 0}, {-1, -2}
};
+
static int sb_ref_distance_weight[MVREF_NEIGHBOURS] =
{ 3, 3, 2, 2, 2, 1, 1, 1 };
-// clamp_mv
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-static void clamp_mv(const MACROBLOCKD *xd, int_mv *mv) {
- if (mv->as_mv.col < (xd->mb_to_left_edge - MV_BORDER))
- mv->as_mv.col = xd->mb_to_left_edge - MV_BORDER;
- else if (mv->as_mv.col > xd->mb_to_right_edge + MV_BORDER)
- mv->as_mv.col = xd->mb_to_right_edge + MV_BORDER;
- if (mv->as_mv.row < (xd->mb_to_top_edge - MV_BORDER))
- mv->as_mv.row = xd->mb_to_top_edge - MV_BORDER;
- else if (mv->as_mv.row > xd->mb_to_bottom_edge + MV_BORDER)
- mv->as_mv.row = xd->mb_to_bottom_edge + MV_BORDER;
+static int sb64_mv_ref_search[MVREF_NEIGHBOURS][2] = {
+ {0, -1}, {-1, 0}, {1, -1}, {-1, 1},
+ {2, -1}, {-1, 2}, {3, -1}, {-1,-1}
+};
+
+static int sb64_ref_distance_weight[MVREF_NEIGHBOURS] =
+ { 1, 1, 1, 1, 1, 1, 1, 1 };
+
+
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+
+static void clamp_mv_ref(const MACROBLOCKD *xd, int_mv *mv) {
+ mv->as_mv.col = clamp(mv->as_mv.col, xd->mb_to_left_edge - MV_BORDER,
+ xd->mb_to_right_edge + MV_BORDER);
+ mv->as_mv.row = clamp(mv->as_mv.row, xd->mb_to_top_edge - MV_BORDER,
+ xd->mb_to_bottom_edge + MV_BORDER);
}
// Gets a candidate refenence motion vector from the given mode info
// structure if one exists that matches the given reference frame.
-static int get_matching_candidate(
- const MODE_INFO *candidate_mi,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *c_mv
-) {
- int ret_val = TRUE;
-
+static int get_matching_candidate(const MODE_INFO *candidate_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv *c_mv) {
if (ref_frame == candidate_mi->mbmi.ref_frame) {
c_mv->as_int = candidate_mi->mbmi.mv[0].as_int;
} else if (ref_frame == candidate_mi->mbmi.second_ref_frame) {
c_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
} else {
- ret_val = FALSE;
+ return 0;
}
- return ret_val;
+ return 1;
}
// Gets candidate refenence motion vector(s) from the given mode info
// structure if they exists and do NOT match the given reference frame.
-static void get_non_matching_candidates(
- const MODE_INFO *candidate_mi,
- MV_REFERENCE_FRAME ref_frame,
- MV_REFERENCE_FRAME *c_ref_frame,
- int_mv *c_mv,
- MV_REFERENCE_FRAME *c2_ref_frame,
- int_mv *c2_mv
-) {
+static void get_non_matching_candidates(const MODE_INFO *candidate_mi,
+ MV_REFERENCE_FRAME ref_frame,
+ MV_REFERENCE_FRAME *c_ref_frame,
+ int_mv *c_mv,
+ MV_REFERENCE_FRAME *c2_ref_frame,
+ int_mv *c2_mv) {
c_mv->as_int = 0;
c2_mv->as_int = 0;
@@ -85,10 +90,8 @@
// Second candidate
if ((candidate_mi->mbmi.second_ref_frame > INTRA_FRAME) &&
- (candidate_mi->mbmi.second_ref_frame != ref_frame)) { // &&
- // (candidate_mi->mbmi.mv[1].as_int != 0) &&
- // (candidate_mi->mbmi.mv[1].as_int !=
- // candidate_mi->mbmi.mv[0].as_int)) {
+ (candidate_mi->mbmi.second_ref_frame != ref_frame) &&
+ (candidate_mi->mbmi.mv[1].as_int != candidate_mi->mbmi.mv[0].as_int)) {
*c2_ref_frame = candidate_mi->mbmi.second_ref_frame;
c2_mv->as_int = candidate_mi->mbmi.mv[1].as_int;
}
@@ -95,63 +98,60 @@
}
}
-// Performs mv adjustment based on reference frame and clamps the MV
-// if it goes off the edge of the buffer.
-static void scale_mv(
- MACROBLOCKD *xd,
- MV_REFERENCE_FRAME this_ref_frame,
- MV_REFERENCE_FRAME candidate_ref_frame,
- int_mv *candidate_mv,
- int *ref_sign_bias
-) {
- if (candidate_ref_frame != this_ref_frame) {
+// Performs mv sign inversion if indicated by the reference frame combination.
+static void scale_mv(MACROBLOCKD *xd, MV_REFERENCE_FRAME this_ref_frame,
+ MV_REFERENCE_FRAME candidate_ref_frame,
+ int_mv *candidate_mv, int *ref_sign_bias) {
+ // int frame_distances[MAX_REF_FRAMES];
+ // int last_distance = 1;
+ // int gf_distance = xd->frames_since_golden;
+ // int arf_distance = xd->frames_till_alt_ref_frame;
- //int frame_distances[MAX_REF_FRAMES];
- //int last_distance = 1;
- //int gf_distance = xd->frames_since_golden;
- //int arf_distance = xd->frames_till_alt_ref_frame;
+ // Sign inversion where appropriate.
+ if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
+ candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
+ candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
+ }
- // Sign inversion where appropriate.
- if (ref_sign_bias[candidate_ref_frame] != ref_sign_bias[this_ref_frame]) {
- candidate_mv->as_mv.row = -candidate_mv->as_mv.row;
- candidate_mv->as_mv.col = -candidate_mv->as_mv.col;
- }
+ /*
+ // Scale based on frame distance if the reference frames not the same.
+ frame_distances[INTRA_FRAME] = 1; // should never be used
+ frame_distances[LAST_FRAME] = 1;
+ frame_distances[GOLDEN_FRAME] =
+ (xd->frames_since_golden) ? xd->frames_si nce_golden : 1;
+ frame_distances[ALTREF_FRAME] =
+ (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1;
- // Scale based on frame distance if the reference frames not the same.
- /*frame_distances[INTRA_FRAME] = 1; // should never be used
- frame_distances[LAST_FRAME] = 1;
- frame_distances[GOLDEN_FRAME] =
- (xd->frames_since_golden) ? xd->frames_since_golden : 1;
- frame_distances[ALTREF_FRAME] =
- (xd->frames_till_alt_ref_frame) ? xd->frames_till_alt_ref_frame : 1;
+ if (frame_distances[this_ref_frame] &&
+ frame_distances[candidate_ref_frame]) {
+ candidate_mv->as_mv.row =
+ (short)(((int)(candidate_mv->as_mv.row) *
+ frame_distances[this_ref_frame]) /
+ frame_distances[candidate_ref_frame]);
- if (frame_distances[this_ref_frame] &&
- frame_distances[candidate_ref_frame]) {
- candidate_mv->as_mv.row =
- (short)(((int)(candidate_mv->as_mv.row) *
- frame_distances[this_ref_frame]) /
- frame_distances[candidate_ref_frame]);
-
- candidate_mv->as_mv.col =
- (short)(((int)(candidate_mv->as_mv.col) *
- frame_distances[this_ref_frame]) /
- frame_distances[candidate_ref_frame]);
- }
- */
+ candidate_mv->as_mv.col =
+ (short)(((int)(candidate_mv->as_mv.col) *
+ frame_distances[this_ref_frame]) /
+ frame_distances[candidate_ref_frame]);
}
-
- // Clamp the MV so it does not point out of the frame buffer
- clamp_mv(xd, candidate_mv);
+ */
}
-// Adds a new candidate reference vector to the list if indeed it is new.
-// If it is not new then the score of the existing candidate that it matches
-// is increased and the list is resorted.
+/*
+// Adds a new candidate reference vector to the sorted list.
+// If it is a repeat the weight of the existing entry is increased
+// and the order of the list is resorted.
+// This method of add plus sort has been deprecated for now as there is a
+// further sort of the best candidates in vp9_find_best_ref_mvs() and the
+// incremental benefit of both is small. If the decision is made to remove
+// the sort in vp9_find_best_ref_mvs() for performance reasons then it may be
+// worth re-instating some sort of list reordering by weight here.
+//
static void addmv_and_shuffle(
int_mv *mv_list,
int *mv_scores,
- int *index,
+ int *refmv_count,
int_mv candidate_mv,
int weight
) {
@@ -162,11 +162,11 @@
// Check for duplicates. If there is one increase its score.
// We only compare vs the current top candidates.
- insert_point = (*index < (MAX_MV_REF_CANDIDATES - 1))
- ? *index : (MAX_MV_REF_CANDIDATES - 1);
+ insert_point = (*refmv_count < (MAX_MV_REF_CANDIDATES - 1))
+ ? *refmv_count : (MAX_MV_REF_CANDIDATES - 1);
i = insert_point;
- if (*index > i)
+ if (*refmv_count > i)
i++;
while (i > 0) {
i--;
@@ -184,7 +184,7 @@
mv_scores[insert_point] = weight;
i = insert_point;
}
- (*index)++;
+ (*refmv_count)++;
}
// Reshuffle the list so that highest scoring mvs at the top.
@@ -202,19 +202,42 @@
break;
}
}
+*/
+// Adds a new candidate reference vector to the list.
+// The mv is thrown out if it is already in the list.
+// Unlike the addmv_and_shuffle() this does not reorder the list
+// but assumes that candidates are added in the order most likely to
+// match distance and reference frame bias.
+static void add_candidate_mv(int_mv *mv_list, int *mv_scores,
+ int *candidate_count, int_mv candidate_mv,
+ int weight) {
+ int i;
+
+ // Make sure we dont insert off the end of the list
+ const int insert_point = MIN(*candidate_count, MAX_MV_REF_CANDIDATES - 1);
+
+ // Look for duplicates
+ for (i = 0; i <= insert_point; ++i) {
+ if (candidate_mv.as_int == mv_list[i].as_int)
+ break;
+ }
+
+ // Add the candidate. If the list is already full it is only desirable that
+ // it should overwrite if it has a higher weight than the last entry.
+ if (i >= insert_point && weight > mv_scores[insert_point]) {
+ mv_list[insert_point].as_int = candidate_mv.as_int;
+ mv_scores[insert_point] = weight;
+ *candidate_count += (*candidate_count < MAX_MV_REF_CANDIDATES);
+ }
+}
+
// This function searches the neighbourhood of a given MB/SB and populates a
// list of candidate reference vectors.
//
-void vp9_find_mv_refs(
- MACROBLOCKD *xd,
- MODE_INFO *here,
- MODE_INFO *lf_here,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list,
- int *ref_sign_bias
-) {
-
+void vp9_find_mv_refs(VP9_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *here,
+ MODE_INFO *lf_here, MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list, int *ref_sign_bias) {
int i;
MODE_INFO *candidate_mi;
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
@@ -224,10 +247,12 @@
MV_REFERENCE_FRAME c_ref_frame;
MV_REFERENCE_FRAME c2_ref_frame;
int candidate_scores[MAX_MV_REF_CANDIDATES];
- int index = 0;
+ int refmv_count = 0;
int split_count = 0;
int (*mv_ref_search)[2];
int *ref_distance_weight;
+ int zero_seen = FALSE;
+ const int mb_col = (-xd->mb_to_left_edge) >> 7;
// Blank the reference vector lists and other local structures.
vpx_memset(mv_ref_list, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES);
@@ -234,7 +259,10 @@
vpx_memset(candidate_mvs, 0, sizeof(int_mv) * MAX_MV_REF_CANDIDATES);
vpx_memset(candidate_scores, 0, sizeof(candidate_scores));
- if (mbmi->sb_type) {
+ if (mbmi->sb_type == BLOCK_SIZE_SB64X64) {
+ mv_ref_search = sb64_mv_ref_search;
+ ref_distance_weight = sb64_ref_distance_weight;
+ } else if (mbmi->sb_type == BLOCK_SIZE_SB32X32) {
mv_ref_search = sb_mv_ref_search;
ref_distance_weight = sb_ref_distance_weight;
} else {
@@ -245,7 +273,10 @@
// We first scan for candidate vectors that match the current reference frame
// Look at nearest neigbours
for (i = 0; i < 2; ++i) {
- if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+ const int mb_search_col = mb_col + mv_ref_search[i][0];
+
+ if ((mb_search_col >= cm->cur_tile_mb_col_start) &&
+ (mb_search_col < cm->cur_tile_mb_col_end) &&
((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
candidate_mi = here + mv_ref_search[i][0] +
@@ -252,32 +283,34 @@
(mv_ref_search[i][1] * xd->mode_info_stride);
if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) {
- clamp_mv(xd, &c_refmv);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c_refmv, ref_distance_weight[i] + 16);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c_refmv, ref_distance_weight[i] + 16);
}
split_count += (candidate_mi->mbmi.mode == SPLITMV);
}
}
- // Look in the last frame
- candidate_mi = lf_here;
- if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) {
- clamp_mv(xd, &c_refmv);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c_refmv, 18);
+ // Look in the last frame if it exists
+ if (lf_here) {
+ candidate_mi = lf_here;
+ if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) {
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c_refmv, 18);
+ }
}
// More distant neigbours
for (i = 2; (i < MVREF_NEIGHBOURS) &&
- (index < (MAX_MV_REF_CANDIDATES - 1)); ++i) {
- if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+ (refmv_count < (MAX_MV_REF_CANDIDATES - 1)); ++i) {
+ const int mb_search_col = mb_col + mv_ref_search[i][0];
+
+ if ((mb_search_col >= cm->cur_tile_mb_col_start) &&
+ (mb_search_col < cm->cur_tile_mb_col_end) &&
((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
candidate_mi = here + mv_ref_search[i][0] +
(mv_ref_search[i][1] * xd->mode_info_stride);
if (get_matching_candidate(candidate_mi, ref_frame, &c_refmv)) {
- clamp_mv(xd, &c_refmv);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c_refmv, ref_distance_weight[i] + 16);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c_refmv, ref_distance_weight[i] + 16);
}
}
}
@@ -286,9 +319,12 @@
// reference frame does not match. Break out when we have
// MAX_MV_REF_CANDIDATES candidates.
// Look first at spatial neighbours
- if (index < (MAX_MV_REF_CANDIDATES - 1)) {
+ if (refmv_count < (MAX_MV_REF_CANDIDATES - 1)) {
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
- if (((mv_ref_search[i][0] << 7) >= xd->mb_to_left_edge) &&
+ const int mb_search_col = mb_col + mv_ref_search[i][0];
+
+ if ((mb_search_col >= cm->cur_tile_mb_col_start) &&
+ (mb_search_col < cm->cur_tile_mb_col_end) &&
((mv_ref_search[i][1] << 7) >= xd->mb_to_top_edge)) {
candidate_mi = here + mv_ref_search[i][0] +
@@ -300,24 +336,24 @@
if (c_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c_refmv, ref_distance_weight[i]);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c_refmv, ref_distance_weight[i]);
}
if (c2_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c2_refmv, ref_distance_weight[i]);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c2_refmv, ref_distance_weight[i]);
}
}
- if (index >= (MAX_MV_REF_CANDIDATES - 1)) {
+ if (refmv_count >= (MAX_MV_REF_CANDIDATES - 1)) {
break;
}
}
}
- // Look at the last frame
- if (index < (MAX_MV_REF_CANDIDATES - 1)) {
+ // Look at the last frame if it exists
+ if (refmv_count < (MAX_MV_REF_CANDIDATES - 1) && lf_here) {
candidate_mi = lf_here;
get_non_matching_candidates(candidate_mi, ref_frame,
&c_ref_frame, &c_refmv,
@@ -325,14 +361,14 @@
if (c_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c_ref_frame, &c_refmv, ref_sign_bias);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c_refmv, 2);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c_refmv, 2);
}
if (c2_ref_frame != INTRA_FRAME) {
scale_mv(xd, ref_frame, c2_ref_frame, &c2_refmv, ref_sign_bias);
- addmv_and_shuffle(candidate_mvs, candidate_scores,
- &index, c2_refmv, 2);
+ add_candidate_mv(candidate_mvs, candidate_scores,
+ &refmv_count, c2_refmv, 2);
}
}
@@ -340,7 +376,7 @@
// 0,0 was best
if (candidate_mvs[0].as_int == 0) {
// 0,0 is only candidate
- if (index <= 1) {
+ if (refmv_count <= 1) {
mbmi->mb_mode_context[ref_frame] = 0;
// non zero candidates candidates available
} else if (split_count == 0) {
@@ -348,30 +384,25 @@
} else {
mbmi->mb_mode_context[ref_frame] = 2;
}
- // Non zero best, No Split MV cases
} else if (split_count == 0) {
- if (candidate_scores[0] >= 32) {
- mbmi->mb_mode_context[ref_frame] = 3;
- } else {
- mbmi->mb_mode_context[ref_frame] = 4;
- }
- // Non zero best, some split mv
+ // Non zero best, No Split MV cases
+ mbmi->mb_mode_context[ref_frame] = candidate_scores[0] >= 16 ? 3 : 4;
} else {
- if (candidate_scores[0] >= 32) {
- mbmi->mb_mode_context[ref_frame] = 5;
- } else {
- mbmi->mb_mode_context[ref_frame] = 6;
- }
+ // Non zero best, some split mv
+ mbmi->mb_mode_context[ref_frame] = candidate_scores[0] >= 16 ? 5 : 6;
}
- // 0,0 is always a valid reference.
+ // Scan for 0,0 case and clamp non zero choices
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- if (candidate_mvs[i].as_int == 0)
- break;
+ if (candidate_mvs[i].as_int == 0) {
+ zero_seen = TRUE;
+ } else {
+ clamp_mv_ref(xd, &candidate_mvs[i]);
+ }
}
- if (i == MAX_MV_REF_CANDIDATES) {
+ // 0,0 is always a valid reference. Add it if not already seen.
+ if (!zero_seen)
candidate_mvs[MAX_MV_REF_CANDIDATES-1].as_int = 0;
- }
// Copy over the candidate list.
vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs));
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -14,7 +14,8 @@
#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
#define VP9_COMMON_VP9_MVREF_COMMON_H_
-void vp9_find_mv_refs(MACROBLOCKD *xd,
+void vp9_find_mv_refs(VP9_COMMON *cm,
+ MACROBLOCKD *xd,
MODE_INFO *here,
MODE_INFO *lf_here,
MV_REFERENCE_FRAME ref_frame,
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -16,6 +16,7 @@
{
#endif
+#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vpx_scale/yv12config.h"
@@ -62,7 +63,7 @@
#include <assert.h>
- static __inline void Scale2Ratio(int mode, int *hr, int *hs) {
+ static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
*hr = 1;
@@ -89,11 +90,13 @@
}
typedef struct {
- int Version; // 4 versions of bitstream defined 0 best quality/slowest decode, 3 lowest quality/fastest decode
- int Width; // width of data passed to the compressor
- int Height; // height of data passed to the compressor
+ int version; // 4 versions of bitstream defined:
+ // 0 - best quality/slowest decode,
+ // 3 - lowest quality/fastest decode
+ int width; // width of data passed to the compressor
+ int height; // height of data passed to the compressor
double frame_rate; // set to passed in framerate
- int target_bandwidth; // bandwidth to be used in kilobits per second
+ int64_t target_bandwidth; // bandwidth to be used in kilobits per second
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
int Sharpness; // parameter used for sharpening output: recommendation 0:
@@ -134,9 +137,9 @@
int over_shoot_pct;
// buffering parameters
- int starting_buffer_level; // in seconds
- int optimal_buffer_level;
- int maximum_buffer_size;
+ int64_t starting_buffer_level; // in seconds
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
// controlling quality
int fixed_q;
@@ -159,10 +162,25 @@
int encode_breakout; // early breakout encode threshold : for video conf recommend 800
+ /* Bitfield defining the error resiliency features to enable.
+ * Can provide decodable frames after losses in previous
+ * frames and decodable partitions after losses in the same frame.
+ */
+ unsigned int error_resilient_mode;
+
+ /* Bitfield defining the parallel decoding mode where the
+ * decoding in successive frames may be conducted in parallel
+ * just by decoding the frame headers.
+ */
+ unsigned int frame_parallel_decoding_mode;
+
int arnr_max_frames;
int arnr_strength;
int arnr_type;
+ int tile_columns;
+ int tile_rows;
+
struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
@@ -195,8 +213,10 @@
int vp9_update_reference(VP9_PTR comp, int ref_frame_flags);
- int vp9_get_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
- YV12_BUFFER_CONFIG *sd);
+ int vp9_copy_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb);
int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -37,8 +37,17 @@
#define QINDEX_RANGE (MAXQ + 1)
-#define NUM_YV12_BUFFERS 4
+#define NUM_REF_FRAMES 3
+#define NUM_REF_FRAMES_LG2 2
+// 1 scratch frame for the new frame, 3 for scaled references on the encoder
+// TODO(jkoleszar): These 3 extra references could probably come from the
+// normal reference pool.
+#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 4)
+
+#define NUM_FRAME_CONTEXTS_LG2 2
+#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LG2)
+
#define COMP_PRED_CONTEXTS 2
typedef struct frame_contexts {
@@ -49,14 +58,24 @@
vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
- vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_NODES];
+ vp9_prob nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_NODES];
+ vp9_prob nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_NODES];
+ vp9_prob nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_NODES];
+ vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
+
nmv_context nmvc;
nmv_context pre_nmvc;
vp9_prob pre_bmode_prob[VP9_NKF_BINTRAMODES - 1];
@@ -74,22 +93,43 @@
unsigned int sub_mv_ref_counts[SUBMVREF_COUNT][VP9_SUBMVREFS];
unsigned int mbsplit_counts[VP9_NUMMBSPLITS];
- vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs pre_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs pre_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs pre_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES];
+ vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob pre_nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_NODES];
+ vp9_prob pre_nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_NODES];
+ vp9_prob pre_nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_NODES];
+ vp9_prob pre_nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_NODES];
+ vp9_prob pre_nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
- vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
+ vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
+ vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
+ vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
+ unsigned int eob_branch_counts[TX_SIZE_MAX_SB][BLOCK_TYPES][REF_TYPES]
+ [COEF_BANDS][PREV_COEF_CONTEXTS];
+#if CONFIG_CODE_NONZEROCOUNT
+ unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+ unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+ unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+ unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+ unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA][2];
+#endif
+
nmv_context_counts NMVcount;
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS - 1];
@@ -128,13 +168,14 @@
struct vpx_internal_error_info error;
DECLARE_ALIGNED(16, int16_t, Y1dequant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, int16_t, Y2dequant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, int16_t, UVdequant[QINDEX_RANGE][16]);
- int Width;
- int Height;
- int horiz_scale;
- int vert_scale;
+ int width;
+ int height;
+ int display_width;
+ int display_height;
+ int last_width;
+ int last_height;
YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
@@ -142,9 +183,16 @@
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
- int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
- int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+ int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; /* reference counts */
+ int ref_frame_map[NUM_REF_FRAMES]; /* maps fb_idx to reference slot */
+ /* TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
+ * roll new_fb_idx into it.
+ */
+ int active_ref_idx[3]; /* each frame can reference 3 buffers */
+ int new_fb_idx;
+ struct scale_factors active_ref_scale[3];
+
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
@@ -173,8 +221,6 @@
int last_kf_gf_q; /* Q used on the last GF or KF */
int y1dc_delta_q;
- int y2dc_delta_q;
- int y2ac_delta_q;
int uvdc_delta_q;
int uvac_delta_q;
@@ -201,19 +247,13 @@
int filter_level;
int last_sharpness_level;
int sharpness_level;
+ int dering_enabled;
- int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
- int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
- int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
-
- int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
- int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
-
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
- /* Y,U,V,Y2 */
+ /* Y,U,V */
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */
@@ -250,9 +290,9 @@
vp9_prob mbskip_pred_probs[MBSKIP_CONTEXTS];
- FRAME_CONTEXT lfc_a; /* last alt ref entropy */
- FRAME_CONTEXT lfc; /* last frame entropy */
FRAME_CONTEXT fc; /* this frame entropy */
+ FRAME_CONTEXT frame_contexts[NUM_FRAME_CONTEXTS];
+ unsigned int frame_context_idx; /* Context to use/update */
unsigned int current_video_frame;
int near_boffset[3];
@@ -272,6 +312,60 @@
int use_interintra;
#endif
+ int error_resilient_mode;
+ int frame_parallel_decoding_mode;
+
+ int tile_columns, log2_tile_columns;
+ int cur_tile_mb_col_start, cur_tile_mb_col_end, cur_tile_col_idx;
+ int tile_rows, log2_tile_rows;
+ int cur_tile_mb_row_start, cur_tile_mb_row_end, cur_tile_row_idx;
} VP9_COMMON;
+static int get_free_fb(VP9_COMMON *cm) {
+ int i;
+ for (i = 0; i < NUM_YV12_BUFFERS; i++)
+ if (cm->fb_idx_ref_cnt[i] == 0)
+ break;
+
+ assert(i < NUM_YV12_BUFFERS);
+ cm->fb_idx_ref_cnt[i] = 1;
+ return i;
+}
+
+static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
+ if (buf[*idx] > 0)
+ buf[*idx]--;
+
+ *idx = new_idx;
+
+ buf[new_idx]++;
+}
+
+// TODO(debargha): merge the two functions
+static void set_mb_row(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int mb_row, int block_size) {
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
+
+ // Are edges available for intra prediction?
+ xd->up_available = (mb_row != 0);
+}
+
+static void set_mb_col(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int mb_col, int block_size) {
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
+
+ // Are edges available for intra prediction?
+ xd->left_available = (mb_col > cm->cur_tile_mb_col_start);
+ xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);
+}
+
+static int get_mb_row(const MACROBLOCKD *xd) {
+ return ((-xd->mb_to_top_edge) >> 7);
+}
+
+static int get_mb_col(const MACROBLOCKD *xd) {
+ return ((-xd->mb_to_left_edge) >> 7);
+}
#endif // VP9_COMMON_VP9_ONYXC_INT_H_
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -336,11 +336,8 @@
source->uv_height, source->uv_width, ppl);
}
-void vp9_de_noise(YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag) {
+void vp9_denoise(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *post,
+ int q, int low_var_thresh, int flag) {
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
(void) post;
@@ -424,9 +421,9 @@
*
* INPUTS : unsigned char *Start starting address of buffer to
* add gaussian noise to
- * unsigned int Width width of plane
- * unsigned int Height height of plane
- * int Pitch distance between subsequent lines of frame
+ * unsigned int width width of plane
+ * unsigned int height height of plane
+ * int pitch distance between subsequent lines of frame
* int q quantizer used to determine amount of noise
* to add
*
@@ -439,25 +436,25 @@
* SPECIAL NOTES : None.
*
****************************************************************************/
-void vp9_plane_add_noise_c(uint8_t *Start, char *noise,
+void vp9_plane_add_noise_c(uint8_t *start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
- unsigned int Width, unsigned int Height, int Pitch) {
+ unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
- for (i = 0; i < Height; i++) {
- uint8_t *Pos = Start + i * Pitch;
- char *Ref = (char *)(noise + (rand() & 0xff));
+ for (i = 0; i < height; i++) {
+ uint8_t *pos = start + i * pitch;
+ char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
- for (j = 0; j < Width; j++) {
- if (Pos[j] < blackclamp[0])
- Pos[j] = blackclamp[0];
+ for (j = 0; j < width; j++) {
+ if (pos[j] < blackclamp[0])
+ pos[j] = blackclamp[0];
- if (Pos[j] > 255 + whiteclamp[0])
- Pos[j] = 255 + whiteclamp[0];
+ if (pos[j] > 255 + whiteclamp[0])
+ pos[j] = 255 + whiteclamp[0];
- Pos[j] += Ref[j];
+ pos[j] += ref[j];
}
}
}
@@ -636,8 +633,8 @@
*dest = *oci->frame_to_show;
/* handle problem with extending borders */
- dest->y_width = oci->Width;
- dest->y_height = oci->Height;
+ dest->y_width = oci->width;
+ dest->y_height = oci->height;
dest->uv_height = dest->y_height / 2;
return 0;
@@ -1004,8 +1001,8 @@
*dest = oci->post_proc_buffer;
/* handle problem with extending borders */
- dest->y_width = oci->Width;
- dest->y_height = oci->Height;
+ dest->y_width = oci->width;
+ dest->y_height = oci->height;
dest->uv_height = dest->y_height / 2;
return 0;
--- a/vp9/common/vp9_postproc.h
+++ b/vp9/common/vp9_postproc.h
@@ -13,30 +13,26 @@
#define VP9_COMMON_VP9_POSTPROC_H_
#include "vpx_ports/mem.h"
+
struct postproc_state {
- int last_q;
- int last_noise;
- char noise[3072];
+ int last_q;
+ int last_noise;
+ char noise[3072];
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
};
+
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_ppflags.h"
+
int vp9_post_proc_frame(struct VP9Common *oci, YV12_BUFFER_CONFIG *dest,
vp9_ppflags_t *flags);
+void vp9_denoise(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post,
+ int q, int low_var_thresh, int flag);
-void vp9_de_noise(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag);
-
-void vp9_deblock(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag);
+void vp9_deblock(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post,
+ int q, int low_var_thresh, int flag);
#endif // VP9_COMMON_VP9_POSTPROC_H_
--- a/vp9/common/vp9_pragmas.h
+++ b/vp9/common/vp9_pragmas.h
@@ -14,6 +14,7 @@
#ifdef __INTEL_COMPILER
#pragma warning(disable:997 1011 170)
#endif
+
#ifdef _MSC_VER
#pragma warning(disable:4799)
#endif
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -29,14 +29,15 @@
// The prediction flags in these dummy entries are initialised to 0.
switch (pred_id) {
case PRED_SEG_ID:
- pred_context = (m - 1)->mbmi.seg_id_predicted +
- (m - cm->mode_info_stride)->mbmi.seg_id_predicted;
+ pred_context = (m - cm->mode_info_stride)->mbmi.seg_id_predicted;
+ if (xd->left_available)
+ pred_context += (m - 1)->mbmi.seg_id_predicted;
break;
-
case PRED_REF:
- pred_context = (m - 1)->mbmi.ref_predicted +
- (m - cm->mode_info_stride)->mbmi.ref_predicted;
+ pred_context = (m - cm->mode_info_stride)->mbmi.ref_predicted;
+ if (xd->left_available)
+ pred_context += (m - 1)->mbmi.ref_predicted;
break;
case PRED_COMP:
@@ -61,13 +62,14 @@
break;
case PRED_MBSKIP:
- pred_context = (m - 1)->mbmi.mb_skip_coeff +
- (m - cm->mode_info_stride)->mbmi.mb_skip_coeff;
+ pred_context = (m - cm->mode_info_stride)->mbmi.mb_skip_coeff;
+ if (xd->left_available)
+ pred_context += (m - 1)->mbmi.mb_skip_coeff;
break;
case PRED_SWITCHABLE_INTERP:
{
- int left_in_image = (m - 1)->mbmi.mb_in_image;
+ int left_in_image = xd->left_available && (m - 1)->mbmi.mb_in_image;
int above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image;
int left_mode = (m - 1)->mbmi.mode;
int above_mode = (m - cm->mode_info_stride)->mbmi.mode;
@@ -98,8 +100,7 @@
break;
default:
- // TODO *** add error trap code.
- pred_context = 0;
+ pred_context = 0; // *** add error trap code.
break;
}
@@ -111,39 +112,23 @@
vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- vp9_prob pred_probability;
- int pred_context;
+ const int pred_context = vp9_get_pred_context(cm, xd, pred_id);
- // Get the appropriate prediction context
- pred_context = vp9_get_pred_context(cm, xd, pred_id);
-
switch (pred_id) {
case PRED_SEG_ID:
- pred_probability = cm->segment_pred_probs[pred_context];
- break;
-
+ return cm->segment_pred_probs[pred_context];
case PRED_REF:
- pred_probability = cm->ref_pred_probs[pred_context];
- break;
-
+ return cm->ref_pred_probs[pred_context];
case PRED_COMP:
// In keeping with convention elsewhre the probability returned is
// the probability of a "0" outcome which in this case means the
// probability of comp pred off.
- pred_probability = cm->prob_comppred[pred_context];
- break;
-
+ return cm->prob_comppred[pred_context];
case PRED_MBSKIP:
- pred_probability = cm->mbskip_pred_probs[pred_context];
- break;
-
+ return cm->mbskip_pred_probs[pred_context];
default:
- // TODO *** add error trap code.
- pred_probability = 128;
- break;
+ return 128; // *** add error trap code.
}
-
- return pred_probability;
}
// This function returns a context probability ptr for coding a given
@@ -151,43 +136,25 @@
const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- const vp9_prob *pred_probability;
- int pred_context;
+ const int pred_context = vp9_get_pred_context(cm, xd, pred_id);
- // Get the appropriate prediction context
- pred_context = vp9_get_pred_context(cm, xd, pred_id);
-
switch (pred_id) {
case PRED_SEG_ID:
- pred_probability = &cm->segment_pred_probs[pred_context];
- break;
-
+ return &cm->segment_pred_probs[pred_context];
case PRED_REF:
- pred_probability = &cm->ref_pred_probs[pred_context];
- break;
-
+ return &cm->ref_pred_probs[pred_context];
case PRED_COMP:
// In keeping with convention elsewhre the probability returned is
// the probability of a "0" outcome which in this case means the
// probability of comp pred off.
- pred_probability = &cm->prob_comppred[pred_context];
- break;
-
+ return &cm->prob_comppred[pred_context];
case PRED_MBSKIP:
- pred_probability = &cm->mbskip_pred_probs[pred_context];
- break;
-
+ return &cm->mbskip_pred_probs[pred_context];
case PRED_SWITCHABLE_INTERP:
- pred_probability = &cm->fc.switchable_interp_prob[pred_context][0];
- break;
-
+ return &cm->fc.switchable_interp_prob[pred_context][0];
default:
- // TODO *** add error trap code.
- pred_probability = NULL;
- break;
+ return NULL; // *** add error trap code.
}
-
- return pred_probability;
}
// This function returns the status of the given prediction signal.
@@ -194,28 +161,16 @@
// I.e. is the predicted value for the given signal correct.
unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
PRED_ID pred_id) {
- unsigned char pred_flag = 0;
-
switch (pred_id) {
case PRED_SEG_ID:
- pred_flag = xd->mode_info_context->mbmi.seg_id_predicted;
- break;
-
+ return xd->mode_info_context->mbmi.seg_id_predicted;
case PRED_REF:
- pred_flag = xd->mode_info_context->mbmi.ref_predicted;
- break;
-
+ return xd->mode_info_context->mbmi.ref_predicted;
case PRED_MBSKIP:
- pred_flag = xd->mode_info_context->mbmi.mb_skip_coeff;
- break;
-
+ return xd->mode_info_context->mbmi.mb_skip_coeff;
default:
- // TODO *** add error trap code.
- pred_flag = 0;
- break;
+ return 0; // *** add error trap code.
}
-
- return pred_flag;
}
// This function sets the status of the given prediction signal.
@@ -277,7 +232,7 @@
break;
default:
- // TODO *** add error trap code.
+ // *** add error trap code.
break;
}
}
@@ -322,7 +277,6 @@
MV_REFERENCE_FRAME pred_ref = LAST_FRAME;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- int seg_ref_active;
int i;
unsigned char frame_allowed[MAX_REF_FRAMES] = {1, 1, 1, 1};
@@ -333,7 +287,7 @@
unsigned char above_left_in_image;
// Is segment coding ennabled
- seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+ int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
// Special case treatment if segment coding is enabled.
// Dont allow prediction of a reference frame that the segment
@@ -355,9 +309,10 @@
above_left = (m - 1 - cm->mode_info_stride)->mbmi.ref_frame;
// Are neighbours in image
- left_in_image = (m - 1)->mbmi.mb_in_image;
+ left_in_image = (m - 1)->mbmi.mb_in_image && xd->left_available;
above_in_image = (m - cm->mode_info_stride)->mbmi.mb_in_image;
- above_left_in_image = (m - 1 - cm->mode_info_stride)->mbmi.mb_in_image;
+ above_left_in_image = (m - 1 - cm->mode_info_stride)->mbmi.mb_in_image &&
+ xd->left_available;
// Adjust scores for candidate reference frames based on neigbours
if (frame_allowed[left] && left_in_image) {
@@ -385,9 +340,7 @@
// Functions to computes a set of modified reference frame probabilities
// to use when the prediction of the reference frame value fails
void vp9_calc_ref_probs(int *count, vp9_prob *probs) {
- int tot_count;
-
- tot_count = count[0] + count[1] + count[2] + count[3];
+ int tot_count = count[0] + count[1] + count[2] + count[3];
probs[0] = get_prob(count[0], tot_count);
tot_count -= count[0];
@@ -403,19 +356,12 @@
// they are not allowed for a given segment.
void vp9_compute_mod_refprobs(VP9_COMMON *const cm) {
int norm_cnt[MAX_REF_FRAMES];
- int intra_count;
- int inter_count;
- int last_count;
- int gfarf_count;
- int gf_count;
- int arf_count;
-
- intra_count = cm->prob_intra_coded;
- inter_count = (255 - intra_count);
- last_count = (inter_count * cm->prob_last_coded) / 255;
- gfarf_count = inter_count - last_count;
- gf_count = (gfarf_count * cm->prob_gf_coded) / 255;
- arf_count = gfarf_count - gf_count;
+ const int intra_count = cm->prob_intra_coded;
+ const int inter_count = (255 - intra_count);
+ const int last_count = (inter_count * cm->prob_last_coded) / 255;
+ const int gfarf_count = inter_count - last_count;
+ const int gf_count = (gfarf_count * cm->prob_gf_coded) / 255;
+ const int arf_count = gfarf_count - gf_count;
// Work out modified reference frame probabilities to use where prediction
// of the reference frame fails
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -8,16 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/vp9_blockd.h"
-
#ifndef VP9_COMMON_VP9_PRED_COMMON_H_
#define VP9_COMMON_VP9_PRED_COMMON_H_
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_onyxc_int.h"
// Predicted items
typedef enum {
- PRED_SEG_ID = 0, // Segment identifier
+ PRED_SEG_ID = 0, // Segment identifier
PRED_REF = 1,
PRED_COMP = 2,
PRED_MBSKIP = 3,
@@ -24,32 +23,33 @@
PRED_SWITCHABLE_INTERP = 4
} PRED_ID;
-extern unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+unsigned char vp9_get_pred_context(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+vp9_prob vp9_get_pred_prob(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+const vp9_prob *vp9_get_pred_probs(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
- PRED_ID pred_id);
+unsigned char vp9_get_pred_flag(const MACROBLOCKD *const xd,
+ PRED_ID pred_id);
-extern void vp9_set_pred_flag(MACROBLOCKD *const xd,
- PRED_ID pred_id,
- unsigned char pred_flag);
+void vp9_set_pred_flag(MACROBLOCKD *const xd,
+ PRED_ID pred_id,
+ unsigned char pred_flag);
-extern unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd,
- int MbIndex);
+unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ int MbIndex);
-extern MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
- const MACROBLOCKD *const xd);
-extern void vp9_compute_mod_refprobs(VP9_COMMON *const cm);
+MV_REFERENCE_FRAME vp9_get_pred_ref(const VP9_COMMON *const cm,
+ const MACROBLOCKD *const xd);
+
+void vp9_compute_mod_refprobs(VP9_COMMON *const cm);
#endif // VP9_COMMON_VP9_PRED_COMMON_H_
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_quant_common.h"
static int dc_qlookup[QINDEX_RANGE];
@@ -24,7 +24,7 @@
for (i = 0; i < QINDEX_RANGE; i++) {
ac_qlookup[i] = current_val;
- current_val = (int)((double)current_val * 1.02);
+ current_val = (int)(current_val * 1.02);
if (current_val == last_val)
current_val++;
last_val = current_val;
@@ -38,88 +38,18 @@
}
}
-int vp9_dc_quant(int QIndex, int Delta) {
- int retval;
-
- QIndex = QIndex + Delta;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = dc_qlookup[ QIndex ];
- return retval;
+int vp9_dc_quant(int qindex, int delta) {
+ return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
}
-int vp9_dc2quant(int QIndex, int Delta) {
- int retval;
-
- QIndex = QIndex + Delta;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = dc_qlookup[ QIndex ];
-
- return retval;
-
+int vp9_dc_uv_quant(int qindex, int delta) {
+ return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
}
-int vp9_dc_uv_quant(int QIndex, int Delta) {
- int retval;
- QIndex = QIndex + Delta;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = dc_qlookup[ QIndex ];
-
- return retval;
+int vp9_ac_yquant(int qindex) {
+ return ac_qlookup[clamp(qindex, 0, MAXQ)];
}
-int vp9_ac_yquant(int QIndex) {
- int retval;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = ac_qlookup[ QIndex ];
- return retval;
-}
-
-int vp9_ac2quant(int QIndex, int Delta) {
- int retval;
-
- QIndex = QIndex + Delta;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = (ac_qlookup[ QIndex ] * 775) / 1000;
- if (retval < 4)
- retval = 4;
-
- return retval;
-}
-int vp9_ac_uv_quant(int QIndex, int Delta) {
- int retval;
-
- QIndex = QIndex + Delta;
-
- if (QIndex > MAXQ)
- QIndex = MAXQ;
- else if (QIndex < 0)
- QIndex = 0;
-
- retval = ac_qlookup[ QIndex ];
- return retval;
+int vp9_ac_uv_quant(int qindex, int delta) {
+ return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
}
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -11,16 +11,15 @@
#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
#define VP9_COMMON_VP9_QUANT_COMMON_H_
-#include "string.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_onyxc_int.h"
-extern void vp9_init_quant_tables(void);
-extern int vp9_ac_yquant(int QIndex);
-extern int vp9_dc_quant(int QIndex, int Delta);
-extern int vp9_dc2quant(int QIndex, int Delta);
-extern int vp9_ac2quant(int QIndex, int Delta);
-extern int vp9_dc_uv_quant(int QIndex, int Delta);
-extern int vp9_ac_uv_quant(int QIndex, int Delta);
+void vp9_init_quant_tables();
+int vp9_ac_yquant(int qindex);
+int vp9_dc_quant(int qindex, int delta);
+int vp9_dc2quant(int qindex, int delta);
+int vp9_ac2quant(int qindex, int delta);
+int vp9_dc_uv_quant(int qindex, int delta);
+int vp9_ac_uv_quant(int qindex, int delta);
#endif // VP9_COMMON_VP9_QUANT_COMMON_H_
--- a/vp9/common/vp9_recon.c
+++ b/vp9/common/vp9_recon.c
@@ -117,7 +117,7 @@
void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) {
int x, y, stride = xd->block[0].dst_stride;
- int16_t *diff = xd->sb_coeff_data.diff;
+ int16_t *diff = xd->diff;
for (y = 0; y < 32; y++) {
for (x = 0; x < 32; x++) {
@@ -130,8 +130,8 @@
void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
int x, y, stride = xd->block[16].dst_stride;
- int16_t *udiff = xd->sb_coeff_data.diff + 1024;
- int16_t *vdiff = xd->sb_coeff_data.diff + 1280;
+ int16_t *udiff = xd->diff + 1024;
+ int16_t *vdiff = xd->diff + 1280;
for (y = 0; y < 16; y++) {
for (x = 0; x < 16; x++) {
@@ -142,6 +142,36 @@
vdst += stride;
udiff += 16;
vdiff += 16;
+ }
+}
+
+void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) {
+ int x, y, stride = xd->block[0].dst_stride;
+ int16_t *diff = xd->diff;
+
+ for (y = 0; y < 64; y++) {
+ for (x = 0; x < 64; x++) {
+ dst[x] = clip_pixel(dst[x] + diff[x]);
+ }
+ dst += stride;
+ diff += 64;
+ }
+}
+
+void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) {
+ int x, y, stride = xd->block[16].dst_stride;
+ int16_t *udiff = xd->diff + 4096;
+ int16_t *vdiff = xd->diff + 4096 + 1024;
+
+ for (y = 0; y < 32; y++) {
+ for (x = 0; x < 32; x++) {
+ udst[x] = clip_pixel(udst[x] + udiff[x]);
+ vdst[x] = clip_pixel(vdst[x] + vdiff[x]);
+ }
+ udst += stride;
+ vdst += stride;
+ udiff += 32;
+ vdiff += 32;
}
}
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -8,66 +8,252 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
+ YV12_BUFFER_CONFIG *other,
+ int this_w, int this_h) {
+ int other_h = other->y_crop_height;
+ int other_w = other->y_crop_width;
+
+ scale->x_num = other_w;
+ scale->x_den = this_w;
+ scale->x_offset_q4 = 0; // calculated per-mb
+ scale->x_step_q4 = 16 * other_w / this_w;
+
+ scale->y_num = other_h;
+ scale->y_den = this_h;
+ scale->y_offset_q4 = 0; // calculated per-mb
+ scale->y_step_q4 = 16 * other_h / this_h;
+
+ // TODO(agrange): Investigate the best choice of functions to use here
+ // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
+ // to do at full-pel offsets. The current selection, where the filter is
+ // applied in one direction only, and not at all for 0,0, seems to give the
+ // best quality, but it may be worth trying an additional mode that does
+ // do the filtering on full-pel.
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+ if (scale->x_step_q4 == 16) {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in either direction.
+ scale->predict[0][0][0] = vp9_convolve_copy;
+ scale->predict[0][0][1] = vp9_convolve_1by8;
+ scale->predict[0][0][2] = vp9_convolve_qtr;
+ scale->predict[0][0][3] = vp9_convolve_3by8;
+ scale->predict[0][0][4] = vp9_convolve_avg;
+ scale->predict[0][0][5] = vp9_convolve_5by8;
+ scale->predict[0][0][6] = vp9_convolve_3qtr;
+ scale->predict[0][0][7] = vp9_convolve_7by8;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
+ scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
+ scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
+ scale->predict[0][1][4] = vp9_convolve8_avg_vert;
+ scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
+ scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
+ scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
+ scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
+ scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
+ scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
+ scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
+ scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
+ scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
+ } else {
+ // No scaling in x direction. Must always scale in the y direction.
+ scale->predict[0][0][0] = vp9_convolve8_vert;
+ scale->predict[0][0][1] = vp9_convolve8_1by8_vert;
+ scale->predict[0][0][2] = vp9_convolve8_qtr_vert;
+ scale->predict[0][0][3] = vp9_convolve8_3by8_vert;
+ scale->predict[0][0][4] = vp9_convolve8_avg_vert;
+ scale->predict[0][0][5] = vp9_convolve8_5by8_vert;
+ scale->predict[0][0][6] = vp9_convolve8_3qtr_vert;
+ scale->predict[0][0][7] = vp9_convolve8_7by8_vert;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
+ scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
+ scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
+ scale->predict[0][1][4] = vp9_convolve8_avg_vert;
+ scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
+ scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
+ scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_1by8;
+ scale->predict[1][0][2] = vp9_convolve8_qtr;
+ scale->predict[1][0][3] = vp9_convolve8_3by8;
+ scale->predict[1][0][4] = vp9_convolve8_avg;
+ scale->predict[1][0][5] = vp9_convolve8_5by8;
+ scale->predict[1][0][6] = vp9_convolve8_3qtr;
+ scale->predict[1][0][7] = vp9_convolve8_7by8;
+ }
+ } else {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in the y direction. Must always scale in the x direction.
+ scale->predict[0][0][0] = vp9_convolve8_horiz;
+ scale->predict[0][0][1] = vp9_convolve8_1by8_horiz;
+ scale->predict[0][0][2] = vp9_convolve8_qtr_horiz;
+ scale->predict[0][0][3] = vp9_convolve8_3by8_horiz;
+ scale->predict[0][0][4] = vp9_convolve8_avg_horiz;
+ scale->predict[0][0][5] = vp9_convolve8_5by8_horiz;
+ scale->predict[0][0][6] = vp9_convolve8_3qtr_horiz;
+ scale->predict[0][0][7] = vp9_convolve8_7by8_horiz;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_1by8;
+ scale->predict[0][1][2] = vp9_convolve8_qtr;
+ scale->predict[0][1][3] = vp9_convolve8_3by8;
+ scale->predict[0][1][4] = vp9_convolve8_avg;
+ scale->predict[0][1][5] = vp9_convolve8_5by8;
+ scale->predict[0][1][6] = vp9_convolve8_3qtr;
+ scale->predict[0][1][7] = vp9_convolve8_7by8;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
+ scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
+ scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
+ scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
+ scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
+ scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
+ scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
+ } else {
+ // Must always scale in both directions.
+ scale->predict[0][0][0] = vp9_convolve8;
+ scale->predict[0][0][1] = vp9_convolve8_1by8;
+ scale->predict[0][0][2] = vp9_convolve8_qtr;
+ scale->predict[0][0][3] = vp9_convolve8_3by8;
+ scale->predict[0][0][4] = vp9_convolve8_avg;
+ scale->predict[0][0][5] = vp9_convolve8_5by8;
+ scale->predict[0][0][6] = vp9_convolve8_3qtr;
+ scale->predict[0][0][7] = vp9_convolve8_7by8;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_1by8;
+ scale->predict[0][1][2] = vp9_convolve8_qtr;
+ scale->predict[0][1][3] = vp9_convolve8_3by8;
+ scale->predict[0][1][4] = vp9_convolve8_avg;
+ scale->predict[0][1][5] = vp9_convolve8_5by8;
+ scale->predict[0][1][6] = vp9_convolve8_3qtr;
+ scale->predict[0][1][7] = vp9_convolve8_7by8;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_1by8;
+ scale->predict[1][0][2] = vp9_convolve8_qtr;
+ scale->predict[1][0][3] = vp9_convolve8_3by8;
+ scale->predict[1][0][4] = vp9_convolve8_avg;
+ scale->predict[1][0][5] = vp9_convolve8_5by8;
+ scale->predict[1][0][6] = vp9_convolve8_3qtr;
+ scale->predict[1][0][7] = vp9_convolve8_7by8;
+ }
+ }
+ // 2D subpel motion always gets filtered in both directions
+ scale->predict[1][1][0] = vp9_convolve8;
+ scale->predict[1][1][1] = vp9_convolve8_1by8;
+ scale->predict[1][1][2] = vp9_convolve8_qtr;
+ scale->predict[1][1][3] = vp9_convolve8_3by8;
+ scale->predict[1][1][4] = vp9_convolve8_avg;
+ scale->predict[1][1][5] = vp9_convolve8_5by8;
+ scale->predict[1][1][6] = vp9_convolve8_3qtr;
+ scale->predict[1][1][7] = vp9_convolve8_7by8;
+}
+#else
+ if (scale->x_step_q4 == 16) {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in either direction.
+ scale->predict[0][0][0] = vp9_convolve_copy;
+ scale->predict[0][0][1] = vp9_convolve_avg;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_avg_vert;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
+ } else {
+ // No scaling in x direction. Must always scale in the y direction.
+ scale->predict[0][0][0] = vp9_convolve8_vert;
+ scale->predict[0][0][1] = vp9_convolve8_avg_vert;
+ scale->predict[0][1][0] = vp9_convolve8_vert;
+ scale->predict[0][1][1] = vp9_convolve8_avg_vert;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_avg;
+ }
+ } else {
+ if (scale->y_step_q4 == 16) {
+ // No scaling in the y direction. Must always scale in the x direction.
+ scale->predict[0][0][0] = vp9_convolve8_horiz;
+ scale->predict[0][0][1] = vp9_convolve8_avg_horiz;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_avg;
+ scale->predict[1][0][0] = vp9_convolve8_horiz;
+ scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
+ } else {
+ // Must always scale in both directions.
+ scale->predict[0][0][0] = vp9_convolve8;
+ scale->predict[0][0][1] = vp9_convolve8_avg;
+ scale->predict[0][1][0] = vp9_convolve8;
+ scale->predict[0][1][1] = vp9_convolve8_avg;
+ scale->predict[1][0][0] = vp9_convolve8;
+ scale->predict[1][0][1] = vp9_convolve8_avg;
+ }
+ }
+ // 2D subpel motion always gets filtered in both directions
+ scale->predict[1][1][0] = vp9_convolve8;
+ scale->predict[1][1][1] = vp9_convolve8_avg;
+}
+#endif
+
void vp9_setup_interp_filters(MACROBLOCKD *xd,
INTERPOLATIONFILTERTYPE mcomp_filter_type,
VP9_COMMON *cm) {
-#if CONFIG_ENABLE_6TAP
- if (mcomp_filter_type == SIXTAP) {
- xd->subpixel_predict4x4 = vp9_sixtap_predict4x4;
- xd->subpixel_predict8x4 = vp9_sixtap_predict8x4;
- xd->subpixel_predict8x8 = vp9_sixtap_predict8x8;
- xd->subpixel_predict16x16 = vp9_sixtap_predict16x16;
- xd->subpixel_predict_avg4x4 = vp9_sixtap_predict_avg4x4;
- xd->subpixel_predict_avg8x8 = vp9_sixtap_predict_avg8x8;
- xd->subpixel_predict_avg16x16 = vp9_sixtap_predict_avg16x16;
- } else {
-#endif
- if (mcomp_filter_type == EIGHTTAP || mcomp_filter_type == SWITCHABLE) {
- xd->subpixel_predict4x4 = vp9_eighttap_predict4x4;
- xd->subpixel_predict8x4 = vp9_eighttap_predict8x4;
- xd->subpixel_predict8x8 = vp9_eighttap_predict8x8;
- xd->subpixel_predict16x16 = vp9_eighttap_predict16x16;
- xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4;
- xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8;
- xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16;
- } else if (mcomp_filter_type == EIGHTTAP_SMOOTH) {
- xd->subpixel_predict4x4 = vp9_eighttap_predict4x4_smooth;
- xd->subpixel_predict8x4 = vp9_eighttap_predict8x4_smooth;
- xd->subpixel_predict8x8 = vp9_eighttap_predict8x8_smooth;
- xd->subpixel_predict16x16 = vp9_eighttap_predict16x16_smooth;
- xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4_smooth;
- xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_smooth;
- xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_smooth;
- } else if (mcomp_filter_type == EIGHTTAP_SHARP) {
- xd->subpixel_predict4x4 = vp9_eighttap_predict4x4_sharp;
- xd->subpixel_predict8x4 = vp9_eighttap_predict8x4_sharp;
- xd->subpixel_predict8x8 = vp9_eighttap_predict8x8_sharp;
- xd->subpixel_predict16x16 = vp9_eighttap_predict16x16_sharp;
- xd->subpixel_predict_avg4x4 = vp9_eighttap_predict_avg4x4_sharp;
- xd->subpixel_predict_avg8x8 = vp9_eighttap_predict_avg8x8_sharp;
- xd->subpixel_predict_avg16x16 = vp9_eighttap_predict_avg16x16_sharp_c;
- } else {
- xd->subpixel_predict4x4 = vp9_bilinear_predict4x4;
- xd->subpixel_predict8x4 = vp9_bilinear_predict8x4;
- xd->subpixel_predict8x8 = vp9_bilinear_predict8x8;
- xd->subpixel_predict16x16 = vp9_bilinear_predict16x16;
- xd->subpixel_predict_avg4x4 = vp9_bilinear_predict_avg4x4;
- xd->subpixel_predict_avg8x8 = vp9_bilinear_predict_avg8x8;
- xd->subpixel_predict_avg16x16 = vp9_bilinear_predict_avg16x16;
+ int i;
+
+ /* Calculate scaling factors for each of the 3 available references */
+ for (i = 0; i < 3; ++i) {
+ if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) {
+ memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i]));
+ continue;
+ }
+
+ vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],
+ &cm->yv12_fb[cm->active_ref_idx[i]],
+ cm->width, cm->height);
}
-#if CONFIG_ENABLE_6TAP
+
+ if (xd->mode_info_context) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+
+ set_scale_factors(xd,
+ mbmi->ref_frame - 1,
+ mbmi->second_ref_frame - 1,
+ cm->active_ref_scale);
}
+
+
+ switch (mcomp_filter_type) {
+ case EIGHTTAP:
+ case SWITCHABLE:
+ xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8;
+ break;
+ case EIGHTTAP_SMOOTH:
+ xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8lp;
+ break;
+ case EIGHTTAP_SHARP:
+ xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8s;
+ break;
+ case BILINEAR:
+ xd->subpix.filter_x = xd->subpix.filter_y = vp9_bilinear_filters;
+ break;
+#if CONFIG_ENABLE_6TAP
+ case SIXTAP:
+ xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_6;
+ break;
#endif
+ }
+ assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
}
-void vp9_copy_mem16x16_c(uint8_t *src,
+void vp9_copy_mem16x16_c(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride) {
@@ -93,10 +279,10 @@
dst[15] = src[15];
#else
- ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
- ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
- ((uint32_t *)dst)[2] = ((uint32_t *)src)[2];
- ((uint32_t *)dst)[3] = ((uint32_t *)src)[3];
+ ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
+ ((uint32_t *)dst)[2] = ((const uint32_t *)src)[2];
+ ((uint32_t *)dst)[3] = ((const uint32_t *)src)[3];
#endif
src += src_stride;
@@ -104,25 +290,7 @@
}
}
-void vp9_avg_mem16x16_c(uint8_t *src,
- int src_stride,
- uint8_t *dst,
- int dst_stride) {
- int r;
-
- for (r = 0; r < 16; r++) {
- int n;
-
- for (n = 0; n < 16; n++) {
- dst[n] = (dst[n] + src[n] + 1) >> 1;
- }
-
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void vp9_copy_mem8x8_c(uint8_t *src,
+void vp9_copy_mem8x8_c(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride) {
@@ -139,8 +307,8 @@
dst[6] = src[6];
dst[7] = src[7];
#else
- ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
- ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
+ ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
#endif
src += src_stride;
dst += dst_stride;
@@ -147,25 +315,7 @@
}
}
-void vp9_avg_mem8x8_c(uint8_t *src,
- int src_stride,
- uint8_t *dst,
- int dst_stride) {
- int r;
-
- for (r = 0; r < 8; r++) {
- int n;
-
- for (n = 0; n < 8; n++) {
- dst[n] = (dst[n] + src[n] + 1) >> 1;
- }
-
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void vp9_copy_mem8x4_c(uint8_t *src,
+void vp9_copy_mem8x4_c(const uint8_t *src,
int src_stride,
uint8_t *dst,
int dst_stride) {
@@ -182,8 +332,8 @@
dst[6] = src[6];
dst[7] = src[7];
#else
- ((uint32_t *)dst)[0] = ((uint32_t *)src)[0];
- ((uint32_t *)dst)[1] = ((uint32_t *)src)[1];
+ ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
+ ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
#endif
src += src_stride;
dst += dst_stride;
@@ -190,236 +340,193 @@
}
}
-void vp9_build_inter_predictors_b(BLOCKD *d, int pitch, vp9_subpix_fn_t sppf) {
- int r;
- uint8_t *ptr_base;
- uint8_t *ptr;
- uint8_t *pred_ptr = d->predictor;
- int_mv mv;
+static void set_scaled_offsets(struct scale_factors *scale,
+ int row, int col) {
+ const int x_q4 = 16 * col;
+ const int y_q4 = 16 * row;
- ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
+ scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf;
+ scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf;
+}
- if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
- ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
- sppf(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1, (mv.as_mv.row & 7) << 1,
- pred_ptr, pitch);
- } else {
- ptr_base += d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
- ptr = ptr_base;
+static int32_t scale_motion_vector_component_q3(int mv_q3,
+ int num,
+ int den,
+ int offset_q4) {
+ // returns the scaled and offset value of the mv component.
+ const int32_t mv_q4 = mv_q3 << 1;
- for (r = 0; r < 4; r++) {
-#if !(CONFIG_FAST_UNALIGNED)
- pred_ptr[0] = ptr[0];
- pred_ptr[1] = ptr[1];
- pred_ptr[2] = ptr[2];
- pred_ptr[3] = ptr[3];
-#else
- *(uint32_t *)pred_ptr = *(uint32_t *)ptr;
-#endif
- pred_ptr += pitch;
- ptr += d->pre_stride;
- }
- }
+ /* TODO(jkoleszar): make fixed point, or as a second multiply? */
+ return mv_q4 * num / den + offset_q4;
}
-/*
- * Similar to vp9_build_inter_predictors_b(), but instead of storing the
- * results in d->predictor, we average the contents of d->predictor (which
- * come from an earlier call to vp9_build_inter_predictors_b()) with the
- * predictor of the second reference frame / motion vector.
- */
-void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
- vp9_subpix_fn_t sppf) {
- int r;
- uint8_t *ptr_base;
- uint8_t *ptr;
- uint8_t *pred_ptr = d->predictor;
- int_mv mv;
+static int32_t scale_motion_vector_component_q4(int mv_q4,
+ int num,
+ int den,
+ int offset_q4) {
+ // returns the scaled and offset value of the mv component.
- ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
+ /* TODO(jkoleszar): make fixed point, or as a second multiply? */
+ return mv_q4 * num / den + offset_q4;
+}
- if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
- ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
- sppf(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1, (mv.as_mv.row & 7) << 1,
- pred_ptr, pitch);
- } else {
- ptr_base += d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
- ptr = ptr_base;
+static int_mv32 scale_motion_vector_q3_to_q4(
+ const int_mv *src_mv,
+ const struct scale_factors *scale) {
+ // returns mv * scale + offset
+ int_mv32 result;
- for (r = 0; r < 4; r++) {
- pred_ptr[0] = (pred_ptr[0] + ptr[0] + 1) >> 1;
- pred_ptr[1] = (pred_ptr[1] + ptr[1] + 1) >> 1;
- pred_ptr[2] = (pred_ptr[2] + ptr[2] + 1) >> 1;
- pred_ptr[3] = (pred_ptr[3] + ptr[3] + 1) >> 1;
- pred_ptr += pitch;
- ptr += d->pre_stride;
- }
- }
+ result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row,
+ scale->y_num,
+ scale->y_den,
+ scale->y_offset_q4);
+ result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col,
+ scale->x_num,
+ scale->x_den,
+ scale->x_offset_q4);
+ return result;
}
-void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
- uint8_t *ptr_base;
- uint8_t *ptr;
- uint8_t *pred_ptr = d->predictor;
- int_mv mv;
-
- ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
- ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
-
- if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
- xd->subpixel_predict8x8(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
- (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
- } else {
- vp9_copy_mem8x8(ptr, d->pre_stride, pred_ptr, pitch);
- }
+void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int_mv *mv_q3,
+ const struct scale_factors *scale,
+ int w, int h, int weight,
+ const struct subpix_fn_table *subpix) {
+ int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale);
+ src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);
+ scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight](
+ src, src_stride, dst, dst_stride,
+ subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4,
+ subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4,
+ w, h);
}
-/*
- * Similar to build_inter_predictors_4b(), but instead of storing the
- * results in d->predictor, we average the contents of d->predictor (which
- * come from an earlier call to build_inter_predictors_4b()) with the
- * predictor of the second reference frame / motion vector.
+/* Like vp9_build_inter_predictor, but takes the full-pel part of the
+ * mv separately, and the fractional part as a q4.
*/
-void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
- BLOCKD *d, int pitch) {
- uint8_t *ptr_base;
- uint8_t *ptr;
- uint8_t *pred_ptr = d->predictor;
- int_mv mv;
+void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int_mv *fullpel_mv_q3,
+ const int_mv *frac_mv_q4,
+ const struct scale_factors *scale,
+ int w, int h, int weight,
+ const struct subpix_fn_table *subpix) {
+ const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4)
+ + (frac_mv_q4->as_mv.row & 0xf);
+ const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)
+ + (frac_mv_q4->as_mv.col & 0xf);
+ const int scaled_mv_row_q4 =
+ scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den,
+ scale->y_offset_q4);
+ const int scaled_mv_col_q4 =
+ scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den,
+ scale->x_offset_q4);
+ const int subpel_x = scaled_mv_col_q4 & 15;
+ const int subpel_y = scaled_mv_row_q4 & 15;
- ptr_base = *(d->base_second_pre);
- mv.as_int = d->bmi.as_mv.second.as_int;
- ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
-
- if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
- xd->subpixel_predict_avg8x8(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
- (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
- } else {
- vp9_avg_mem8x8(ptr, d->pre_stride, pred_ptr, pitch);
- }
+ src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4);
+ scale->predict[!!subpel_x][!!subpel_y][weight](
+ src, src_stride, dst, dst_stride,
+ subpix->filter_x[subpel_x], scale->x_step_q4,
+ subpix->filter_y[subpel_y], scale->y_step_q4,
+ w, h);
}
-static void build_inter_predictors2b(MACROBLOCKD *xd, BLOCKD *d, int pitch) {
- uint8_t *ptr_base;
- uint8_t *ptr;
- uint8_t *pred_ptr = d->predictor;
- int_mv mv;
+static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
+ struct scale_factors *scale,
+ uint8_t *predictor,
+ int block_size, int stride,
+ int which_mv, int weight,
+ int width, int height,
+ const struct subpix_fn_table *subpix,
+ int row, int col) {
+ assert(d1->predictor - d0->predictor == block_size);
+ assert(d1->pre == d0->pre + block_size);
- ptr_base = *(d->base_pre);
- mv.as_int = d->bmi.as_mv.first.as_int;
- ptr = ptr_base + d->pre + (mv.as_mv.row >> 3) * d->pre_stride +
- (mv.as_mv.col >> 3);
+ set_scaled_offsets(&scale[which_mv], row, col);
- if (mv.as_mv.row & 7 || mv.as_mv.col & 7) {
- xd->subpixel_predict8x4(ptr, d->pre_stride, (mv.as_mv.col & 7) << 1,
- (mv.as_mv.row & 7) << 1, pred_ptr, pitch);
- } else {
- vp9_copy_mem8x4(ptr, d->pre_stride, pred_ptr, pitch);
- }
-}
+ if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
+ uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
-/*encoder only*/
-void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {
- int i, j;
- BLOCKD *blockd = xd->block;
+ vp9_build_inter_predictor(*base_pre + d0->pre,
+ d0->pre_stride,
+ predictor, stride,
+ &d0->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ width, height,
+ weight, subpix);
- /* build uv mvs */
- for (i = 0; i < 2; i++) {
- for (j = 0; j < 2; j++) {
- int yoffset = i * 8 + j * 2;
- int uoffset = 16 + i * 2 + j;
- int voffset = 20 + i * 2 + j;
- int temp;
+ } else {
+ uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
+ uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.row;
+ vp9_build_inter_predictor(*base_pre0 + d0->pre,
+ d0->pre_stride,
+ predictor, stride,
+ &d0->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ width > block_size ? block_size : width, height,
+ weight, subpix);
- if (temp < 0) temp -= 4;
- else temp += 4;
+ if (width <= block_size) return;
- xd->block[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
- xd->fullpixel_mask;
+ set_scaled_offsets(&scale[which_mv], row, col + block_size);
- temp = blockd[yoffset ].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.first.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.first.as_mv.col;
+ vp9_build_inter_predictor(*base_pre1 + d1->pre,
+ d1->pre_stride,
+ predictor + block_size, stride,
+ &d1->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ width - block_size, height,
+ weight, subpix);
+ }
+}
- if (temp < 0) temp -= 4;
- else temp += 4;
+static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
+ struct scale_factors *scale,
+ int block_size, int stride,
+ int which_mv, int weight,
+ const struct subpix_fn_table *subpix,
+ int row, int col) {
+ assert(d1->predictor - d0->predictor == block_size);
+ assert(d1->pre == d0->pre + block_size);
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
- xd->fullpixel_mask;
+ set_scaled_offsets(&scale[which_mv], row, col);
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
+ uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.row
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.row;
+ vp9_build_inter_predictor(*base_pre + d0->pre,
+ d0->pre_stride,
+ d0->predictor, stride,
+ &d0->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ 2 * block_size, block_size,
+ weight, subpix);
- if (temp < 0) {
- temp -= 4;
- } else {
- temp += 4;
- }
+ } else {
+ uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
+ uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
- xd->fullpixel_mask;
+ vp9_build_inter_predictor(*base_pre0 + d0->pre,
+ d0->pre_stride,
+ d0->predictor, stride,
+ &d0->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ block_size, block_size,
+ weight, subpix);
- temp = blockd[yoffset ].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 1].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 4].bmi.as_mv.second.as_mv.col
- + blockd[yoffset + 5].bmi.as_mv.second.as_mv.col;
+ set_scaled_offsets(&scale[which_mv], row, col + block_size);
- if (temp < 0) {
- temp -= 4;
- } else {
- temp += 4;
- }
-
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
- xd->fullpixel_mask;
-
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
- }
- }
+ vp9_build_inter_predictor(*base_pre1 + d1->pre,
+ d1->pre_stride,
+ d1->predictor, stride,
+ &d1->bmi.as_mv[which_mv],
+ &scale[which_mv],
+ block_size, block_size,
+ weight, subpix);
}
-
- for (i = 16; i < 24; i += 2) {
- BLOCKD *d0 = &blockd[i];
- BLOCKD *d1 = &blockd[i + 1];
-
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
- build_inter_predictors2b(xd, d0, 8);
- else {
- vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict4x4);
- vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict4x4);
- }
-
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg4x4);
- vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg4x4);
- }
- }
}
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
@@ -458,102 +565,653 @@
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
}
-/*encoder only*/
-void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *dst_y,
- int dst_ystride,
- int clamp_mvs) {
- uint8_t *ptr_base = xd->pre.y_buffer;
- uint8_t *ptr;
- int pre_stride = xd->block[0].pre_stride;
+#define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT))
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+
+// Whether to use implicit weighting for UV
+#define USE_IMPLICIT_WEIGHT_UV
+
+// Whether to use implicit weighting for SplitMV
+// #define USE_IMPLICIT_WEIGHT_SPLITMV
+
+// #define SEARCH_MIN3
+static int64_t get_consistency_metric(MACROBLOCKD *xd,
+ uint8_t *tmp_y, int tmp_ystride) {
+ int block_size = 16 << xd->mode_info_context->mbmi.sb_type;
+ uint8_t *rec_y = xd->dst.y_buffer;
+ int rec_ystride = xd->dst.y_stride;
+ int64_t metric = 0;
+ int i;
+ if (xd->up_available) {
+ for (i = 0; i < block_size; ++i) {
+ int diff = abs(*(rec_y - rec_ystride + i) -
+ *(tmp_y + i));
+#ifdef SEARCH_MIN3
+ // Searches for the min abs diff among 3 pixel neighbors in the border
+ int diff1 = xd->left_available ?
+ abs(*(rec_y - rec_ystride + i - 1) - *(tmp_y + i)) : diff;
+ int diff2 = i < block_size - 1 ?
+ abs(*(rec_y - rec_ystride + i + 1) - *(tmp_y + i)) : diff;
+ diff = diff <= diff1 ? diff : diff1;
+ diff = diff <= diff2 ? diff : diff2;
+#endif
+ metric += diff;
+ }
+ }
+ if (xd->left_available) {
+ for (i = 0; i < block_size; ++i) {
+ int diff = abs(*(rec_y - 1 + i * rec_ystride) -
+ *(tmp_y + i * tmp_ystride));
+#ifdef SEARCH_MIN3
+ // Searches for the min abs diff among 3 pixel neighbors in the border
+ int diff1 = xd->up_available ?
+ abs(*(rec_y - 1 + (i - 1) * rec_ystride) -
+ *(tmp_y + i * tmp_ystride)) : diff;
+ int diff2 = i < block_size - 1 ?
+ abs(*(rec_y - 1 + (i + 1) * rec_ystride) -
+ *(tmp_y + i * tmp_ystride)) : diff;
+ diff = diff <= diff1 ? diff : diff1;
+ diff = diff <= diff2 ? diff : diff2;
+#endif
+ metric += diff;
+ }
+ }
+ return metric;
+}
+
+static int get_weight(MACROBLOCKD *xd, int64_t metric_1, int64_t metric_2) {
+ int weight = AVERAGE_WEIGHT;
+ if (2 * metric_1 < metric_2)
+ weight = 6;
+ else if (4 * metric_1 < 3 * metric_2)
+ weight = 5;
+ else if (2 * metric_2 < metric_1)
+ weight = 2;
+ else if (4 * metric_2 < 3 * metric_1)
+ weight = 3;
+ return weight;
+}
+
+#ifdef USE_IMPLICIT_WEIGHT_SPLITMV
+static int get_implicit_compoundinter_weight_splitmv(
+ MACROBLOCKD *xd, int mb_row, int mb_col) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ BLOCKD *blockd = xd->block;
+ const int use_second_ref = mbmi->second_ref_frame > 0;
+ int64_t metric_2 = 0, metric_1 = 0;
+ int i, which_mv, weight;
+ uint8_t tmp_y[256];
+ const int tmp_ystride = 16;
+
+ if (!use_second_ref) return 0;
+ if (!(xd->up_available || xd->left_available))
+ return AVERAGE_WEIGHT;
+
+ assert(xd->mode_info_context->mbmi.mode == SPLITMV);
+
+ which_mv = 1; // second predictor
+ if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
+ for (i = 0; i < 16; i += 8) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 2];
+ const int y = i & 8;
+
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
+
+ if (mbmi->need_to_clamp_mvs) {
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
+ }
+ if (i == 0) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+ which_mv, 0, 16, 1,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+ which_mv, 0, 1, 8,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ } else {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
+ 8, 16, which_mv, 0, 1, 8,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ }
+ }
+ } else {
+ for (i = 0; i < 16; i += 2) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+ const int x = (i & 3) * 4;
+ const int y = (i >> 2) * 4;
+
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+
+ if (i >= 4 && (i & 3) != 0) continue;
+
+ if (i == 0) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+ which_mv, 0, 8, 1, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+ which_mv, 0, 1, 4, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ } else if (i < 4) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
+ which_mv, 0, 8, 1, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ } else {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
+ 4, 16, which_mv, 0, 1, 4, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ }
+ }
+ }
+ metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+ which_mv = 0; // first predictor
+ if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
+ for (i = 0; i < 16; i += 8) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 2];
+ const int y = i & 8;
+
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
+
+ if (mbmi->need_to_clamp_mvs) {
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
+ }
+ if (i == 0) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+ which_mv, 0, 16, 1,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
+ which_mv, 0, 1, 8,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ } else {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
+ 8, 16, which_mv, 0, 1, 8,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ }
+ }
+ } else {
+ for (i = 0; i < 16; i += 2) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+ const int x = (i & 3) * 4;
+ const int y = (i >> 2) * 4;
+
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
+
+ if (i >= 4 && (i & 3) != 0) continue;
+
+ if (i == 0) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+ which_mv, 0, 8, 1, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
+ which_mv, 0, 1, 4, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ } else if (i < 4) {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
+ which_mv, 0, 8, 1, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ } else {
+ build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
+ 4, 16, which_mv, 0, 1, 4, &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
+ }
+ }
+ }
+ metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+ // Choose final weight for averaging
+ weight = get_weight(xd, metric_1, metric_2);
+ return weight;
+}
+#endif
+
+static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int64_t metric_2 = 0, metric_1 = 0;
+ int n, clamp_mvs, pre_stride;
+ uint8_t *base_pre;
int_mv ymv;
+ uint8_t tmp_y[4096];
+ const int tmp_ystride = 64;
+ int weight;
+ int edge[4];
+ int block_size = 16 << xd->mode_info_context->mbmi.sb_type;
+ if (!use_second_ref) return 0;
+ if (!(xd->up_available || xd->left_available))
+ return AVERAGE_WEIGHT;
+
+ edge[0] = xd->mb_to_top_edge;
+ edge[1] = xd->mb_to_bottom_edge;
+ edge[2] = xd->mb_to_left_edge;
+ edge[3] = xd->mb_to_right_edge;
+
+ clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_secondmv;
+ base_pre = xd->second_pre.y_buffer;
+ pre_stride = xd->second_pre.y_stride;
+ ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+ // First generate the second predictor
+ for (n = 0; n < block_size; n += 16) {
+ xd->mb_to_left_edge = edge[2] - (n << 3);
+ xd->mb_to_right_edge = edge[3] + ((16 - n) << 3);
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+ set_scaled_offsets(&xd->scale_factor[1], mb_row * 16, mb_col * 16 + n);
+ // predict a single row of pixels
+ vp9_build_inter_predictor(
+ base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[1]),
+ pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[1],
+ 16, 1, 0, &xd->subpix);
+ }
+ xd->mb_to_left_edge = edge[2];
+ xd->mb_to_right_edge = edge[3];
+ for (n = 0; n < block_size; n += 16) {
+ xd->mb_to_top_edge = edge[0] - (n << 3);
+ xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+ set_scaled_offsets(&xd->scale_factor[1], mb_row * 16 + n, mb_col * 16);
+ // predict a single col of pixels
+ vp9_build_inter_predictor(
+ base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[1]),
+ pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
+ &xd->scale_factor[1], 1, 16, 0, &xd->subpix);
+ }
+ xd->mb_to_top_edge = edge[0];
+ xd->mb_to_bottom_edge = edge[1];
+ // Compute consistency metric
+ metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);
+
+ clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+ base_pre = xd->pre.y_buffer;
+ pre_stride = xd->pre.y_stride;
ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
+ // Now generate the first predictor
+ for (n = 0; n < block_size; n += 16) {
+ xd->mb_to_left_edge = edge[2] - (n << 3);
+ xd->mb_to_right_edge = edge[3] + ((16 - n) << 3);
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+ set_scaled_offsets(&xd->scale_factor[0], mb_row * 16, mb_col * 16 + n);
+ // predict a single row of pixels
+ vp9_build_inter_predictor(
+ base_pre + scaled_buffer_offset(n, 0, pre_stride, &xd->scale_factor[0]),
+ pre_stride, tmp_y + n, tmp_ystride, &ymv, &xd->scale_factor[0],
+ 16, 1, 0, &xd->subpix);
+ }
+ xd->mb_to_left_edge = edge[2];
+ xd->mb_to_right_edge = edge[3];
+ for (n = 0; n < block_size; n += 16) {
+ xd->mb_to_top_edge = edge[0] - (n << 3);
+ xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+ set_scaled_offsets(&xd->scale_factor[0], mb_row * 16 + n, mb_col * 16);
+ // predict a single col of pixels
+ vp9_build_inter_predictor(
+ base_pre + scaled_buffer_offset(0, n, pre_stride, &xd->scale_factor[0]),
+ pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
+ &xd->scale_factor[0], 1, 16, 0, &xd->subpix);
+ }
+ xd->mb_to_top_edge = edge[0];
+ xd->mb_to_bottom_edge = edge[1];
+ metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);
- if (clamp_mvs)
- clamp_mv_to_umv_border(&ymv.as_mv, xd);
+ // Choose final weight for averaging
+ weight = get_weight(xd, metric_1, metric_2);
+ return weight;
+}
- ptr = ptr_base + (ymv.as_mv.row >> 3) * pre_stride + (ymv.as_mv.col >> 3);
+static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int weight,
+ int mb_row,
+ int mb_col) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
- if ((ymv.as_mv.row | ymv.as_mv.col) & 7) {
- xd->subpixel_predict16x16(ptr, pre_stride,
- (ymv.as_mv.col & 7) << 1,
- (ymv.as_mv.row & 7) << 1,
- dst_y, dst_ystride);
- } else {
- vp9_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
- }
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ const int clamp_mvs = which_mv ?
+ xd->mode_info_context->mbmi.need_to_clamp_secondmv :
+ xd->mode_info_context->mbmi.need_to_clamp_mvs;
+
+ uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
+ int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
+ int_mv ymv;
+ ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
+
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
+
+ set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
+
+ vp9_build_inter_predictor(base_pre, pre_stride,
+ dst_y, dst_ystride,
+ &ymv, &xd->scale_factor[which_mv],
+ 16, 16, which_mv ? weight : 0, &xd->subpix);
+ }
}
-void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride) {
- int offset;
- uint8_t *uptr, *vptr;
- int pre_stride = xd->block[0].pre_stride;
- int_mv _o16x16mv;
- int_mv _16x16mv;
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
- _16x16mv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
+ build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight,
+ mb_row, mb_col);
+}
- if (xd->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+#else
- _o16x16mv = _16x16mv;
- /* calc uv motion vectors */
- if (_16x16mv.as_mv.row < 0)
- _16x16mv.as_mv.row -= 1;
- else
- _16x16mv.as_mv.row += 1;
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
- if (_16x16mv.as_mv.col < 0)
- _16x16mv.as_mv.col -= 1;
- else
- _16x16mv.as_mv.col += 1;
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ const int clamp_mvs = which_mv ?
+ xd->mode_info_context->mbmi.need_to_clamp_secondmv :
+ xd->mode_info_context->mbmi.need_to_clamp_mvs;
- _16x16mv.as_mv.row /= 2;
- _16x16mv.as_mv.col /= 2;
+ uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
+ int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
+ int_mv ymv;
+ ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
- _16x16mv.as_mv.row &= xd->fullpixel_mask;
- _16x16mv.as_mv.col &= xd->fullpixel_mask;
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&ymv.as_mv, xd);
- pre_stride >>= 1;
- offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
- uptr = xd->pre.u_buffer + offset;
- vptr = xd->pre.v_buffer + offset;
+ set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);
- if (_o16x16mv.as_int & 0x000f000f) {
- xd->subpixel_predict8x8(uptr, pre_stride, _o16x16mv.as_mv.col & 15,
- _o16x16mv.as_mv.row & 15, dst_u, dst_uvstride);
- xd->subpixel_predict8x8(vptr, pre_stride, _o16x16mv.as_mv.col & 15,
- _o16x16mv.as_mv.row & 15, dst_v, dst_uvstride);
- } else {
- vp9_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
- vp9_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
+ vp9_build_inter_predictor(base_pre, pre_stride,
+ dst_y, dst_ystride,
+ &ymv, &xd->scale_factor[which_mv],
+ 16, 16, which_mv, &xd->subpix);
+ }
+}
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int weight,
+ int mb_row,
+ int mb_col) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
+
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ const int clamp_mvs =
+ which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
+ : xd->mode_info_context->mbmi.need_to_clamp_mvs;
+ uint8_t *uptr, *vptr;
+ int pre_stride = which_mv ? xd->second_pre.uv_stride
+ : xd->pre.uv_stride;
+ int_mv _o16x16mv;
+ int_mv _16x16mv;
+
+ _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
+
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+ _o16x16mv = _16x16mv;
+ /* calc uv motion vectors */
+ if (_16x16mv.as_mv.row < 0)
+ _16x16mv.as_mv.row -= 1;
+ else
+ _16x16mv.as_mv.row += 1;
+
+ if (_16x16mv.as_mv.col < 0)
+ _16x16mv.as_mv.col -= 1;
+ else
+ _16x16mv.as_mv.col += 1;
+
+ _16x16mv.as_mv.row /= 2;
+ _16x16mv.as_mv.col /= 2;
+
+ _16x16mv.as_mv.row &= xd->fullpixel_mask;
+ _16x16mv.as_mv.col &= xd->fullpixel_mask;
+
+ uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
+ vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
+
+ set_scaled_offsets(&xd->scale_factor_uv[which_mv],
+ mb_row * 16, mb_col * 16);
+
+ vp9_build_inter_predictor_q4(
+ uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+ &xd->scale_factor_uv[which_mv], 8, 8,
+ which_mv ? weight : 0, &xd->subpix);
+
+ vp9_build_inter_predictor_q4(
+ vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+ &xd->scale_factor_uv[which_mv], 8, 8,
+ which_mv ? weight : 0, &xd->subpix);
+ }
+}
+
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+#ifdef USE_IMPLICIT_WEIGHT_UV
+ int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
+#else
+ int weight = AVERAGE_WEIGHT;
+#endif
+ build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride,
+ weight, mb_row, mb_col);
+}
+
+#else
+
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
+
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ const int clamp_mvs =
+ which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
+ : xd->mode_info_context->mbmi.need_to_clamp_mvs;
+ uint8_t *uptr, *vptr;
+ int pre_stride = which_mv ? xd->second_pre.uv_stride
+ : xd->pre.uv_stride;
+ int_mv _o16x16mv;
+ int_mv _16x16mv;
+
+ _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
+
+ if (clamp_mvs)
+ clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+
+ _o16x16mv = _16x16mv;
+ /* calc uv motion vectors */
+ if (_16x16mv.as_mv.row < 0)
+ _16x16mv.as_mv.row -= 1;
+ else
+ _16x16mv.as_mv.row += 1;
+
+ if (_16x16mv.as_mv.col < 0)
+ _16x16mv.as_mv.col -= 1;
+ else
+ _16x16mv.as_mv.col += 1;
+
+ _16x16mv.as_mv.row /= 2;
+ _16x16mv.as_mv.col /= 2;
+
+ _16x16mv.as_mv.row &= xd->fullpixel_mask;
+ _16x16mv.as_mv.col &= xd->fullpixel_mask;
+
+ uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
+ vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
+
+ set_scaled_offsets(&xd->scale_factor_uv[which_mv],
+ mb_row * 16, mb_col * 16);
+
+ vp9_build_inter_predictor_q4(
+ uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+ &xd->scale_factor_uv[which_mv], 8, 8,
+ which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
+
+ vp9_build_inter_predictor_q4(
+ vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+ &xd->scale_factor_uv[which_mv], 8, 8,
+ which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
+ }
+}
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter32x32_predictors_sby_w(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int weight,
+ int mb_row,
+ int mb_col) {
+ uint8_t *y1 = x->pre.y_buffer;
+ uint8_t *y2 = x->second_pre.y_buffer;
+ int edge[4], n;
+
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
+
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
}
+ build_inter16x16_predictors_mby_w(x,
+ dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
+ dst_ystride, weight, mb_row + y_idx, mb_col + x_idx);
+ }
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
+
+ x->pre.y_buffer = y1;
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2;
+ }
}
+void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col);
+ build_inter32x32_predictors_sby_w(x, dst_y, dst_ystride, weight,
+ mb_row, mb_col);
+}
-void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride, int dst_uvstride) {
- vp9_build_1st_inter16x16_predictors_mby(xd, dst_y, dst_ystride,
- xd->mode_info_context->mbmi.need_to_clamp_mvs);
- vp9_build_1st_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);
+#else
+
+// TODO(all): Can we use 32x32 specific implementations of this rather than
+// using 16x16 implementations ?
+void vp9_build_inter32x32_predictors_sby(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ uint8_t *y1 = x->pre.y_buffer;
+ uint8_t *y2 = x->second_pre.y_buffer;
+ int edge[4], n;
+
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
+
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
+ }
+ vp9_build_inter16x16_predictors_mby(x,
+ dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
+ dst_ystride, mb_row + y_idx, mb_col + x_idx);
+ }
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
+
+ x->pre.y_buffer = y1;
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2;
+ }
}
-void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride) {
- uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
- uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
- *v2 = x->second_pre.v_buffer;
+#endif
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter32x32_predictors_sbuv_w(MACROBLOCKD *x,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int weight,
+ int mb_row,
+ int mb_col) {
+ uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+ uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
int edge[4], n;
edge[0] = x->mb_to_top_edge;
@@ -562,6 +1220,7 @@
edge[3] = x->mb_to_right_edge;
for (n = 0; n < 4; n++) {
+ int scaled_uv_offset;
const int x_idx = n & 1, y_idx = n >> 1;
x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
@@ -569,43 +1228,130 @@
x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
- x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
- x->pre.u_buffer = u1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
- x->pre.v_buffer = v1 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->pre.uv_stride,
+ &x->scale_factor_uv[0]);
+ x->pre.u_buffer = u1 + scaled_uv_offset;
+ x->pre.v_buffer = v1 + scaled_uv_offset;
- vp9_build_1st_inter16x16_predictors_mb(x,
- dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
- dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_ystride, dst_uvstride);
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride + x_idx * 16;
- x->second_pre.u_buffer = u2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
- x->second_pre.v_buffer = v2 + y_idx * 8 * x->pre.uv_stride + x_idx * 8;
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->second_pre.uv_stride,
+ &x->scale_factor_uv[1]);
+ x->second_pre.u_buffer = u2 + scaled_uv_offset;
+ x->second_pre.v_buffer = v2 + scaled_uv_offset;
+ }
- vp9_build_2nd_inter16x16_predictors_mb(x,
- dst_y + y_idx * 16 * dst_ystride + x_idx * 16,
+ build_inter16x16_predictors_mbuv_w(x,
dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
- dst_ystride, dst_uvstride);
- }
+ dst_uvstride, weight, mb_row + y_idx, mb_col + x_idx);
}
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
+ x->pre.u_buffer = u1;
+ x->pre.v_buffer = v1;
+
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.u_buffer = u2;
+ x->second_pre.v_buffer = v2;
+ }
+}
+
+void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+#ifdef USE_IMPLICIT_WEIGHT_UV
+ int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
+#else
+ int weight = AVERAGE_WEIGHT;
+#endif
+ build_inter32x32_predictors_sbuv_w(xd, dst_u, dst_v, dst_uvstride,
+ weight, mb_row, mb_col);
+}
+
+#else
+
+void vp9_build_inter32x32_predictors_sbuv(MACROBLOCKD *x,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+ uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
+ int edge[4], n;
+
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
+
+ for (n = 0; n < 4; n++) {
+ int scaled_uv_offset;
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 16) << 3);
+
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->pre.uv_stride,
+ &x->scale_factor_uv[0]);
+ x->pre.u_buffer = u1 + scaled_uv_offset;
+ x->pre.v_buffer = v1 + scaled_uv_offset;
+
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 8,
+ y_idx * 8,
+ x->second_pre.uv_stride,
+ &x->scale_factor_uv[1]);
+ x->second_pre.u_buffer = u2 + scaled_uv_offset;
+ x->second_pre.v_buffer = v2 + scaled_uv_offset;
+ }
+
+ vp9_build_inter16x16_predictors_mbuv(x,
+ dst_u + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_v + y_idx * 8 * dst_uvstride + x_idx * 8,
+ dst_uvstride, mb_row + y_idx, mb_col + x_idx);
+ }
x->mb_to_top_edge = edge[0];
x->mb_to_bottom_edge = edge[1];
x->mb_to_left_edge = edge[2];
x->mb_to_right_edge = edge[3];
- x->pre.y_buffer = y1;
x->pre.u_buffer = u1;
x->pre.v_buffer = v1;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.y_buffer = y2;
x->second_pre.u_buffer = u2;
x->second_pre.v_buffer = v2;
}
+}
+#endif
+void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ vp9_build_inter32x32_predictors_sby(x, dst_y, dst_ystride,
+ mb_row, mb_col);
+ vp9_build_inter32x32_predictors_sbuv(x, dst_u, dst_v, dst_uvstride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_32x32_predictors_sb(
@@ -614,15 +1360,15 @@
#endif
}
-void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride) {
- uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
- uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,
- *v2 = x->second_pre.v_buffer;
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+static void build_inter64x64_predictors_sby_w(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int weight,
+ int mb_row,
+ int mb_col) {
+ uint8_t *y1 = x->pre.y_buffer;
+ uint8_t *y2 = x->second_pre.y_buffer;
int edge[4], n;
edge[0] = x->mb_to_top_edge;
@@ -638,21 +1384,22 @@
x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3);
x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3);
- x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
- x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
- x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
- x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32;
- x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
- x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
}
- vp9_build_inter32x32_predictors_sb(x,
+ build_inter32x32_predictors_sby_w(x,
dst_y + y_idx * 32 * dst_ystride + x_idx * 32,
- dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,
- dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,
- dst_ystride, dst_uvstride);
+ dst_ystride, weight, mb_row + y_idx * 2, mb_col + x_idx * 2);
}
x->mb_to_top_edge = edge[0];
@@ -661,324 +1408,392 @@
x->mb_to_right_edge = edge[3];
x->pre.y_buffer = y1;
- x->pre.u_buffer = u1;
- x->pre.v_buffer = v1;
if (x->mode_info_context->mbmi.second_ref_frame > 0) {
x->second_pre.y_buffer = y2;
- x->second_pre.u_buffer = u2;
- x->second_pre.v_buffer = v2;
}
+}
-#if CONFIG_COMP_INTERINTRA_PRED
- if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
- vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v,
- dst_ystride, dst_uvstride);
- }
-#endif
+void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ int weight = get_implicit_compoundinter_weight(x, mb_row, mb_col);
+ build_inter64x64_predictors_sby_w(x, dst_y, dst_ystride, weight,
+ mb_row, mb_col);
}
-/*
- * The following functions should be called after an initial
- * call to vp9_build_1st_inter16x16_predictors_mb() or _mby()/_mbuv().
- * It will run a second filter on a (different) ref
- * frame and average the result with the output of the
- * first filter. The second reference frame is stored
- * in x->second_pre (the reference frame index is in
- * x->mode_info_context->mbmi.second_ref_frame). The second
- * motion vector is x->mode_info_context->mbmi.second_mv.
- *
- * This allows blending prediction from two reference frames
- * which sometimes leads to better prediction than from a
- * single reference framer.
- */
-void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *dst_y,
- int dst_ystride) {
- uint8_t *ptr;
+#else
- int_mv _16x16mv;
- int mv_row;
- int mv_col;
+void vp9_build_inter64x64_predictors_sby(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col) {
+ uint8_t *y1 = x->pre.y_buffer;
+ uint8_t *y2 = x->second_pre.y_buffer;
+ int edge[4], n;
- uint8_t *ptr_base = xd->second_pre.y_buffer;
- int pre_stride = xd->block[0].pre_stride;
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
- _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
- if (xd->mode_info_context->mbmi.need_to_clamp_secondmv)
- clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+ x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3);
- mv_row = _16x16mv.as_mv.row;
- mv_col = _16x16mv.as_mv.col;
+ x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->pre.y_stride,
+ &x->scale_factor[0]);
- ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2 +
+ scaled_buffer_offset(x_idx * 32,
+ y_idx * 32,
+ x->second_pre.y_stride,
+ &x->scale_factor[1]);
+ }
- if ((mv_row | mv_col) & 7) {
- xd->subpixel_predict_avg16x16(ptr, pre_stride, (mv_col & 7) << 1,
- (mv_row & 7) << 1, dst_y, dst_ystride);
- } else {
- vp9_avg_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
+ vp9_build_inter32x32_predictors_sby(x,
+ dst_y + y_idx * 32 * dst_ystride + x_idx * 32,
+ dst_ystride, mb_row + y_idx * 2, mb_col + x_idx * 2);
}
+
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
+
+ x->pre.y_buffer = y1;
+
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.y_buffer = y2;
+ }
}
+#endif
-void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride) {
- int offset;
- uint8_t *uptr, *vptr;
+void vp9_build_inter64x64_predictors_sbuv(MACROBLOCKD *x,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ uint8_t *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;
+ uint8_t *u2 = x->second_pre.u_buffer, *v2 = x->second_pre.v_buffer;
+ int edge[4], n;
- int_mv _16x16mv;
- int mv_row;
- int mv_col;
- int omv_row, omv_col;
+ edge[0] = x->mb_to_top_edge;
+ edge[1] = x->mb_to_bottom_edge;
+ edge[2] = x->mb_to_left_edge;
+ edge[3] = x->mb_to_right_edge;
- int pre_stride = xd->block[0].pre_stride;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ int scaled_uv_offset;
- _16x16mv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
+ x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3);
+ x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);
+ x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3);
+ x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3);
- if (xd->mode_info_context->mbmi.need_to_clamp_secondmv)
- clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->pre.uv_stride,
+ &x->scale_factor_uv[0]);
+ x->pre.u_buffer = u1 + scaled_uv_offset;
+ x->pre.v_buffer = v1 + scaled_uv_offset;
- mv_row = _16x16mv.as_mv.row;
- mv_col = _16x16mv.as_mv.col;
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ scaled_uv_offset = scaled_buffer_offset(x_idx * 16,
+ y_idx * 16,
+ x->second_pre.uv_stride,
+ &x->scale_factor_uv[1]);
+ x->second_pre.u_buffer = u2 + scaled_uv_offset;
+ x->second_pre.v_buffer = v2 + scaled_uv_offset;
+ }
- /* calc uv motion vectors */
- omv_row = mv_row;
- omv_col = mv_col;
- mv_row = (mv_row + (mv_row > 0)) >> 1;
- mv_col = (mv_col + (mv_col > 0)) >> 1;
+ vp9_build_inter32x32_predictors_sbuv(x,
+ dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,
+ dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,
+ dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2);
+ }
- mv_row &= xd->fullpixel_mask;
- mv_col &= xd->fullpixel_mask;
+ x->mb_to_top_edge = edge[0];
+ x->mb_to_bottom_edge = edge[1];
+ x->mb_to_left_edge = edge[2];
+ x->mb_to_right_edge = edge[3];
- pre_stride >>= 1;
- offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
- uptr = xd->second_pre.u_buffer + offset;
- vptr = xd->second_pre.v_buffer + offset;
+ x->pre.u_buffer = u1;
+ x->pre.v_buffer = v1;
- if ((omv_row | omv_col) & 15) {
- xd->subpixel_predict_avg8x8(uptr, pre_stride, omv_col & 15,
- omv_row & 15, dst_u, dst_uvstride);
- xd->subpixel_predict_avg8x8(vptr, pre_stride, omv_col & 15,
- omv_row & 15, dst_v, dst_uvstride);
- } else {
- vp9_avg_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
- vp9_avg_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
- }
+ if (x->mode_info_context->mbmi.second_ref_frame > 0) {
+ x->second_pre.u_buffer = u2;
+ x->second_pre.v_buffer = v2;
+ }
}
-void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride) {
- vp9_build_2nd_inter16x16_predictors_mby(xd, dst_y, dst_ystride);
- vp9_build_2nd_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);
+void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ vp9_build_inter64x64_predictors_sby(x, dst_y, dst_ystride,
+ mb_row, mb_col);
+ vp9_build_inter64x64_predictors_sbuv(x, dst_u, dst_v, dst_uvstride,
+ mb_row, mb_col);
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v,
+ dst_ystride, dst_uvstride);
+ }
+#endif
}
-static void build_inter4x4_predictors_mb(MACROBLOCKD *xd) {
+static void build_inter4x4_predictors_mb(MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
int i;
MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
BLOCKD *blockd = xd->block;
+ int which_mv = 0;
+ const int use_second_ref = mbmi->second_ref_frame > 0;
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && defined(USE_IMPLICIT_WEIGHT_SPLITMV)
+ int weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
+#else
+ int weight = AVERAGE_WEIGHT;
+#endif
if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
- blockd[ 0].bmi = xd->mode_info_context->bmi[ 0];
- blockd[ 2].bmi = xd->mode_info_context->bmi[ 2];
- blockd[ 8].bmi = xd->mode_info_context->bmi[ 8];
- blockd[10].bmi = xd->mode_info_context->bmi[10];
+ for (i = 0; i < 16; i += 8) {
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 2];
+ const int y = i & 8;
- if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.first.as_mv, xd);
- if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[ 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 2].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[ 8].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[10].bmi.as_mv.second.as_mv, xd);
- }
- }
+ blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
+ blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ if (mbmi->need_to_clamp_mvs) {
+ clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
+ clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
+ }
- vp9_build_inter_predictors4b(xd, &blockd[ 0], 16);
- vp9_build_inter_predictors4b(xd, &blockd[ 2], 16);
- vp9_build_inter_predictors4b(xd, &blockd[ 8], 16);
- vp9_build_inter_predictors4b(xd, &blockd[10], 16);
-
- if (mbmi->second_ref_frame > 0) {
- vp9_build_2nd_inter_predictors4b(xd, &blockd[ 0], 16);
- vp9_build_2nd_inter_predictors4b(xd, &blockd[ 2], 16);
- vp9_build_2nd_inter_predictors4b(xd, &blockd[ 8], 16);
- vp9_build_2nd_inter_predictors4b(xd, &blockd[10], 16);
+ build_2x1_inter_predictor(d0, d1, xd->scale_factor, 8, 16, which_mv,
+ which_mv ? weight : 0,
+ &xd->subpix, mb_row * 16 + y, mb_col * 16);
+ }
}
} else {
for (i = 0; i < 16; i += 2) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
+ const int x = (i & 3) * 4;
+ const int y = (i >> 2) * 4;
blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];
- if (mbmi->need_to_clamp_mvs) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.first.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.first.as_mv, xd);
- if (mbmi->second_ref_frame > 0) {
- clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv.second.as_mv, xd);
- clamp_mv_to_umv_border(&blockd[i + 1].bmi.as_mv.second.as_mv, xd);
- }
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ build_2x1_inter_predictor(d0, d1, xd->scale_factor, 4, 16, which_mv,
+ which_mv ? weight : 0,
+ &xd->subpix,
+ mb_row * 16 + y, mb_col * 16 + x);
}
-
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
- build_inter_predictors2b(xd, d0, 16);
- else {
- vp9_build_inter_predictors_b(d0, 16, xd->subpixel_predict4x4);
- vp9_build_inter_predictors_b(d1, 16, xd->subpixel_predict4x4);
- }
-
- if (mbmi->second_ref_frame > 0) {
- vp9_build_2nd_inter_predictors_b(d0, 16, xd->subpixel_predict_avg4x4);
- vp9_build_2nd_inter_predictors_b(d1, 16, xd->subpixel_predict_avg4x4);
- }
}
}
-
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+#if !defined(USE_IMPLICIT_WEIGHT_UV)
+ weight = AVERAGE_WEIGHT;
+#endif
+#endif
for (i = 16; i < 24; i += 2) {
BLOCKD *d0 = &blockd[i];
BLOCKD *d1 = &blockd[i + 1];
+ const int x = 4 * (i & 1);
+ const int y = ((i - 16) >> 1) * 4;
- if (d0->bmi.as_mv.first.as_int == d1->bmi.as_mv.first.as_int)
- build_inter_predictors2b(xd, d0, 8);
- else {
- vp9_build_inter_predictors_b(d0, 8, xd->subpixel_predict4x4);
- vp9_build_inter_predictors_b(d1, 8, xd->subpixel_predict4x4);
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
+ which_mv ? weight : 0, &xd->subpix,
+ mb_row * 8 + y, mb_col * 8 + x);
}
-
- if (mbmi->second_ref_frame > 0) {
- vp9_build_2nd_inter_predictors_b(d0, 8, xd->subpixel_predict_avg4x4);
- vp9_build_2nd_inter_predictors_b(d1, 8, xd->subpixel_predict_avg4x4);
- }
}
}
-static
-void build_4x4uvmvs(MACROBLOCKD *xd) {
- int i, j;
- BLOCKD *blockd = xd->block;
+static INLINE int round_mv_comp(int value) {
+ return (value < 0 ? value - 4 : value + 4) / 8;
+}
- for (i = 0; i < 2; i++) {
- for (j = 0; j < 2; j++) {
- int yoffset = i * 8 + j * 2;
- int uoffset = 16 + i * 2 + j;
- int voffset = 20 + i * 2 + j;
+static int mi_mv_pred_row(MACROBLOCKD *mb, int off, int idx) {
+ const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row +
+ mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row;
+ return round_mv_comp(temp) & mb->fullpixel_mask;
+}
- int temp;
+static int mi_mv_pred_col(MACROBLOCKD *mb, int off, int idx) {
+ const int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col +
+ mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col;
+ return round_mv_comp(temp) & mb->fullpixel_mask;
+}
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.row;
+static int b_mv_pred_row(MACROBLOCKD *mb, int off, int idx) {
+ BLOCKD *const blockd = mb->block;
+ const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.row +
+ blockd[off + 1].bmi.as_mv[idx].as_mv.row +
+ blockd[off + 4].bmi.as_mv[idx].as_mv.row +
+ blockd[off + 5].bmi.as_mv[idx].as_mv.row;
+ return round_mv_comp(temp) & mb->fullpixel_mask;
+}
- if (temp < 0) temp -= 4;
- else temp += 4;
+static int b_mv_pred_col(MACROBLOCKD *mb, int off, int idx) {
+ BLOCKD *const blockd = mb->block;
+ const int temp = blockd[off + 0].bmi.as_mv[idx].as_mv.col +
+ blockd[off + 1].bmi.as_mv[idx].as_mv.col +
+ blockd[off + 4].bmi.as_mv[idx].as_mv.col +
+ blockd[off + 5].bmi.as_mv[idx].as_mv.col;
+ return round_mv_comp(temp) & mb->fullpixel_mask;
+}
- blockd[uoffset].bmi.as_mv.first.as_mv.row = (temp / 8) &
- xd->fullpixel_mask;
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.first.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.first.as_mv.col;
+static void build_4x4uvmvs(MACROBLOCKD *xd) {
+ int i, j;
+ BLOCKD *blockd = xd->block;
- if (temp < 0) temp -= 4;
- else temp += 4;
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ const int yoffset = i * 8 + j * 2;
+ const int uoffset = 16 + i * 2 + j;
+ const int voffset = 20 + i * 2 + j;
- blockd[uoffset].bmi.as_mv.first.as_mv.col = (temp / 8) &
- xd->fullpixel_mask;
+ MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv;
+ MV *v = &blockd[voffset].bmi.as_mv[0].as_mv;
+ u->row = mi_mv_pred_row(xd, yoffset, 0);
+ u->col = mi_mv_pred_col(xd, yoffset, 0);
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(u, xd);
// if (x->mode_info_context->mbmi.need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv.first.as_mv, xd);
+ clamp_uvmv_to_umv_border(u, xd);
- blockd[voffset].bmi.as_mv.first.as_mv.row =
- blockd[uoffset].bmi.as_mv.first.as_mv.row;
- blockd[voffset].bmi.as_mv.first.as_mv.col =
- blockd[uoffset].bmi.as_mv.first.as_mv.col;
+ v->row = u->row;
+ v->col = u->col;
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.row
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.row;
+ u = &blockd[uoffset].bmi.as_mv[1].as_mv;
+ v = &blockd[voffset].bmi.as_mv[1].as_mv;
+ u->row = mi_mv_pred_row(xd, yoffset, 1);
+ u->col = mi_mv_pred_col(xd, yoffset, 1);
- if (temp < 0) {
- temp -= 4;
- } else {
- temp += 4;
- }
-
- blockd[uoffset].bmi.as_mv.second.as_mv.row = (temp / 8) &
- xd->fullpixel_mask;
-
- temp = xd->mode_info_context->bmi[yoffset + 0].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 1].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 4].as_mv.second.as_mv.col
- + xd->mode_info_context->bmi[yoffset + 5].as_mv.second.as_mv.col;
-
- if (temp < 0) {
- temp -= 4;
- } else {
- temp += 4;
- }
-
- blockd[uoffset].bmi.as_mv.second.as_mv.col = (temp / 8) &
- xd->fullpixel_mask;
-
// if (mbmi->need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ clamp_uvmv_to_umv_border(u, xd);
// if (mbmi->need_to_clamp_mvs)
- clamp_uvmv_to_umv_border(
- &blockd[uoffset].bmi.as_mv.second.as_mv, xd);
+ clamp_uvmv_to_umv_border(u, xd);
- blockd[voffset].bmi.as_mv.second.as_mv.row =
- blockd[uoffset].bmi.as_mv.second.as_mv.row;
- blockd[voffset].bmi.as_mv.second.as_mv.col =
- blockd[uoffset].bmi.as_mv.second.as_mv.col;
+ v->row = u->row;
+ v->col = u->col;
}
}
}
}
-void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) {
- if (xd->mode_info_context->mbmi.mode != SPLITMV) {
- vp9_build_1st_inter16x16_predictors_mb(xd, xd->predictor,
- &xd->predictor[256],
- &xd->predictor[320], 16, 8);
-
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- /* 256 = offset of U plane in Y+U+V buffer;
- * 320 = offset of V plane in Y+U+V buffer.
- * (256=16x16, 320=16x16+8x8). */
- vp9_build_2nd_inter16x16_predictors_mb(xd, xd->predictor,
- &xd->predictor[256],
- &xd->predictor[320], 16, 8);
- }
+void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col) {
+ vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col);
+ vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
- else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
- vp9_build_interintra_16x16_predictors_mb(xd, xd->predictor,
- &xd->predictor[256],
- &xd->predictor[320], 16, 8);
- }
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ vp9_build_interintra_16x16_predictors_mb(xd, dst_y, dst_u, dst_v,
+ dst_ystride, dst_uvstride);
+ }
#endif
+}
+
+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col) {
+ if (xd->mode_info_context->mbmi.mode != SPLITMV) {
+ vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
+ &xd->predictor[256],
+ &xd->predictor[320], 16, 8,
+ mb_row, mb_col);
+
} else {
build_4x4uvmvs(xd);
- build_inter4x4_predictors_mb(xd);
+ build_inter4x4_predictors_mb(xd, mb_row, mb_col);
+ }
+}
+
+/*encoder only*/
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
+ int i, j, weight;
+ BLOCKD *const blockd = xd->block;
+
+ /* build uv mvs */
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 2; j++) {
+ const int yoffset = i * 8 + j * 2;
+ const int uoffset = 16 + i * 2 + j;
+ const int voffset = 20 + i * 2 + j;
+
+ MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv;
+ MV *v = &blockd[voffset].bmi.as_mv[0].as_mv;
+
+ v->row = u->row = b_mv_pred_row(xd, yoffset, 0);
+ v->col = u->col = b_mv_pred_col(xd, yoffset, 0);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ u = &blockd[uoffset].bmi.as_mv[1].as_mv;
+ v = &blockd[voffset].bmi.as_mv[1].as_mv;
+
+ v->row = u->row = b_mv_pred_row(xd, yoffset, 1);
+ v->row = u->col = b_mv_pred_row(xd, yoffset, 1);
+ }
+ }
+ }
+
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT && \
+ defined(USE_IMPLICIT_WEIGHT_SPLITMV) && \
+ defined(USE_IMPLICIT_WEIGHT_UV)
+ weight = get_implicit_compoundinter_weight_splitmv(xd, mb_row, mb_col);
+#else
+ weight = AVERAGE_WEIGHT;
+#endif
+ for (i = 16; i < 24; i += 2) {
+ const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
+ const int x = 4 * (i & 1);
+ const int y = ((i - 16) >> 1) * 4;
+
+ int which_mv;
+ BLOCKD *d0 = &blockd[i];
+ BLOCKD *d1 = &blockd[i + 1];
+
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,
+ which_mv ? weight : 0,
+ &xd->subpix, mb_row * 8 + y, mb_col * 8 + x);
+ }
}
}
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -14,71 +14,128 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_onyxc_int.h"
-extern void vp9_build_1st_inter16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *dst_y,
- int dst_ystride,
- int clamp_mvs);
+struct subpix_fn_table;
-extern void vp9_build_1st_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride);
+void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ int dst_ystride,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_1st_inter16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
+void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_2nd_inter16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *dst_y,
- int dst_ystride);
+void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_2nd_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_uvstride);
+void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_2nd_inter16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
+void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
+ uint8_t *dst_y,
+ uint8_t *dst_u,
+ uint8_t *dst_v,
+ int dst_ystride,
+ int dst_uvstride,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,
- uint8_t *dst_y,
- uint8_t *dst_u,
- uint8_t *dst_v,
- int dst_ystride,
- int dst_uvstride);
+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col);
-extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);
+void vp9_setup_interp_filters(MACROBLOCKD *xd,
+ INTERPOLATIONFILTERTYPE filter,
+ VP9_COMMON *cm);
-extern void vp9_build_inter_predictors_b(BLOCKD *d, int pitch,
- vp9_subpix_fn_t sppf);
+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
+ YV12_BUFFER_CONFIG *other,
+ int this_w, int this_h);
-extern void vp9_build_2nd_inter_predictors_b(BLOCKD *d, int pitch,
- vp9_subpix_fn_t sppf);
+void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int_mv *mv_q3,
+ const struct scale_factors *scale,
+ int w, int h, int do_avg,
+ const struct subpix_fn_table *subpix);
-extern void vp9_build_inter_predictors4b(MACROBLOCKD *xd, BLOCKD *d,
- int pitch);
+void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int_mv *fullpel_mv_q3,
+ const int_mv *frac_mv_q4,
+ const struct scale_factors *scale,
+ int w, int h, int do_avg,
+ const struct subpix_fn_table *subpix);
-extern void vp9_build_2nd_inter_predictors4b(MACROBLOCKD *xd,
- BLOCKD *d, int pitch);
+static int scale_value_x(int val, const struct scale_factors *scale) {
+ return val * scale->x_num / scale->x_den;
+}
-extern void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd);
+static int scale_value_y(int val, const struct scale_factors *scale) {
+ return val * scale->y_num / scale->y_den;
+}
-extern void vp9_setup_interp_filters(MACROBLOCKD *xd,
- INTERPOLATIONFILTERTYPE filter,
- VP9_COMMON *cm);
+static int scaled_buffer_offset(int x_offset,
+ int y_offset,
+ int stride,
+ const struct scale_factors *scale) {
+ return scale_value_y(y_offset, scale) * stride +
+ scale_value_x(x_offset, scale);
+}
+
+static void setup_pred_block(YV12_BUFFER_CONFIG *dst,
+ const YV12_BUFFER_CONFIG *src,
+ int mb_row, int mb_col,
+ const struct scale_factors *scale,
+ const struct scale_factors *scale_uv) {
+ const int recon_y_stride = src->y_stride;
+ const int recon_uv_stride = src->uv_stride;
+ int recon_yoffset;
+ int recon_uvoffset;
+
+ if (scale) {
+ recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row,
+ recon_y_stride, scale);
+ recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row,
+ recon_uv_stride, scale_uv);
+ } else {
+ recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
+ recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
+ }
+ *dst = *src;
+ dst->y_buffer += recon_yoffset;
+ dst->u_buffer += recon_uvoffset;
+ dst->v_buffer += recon_uvoffset;
+}
+
+static void set_scale_factors(MACROBLOCKD *xd,
+ int ref0, int ref1,
+ struct scale_factors scale_factor[MAX_REF_FRAMES]) {
+
+ xd->scale_factor[0] = scale_factor[ref0 >= 0 ? ref0 : 0];
+ xd->scale_factor[1] = scale_factor[ref1 >= 0 ? ref1 : 0];
+ xd->scale_factor_uv[0] = xd->scale_factor[0];
+ xd->scale_factor_uv[1] = xd->scale_factor[1];
+}
#endif // VP9_COMMON_VP9_RECONINTER_H_
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -9,59 +9,81 @@
*/
#include <stdio.h>
+
#include "./vpx_config.h"
#include "vp9_rtcd.h"
#include "vp9/common/vp9_reconintra.h"
#include "vpx_mem/vpx_mem.h"
-/* For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd)
- * and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
- */
+// For skip_recon_mb(), add vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd)
+// and vp9_build_intra_predictors_mbuv_s(MACROBLOCKD *xd).
+// Using multiplication and shifting instead of division in diagonal prediction.
+// iscale table is calculated from ((1 << 16) + (i + 2) / 2) / (i+2) and used as
+// ((A + B) * iscale[i] + (1 << 15)) >> 16;
+// where A and B are weighted pixel values.
+static const unsigned int iscale[64] = {
+ 32768, 21845, 16384, 13107, 10923, 9362, 8192, 7282,
+ 6554, 5958, 5461, 5041, 4681, 4369, 4096, 3855,
+ 3641, 3449, 3277, 3121, 2979, 2849, 2731, 2621,
+ 2521, 2427, 2341, 2260, 2185, 2114, 2048, 1986,
+ 1928, 1872, 1820, 1771, 1725, 1680, 1638, 1598,
+ 1560, 1524, 1489, 1456, 1425, 1394, 1365, 1337,
+ 1311, 1285, 1260, 1237, 1214, 1192, 1170, 1150,
+ 1130, 1111, 1092, 1074, 1057, 1040, 1024, 1008,
+};
+
+static INLINE int iscale_round(int value, int i) {
+ return ROUND_POWER_OF_TWO(value * iscale[i], 16);
+}
+
static void d27_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
- int r, c, h, w, v;
- int a, b;
+ int r, c;
+
r = 0;
for (c = 0; c < n - 2; c++) {
- if (c & 1)
- a = yleft_col[r + 1];
- else
- a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
- b = yabove_row[c + 2];
- ypred_ptr[c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ int a = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int b = yabove_row[c + 2];
+ ypred_ptr[c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
+
for (r = 1; r < n / 2 - 1; r++) {
for (c = 0; c < n - 2 - 2 * r; c++) {
- if (c & 1)
- a = yleft_col[r + 1];
- else
- a = (yleft_col[r] + yleft_col[r + 1] + 1) >> 1;
- b = ypred_ptr[(r - 1) * y_stride + c + 2];
- ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ int a = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int b = ypred_ptr[(r - 1) * y_stride + c + 2];
+ ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
}
- for (; r < n - 1; ++r) {
+
+ for (; r < n - 1; r++) {
for (c = 0; c < n; c++) {
- v = (c & 1 ? yleft_col[r + 1] : (yleft_col[r] + yleft_col[r + 1] + 1) >> 1);
- h = r - c / 2;
+ int v = c & 1 ? yleft_col[r + 1]
+ : ROUND_POWER_OF_TWO(yleft_col[r] + yleft_col[r + 1], 1);
+ int h = r - c / 2;
ypred_ptr[h * y_stride + c] = v;
}
}
+
c = 0;
r = n - 1;
- ypred_ptr[r * y_stride] = (ypred_ptr[(r - 1) * y_stride] +
- yleft_col[r] + 1) >> 1;
+ ypred_ptr[r * y_stride] = ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride] +
+ yleft_col[r], 1);
for (r = n - 2; r >= n / 2; --r) {
- w = c + (n - 1 - r) * 2;
- ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
- ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ int w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] =
+ ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1], 1);
}
+
for (c = 1; c < n; c++) {
for (r = n - 1; r >= n / 2 + c / 2; --r) {
- w = c + (n - 1 - r) * 2;
- ypred_ptr[r * y_stride + w] = (ypred_ptr[(r - 1) * y_stride + w] +
- ypred_ptr[r * y_stride + w - 1] + 1) >> 1;
+ int w = c + (n - 1 - r) * 2;
+ ypred_ptr[r * y_stride + w] =
+ ROUND_POWER_OF_TWO(ypred_ptr[(r - 1) * y_stride + w] +
+ ypred_ptr[r * y_stride + w - 1], 1);
}
}
}
@@ -68,47 +90,50 @@
static void d63_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
- int r, c, h, w, v;
- int a, b;
+ int r, c;
+
c = 0;
for (r = 0; r < n - 2; r++) {
- if (r & 1)
- a = yabove_row[c + 1];
- else
- a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
- b = yleft_col[r + 2];
- ypred_ptr[r * y_stride] = (2 * a + (r + 1) * b + (r + 3) / 2) / (r + 3);
+ int a = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int b = yleft_col[r + 2];
+ ypred_ptr[r * y_stride] = iscale_round(2 * a + (r + 1) * b, 1 + r);
}
+
for (c = 1; c < n / 2 - 1; c++) {
for (r = 0; r < n - 2 - 2 * c; r++) {
- if (r & 1)
- a = yabove_row[c + 1];
- else
- a = (yabove_row[c] + yabove_row[c + 1] + 1) >> 1;
- b = ypred_ptr[(r + 2) * y_stride + c - 1];
- ypred_ptr[r * y_stride + c] = (2 * a + (c + 1) * b + (c + 3) / 2) / (c + 3);
+ int a = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int b = ypred_ptr[(r + 2) * y_stride + c - 1];
+ ypred_ptr[r * y_stride + c] = iscale_round(2 * a + (c + 1) * b, 1 + c);
}
}
+
for (; c < n - 1; ++c) {
for (r = 0; r < n; r++) {
- v = (r & 1 ? yabove_row[c + 1] : (yabove_row[c] + yabove_row[c + 1] + 1) >> 1);
- w = c - r / 2;
+ int v = r & 1 ? yabove_row[c + 1]
+ : ROUND_POWER_OF_TWO(yabove_row[c] + yabove_row[c + 1], 1);
+ int w = c - r / 2;
ypred_ptr[r * y_stride + w] = v;
}
}
+
r = 0;
c = n - 1;
- ypred_ptr[c] = (ypred_ptr[(c - 1)] + yabove_row[c] + 1) >> 1;
+ ypred_ptr[c] = ROUND_POWER_OF_TWO(ypred_ptr[(c - 1)] + yabove_row[c], 1);
for (c = n - 2; c >= n / 2; --c) {
- h = r + (n - 1 - c) * 2;
- ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
- ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ int h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] =
+ ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c], 1);
}
+
for (r = 1; r < n; r++) {
for (c = n - 1; c >= n / 2 + r / 2; --c) {
- h = r + (n - 1 - c) * 2;
- ypred_ptr[h * y_stride + c] = (ypred_ptr[h * y_stride + c - 1] +
- ypred_ptr[(h - 1) * y_stride + c] + 1) >> 1;
+ int h = r + (n - 1 - c) * 2;
+ ypred_ptr[h * y_stride + c] =
+ ROUND_POWER_OF_TWO(ypred_ptr[h * y_stride + c - 1] +
+ ypred_ptr[(h - 1) * y_stride + c], 1);
}
}
}
@@ -116,13 +141,14 @@
static void d45_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
+
for (r = 0; r < n - 1; ++r) {
for (c = 0; c <= r; ++c) {
- ypred_ptr[(r - c) * y_stride + c] =
- (yabove_row[r + 1] * (c + 1) +
- yleft_col[r + 1] * (r - c + 1) + r / 2 + 1) / (r + 2);
+ ypred_ptr[(r - c) * y_stride + c] = iscale_round(
+ yabove_row[r + 1] * (c + 1) + yleft_col[r + 1] * (r - c + 1), r);
}
}
+
for (c = 0; c <= r; ++c) {
int yabove_ext = yabove_row[r]; // clip_pixel(2 * yabove_row[r] -
// yabove_row[r - 1]);
@@ -129,14 +155,14 @@
int yleft_ext = yleft_col[r]; // clip_pixel(2 * yleft_col[r] -
// yleft_col[r-1]);
ypred_ptr[(r - c) * y_stride + c] =
- (yabove_ext * (c + 1) +
- yleft_ext * (r - c + 1) + r / 2 + 1) / (r + 2);
+ iscale_round(yabove_ext * (c + 1) + yleft_ext * (r - c + 1), r);
}
for (r = 1; r < n; ++r) {
for (c = n - r; c < n; ++c) {
const int yabove_ext = ypred_ptr[(r - 1) * y_stride + c];
const int yleft_ext = ypred_ptr[r * y_stride + c - 1];
- ypred_ptr[r * y_stride + c] = (yabove_ext + yleft_ext + 1) >> 1;
+ ypred_ptr[r * y_stride + c] =
+ ROUND_POWER_OF_TWO(yabove_ext + yleft_ext, 1);
}
}
}
@@ -145,7 +171,7 @@
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
for (c = 0; c < n; c++)
- ypred_ptr[c] = (yabove_row[c - 1] + yabove_row[c] + 1) >> 1;
+ ypred_ptr[c] = ROUND_POWER_OF_TWO(yabove_row[c - 1] + yabove_row[c], 1);
ypred_ptr += y_stride;
for (c = 0; c < n; c++)
ypred_ptr[c] = yabove_row[c - 1];
@@ -179,9 +205,10 @@
static void d153_predictor(uint8_t *ypred_ptr, int y_stride, int n,
uint8_t *yabove_row, uint8_t *yleft_col) {
int r, c;
- ypred_ptr[0] = (yabove_row[-1] + yleft_col[0] + 1) >> 1;
+ ypred_ptr[0] = ROUND_POWER_OF_TWO(yabove_row[-1] + yleft_col[0], 1);
for (r = 1; r < n; r++)
- ypred_ptr[r * y_stride] = (yleft_col[r - 1] + yleft_col[r] + 1) >> 1;
+ ypred_ptr[r * y_stride] =
+ ROUND_POWER_OF_TWO(yleft_col[r - 1] + yleft_col[r], 1);
ypred_ptr++;
ypred_ptr[0] = yabove_row[-1];
for (r = 1; r < n; r++)
@@ -248,20 +275,58 @@
}
}
+static INLINE int log2_minus_1(int n) {
+ switch (n) {
+ case 4: return 1;
+ case 8: return 2;
+ case 16: return 3;
+ case 32: return 4;
+ case 64: return 5;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride,
uint8_t *ypred_ptr,
int y_stride, int mode, int bsize,
- int up_available, int left_available) {
-
- uint8_t *yabove_row = src - src_stride;
- uint8_t yleft_col[64];
- uint8_t ytop_left = yabove_row[-1];
+ int up_available, int left_available,
+ int right_available) {
int r, c, i;
+ uint8_t yleft_col[64], yabove_data[65], ytop_left;
+ uint8_t *yabove_row = yabove_data + 1;
+ /*
+ * 127 127 127 .. 127 127 127 127 127 127
+ * 129 A B .. Y Z
+ * 129 C D .. W X
+ * 129 E F .. U V
+ * 129 G H .. S T T T T T
+ * ..
+ */
- for (i = 0; i < bsize; i++) {
- yleft_col[i] = src[i * src_stride - 1];
+ if (left_available) {
+ for (i = 0; i < bsize; i++)
+ yleft_col[i] = src[i * src_stride - 1];
+ } else {
+ vpx_memset(yleft_col, 129, bsize);
}
+ if (up_available) {
+ uint8_t *yabove_ptr = src - src_stride;
+ vpx_memcpy(yabove_row, yabove_ptr, bsize);
+ if (left_available) {
+ ytop_left = yabove_ptr[-1];
+ } else {
+ ytop_left = 127;
+ }
+ } else {
+ vpx_memset(yabove_row, 127, bsize);
+ ytop_left = 127;
+ }
+ yabove_row[-1] = ytop_left;
+
/* for Y */
switch (mode) {
case DC_PRED: {
@@ -269,23 +334,8 @@
int i;
int shift;
int average = 0;
- int log2_bsize_minus_1;
+ int log2_bsize_minus_1 = log2_minus_1(bsize);
- assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32 ||
- bsize == 64);
- if (bsize == 4) {
- log2_bsize_minus_1 = 1;
- } else if (bsize == 8) {
- log2_bsize_minus_1 = 2;
- } else if (bsize == 16) {
- log2_bsize_minus_1 = 3;
- } else if (bsize == 32) {
- log2_bsize_minus_1 = 4;
- } else {
- assert(bsize == 64);
- log2_bsize_minus_1 = 5;
- }
-
if (up_available || left_available) {
if (up_available) {
for (i = 0; i < bsize; i++) {
@@ -299,7 +349,7 @@
}
}
shift = log2_bsize_minus_1 + up_available + left_available;
- expected_dc = (average + (1 << (shift - 1))) >> shift;
+ expected_dc = ROUND_POWER_OF_TWO(average, shift);
} else {
expected_dc = 128;
}
@@ -310,21 +360,19 @@
}
}
break;
- case V_PRED: {
+ case V_PRED:
for (r = 0; r < bsize; r++) {
memcpy(ypred_ptr, yabove_row, bsize);
ypred_ptr += y_stride;
}
- }
- break;
- case H_PRED: {
+ break;
+ case H_PRED:
for (r = 0; r < bsize; r++) {
vpx_memset(ypred_ptr, yleft_col[r], bsize);
ypred_ptr += y_stride;
}
- }
- break;
- case TM_PRED: {
+ break;
+ case TM_PRED:
for (r = 0; r < bsize; r++) {
for (c = 0; c < bsize; c++) {
ypred_ptr[c] = clip_pixel(yleft_col[r] + yabove_row[c] - ytop_left);
@@ -332,32 +380,25 @@
ypred_ptr += y_stride;
}
- }
- break;
- case D45_PRED: {
+ break;
+ case D45_PRED:
d45_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D135_PRED: {
+ break;
+ case D135_PRED:
d135_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D117_PRED: {
+ break;
+ case D117_PRED:
d117_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D153_PRED: {
+ break;
+ case D153_PRED:
d153_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D27_PRED: {
+ break;
+ case D27_PRED:
d27_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
- case D63_PRED: {
+ break;
+ case D63_PRED:
d63_predictor(ypred_ptr, y_stride, bsize, yabove_row, yleft_col);
- }
- break;
+ break;
case I8X8_PRED:
case B_PRED:
case NEARESTMV:
@@ -383,148 +424,21 @@
static const int scale_max = 256; // 1 << scale_bits;
static const int scale_round = 127; // (1 << (scale_bits - 1));
// This table is a function A + B*exp(-kx), where x is hor. index
- static const int weights1d[32] = {
- 128, 122, 116, 111, 107, 103, 99, 96,
- 93, 90, 88, 85, 83, 81, 80, 78,
- 77, 76, 75, 74, 73, 72, 71, 70,
- 70, 69, 69, 68, 68, 68, 67, 67,
+ static const int weights1d[64] = {
+ 128, 125, 122, 119, 116, 114, 111, 109,
+ 107, 105, 103, 101, 99, 97, 96, 94,
+ 93, 91, 90, 89, 88, 86, 85, 84,
+ 83, 82, 81, 81, 80, 79, 78, 78,
+ 77, 76, 76, 75, 75, 74, 74, 73,
+ 73, 72, 72, 71, 71, 71, 70, 70,
+ 70, 70, 69, 69, 69, 69, 68, 68,
+ 68, 68, 68, 67, 67, 67, 67, 67,
};
- // This table is a function A + B*exp(-k.sqrt(xy)), where x, y are
- // hor. and vert. indices
- static const int weights2d[1024] = {
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 122, 120, 118, 116, 115, 114, 113,
- 112, 111, 111, 110, 109, 109, 108, 107,
- 107, 106, 106, 105, 105, 104, 104, 104,
- 103, 103, 102, 102, 102, 101, 101, 101,
- 128, 120, 116, 114, 112, 111, 109, 108,
- 107, 106, 105, 104, 103, 102, 102, 101,
- 100, 100, 99, 99, 98, 97, 97, 96,
- 96, 96, 95, 95, 94, 94, 93, 93,
- 128, 118, 114, 111, 109, 107, 106, 104,
- 103, 102, 101, 100, 99, 98, 97, 97,
- 96, 95, 95, 94, 93, 93, 92, 92,
- 91, 91, 90, 90, 90, 89, 89, 88,
- 128, 116, 112, 109, 107, 105, 103, 102,
- 100, 99, 98, 97, 96, 95, 94, 93,
- 93, 92, 91, 91, 90, 90, 89, 89,
- 88, 88, 87, 87, 86, 86, 85, 85,
- 128, 115, 111, 107, 105, 103, 101, 99,
- 98, 97, 96, 94, 93, 93, 92, 91,
- 90, 89, 89, 88, 88, 87, 86, 86,
- 85, 85, 84, 84, 84, 83, 83, 82,
- 128, 114, 109, 106, 103, 101, 99, 97,
- 96, 95, 93, 92, 91, 90, 90, 89,
- 88, 87, 87, 86, 85, 85, 84, 84,
- 83, 83, 82, 82, 82, 81, 81, 80,
- 128, 113, 108, 104, 102, 99, 97, 96,
- 94, 93, 92, 91, 90, 89, 88, 87,
- 86, 85, 85, 84, 84, 83, 83, 82,
- 82, 81, 81, 80, 80, 79, 79, 79,
- 128, 112, 107, 103, 100, 98, 96, 94,
- 93, 91, 90, 89, 88, 87, 86, 85,
- 85, 84, 83, 83, 82, 82, 81, 80,
- 80, 80, 79, 79, 78, 78, 78, 77,
- 128, 111, 106, 102, 99, 97, 95, 93,
- 91, 90, 89, 88, 87, 86, 85, 84,
- 83, 83, 82, 81, 81, 80, 80, 79,
- 79, 78, 78, 77, 77, 77, 76, 76,
- 128, 111, 105, 101, 98, 96, 93, 92,
- 90, 89, 88, 86, 85, 84, 84, 83,
- 82, 81, 81, 80, 80, 79, 79, 78,
- 78, 77, 77, 76, 76, 76, 75, 75,
- 128, 110, 104, 100, 97, 94, 92, 91,
- 89, 88, 86, 85, 84, 83, 83, 82,
- 81, 80, 80, 79, 79, 78, 78, 77,
- 77, 76, 76, 75, 75, 75, 74, 74,
- 128, 109, 103, 99, 96, 93, 91, 90,
- 88, 87, 85, 84, 83, 82, 82, 81,
- 80, 79, 79, 78, 78, 77, 77, 76,
- 76, 75, 75, 75, 74, 74, 74, 73,
- 128, 109, 102, 98, 95, 93, 90, 89,
- 87, 86, 84, 83, 82, 81, 81, 80,
- 79, 78, 78, 77, 77, 76, 76, 75,
- 75, 75, 74, 74, 73, 73, 73, 73,
- 128, 108, 102, 97, 94, 92, 90, 88,
- 86, 85, 84, 83, 82, 81, 80, 79,
- 78, 78, 77, 77, 76, 76, 75, 75,
- 74, 74, 73, 73, 73, 73, 72, 72,
- 128, 107, 101, 97, 93, 91, 89, 87,
- 85, 84, 83, 82, 81, 80, 79, 78,
- 78, 77, 76, 76, 75, 75, 74, 74,
- 74, 73, 73, 73, 72, 72, 72, 71,
- 128, 107, 100, 96, 93, 90, 88, 86,
- 85, 83, 82, 81, 80, 79, 78, 78,
- 77, 76, 76, 75, 75, 74, 74, 73,
- 73, 73, 72, 72, 72, 71, 71, 71,
- 128, 106, 100, 95, 92, 89, 87, 85,
- 84, 83, 81, 80, 79, 78, 78, 77,
- 76, 76, 75, 75, 74, 74, 73, 73,
- 72, 72, 72, 72, 71, 71, 71, 70,
- 128, 106, 99, 95, 91, 89, 87, 85,
- 83, 82, 81, 80, 79, 78, 77, 76,
- 76, 75, 75, 74, 74, 73, 73, 72,
- 72, 72, 71, 71, 71, 71, 70, 70,
- 128, 105, 99, 94, 91, 88, 86, 84,
- 83, 81, 80, 79, 78, 77, 77, 76,
- 75, 75, 74, 74, 73, 73, 72, 72,
- 72, 71, 71, 71, 70, 70, 70, 70,
- 128, 105, 98, 93, 90, 88, 85, 84,
- 82, 81, 80, 79, 78, 77, 76, 75,
- 75, 74, 74, 73, 73, 72, 72, 71,
- 71, 71, 71, 70, 70, 70, 70, 69,
- 128, 104, 97, 93, 90, 87, 85, 83,
- 82, 80, 79, 78, 77, 76, 76, 75,
- 74, 74, 73, 73, 72, 72, 71, 71,
- 71, 70, 70, 70, 70, 69, 69, 69,
- 128, 104, 97, 92, 89, 86, 84, 83,
- 81, 80, 79, 78, 77, 76, 75, 74,
- 74, 73, 73, 72, 72, 71, 71, 71,
- 70, 70, 70, 70, 69, 69, 69, 69,
- 128, 104, 96, 92, 89, 86, 84, 82,
- 80, 79, 78, 77, 76, 75, 75, 74,
- 73, 73, 72, 72, 71, 71, 71, 70,
- 70, 70, 70, 69, 69, 69, 69, 68,
- 128, 103, 96, 91, 88, 85, 83, 82,
- 80, 79, 78, 77, 76, 75, 74, 74,
- 73, 72, 72, 72, 71, 71, 70, 70,
- 70, 70, 69, 69, 69, 69, 68, 68,
- 128, 103, 96, 91, 88, 85, 83, 81,
- 80, 78, 77, 76, 75, 75, 74, 73,
- 73, 72, 72, 71, 71, 70, 70, 70,
- 70, 69, 69, 69, 69, 68, 68, 68,
- 128, 102, 95, 90, 87, 84, 82, 81,
- 79, 78, 77, 76, 75, 74, 73, 73,
- 72, 72, 71, 71, 71, 70, 70, 70,
- 69, 69, 69, 69, 68, 68, 68, 68,
- 128, 102, 95, 90, 87, 84, 82, 80,
- 79, 77, 76, 75, 75, 74, 73, 73,
- 72, 72, 71, 71, 70, 70, 70, 69,
- 69, 69, 69, 68, 68, 68, 68, 68,
- 128, 102, 94, 90, 86, 84, 82, 80,
- 78, 77, 76, 75, 74, 73, 73, 72,
- 72, 71, 71, 70, 70, 70, 69, 69,
- 69, 69, 68, 68, 68, 68, 68, 67,
- 128, 101, 94, 89, 86, 83, 81, 79,
- 78, 77, 76, 75, 74, 73, 73, 72,
- 71, 71, 71, 70, 70, 69, 69, 69,
- 69, 68, 68, 68, 68, 68, 67, 67,
- 128, 101, 93, 89, 85, 83, 81, 79,
- 78, 76, 75, 74, 74, 73, 72, 72,
- 71, 71, 70, 70, 70, 69, 69, 69,
- 68, 68, 68, 68, 68, 67, 67, 67,
- 128, 101, 93, 88, 85, 82, 80, 79,
- 77, 76, 75, 74, 73, 73, 72, 71,
- 71, 70, 70, 70, 69, 69, 69, 68,
- 68, 68, 68, 68, 67, 67, 67, 67,
- };
- int size_scale = (size >= 32 ? 1 :
- size == 16 ? 2 :
- size == 8 ? 4 : 8);
- int size_shift = size == 64 ? 1 : 0;
+
+ int size_scale = (size >= 64 ? 1:
+ size == 32 ? 2 :
+ size == 16 ? 4 :
+ size == 8 ? 8 : 16);
int i, j;
switch (mode) {
case V_PRED:
@@ -531,7 +445,7 @@
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
- int scale = weights1d[i * size_scale >> size_shift];
+ int scale = weights1d[i * size_scale];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
@@ -544,7 +458,7 @@
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
- int scale = weights1d[j * size_scale >> size_shift];
+ int scale = weights1d[j * size_scale];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
@@ -558,9 +472,8 @@
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
- int scale = (weights2d[(i * size_scale * 32 +
- j * size_scale) >> size_shift] +
- weights1d[i * size_scale >> size_shift]) >> 1;
+ int scale = (weights1d[i * size_scale] * 3 +
+ weights1d[j * size_scale]) >> 2;
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
@@ -574,9 +487,8 @@
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
- int scale = (weights2d[(i * size_scale * 32 +
- j * size_scale) >> size_shift] +
- weights1d[j * size_scale >> size_shift]) >> 1;
+ int scale = (weights1d[j * size_scale] * 3 +
+ weights1d[i * size_scale]) >> 2;
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
@@ -589,8 +501,7 @@
for (i = 0; i < size; ++i) {
for (j = 0; j < size; ++j) {
int k = i * interstride + j;
- int scale = weights2d[(i * size_scale * 32 +
- j * size_scale) >> size_shift];
+ int scale = weights1d[(i < j ? i : j) * size_scale];
interpred[k] =
((scale_max - scale) * interpred[k] +
scale * intrapred[i * intrastride + j] + scale_round)
@@ -600,8 +511,21 @@
break;
case D45_PRED:
- case DC_PRED:
+ for (i = 0; i < size; ++i) {
+ for (j = 0; j < size; ++j) {
+ int k = i * interstride + j;
+ int scale = (weights1d[i * size_scale] +
+ weights1d[j * size_scale]) >> 1;
+ interpred[k] =
+ ((scale_max - scale) * interpred[k] +
+ scale * intrapred[i * intrastride + j] + scale_round)
+ >> scale_bits;
+ }
+ }
+ break;
+
case TM_PRED:
+ case DC_PRED:
default:
// simple average
for (i = 0; i < size; ++i) {
@@ -631,7 +555,7 @@
xd->dst.y_buffer, xd->dst.y_stride,
intrapredictor, 16,
xd->mode_info_context->mbmi.interintra_mode, 16,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
ypred, ystride, intrapredictor, 16, 16);
}
@@ -646,12 +570,12 @@
xd->dst.u_buffer, xd->dst.uv_stride,
uintrapredictor, 8,
xd->mode_info_context->mbmi.interintra_uv_mode, 8,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
vp9_build_intra_predictors_internal(
xd->dst.v_buffer, xd->dst.uv_stride,
vintrapredictor, 8,
xd->mode_info_context->mbmi.interintra_uv_mode, 8,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
upred, uvstride, uintrapredictor, 8, 8);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
@@ -666,7 +590,7 @@
xd->dst.y_buffer, xd->dst.y_stride,
intrapredictor, 32,
xd->mode_info_context->mbmi.interintra_mode, 32,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
ypred, ystride, intrapredictor, 32, 32);
}
@@ -681,12 +605,12 @@
xd->dst.u_buffer, xd->dst.uv_stride,
uintrapredictor, 16,
xd->mode_info_context->mbmi.interintra_uv_mode, 16,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
vp9_build_intra_predictors_internal(
xd->dst.v_buffer, xd->dst.uv_stride,
vintrapredictor, 16,
xd->mode_info_context->mbmi.interintra_uv_mode, 16,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available, xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
upred, uvstride, uintrapredictor, 16, 16);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
@@ -710,7 +634,8 @@
const int mode = xd->mode_info_context->mbmi.interintra_mode;
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
intrapredictor, 64, mode, 64,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_mode,
ypred, ystride, intrapredictor, 64, 64);
}
@@ -724,10 +649,12 @@
const int mode = xd->mode_info_context->mbmi.interintra_uv_mode;
vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride,
uintrapredictor, 32, mode, 32,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride,
vintrapredictor, 32, mode, 32,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
upred, uvstride, uintrapredictor, 32, 32);
combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode,
@@ -749,7 +676,8 @@
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16,
xd->mode_info_context->mbmi.mode, 16,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
}
void vp9_build_intra_predictors_mby_s(MACROBLOCKD *xd) {
@@ -756,7 +684,8 @@
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
xd->mode_info_context->mbmi.mode, 16,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
}
void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) {
@@ -763,7 +692,8 @@
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
xd->mode_info_context->mbmi.mode, 32,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
}
void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) {
@@ -770,7 +700,8 @@
vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride,
xd->dst.y_buffer, xd->dst.y_stride,
xd->mode_info_context->mbmi.mode, 64,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
}
void vp9_build_intra_predictors_mbuv_internal(MACROBLOCKD *xd,
@@ -780,10 +711,12 @@
int mode, int bsize) {
vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride,
upred_ptr, uv_stride, mode, bsize,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride,
vpred_ptr, uv_stride, mode, bsize,
- xd->up_available, xd->left_available);
+ xd->up_available, xd->left_available,
+ xd->right_available);
}
void vp9_build_intra_predictors_mbuv(MACROBLOCKD *xd) {
@@ -815,20 +748,35 @@
32);
}
-void vp9_intra8x8_predict(BLOCKD *xd,
+void vp9_intra8x8_predict(MACROBLOCKD *xd,
+ BLOCKD *b,
int mode,
uint8_t *predictor) {
- vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
- xd->dst_stride, predictor, 16,
- mode, 8, 1, 1);
+ const int block4x4_idx = (b - xd->block);
+ const int block_idx = (block4x4_idx >> 2) | !!(block4x4_idx & 2);
+ const int have_top = (block_idx >> 1) || xd->up_available;
+ const int have_left = (block_idx & 1) || xd->left_available;
+ const int have_right = !(block_idx & 1) || xd->right_available;
+
+ vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst,
+ b->dst_stride, predictor, 16,
+ mode, 8, have_top, have_left,
+ have_right);
}
-void vp9_intra_uv4x4_predict(BLOCKD *xd,
+void vp9_intra_uv4x4_predict(MACROBLOCKD *xd,
+ BLOCKD *b,
int mode,
uint8_t *predictor) {
- vp9_build_intra_predictors_internal(*(xd->base_dst) + xd->dst,
- xd->dst_stride, predictor, 8,
- mode, 4, 1, 1);
+ const int block_idx = (b - xd->block) & 3;
+ const int have_top = (block_idx >> 1) || xd->up_available;
+ const int have_left = (block_idx & 1) || xd->left_available;
+ const int have_right = !(block_idx & 1) || xd->right_available;
+
+ vp9_build_intra_predictors_internal(*(b->base_dst) + b->dst,
+ b->dst_stride, predictor, 8,
+ mode, 4, have_top, have_left,
+ have_right);
}
/* TODO: try different ways of use Y-UV mode correlation
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -14,37 +14,44 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
-extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd);
-extern B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
- int stride, int n);
-extern B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x);
+void vp9_recon_intra_mbuv(MACROBLOCKD *xd);
+
+B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
+ int stride, int n,
+ int tx, int ty);
+
+B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x);
+
#if CONFIG_COMP_INTERINTRA_PRED
-extern void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
-extern void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
- uint8_t *ypred,
- int ystride);
-extern void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
- uint8_t *upred,
- uint8_t *vpred,
- int uvstride);
+void vp9_build_interintra_16x16_predictors_mb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
+
+void vp9_build_interintra_16x16_predictors_mby(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ int ystride);
+
+void vp9_build_interintra_16x16_predictors_mbuv(MACROBLOCKD *xd,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int uvstride);
#endif // CONFIG_COMP_INTERINTRA_PRED
-extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
-extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
- uint8_t *ypred,
- uint8_t *upred,
- uint8_t *vpred,
- int ystride,
- int uvstride);
+void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
+
+void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd,
+ uint8_t *ypred,
+ uint8_t *upred,
+ uint8_t *vpred,
+ int ystride,
+ int uvstride);
#endif // VP9_COMMON_VP9_RECONINTRA_H_
--- a/vp9/common/vp9_reconintra4x4.c
+++ b/vp9/common/vp9_reconintra4x4.c
@@ -15,17 +15,17 @@
#include "vp9_rtcd.h"
#if CONFIG_NEWBINTRAMODES
-static int find_grad_measure(uint8_t *x, int stride, int n, int t,
+static int find_grad_measure(uint8_t *x, int stride, int n, int tx, int ty,
int dx, int dy) {
int i, j;
int count = 0, gsum = 0, gdiv;
/* TODO: Make this code more efficient by breaking up into two loops */
- for (i = -t; i < n; ++i)
- for (j = -t; j < n; ++j) {
+ for (i = -ty; i < n; ++i)
+ for (j = -tx; j < n; ++j) {
int g;
if (i >= 0 && j >= 0) continue;
if (i + dy >= 0 && j + dx >= 0) continue;
- if (i + dy < -t || i + dy >= n || j + dx < -t || j + dx >= n) continue;
+ if (i + dy < -ty || i + dy >= n || j + dx < -tx || j + dx >= n) continue;
g = abs(x[(i + dy) * stride + j + dx] - x[i * stride + j]);
gsum += g * g;
count++;
@@ -36,14 +36,15 @@
#if CONTEXT_PRED_REPLACEMENTS == 6
B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
- int stride, int n) {
+ int stride, int n,
+ int tx, int ty) {
int g[8], i, imin, imax;
- g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
- g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1);
- g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
- g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
- g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1);
- g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1);
+ g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1);
+ g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2);
+ g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2);
+ g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1);
+ g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1);
imin = 1;
for (i = 2; i < 8; i += 1 + (i == 3))
imin = (g[i] < g[imin] ? i : imin);
@@ -73,12 +74,13 @@
}
#elif CONTEXT_PRED_REPLACEMENTS == 4
B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
- int stride, int n) {
+ int stride, int n,
+ int tx, int ty) {
int g[8], i, imin, imax;
- g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
- g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
- g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
- g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1);
+ g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2);
+ g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2);
+ g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1);
imin = 1;
for (i = 3; i < 8; i+=2)
imin = (g[i] < g[imin] ? i : imin);
@@ -104,16 +106,17 @@
}
#elif CONTEXT_PRED_REPLACEMENTS == 0
B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
- int stride, int n) {
+ int stride, int n,
+ int tx, int ty) {
int g[8], i, imin, imax;
- g[0] = find_grad_measure(ptr, stride, n, 4, 1, 0);
- g[1] = find_grad_measure(ptr, stride, n, 4, 2, 1);
- g[2] = find_grad_measure(ptr, stride, n, 4, 1, 1);
- g[3] = find_grad_measure(ptr, stride, n, 4, 1, 2);
- g[4] = find_grad_measure(ptr, stride, n, 4, 0, 1);
- g[5] = find_grad_measure(ptr, stride, n, 4, -1, 2);
- g[6] = find_grad_measure(ptr, stride, n, 4, -1, 1);
- g[7] = find_grad_measure(ptr, stride, n, 4, -2, 1);
+ g[0] = find_grad_measure(ptr, stride, n, tx, ty, 1, 0);
+ g[1] = find_grad_measure(ptr, stride, n, tx, ty, 2, 1);
+ g[2] = find_grad_measure(ptr, stride, n, tx, ty, 1, 1);
+ g[3] = find_grad_measure(ptr, stride, n, tx, ty, 1, 2);
+ g[4] = find_grad_measure(ptr, stride, n, tx, ty, 0, 1);
+ g[5] = find_grad_measure(ptr, stride, n, tx, ty, -1, 2);
+ g[6] = find_grad_measure(ptr, stride, n, tx, ty, -1, 1);
+ g[7] = find_grad_measure(ptr, stride, n, tx, ty, -2, 1);
imax = 0;
for (i = 1; i < 8; i++)
imax = (g[i] > g[imax] ? i : imax);
@@ -144,27 +147,114 @@
}
#endif
-B_PREDICTION_MODE vp9_find_bpred_context(BLOCKD *x) {
+B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, BLOCKD *x) {
+ const int block_idx = x - xd->block;
+ const int have_top = (block_idx >> 2) || xd->up_available;
+ const int have_left = (block_idx & 3) || xd->left_available;
uint8_t *ptr = *(x->base_dst) + x->dst;
int stride = x->dst_stride;
- return vp9_find_dominant_direction(ptr, stride, 4);
+ int tx = have_left ? 4 : 0;
+ int ty = have_top ? 4 : 0;
+ if (!have_left && !have_top)
+ return B_DC_PRED;
+ return vp9_find_dominant_direction(ptr, stride, 4, tx, ty);
}
#endif
-void vp9_intra4x4_predict(BLOCKD *x,
+void vp9_intra4x4_predict(MACROBLOCKD *xd,
+ BLOCKD *x,
int b_mode,
uint8_t *predictor) {
int i, r, c;
+ const int block_idx = x - xd->block;
+ const int have_top = (block_idx >> 2) || xd->up_available;
+ const int have_left = (block_idx & 3) || xd->left_available;
+ const int have_right = (block_idx & 3) != 3 || xd->right_available;
+ uint8_t left[4], above[8], top_left;
+ /*
+ * 127 127 127 .. 127 127 127 127 127 127
+ * 129 A B .. Y Z
+ * 129 C D .. W X
+ * 129 E F .. U V
+ * 129 G H .. S T T T T T
+ * ..
+ */
- uint8_t *above = *(x->base_dst) + x->dst - x->dst_stride;
- uint8_t left[4];
- uint8_t top_left = above[-1];
+ if (have_left) {
+ uint8_t *left_ptr = *(x->base_dst) + x->dst - 1;
+ const int stride = x->dst_stride;
- left[0] = (*(x->base_dst))[x->dst - 1];
- left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
- left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
- left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
+ left[0] = left_ptr[0 * stride];
+ left[1] = left_ptr[1 * stride];
+ left[2] = left_ptr[2 * stride];
+ left[3] = left_ptr[3 * stride];
+ } else {
+ left[0] = left[1] = left[2] = left[3] = 129;
+ }
+ if (have_top) {
+ uint8_t *above_ptr = *(x->base_dst) + x->dst - x->dst_stride;
+
+ if (have_left) {
+ top_left = above_ptr[-1];
+ } else {
+ top_left = 127;
+ }
+
+ above[0] = above_ptr[0];
+ above[1] = above_ptr[1];
+ above[2] = above_ptr[2];
+ above[3] = above_ptr[3];
+ if (((block_idx & 3) != 3) ||
+ (have_right && block_idx == 3 &&
+ ((xd->mb_index != 3 && xd->sb_index != 3) ||
+ ((xd->mb_index & 1) == 0 && xd->sb_index == 3)))) {
+ above[4] = above_ptr[4];
+ above[5] = above_ptr[5];
+ above[6] = above_ptr[6];
+ above[7] = above_ptr[7];
+ } else if (have_right) {
+ uint8_t *above_right = above_ptr + 4;
+
+ if (xd->sb_index == 3 && (xd->mb_index & 1))
+ above_right -= 32 * x->dst_stride;
+ if (xd->mb_index == 3)
+ above_right -= 16 * x->dst_stride;
+ above_right -= (block_idx & ~3) * x->dst_stride;
+
+ /* use a more distant above-right (from closest available top-right
+ * corner), but with a "localized DC" (similar'ish to TM-pred):
+ *
+ * A B C D E F G H
+ * I J K L
+ * M N O P
+ * Q R S T
+ * U V W X x1 x2 x3 x4
+ *
+ * Where:
+ * x1 = clip_pixel(E + X - D)
+ * x2 = clip_pixel(F + X - D)
+ * x3 = clip_pixel(G + X - D)
+ * x4 = clip_pixel(H + X - D)
+ *
+ * This is applied anytime when we use a "distant" above-right edge
+ * that is not immediately top-right to the block that we're going
+ * to do intra prediction for.
+ */
+ above[4] = clip_pixel(above_right[0] + above_ptr[3] - above_right[-1]);
+ above[5] = clip_pixel(above_right[1] + above_ptr[3] - above_right[-1]);
+ above[6] = clip_pixel(above_right[2] + above_ptr[3] - above_right[-1]);
+ above[7] = clip_pixel(above_right[3] + above_ptr[3] - above_right[-1]);
+ } else {
+ // extend edge
+ above[4] = above[5] = above[6] = above[7] = above[3];
+ }
+ } else {
+ above[0] = above[1] = above[2] = above[3] = 127;
+ above[4] = above[5] = above[6] = above[7] = 127;
+ top_left = 127;
+ }
+
#if CONFIG_NEWBINTRAMODES
if (b_mode == B_CONTEXT_PRED)
b_mode = x->bmi.as_mode.context;
@@ -410,40 +500,4 @@
*/
#endif
}
-}
-
-/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
- * to the right prediction have filled in pixels to use.
- */
-void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) {
- int extend_edge = xd->mb_to_right_edge == 0 && xd->mb_index < 2;
- uint8_t *above_right = *(xd->block[0].base_dst) + xd->block[0].dst -
- xd->block[0].dst_stride + 16;
- uint32_t *dst_ptr0 = (uint32_t *)above_right;
- uint32_t *dst_ptr1 =
- (uint32_t *)(above_right + 4 * xd->block[0].dst_stride);
- uint32_t *dst_ptr2 =
- (uint32_t *)(above_right + 8 * xd->block[0].dst_stride);
- uint32_t *dst_ptr3 =
- (uint32_t *)(above_right + 12 * xd->block[0].dst_stride);
-
- uint32_t *src_ptr = (uint32_t *) above_right;
-
- if ((xd->sb_index >= 2 && xd->mb_to_right_edge == 0) ||
- (xd->sb_index == 3 && xd->mb_index & 1))
- src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 32 *
- xd->block[0].dst_stride);
- if (xd->mb_index == 3 ||
- (xd->mb_to_right_edge == 0 && xd->mb_index == 2))
- src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 16 *
- xd->block[0].dst_stride);
-
- if (extend_edge) {
- *src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U;
- }
-
- *dst_ptr0 = *src_ptr;
- *dst_ptr1 = *src_ptr;
- *dst_ptr2 = *src_ptr;
- *dst_ptr3 = *src_ptr;
}
--- a/vp9/common/vp9_reconintra4x4.h
+++ /dev/null
@@ -1,17 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_COMMON_VP9_RECONINTRA4X4_H_
-#define VP9_COMMON_VP9_RECONINTRA4X4_H_
-
-extern void vp9_intra_prediction_down_copy(MACROBLOCKD *xd);
-
-#endif // VP9_COMMON_VP9_RECONINTRA4X4_H_
--- a/vp9/common/vp9_rtcd.c
+++ b/vp9/common/vp9_rtcd.c
@@ -12,10 +12,9 @@
#include "vp9_rtcd.h"
#include "vpx_ports/vpx_once.h"
-extern void vpx_scale_rtcd(void);
+void vpx_scale_rtcd(void);
-void vp9_rtcd()
-{
+void vp9_rtcd() {
vpx_scale_rtcd();
once(setup_rtcd_internal);
}
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -23,90 +23,50 @@
}
forward_decls vp9_common_forward_decls
-prototype void vp9_filter_block2d_4x4_8 "const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *HFilter_aligned16, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride"
-prototype void vp9_filter_block2d_8x4_8 "const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *HFilter_aligned16, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride"
-prototype void vp9_filter_block2d_8x8_8 "const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *HFilter_aligned16, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride"
-prototype void vp9_filter_block2d_16x16_8 "const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *HFilter_aligned16, const int16_t *VFilter_aligned16, uint8_t *dst_ptr, unsigned int dst_stride"
-
-# At the very least, MSVC 2008 has compiler bug exhibited by this code; code
-# compiles warning free but a dissassembly of generated code show bugs. To be
-# on the safe side, only enabled when compiled with 'gcc'.
-if [ "$CONFIG_GCC" = "yes" ]; then
- specialize vp9_filter_block2d_4x4_8 sse4_1 sse2
-fi
- specialize vp9_filter_block2d_8x4_8 ssse3 #sse4_1 sse2
- specialize vp9_filter_block2d_8x8_8 ssse3 #sse4_1 sse2
- specialize vp9_filter_block2d_16x16_8 ssse3 #sse4_1 sse2
-
#
# Dequant
#
-prototype void vp9_dequantize_b "struct blockd *x"
-specialize vp9_dequantize_b
-
-prototype void vp9_dequantize_b_2x2 "struct blockd *x"
-specialize vp9_dequantize_b_2x2
-
-prototype void vp9_dequant_dc_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dc, struct macroblockd *xd"
-specialize vp9_dequant_dc_idct_add_y_block_8x8
-
-prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_y_block_8x8
-prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_uv_block_8x8
prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
specialize vp9_dequant_idct_add_16x16
-prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc, int eob"
+prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
specialize vp9_dequant_idct_add_8x8
-prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride"
+prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
specialize vp9_dequant_idct_add
-prototype void vp9_dequant_dc_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc"
-specialize vp9_dequant_dc_idct_add
-
-prototype void vp9_dequant_dc_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dcs"
-specialize vp9_dequant_dc_idct_add_y_block
-
-prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs"
+prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_y_block
-prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs"
+prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_uv_block
prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob"
specialize vp9_dequant_idct_add_32x32
-prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs"
+prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
specialize vp9_dequant_idct_add_uv_block_16x16
#
# RECON
#
-prototype void vp9_copy_mem16x16 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
+prototype void vp9_copy_mem16x16 "const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
specialize vp9_copy_mem16x16 mmx sse2 dspr2
vp9_copy_mem16x16_dspr2=vp9_copy_mem16x16_dspr2
-prototype void vp9_copy_mem8x8 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
+prototype void vp9_copy_mem8x8 "const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
specialize vp9_copy_mem8x8 mmx dspr2
vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2
-prototype void vp9_copy_mem8x4 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
+prototype void vp9_copy_mem8x4 "const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
specialize vp9_copy_mem8x4 mmx
-prototype void vp9_avg_mem16x16 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
-specialize vp9_avg_mem16x16
-
-prototype void vp9_avg_mem8x8 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
-specialize vp9_avg_mem8x8
-
-prototype void vp9_copy_mem8x4 "uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch"
-specialize vp9_copy_mem8x4 mmx dspr2
-vp9_copy_mem8x4_dspr2=vp9_copy_mem8x4_dspr2
-
prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride"
specialize vp9_recon_b
@@ -137,6 +97,12 @@
prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
specialize void vp9_recon_sbuv_s
+prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst"
+specialize vp9_recon_sb64y_s
+
+prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst"
+specialize void vp9_recon_sb64uv_s
+
prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x"
specialize vp9_build_intra_predictors_mby_s
@@ -164,15 +130,38 @@
prototype void vp9_build_intra_predictors_sb64uv_s "struct macroblockd *x"
specialize vp9_build_intra_predictors_sb64uv_s;
-prototype void vp9_intra4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor"
+prototype void vp9_intra4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor"
specialize vp9_intra4x4_predict;
-prototype void vp9_intra8x8_predict "struct blockd *x, int b_mode, uint8_t *predictor"
+prototype void vp9_intra8x8_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor"
specialize vp9_intra8x8_predict;
-prototype void vp9_intra_uv4x4_predict "struct blockd *x, int b_mode, uint8_t *predictor"
+prototype void vp9_intra_uv4x4_predict "struct macroblockd *xd, struct blockd *x, int b_mode, uint8_t *predictor"
specialize vp9_intra_uv4x4_predict;
+if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
+prototype void vp9_add_residual_4x4 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_4x4 sse2
+
+prototype void vp9_add_residual_8x8 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_8x8 sse2
+
+prototype void vp9_add_residual_16x16 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_16x16 sse2
+
+prototype void vp9_add_residual_32x32 "const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_residual_32x32 sse2
+
+prototype void vp9_add_constant_residual_8x8 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_8x8 sse2
+
+prototype void vp9_add_constant_residual_16x16 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_16x16 sse2
+
+prototype void vp9_add_constant_residual_32x32 "const int16_t diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride"
+specialize vp9_add_constant_residual_32x32 sse2
+fi
+
#
# Loopfilter
#
@@ -263,171 +252,146 @@
prototype unsigned int vp9_sad3x16 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"
specialize vp9_sad3x16 sse2
-prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance16x2 "const uint8_t *src_ptr, const int source_stride, const int xoffset, const int yoffset, const uint8_t *ref_ptr, const int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x2 sse2
#
# Sub Pixel Filters
#
-prototype void vp9_eighttap_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict16x16
+prototype void vp9_convolve8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8 ssse3
-prototype void vp9_eighttap_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x8
+prototype void vp9_convolve8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_horiz ssse3
-prototype void vp9_eighttap_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg16x16
+prototype void vp9_convolve8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_vert ssse3
-prototype void vp9_eighttap_predict_avg8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg8x8
+prototype void vp9_convolve8_avg "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg ssse3
-prototype void vp9_eighttap_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg4x4
+prototype void vp9_convolve8_avg_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg_horiz ssse3
-prototype void vp9_eighttap_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x4
+prototype void vp9_convolve8_avg_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_avg_vert ssse3
-prototype void vp9_eighttap_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict4x4
+#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
+prototype void vp9_convolve8_1by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8
-prototype void vp9_eighttap_predict16x16_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict16x16_sharp
+prototype void vp9_convolve8_qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr
-prototype void vp9_eighttap_predict8x8_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x8_sharp
+prototype void vp9_convolve8_3by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8
-prototype void vp9_eighttap_predict_avg16x16_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg16x16_sharp
+prototype void vp9_convolve8_5by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8
-prototype void vp9_eighttap_predict_avg8x8_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg8x8_sharp
+prototype void vp9_convolve8_3qtr "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr
-prototype void vp9_eighttap_predict_avg4x4_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg4x4_sharp
+prototype void vp9_convolve8_7by8 "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8
-prototype void vp9_eighttap_predict8x4_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x4_sharp
+prototype void vp9_convolve8_1by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8_horiz
-prototype void vp9_eighttap_predict4x4_sharp "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict4x4_sharp
+prototype void vp9_convolve8_qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr_horiz
-prototype void vp9_eighttap_predict16x16_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict16x16_smooth
+prototype void vp9_convolve8_3by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8_horiz
-prototype void vp9_eighttap_predict8x8_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x8_smooth
+prototype void vp9_convolve8_5by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8_horiz
-prototype void vp9_eighttap_predict_avg16x16_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg16x16_smooth
+prototype void vp9_convolve8_3qtr_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr_horiz
-prototype void vp9_eighttap_predict_avg8x8_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg8x8_smooth
+prototype void vp9_convolve8_7by8_horiz "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8_horiz
-prototype void vp9_eighttap_predict_avg4x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict_avg4x4_smooth
+prototype void vp9_convolve8_1by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_1by8_vert
-prototype void vp9_eighttap_predict8x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict8x4_smooth
+prototype void vp9_convolve8_qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_qtr_vert
-prototype void vp9_eighttap_predict4x4_smooth "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_eighttap_predict4x4_smooth
+prototype void vp9_convolve8_3by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3by8_vert
-prototype void vp9_sixtap_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict16x16
+prototype void vp9_convolve8_5by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_5by8_vert
-prototype void vp9_sixtap_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict8x8
+prototype void vp9_convolve8_3qtr_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_3qtr_vert
-prototype void vp9_sixtap_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict_avg16x16
+prototype void vp9_convolve8_7by8_vert "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
+specialize vp9_convolve8_7by8_vert
+#endif
-prototype void vp9_sixtap_predict_avg8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict_avg8x8
-
-prototype void vp9_sixtap_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict8x4
-
-prototype void vp9_sixtap_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict4x4
-
-prototype void vp9_sixtap_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_sixtap_predict_avg4x4
-
-prototype void vp9_bilinear_predict16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict16x16 sse2
-
-prototype void vp9_bilinear_predict8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict8x8 sse2
-
-prototype void vp9_bilinear_predict_avg16x16 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict_avg16x16
-
-prototype void vp9_bilinear_predict_avg8x8 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict_avg8x8
-
-prototype void vp9_bilinear_predict8x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict8x4
-
-prototype void vp9_bilinear_predict4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict4x4
-
-prototype void vp9_bilinear_predict_avg4x4 "uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch"
-specialize vp9_bilinear_predict_avg4x4
-
#
# dct
#
-prototype void vp9_short_idct4x4llm_1 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct4x4llm_1
+prototype void vp9_short_idct4x4_1 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_idct4x4_1
-prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct4x4llm
+prototype void vp9_short_idct4x4 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_idct4x4 sse2
prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct8x8
+specialize vp9_short_idct8x8 sse2
prototype void vp9_short_idct10_8x8 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct10_8x8
+specialize vp9_short_idct10_8x8 sse2
-prototype void vp9_short_ihaar2x2 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_ihaar2x2
+prototype void vp9_short_idct1_8x8 "int16_t *input, int16_t *output"
+specialize vp9_short_idct1_8x8
prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct16x16
+specialize vp9_short_idct16x16 sse2
prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct10_16x16
+specialize vp9_short_idct10_16x16 sse2
+prototype void vp9_short_idct1_16x16 "int16_t *input, int16_t *output"
+specialize vp9_short_idct1_16x16
+
+
prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch"
-specialize vp9_short_idct32x32
+specialize vp9_short_idct32x32 sse2
-prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
-specialize vp9_ihtllm
+prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
+specialize vp9_short_idct1_32x32
-#
-# 2nd order
-#
-prototype void vp9_short_inv_walsh4x4_1 "int16_t *in, int16_t *out"
-specialize vp9_short_inv_walsh4x4_1
+prototype void vp9_short_idct10_32x32 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_idct10_32x32
-prototype void vp9_short_inv_walsh4x4 "int16_t *in, int16_t *out"
-specialize vp9_short_inv_walsh4x4_
+prototype void vp9_short_iht8x8 "int16_t *input, int16_t *output, int pitch, int tx_type"
+specialize vp9_short_iht8x8
+prototype void vp9_short_iht4x4 "int16_t *input, int16_t *output, int pitch, int tx_type"
+specialize vp9_short_iht4x4
+prototype void vp9_short_iht16x16 "int16_t *input, int16_t *output, int pitch, int tx_type"
+specialize vp9_short_iht16x16
+
+prototype void vp9_idct4_1d "int16_t *input, int16_t *output"
+specialize vp9_idct4_1d sse2
+
# dct and add
-prototype void vp9_dc_only_idct_add_8x8 "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
-specialize vp9_dc_only_idct_add_8x8
prototype void vp9_dc_only_idct_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
-specialize vp9_dc_only_idct_add
+specialize vp9_dc_only_idct_add sse2
-if [ "$CONFIG_LOSSLESS" = "yes" ]; then
-prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch"
-prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch"
+prototype void vp9_short_iwalsh4x4_1 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_iwalsh4x4_1
+prototype void vp9_short_iwalsh4x4 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_iwalsh4x4
prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
-prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out"
-prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out"
-fi
+specialize vp9_dc_only_inv_walsh_add
prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad"
specialize vp9_sad32x3
@@ -475,58 +439,52 @@
vp9_variance4x4_sse2=vp9_variance4x4_wmt
vp9_variance4x4_mmx=vp9_variance4x4_mmx
-prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64
+prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance64x64 sse2
-prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32
+prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance32x32 sse2
-prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x16 sse2 mmx ssse3
-vp9_sub_pixel_variance16x16_sse2=vp9_sub_pixel_variance16x16_wmt
-prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance8x16 sse2 mmx
vp9_sub_pixel_variance8x16_sse2=vp9_sub_pixel_variance8x16_wmt
-prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x8 sse2 mmx ssse3
vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_ssse3;
vp9_sub_pixel_variance16x8_sse2=vp9_sub_pixel_variance16x8_wmt
-prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance8x8 sse2 mmx
vp9_sub_pixel_variance8x8_sse2=vp9_sub_pixel_variance8x8_wmt
-prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance4x4 sse2 mmx
vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64
+specialize vp9_sad64x64 sse2
prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32
+specialize vp9_sad32x32 sse2
prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx sse2 sse3
-vp9_sad16x16_sse2=vp9_sad16x16_wmt
+specialize vp9_sad16x16 mmx sse2
prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad16x8 mmx sse2
-vp9_sad16x8_sse2=vp9_sad16x8_wmt
prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad8x16 mmx sse2
-vp9_sad8x16_sse2=vp9_sad8x16_wmt
prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad8x8 mmx sse2
-vp9_sad8x8_sse2=vp9_sad8x8_wmt
prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx sse2
-vp9_sad4x4_sse2=vp9_sad4x4_wmt
+specialize vp9_sad4x4 mmx sse
prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance_halfpixvar16x16_h mmx sse2
@@ -579,76 +537,64 @@
prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x3 sse3
-prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad64x64x8
-prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad32x32x8
-prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad16x16x8 sse4
-prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad16x8x8 sse4
-prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad8x16x8 sse4
-prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad8x8x8 sse4
-prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array"
+prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
specialize vp9_sad4x4x8 sse4
-prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x4d
+prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad64x64x4d sse2
-prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x4d
+prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad32x32x4d sse2
-prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x4d sse3
+prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x16x4d sse2
-prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x4d sse3
+prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad16x8x4d sse2
-prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x4d sse3
+prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x16x4d sse2
-prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x4d sse3
+prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x8x4d sse2
-prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x4d sse3
-
-#
-# Block copy
-#
-case $arch in
- x86*)
- prototype void vp9_copy32xn "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, int n"
- specialize vp9_copy32xn sse2 sse3
- ;;
-esac
-
+prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x4x4d sse
prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
specialize vp9_sub_pixel_mse16x16 sse2 mmx
-vp9_sub_pixel_mse16x16_sse2=vp9_sub_pixel_mse16x16_wmt
prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"
specialize vp9_mse16x16 mmx sse2
vp9_mse16x16_sse2=vp9_mse16x16_wmt
-prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_mse64x64
-prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse"
+prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_mse32x32
prototype unsigned int vp9_get_mb_ss "const int16_t *"
specialize vp9_get_mb_ss mmx sse2
# ENCODEMB INVOKE
-prototype int vp9_mbblock_error "struct macroblock *mb, int dc"
+prototype int vp9_mbblock_error "struct macroblock *mb"
specialize vp9_mbblock_error mmx sse2
vp9_mbblock_error_sse2=vp9_mbblock_error_xmm
@@ -686,15 +632,18 @@
fi
# fdct functions
-prototype void vp9_fht "const int16_t *input, int pitch, int16_t *output, int tx_type, int tx_dim"
-specialize vp9_fht
+prototype void vp9_short_fht4x4 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type"
+specialize vp9_short_fht4x4
-prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct8x8
+prototype void vp9_short_fht8x8 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type"
+specialize vp9_short_fht8x8
-prototype void vp9_short_fhaar2x2 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fhaar2x2
+prototype void vp9_short_fht16x16 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type"
+specialize vp9_short_fht16x16
+prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int pitch"
+specialize vp9_short_fdct8x8 sse2
+
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct4x4
@@ -701,23 +650,17 @@
prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct8x4
-prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh4x4
-
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32
prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct16x16
+specialize vp9_short_fdct16x16 sse2
-prototype void vp9_short_walsh4x4_lossless "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh4x4_lossless
+prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
+specialize vp9_short_walsh4x4
-prototype void vp9_short_walsh4x4_x8 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh4x4_x8
-
-prototype void vp9_short_walsh8x4_x8 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh8x4_x8
+prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
+specialize vp9_short_walsh8x4
#
# Motion search
--- a/vp9/common/vp9_sadmxn.h
+++ b/vp9/common/vp9_sadmxn.h
@@ -11,14 +11,15 @@
#ifndef VP9_COMMON_VP9_SADMXN_H_
#define VP9_COMMON_VP9_SADMXN_H_
+#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-static __inline unsigned int sad_mx_n_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- int m,
- int n) {
+static INLINE unsigned int sad_mx_n_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ int m,
+ int n) {
int r, c;
unsigned int sad = 0;
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -12,9 +12,8 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_seg_common.h"
-static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0, 0, 0 };
-static const int seg_feature_data_max[SEG_LVL_MAX] =
- { MAXQ, 63, 0xf, MB_MODE_COUNT - 1, 255, TX_SIZE_MAX_SB - 1};
+static const int segfeaturedata_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
+static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, 63, 0xf, 0xf };
// These functions provide access to new segment level features.
// Eventually these function may be "optimized out" but for the moment,
@@ -52,7 +51,7 @@
}
int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
- return (segfeaturedata_signed[feature_id]);
+ return segfeaturedata_signed[feature_id];
}
void vp9_clear_segdata(MACROBLOCKD *xd,
@@ -103,10 +102,4 @@
~(1 << INTRA_FRAME)) ? 1 : 0;
}
-int vp9_get_seg_tx_type(MACROBLOCKD *xd, int segment_id) {
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_TRANSFORM))
- return vp9_get_segdata(xd, segment_id, SEG_LVL_TRANSFORM);
- else
- return TX_4X4;
-}
// TBD? Functions to read and write segment data with range / validity checking
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -57,7 +57,5 @@
int vp9_check_segref_inter(MACROBLOCKD *xd, int segment_id);
-int vp9_get_seg_tx_type(MACROBLOCKD *xd, int segment_id);
-
#endif // VP9_COMMON_VP9_SEG_COMMON_H_
--- a/vp9/common/vp9_setupintrarecon.h
+++ b/vp9/common/vp9_setupintrarecon.h
@@ -13,6 +13,6 @@
#include "vpx_scale/yv12config.h"
-extern void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
+void vp9_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
#endif // VP9_COMMON_VP9_SETUPINTRARECON_H_
--- a/vp9/common/vp9_subpixel.h
+++ /dev/null
@@ -1,20 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_SUBPIXEL_H_
-#define VP9_COMMON_VP9_SUBPIXEL_H_
-
-#define prototype_subpixel_predict(sym) \
- void sym(uint8_t *src, int src_pitch, int xofst, int yofst, \
- uint8_t *dst, int dst_pitch)
-
-typedef prototype_subpixel_predict((*vp9_subpix_fn_t));
-
-#endif // VP9_COMMON_VP9_SUBPIXEL_H_
--- a/vp9/common/vp9_textblit.c
+++ b/vp9/common/vp9_textblit.c
@@ -12,22 +12,26 @@
#include "vp9/common/vp9_textblit.h"
+static const int font[] = {
+ 0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740, 0x18000,
+ 0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080, 0x80000, 0x111110,
+ 0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4, 0x4D6B7, 0x456AA,
+ 0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00, 0x8A880, 0x52940, 0x22A20,
+ 0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF, 0x8C62E, 0xE8C63F, 0x118D6BF,
+ 0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31, 0xF8C628, 0x8A89F, 0x108421F,
+ 0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF, 0x164C62E, 0x12694BF, 0x8AD6A2,
+ 0x10FC21, 0x1F8421F, 0x744107, 0xF8220F, 0x1151151, 0x117041, 0x119D731,
+ 0x47E0, 0x1041041, 0xFC400, 0x10440, 0x1084210, 0x820
+};
+
+static void plot(int x, int y, unsigned char *image, int pitch) {
+ image[x + y * pitch] ^= 255;
+}
+
void vp9_blit_text(const char *msg, unsigned char *address, const int pitch) {
int letter_bitmap;
unsigned char *output_pos = address;
- int colpos;
- const int font[] = {
- 0x0, 0x5C00, 0x8020, 0xAFABEA, 0xD7EC0, 0x1111111, 0x1855740, 0x18000,
- 0x45C0, 0x74400, 0x51140, 0x23880, 0xC4000, 0x21080, 0x80000, 0x111110,
- 0xE9D72E, 0x87E40, 0x12AD732, 0xAAD62A, 0x4F94C4, 0x4D6B7, 0x456AA,
- 0x3E8423, 0xAAD6AA, 0xAAD6A2, 0x2800, 0x2A00, 0x8A880, 0x52940, 0x22A20,
- 0x15422, 0x6AD62E, 0x1E4A53E, 0xAAD6BF, 0x8C62E, 0xE8C63F, 0x118D6BF,
- 0x1094BF, 0xCAC62E, 0x1F2109F, 0x118FE31, 0xF8C628, 0x8A89F, 0x108421F,
- 0x1F1105F, 0x1F4105F, 0xE8C62E, 0x2294BF, 0x164C62E, 0x12694BF, 0x8AD6A2,
- 0x10FC21, 0x1F8421F, 0x744107, 0xF8220F, 0x1151151, 0x117041, 0x119D731,
- 0x47E0, 0x1041041, 0xFC400, 0x10440, 0x1084210, 0x820
- };
- colpos = 0;
+ int colpos = 0;
while (msg[colpos] != 0) {
char letter = msg[colpos];
@@ -50,12 +54,11 @@
}
}
-static void plot(const int x, const int y, unsigned char *image, const int pitch) {
- image [x + y * pitch] ^= 255;
-}
+
/* Bresenham line algorithm */
-void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch) {
+void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
+ int pitch) {
int steep = abs(y1 - y0) > abs(x1 - x0);
int deltax, deltay;
int error, ystep, y, x;
--- a/vp9/common/vp9_textblit.h
+++ b/vp9/common/vp9_textblit.h
@@ -11,9 +11,9 @@
#ifndef VP9_COMMON_VP9_TEXTBLIT_H_
#define VP9_COMMON_VP9_TEXTBLIT_H_
-extern void vp9_blit_text(const char *msg, unsigned char *address,
- const int pitch);
-extern void vp9_blit_line(int x0, int x1, int y0, int y1,
- unsigned char *image, const int pitch);
+void vp9_blit_text(const char *msg, unsigned char *address, int pitch);
+
+void vp9_blit_line(int x0, int x1, int y0, int y1, unsigned char *image,
+ int pitch);
#endif // VP9_COMMON_VP9_TEXTBLIT_H_
--- /dev/null
+++ b/vp9/common/vp9_tile_common.c
@@ -1,0 +1,58 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_tile_common.h"
+
+#define MIN_TILE_WIDTH 256
+#define MAX_TILE_WIDTH 4096
+#define MIN_TILE_WIDTH_SBS (MIN_TILE_WIDTH >> 6)
+#define MAX_TILE_WIDTH_SBS (MAX_TILE_WIDTH >> 6)
+
+static void vp9_get_tile_offsets(VP9_COMMON *cm, int *min_tile_off,
+ int *max_tile_off, int tile_idx,
+ int log2_n_tiles, int n_mbs) {
+ const int n_sbs = (n_mbs + 3) >> 2;
+ const int sb_off1 = (tile_idx * n_sbs) >> log2_n_tiles;
+ const int sb_off2 = ((tile_idx + 1) * n_sbs) >> log2_n_tiles;
+
+ *min_tile_off = MIN(sb_off1 << 2, n_mbs);
+ *max_tile_off = MIN(sb_off2 << 2, n_mbs);
+}
+
+void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx) {
+ cm->cur_tile_col_idx = tile_col_idx;
+ vp9_get_tile_offsets(cm, &cm->cur_tile_mb_col_start,
+ &cm->cur_tile_mb_col_end, tile_col_idx,
+ cm->log2_tile_columns, cm->mb_cols);
+}
+
+void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx) {
+ cm->cur_tile_row_idx = tile_row_idx;
+ vp9_get_tile_offsets(cm, &cm->cur_tile_mb_row_start,
+ &cm->cur_tile_mb_row_end, tile_row_idx,
+ cm->log2_tile_rows, cm->mb_rows);
+}
+
+
+void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles_ptr,
+ int *delta_log2_n_tiles) {
+ const int sb_cols = (cm->mb_cols + 3) >> 2;
+ int min_log2_n_tiles, max_log2_n_tiles;
+
+ for (max_log2_n_tiles = 0;
+ (sb_cols >> max_log2_n_tiles) >= MIN_TILE_WIDTH_SBS;
+ max_log2_n_tiles++) {}
+ for (min_log2_n_tiles = 0;
+ (MAX_TILE_WIDTH_SBS << min_log2_n_tiles) < sb_cols;
+ min_log2_n_tiles++) {}
+
+ *min_log2_n_tiles_ptr = min_log2_n_tiles;
+ *delta_log2_n_tiles = max_log2_n_tiles - min_log2_n_tiles;
+}
--- /dev/null
+++ b/vp9/common/vp9_tile_common.h
@@ -1,0 +1,23 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_TILE_COMMON_H_
+#define VP9_COMMON_VP9_TILE_COMMON_H_
+
+#include "vp9/common/vp9_onyxc_int.h"
+
+void vp9_get_tile_col_offsets(VP9_COMMON *cm, int tile_col_idx);
+
+void vp9_get_tile_row_offsets(VP9_COMMON *cm, int tile_row_idx);
+
+void vp9_get_tile_n_bits(VP9_COMMON *cm, int *min_log2_n_tiles,
+ int *delta_log2_n_tiles);
+
+#endif // VP9_COMMON_VP9_TILE_COMMON_H_
--- a/vp9/common/vp9_treecoder.c
+++ b/vp9/common/vp9_treecoder.c
@@ -48,66 +48,37 @@
tree2tok(p - offset, t, 0, 0, 0);
}
-static void branch_counts(
- int n, /* n = size of alphabet */
- vp9_token tok [ /* n */ ],
- vp9_tree tree,
- unsigned int branch_ct [ /* n-1 */ ] [2],
- const unsigned int num_events[ /* n */ ]
-) {
- const int tree_len = n - 1;
- int t = 0;
-
-#if CONFIG_DEBUG
- assert(tree_len);
-#endif
-
- do {
- branch_ct[t][0] = branch_ct[t][1] = 0;
- } while (++t < tree_len);
-
- t = 0;
-
- do {
- int L = tok[t].Len;
- const int enc = tok[t].value;
- const unsigned int ct = num_events[t];
-
- vp9_tree_index i = 0;
-
- do {
- const int b = (enc >> --L) & 1;
- const int j = i >> 1;
-#if CONFIG_DEBUG
- assert(j < tree_len && 0 <= L);
-#endif
+static unsigned int convert_distribution(unsigned int i,
+ vp9_tree tree,
+ vp9_prob probs[],
+ unsigned int branch_ct[][2],
+ const unsigned int num_events[],
+ unsigned int tok0_offset) {
+ unsigned int left, right;
- branch_ct [j] [b] += ct;
- i = tree[ i + b];
- } while (i > 0);
-
-#if CONFIG_DEBUG
- assert(!L);
-#endif
- } while (++t < n);
-
+ if (tree[i] <= 0) {
+ left = num_events[-tree[i] - tok0_offset];
+ } else {
+ left = convert_distribution(tree[i], tree, probs, branch_ct,
+ num_events, tok0_offset);
+ }
+ if (tree[i + 1] <= 0) {
+ right = num_events[-tree[i + 1] - tok0_offset];
+ } else {
+ right = convert_distribution(tree[i + 1], tree, probs, branch_ct,
+ num_events, tok0_offset);
+ }
+ probs[i>>1] = get_binary_prob(left, right);
+ branch_ct[i>>1][0] = left;
+ branch_ct[i>>1][1] = right;
+ return left + right;
}
-
void vp9_tree_probs_from_distribution(
- int n, /* n = size of alphabet */
- vp9_token tok [ /* n */ ],
vp9_tree tree,
vp9_prob probs [ /* n-1 */ ],
unsigned int branch_ct [ /* n-1 */ ] [2],
- const unsigned int num_events[ /* n */ ]
-) {
- const int tree_len = n - 1;
- int t = 0;
-
- branch_counts(n, tok, tree, branch_ct, num_events);
-
- do {
- probs[t] = get_binary_prob(branch_ct[t][0], branch_ct[t][1]);
- } while (++t < tree_len);
+ const unsigned int num_events[ /* n */ ],
+ unsigned int tok0_offset) {
+ convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset);
}
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -11,6 +11,7 @@
#ifndef VP9_COMMON_VP9_TREECODER_H_
#define VP9_COMMON_VP9_TREECODER_H_
+#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
typedef uint8_t vp9_prob;
@@ -46,27 +47,35 @@
taken for each node on the tree; this facilitiates decisions as to
probability updates. */
-void vp9_tree_probs_from_distribution(int n, /* n = size of alphabet */
- vp9_token tok[ /* n */ ],
- vp9_tree tree,
+void vp9_tree_probs_from_distribution(vp9_tree tree,
vp9_prob probs[ /* n - 1 */ ],
unsigned int branch_ct[ /* n - 1 */ ][2],
- const unsigned int num_events[ /* n */ ]);
+ const unsigned int num_events[ /* n */ ],
+ unsigned int tok0_offset);
-static __inline vp9_prob clip_prob(int p) {
+static INLINE vp9_prob clip_prob(int p) {
return (p > 255) ? 255u : (p < 1) ? 1u : p;
}
-static __inline vp9_prob get_prob(int num, int den) {
+// int64 is not needed for normal frame level calculations.
+// However when outputing entropy stats accumulated over many frames
+// or even clips we can overflow int math.
+#ifdef ENTROPY_STATS
+static INLINE vp9_prob get_prob(int num, int den) {
+ return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den);
+}
+#else
+static INLINE vp9_prob get_prob(int num, int den) {
return (den == 0) ? 128u : clip_prob((num * 256 + (den >> 1)) / den);
}
+#endif
-static __inline vp9_prob get_binary_prob(int n0, int n1) {
+static INLINE vp9_prob get_binary_prob(int n0, int n1) {
return get_prob(n0, n0 + n1);
}
/* this function assumes prob1 and prob2 are already within [1,255] range */
-static __inline vp9_prob weighted_prob(int prob1, int prob2, int factor) {
+static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) {
return (prob1 * (256 - factor) + prob2 * factor + 128) >> 8;
}
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -8,91 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_subpixel.h"
-
-extern const short vp9_six_tap_mmx[8][6 * 8];
-
-extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_filter_block1dc_v6_mmx(unsigned short *src_ptr,
- unsigned char *output_ptr,
- int output_pitch,
- unsigned int pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d8_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d16_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d8_v6_sse2(unsigned short *src_ptr,
- unsigned char *output_ptr,
- int dst_ptich,
- unsigned int pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d16_v6_sse2(unsigned short *src_ptr,
- unsigned char *output_ptr,
- int dst_ptich,
- unsigned int pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp9_filter);
-
-extern void vp9_unpack_block1d16_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int output_height,
- unsigned int output_width);
-
-extern void vp9_filter_block1d8_h6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- int dst_pitch,
- unsigned int output_height,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d16_h6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_lin,
- unsigned char *output_ptr,
- int dst_pitch,
- unsigned int output_height,
- const short *vp9_filter);
-
-extern void vp9_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- int dst_pitch,
- unsigned int output_height,
- const short *vp9_filter);
-
///////////////////////////////////////////////////////////////////////////
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
@@ -116,486 +36,332 @@
{ 8, 8, 8, 8, 120, 120, 120, 120 }
};
-#if HAVE_MMX
-void vp9_sixtap_predict4x4_mmx(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict4x4_mmx\n");
-#endif
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 16 * 16);
- const short *hfilter, *vfilter;
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), fdata2,
- src_pixels_per_line, 1, 9, 8, hfilter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1dc_v6_mmx(fdata2 + 8, dst_ptr, dst_pitch,
- 8, 4, 4, 4, vfilter);
-}
+#if HAVE_SSSE3
+void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
-void vp9_sixtap_predict16x16_mmx(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict16x16_mmx\n");
-#endif
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
- const short *hfilter, *vfilter;
+void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
- fdata2, src_pixels_per_line, 1, 21, 32,
- hfilter);
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
- fdata2 + 4, src_pixels_per_line, 1, 21, 32,
- hfilter);
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8,
- fdata2 + 8, src_pixels_per_line, 1, 21, 32,
- hfilter);
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12,
- fdata2 + 12, src_pixels_per_line, 1, 21, 32,
- hfilter);
+void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1dc_v6_mmx(fdata2 + 32, dst_ptr, dst_pitch,
- 32, 16, 16, 16, vfilter);
- vp9_filter_block1dc_v6_mmx(fdata2 + 36, dst_ptr + 4, dst_pitch,
- 32, 16, 16, 16, vfilter);
- vp9_filter_block1dc_v6_mmx(fdata2 + 40, dst_ptr + 8, dst_pitch,
- 32, 16, 16, 16, vfilter);
- vp9_filter_block1dc_v6_mmx(fdata2 + 44, dst_ptr + 12, dst_pitch,
- 32, 16, 16, 16, vfilter);
-}
+void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
-void vp9_sixtap_predict8x8_mmx(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x8_mmx\n");
-#endif
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
- const short *hfilter, *vfilter;
+void vp9_filter_block1d4_v8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
- fdata2, src_pixels_per_line, 1, 13, 16,
- hfilter);
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
- fdata2 + 4, src_pixels_per_line, 1, 13, 16,
- hfilter);
+void vp9_filter_block1d4_h8_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
- 16, 8, 8, 8, vfilter);
- vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
- 16, 8, 8, 8, vfilter);
-}
+void vp9_filter_block1d16_v8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
-void vp9_sixtap_predict8x4_mmx(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x4_mmx\n");
-#endif
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
- const short *hfilter, *vfilter;
+void vp9_filter_block1d16_h8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line),
- fdata2, src_pixels_per_line, 1, 9, 16, hfilter);
- vp9_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4,
- fdata2 + 4, src_pixels_per_line, 1, 9, 16, hfilter);
+void vp9_filter_block1d8_v8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1dc_v6_mmx(fdata2 + 16, dst_ptr, dst_pitch,
- 16, 8, 4, 8, vfilter);
- vp9_filter_block1dc_v6_mmx(fdata2 + 20, dst_ptr + 4, dst_pitch,
- 16, 8, 4, 8, vfilter);
-}
-#endif
+void vp9_filter_block1d8_h8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
-#if HAVE_SSE2
-void vp9_sixtap_predict16x16_sse2(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 24 * 24);
- const short *hfilter, *vfilter;
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict16x16_sse2\n");
-#endif
+void vp9_filter_block1d4_v8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
- if (xoffset) {
- if (yoffset) {
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
- src_pixels_per_line, 1, 21, 32, hfilter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
- 32, 16, 16, dst_pitch, vfilter);
- } else {
- /* First-pass only */
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 16, hfilter);
- }
- } else {
- /* Second-pass only */
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
- src_pixels_per_line, 21, 32);
- vp9_filter_block1d16_v6_sse2(fdata2 + 32, dst_ptr, dst_pitch,
- 32, 16, 16, dst_pitch, vfilter);
- }
-}
+void vp9_filter_block1d4_h8_avg_ssse3(const unsigned char *src_ptr,
+ const unsigned int src_pitch,
+ unsigned char *output_ptr,
+ unsigned int out_pitch,
+ unsigned int output_height,
+ const short *filter);
-void vp9_sixtap_predict8x8_sse2(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
- const short *hfilter, *vfilter;
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x8_sse2\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
- src_pixels_per_line, 1, 13, 16, hfilter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
- 16, 8, 8, dst_pitch, vfilter);
- } else {
- /* First-pass only */
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 8, hfilter);
+void vp9_convolve8_horiz_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ if (x_step_q4 == 16 && filter_x[3] != 128) {
+ while (w >= 16) {
+ vp9_filter_block1d16_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 16;
+ dst += 16;
+ w -= 16;
}
- } else {
- /* Second-pass only */
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 8, vfilter);
- }
-}
-
-void vp9_sixtap_predict8x4_sse2(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- /* Temp data bufffer used in filtering */
- DECLARE_ALIGNED_ARRAY(16, unsigned short, fdata2, 256);
- const short *hfilter, *vfilter;
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x4_sse2\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), fdata2,
- src_pixels_per_line, 1, 9, 16, hfilter);
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1d8_v6_sse2(fdata2 + 16, dst_ptr, dst_pitch,
- 16, 8, 4, dst_pitch, vfilter);
- } else {
- /* First-pass only */
- hfilter = vp9_six_tap_mmx[xoffset];
- vp9_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 4, hfilter);
+ while (w >= 8) {
+ vp9_filter_block1d8_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 8;
+ dst += 8;
+ w -= 8;
}
- } else {
- /* Second-pass only */
- vfilter = vp9_six_tap_mmx[yoffset];
- vp9_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 4, vfilter);
- }
-}
-#endif
-
-#if HAVE_SSSE3
-extern void vp9_filter_block1d8_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-extern void vp9_filter_block1d16_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-extern void vp9_filter_block1d16_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-extern void vp9_filter_block1d8_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-extern void vp9_filter_block1d4_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-extern void vp9_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp9_filter_index);
-
-void vp9_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 24 * 24);
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict16x16_ssse3\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- vp9_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- fdata2, 16, 21, xoffset);
- vp9_filter_block1d16_v6_ssse3(fdata2, 16, dst_ptr, dst_pitch,
- 16, yoffset);
- } else {
- /* First-pass only */
- vp9_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 16, xoffset);
+ while (w >= 4) {
+ vp9_filter_block1d4_h8_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 4;
+ dst += 4;
+ w -= 4;
}
- } else {
- /* Second-pass only */
- vp9_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 16, yoffset);
}
+ if (w) {
+ vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ }
}
-void vp9_sixtap_predict8x8_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x8_ssse3\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, fdata2, 8, 13, xoffset);
- vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 8, yoffset);
- } else {
- vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 8, xoffset);
+void vp9_convolve8_vert_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ if (y_step_q4 == 16 && filter_y[3] != 128) {
+ while (w >= 16) {
+ vp9_filter_block1d16_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 16;
+ dst += 16;
+ w -= 16;
}
- } else {
- /* Second-pass only */
- vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 8, yoffset);
+ while (w >= 8) {
+ vp9_filter_block1d8_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ }
+ while (w >= 4) {
+ vp9_filter_block1d4_v8_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
+ if (w) {
+ vp9_convolve8_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ }
}
-void vp9_sixtap_predict8x4_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 256);
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict8x4_ssse3\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- vp9_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, fdata2, 8, 9, xoffset);
- vp9_filter_block1d8_v6_ssse3(fdata2, 8, dst_ptr, dst_pitch, 4, yoffset);
- } else {
- /* First-pass only */
- vp9_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 4, xoffset);
+void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ if (x_step_q4 == 16 && filter_x[3] != 128) {
+ while (w >= 16) {
+ vp9_filter_block1d16_h8_avg_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 16;
+ dst += 16;
+ w -= 16;
}
- } else {
- /* Second-pass only */
- vp9_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 4, yoffset);
+ while (w >= 8) {
+ vp9_filter_block1d8_h8_avg_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ }
+ while (w >= 4) {
+ vp9_filter_block1d4_h8_avg_ssse3(src, src_stride,
+ dst, dst_stride,
+ h, filter_x);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
+ if (w) {
+ vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ }
}
-void vp9_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 4 * 9);
-#ifdef ANNOUNCE_FUNCTION
- printf("vp9_sixtap_predict4x4_ssse3\n");
-#endif
-
- if (xoffset) {
- if (yoffset) {
- vp9_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, fdata2, 4, 9, xoffset);
- vp9_filter_block1d4_v6_ssse3(fdata2, 4, dst_ptr, dst_pitch, 4, yoffset);
- } else {
- vp9_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
- dst_ptr, dst_pitch, 4, xoffset);
+void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ if (y_step_q4 == 16 && filter_y[3] != 128) {
+ while (w >= 16) {
+ vp9_filter_block1d16_v8_avg_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 16;
+ dst += 16;
+ w -= 16;
}
- } else {
- vp9_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line,
- dst_ptr, dst_pitch, 4, yoffset);
+ while (w >= 8) {
+ vp9_filter_block1d8_v8_avg_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ }
+ while (w >= 4) {
+ vp9_filter_block1d4_v8_avg_ssse3(src - src_stride * 3, src_stride,
+ dst, dst_stride,
+ h, filter_y);
+ src += 4;
+ dst += 4;
+ w -= 4;
+ }
}
+ if (w) {
+ vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ }
}
-void vp9_filter_block1d16_v8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- const short *filter);
+void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
-void vp9_filter_block1d16_h8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- const short *filter);
+ // check w/h due to fixed size fdata2 array
+ assert(w <= 16);
+ assert(h <= 16);
-void vp9_filter_block2d_16x16_8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_stride,
- const short *hfilter_aligned16,
- const short *vfilter_aligned16,
- unsigned char *dst_ptr,
- unsigned int dst_stride) {
- if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
-
- vp9_filter_block1d16_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
- fdata2, 16, 23, hfilter_aligned16);
- vp9_filter_block1d16_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 16,
- vfilter_aligned16);
- } else {
- if (hfilter_aligned16[3] != 128) {
- vp9_filter_block1d16_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride,
- 16, hfilter_aligned16);
- } else {
- vp9_filter_block1d16_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
- dst_ptr, dst_stride, 16, vfilter_aligned16);
+ if (x_step_q4 == 16 && y_step_q4 == 16 &&
+ filter_x[3] != 128 && filter_y[3] != 128) {
+ if (w == 16) {
+ vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d16_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
}
+ if (w == 8) {
+ vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d8_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
+ if (w == 4) {
+ vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d4_v8_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
}
+ vp9_convolve8_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
}
-void vp9_filter_block1d8_v8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- const short *filter);
+void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
-void vp9_filter_block1d8_h8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- const short *filter);
+ // check w/h due to fixed size fdata2 array
+ assert(w <= 16);
+ assert(h <= 16);
-void vp9_filter_block2d_8x8_8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_stride,
- const short *hfilter_aligned16,
- const short *vfilter_aligned16,
- unsigned char *dst_ptr,
- unsigned int dst_stride) {
- if (hfilter_aligned16[3] != 128 && vfilter_aligned16[3] != 128) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
-
- vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
- fdata2, 16, 15, hfilter_aligned16);
- vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 8,
- vfilter_aligned16);
- } else {
- if (hfilter_aligned16[3] != 128) {
- vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 8,
- hfilter_aligned16);
- } else {
- vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
- dst_ptr, dst_stride, 8, vfilter_aligned16);
+ if (x_step_q4 == 16 && y_step_q4 == 16 &&
+ filter_x[3] != 128 && filter_y[3] != 128) {
+ if (w == 16) {
+ vp9_filter_block1d16_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d16_v8_avg_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
}
- }
-}
-
-void vp9_filter_block2d_8x4_8_ssse3(const unsigned char *src_ptr,
- const unsigned int src_stride,
- const short *hfilter_aligned16,
- const short *vfilter_aligned16,
- unsigned char *dst_ptr,
- unsigned int dst_stride) {
- if (hfilter_aligned16[3] !=128 && vfilter_aligned16[3] != 128) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 23 * 16);
-
- vp9_filter_block1d8_h8_ssse3(src_ptr - (3 * src_stride), src_stride,
- fdata2, 16, 11, hfilter_aligned16);
- vp9_filter_block1d8_v8_ssse3(fdata2, 16, dst_ptr, dst_stride, 4,
- vfilter_aligned16);
- } else {
- if (hfilter_aligned16[3] != 128) {
- vp9_filter_block1d8_h8_ssse3(src_ptr, src_stride, dst_ptr, dst_stride, 4,
- hfilter_aligned16);
- } else {
- vp9_filter_block1d8_v8_ssse3(src_ptr - (3 * src_stride), src_stride,
- dst_ptr, dst_stride, 4, vfilter_aligned16);
+ if (w == 8) {
+ vp9_filter_block1d8_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d8_v8_avg_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
}
+ if (w == 4) {
+ vp9_filter_block1d4_h8_ssse3(src - 3 * src_stride, src_stride,
+ fdata2, 16,
+ h + 7, filter_x);
+ vp9_filter_block1d4_v8_avg_ssse3(fdata2, 16,
+ dst, dst_stride,
+ h, filter_y);
+ return;
+ }
}
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
}
#endif
--- a/vp9/common/x86/vp9_filter_sse2.c
+++ /dev/null
@@ -1,290 +1,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h> // for alignment checks
-#include <emmintrin.h> // SSE2
-#include "vp9/common/vp9_filter.h"
-#include "vpx_ports/emmintrin_compat.h"
-#include "vpx_ports/mem.h" // for DECLARE_ALIGNED
-#include "vp9_rtcd.h"
-
-// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
-// just a quick partial snapshot so that other can already use some
-// speedup.
-// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap
-// filtering.
-// TODO(cd): Add some comments, better variable naming.
-// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
-// of positive above 128), or have higher precision filter
-// coefficients.
-
-DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = {
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
-};
-
-// Creating a macro to do more than four pixels at once to hide instruction
-// latency is actually slower :-(
-#define DO_FOUR_PIXELS(result, src_ptr, offset) \
- { \
- /* Do shifted load to achieve require shuffles through unpacking */ \
- const __m128i src0 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 0)); \
- const __m128i src1 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 1)); \
- const __m128i src2 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 2)); \
- const __m128i src3 = _mm_loadu_si128((const __m128i *)(src_ptr + offset + 3)); \
- const __m128i src01 = _mm_unpacklo_epi8(src0, src1); \
- const __m128i src01_16 = _mm_unpacklo_epi8(src01, zero); \
- const __m128i src23 = _mm_unpacklo_epi8(src2, src3); \
- const __m128i src23_16 = _mm_unpacklo_epi8(src23, zero); \
- /* Shit by 4 bytes through suffle to get additional shifted loads */ \
- const __m128i src4 = _mm_shuffle_epi32(src0, _MM_SHUFFLE(3, 3, 2, 1)); \
- const __m128i src5 = _mm_shuffle_epi32(src1, _MM_SHUFFLE(3, 3, 2, 1)); \
- const __m128i src6 = _mm_shuffle_epi32(src2, _MM_SHUFFLE(3, 3, 2, 1)); \
- const __m128i src7 = _mm_shuffle_epi32(src3, _MM_SHUFFLE(3, 3, 2, 1)); \
- const __m128i src45 = _mm_unpacklo_epi8(src4, src5); \
- const __m128i src45_16 = _mm_unpacklo_epi8(src45, zero); \
- const __m128i src67 = _mm_unpacklo_epi8(src6, src7); \
- const __m128i src67_16 = _mm_unpacklo_epi8(src67, zero); \
- /* multiply accumulate them */ \
- const __m128i mad01 = _mm_madd_epi16(src01_16, fil01); \
- const __m128i mad23 = _mm_madd_epi16(src23_16, fil23); \
- const __m128i mad45 = _mm_madd_epi16(src45_16, fil45); \
- const __m128i mad67 = _mm_madd_epi16(src67_16, fil67); \
- const __m128i mad0123 = _mm_add_epi32(mad01, mad23); \
- const __m128i mad4567 = _mm_add_epi32(mad45, mad67); \
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567); \
- mad_all = _mm_add_epi32(mad_all, rounding); \
- result = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT); \
- }
-
-void vp9_filter_block2d_4x4_8_sse2
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- __m128i intermediateA, intermediateB, intermediateC;
-
- const int kInterp_Extend = 4;
-
- const __m128i zero = _mm_set1_epi16(0);
- const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c);
-
- // check alignment
- assert(0 == ((long)HFilter_aligned16)%16);
- assert(0 == ((long)VFilter_aligned16)%16);
-
- {
- __m128i transpose3_0;
- __m128i transpose3_1;
- __m128i transpose3_2;
- __m128i transpose3_3;
-
- // Horizontal pass (src -> intermediate).
- {
- const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16);
- // get first two columns filter coefficients
- __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0));
- __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1));
- __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2));
- __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3));
- src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
-
- {
- __m128i mad_all0;
- __m128i mad_all1;
- __m128i mad_all2;
- __m128i mad_all3;
- DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
- DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
- intermediateA = _mm_packus_epi16(mad_all0, mad_all2);
- // --
- src_ptr += src_stride*4;
- // --
- DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
- DO_FOUR_PIXELS(mad_all3, src_ptr, 3*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
- intermediateB = _mm_packus_epi16(mad_all0, mad_all2);
- // --
- src_ptr += src_stride*4;
- // --
- DO_FOUR_PIXELS(mad_all0, src_ptr, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, src_ptr, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, src_ptr, 2*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all2);
- intermediateC = _mm_packus_epi16(mad_all0, mad_all2);
- }
- }
-
- // Transpose result (intermediate -> transpose3_x)
- {
- // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33
- // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73
- // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx
- const __m128i transpose0_0 = _mm_unpacklo_epi8(intermediateA, intermediateB);
- const __m128i transpose0_1 = _mm_unpackhi_epi8(intermediateA, intermediateB);
- const __m128i transpose0_2 = _mm_unpacklo_epi8(intermediateC, intermediateC);
- const __m128i transpose0_3 = _mm_unpackhi_epi8(intermediateC, intermediateC);
- // 00 40 01 41 02 42 03 43 10 50 11 51 12 52 13 53
- // 20 60 21 61 22 62 23 63 30 70 31 71 32 72 33 73
- // 80 xx 81 xx 82 xx 83 xx 90 xx 91 xx 92 xx 93 xx
- // A0 xx A1 xx A2 xx A3 xx xx xx xx xx xx xx xx xx
- const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
- const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
- const __m128i transpose1_2 = _mm_unpacklo_epi8(transpose0_2, transpose0_3);
- const __m128i transpose1_3 = _mm_unpackhi_epi8(transpose0_2, transpose0_3);
- // 00 20 40 60 01 21 41 61 02 22 42 62 03 23 43 63
- // 10 30 50 70 11 31 51 71 12 32 52 72 13 33 53 73
- // 80 A0 xx xx 81 A1 xx xx 82 A2 xx xx 83 A3 xx xx
- // 90 xx xx xx 91 xx xx xx 92 xx xx xx 93 xx xx xx
- const __m128i transpose2_0 = _mm_unpacklo_epi8(transpose1_0, transpose1_1);
- const __m128i transpose2_1 = _mm_unpackhi_epi8(transpose1_0, transpose1_1);
- const __m128i transpose2_2 = _mm_unpacklo_epi8(transpose1_2, transpose1_3);
- const __m128i transpose2_3 = _mm_unpackhi_epi8(transpose1_2, transpose1_3);
- // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
- // 80 90 A0 xx xx xx xx xx 81 91 A1 xx xx xx xx xx
- // 82 92 A2 xx xx xx xx xx 83 93 A3 xx xx xx xx xx
- transpose3_0 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
- _mm_castsi128_ps(transpose2_2),
- _MM_SHUFFLE(1, 0, 1, 0)));
- transpose3_1 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
- _mm_castsi128_ps(transpose2_2),
- _MM_SHUFFLE(3, 2, 3, 2)));
- transpose3_2 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
- _mm_castsi128_ps(transpose2_3),
- _MM_SHUFFLE(1, 0, 1, 0)));
- transpose3_3 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
- _mm_castsi128_ps(transpose2_3),
- _MM_SHUFFLE(3, 2, 3, 2)));
- // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx
- // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx
- // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx
- // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx
- }
-
- // Vertical pass (transpose3_x -> dst).
- {
- const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16);
- // get first two columns filter coefficients
- __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0));
- __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1));
- __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2));
- __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3));
- __m128i col0, col1, col2, col3;
- DECLARE_ALIGNED(16, unsigned char, temp[32]);
- {
- _mm_store_si128((__m128i *)temp, transpose3_0);
- DO_FOUR_PIXELS(col0, temp, 0);
- }
- {
- _mm_store_si128((__m128i *)temp, transpose3_1);
- DO_FOUR_PIXELS(col1, temp, 0);
- }
- {
- _mm_store_si128((__m128i *)temp, transpose3_2);
- DO_FOUR_PIXELS(col2, temp, 0);
- }
- {
- _mm_store_si128((__m128i *)temp, transpose3_3);
- DO_FOUR_PIXELS(col3, temp, 0);
- }
- // transpose
- {
- __m128i T0 = _mm_unpacklo_epi32(col0, col1);
- __m128i T1 = _mm_unpacklo_epi32(col2, col3);
- __m128i T2 = _mm_unpackhi_epi32(col0, col1);
- __m128i T3 = _mm_unpackhi_epi32(col2, col3);
- col0 = _mm_unpacklo_epi64(T0, T1);
- col1 = _mm_unpackhi_epi64(T0, T1);
- col2 = _mm_unpacklo_epi64(T2, T3);
- col3 = _mm_unpackhi_epi64(T2, T3);
- }
- // saturate to 8 bit
- {
- col0 = _mm_packs_epi32(col0, col0);
- col0 = _mm_packus_epi16(col0, col0);
- col1 = _mm_packs_epi32(col1, col1);
- col1 = _mm_packus_epi16(col1, col1);
- col2 = _mm_packs_epi32 (col2, col2);
- col2 = _mm_packus_epi16(col2, col2);
- col3 = _mm_packs_epi32 (col3, col3);
- col3 = _mm_packus_epi16(col3, col3);
- }
- // store
- {
- *((unsigned int *)&dst_ptr[dst_stride * 0]) = _mm_cvtsi128_si32(col0);
- *((unsigned int *)&dst_ptr[dst_stride * 1]) = _mm_cvtsi128_si32(col1);
- *((unsigned int *)&dst_ptr[dst_stride * 2]) = _mm_cvtsi128_si32(col2);
- *((unsigned int *)&dst_ptr[dst_stride * 3]) = _mm_cvtsi128_si32(col3);
- }
- }
- }
-}
-
-void vp9_filter_block2d_8x4_8_sse2
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int j;
- for (j=0; j<8; j+=4) {
- vp9_filter_block2d_4x4_8_sse2(src_ptr + j, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j, dst_stride);
- }
-}
-
-void vp9_filter_block2d_8x8_8_sse2
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int i, j;
- for (i=0; i<8; i+=4) {
- for (j=0; j<8; j+=4) {
- vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j + i*dst_stride, dst_stride);
- }
- }
-}
-
-void vp9_filter_block2d_16x16_8_sse2
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int i, j;
- for (i=0; i<16; i+=4) {
- for (j=0; j<16; j+=4) {
- vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j + i*dst_stride, dst_stride);
- }
- }
-}
--- a/vp9/common/x86/vp9_filter_sse4.c
+++ /dev/null
@@ -1,362 +1,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h> // for alignment checks
-#include <smmintrin.h> // SSE4.1
-#include "vp9/common/vp9_filter.h"
-#include "vpx_ports/mem.h" // for DECLARE_ALIGNED
-#include "vp9_rtcd.h"
-
-// TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
-// just a quick partial snapshot so that other can already use some
-// speedup.
-// TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap
-// filtering.
-// TODO(cd): Reduce source size by using macros instead of current code
-// duplication.
-// TODO(cd): Add some comments, better variable naming.
-// TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum
-// of positive above 128), or have higher precision filter
-// coefficients.
-
-DECLARE_ALIGNED(16, static const unsigned char, mask0123_c[16]) = {
- 0x00, 0x01,
- 0x01, 0x02,
- 0x02, 0x03,
- 0x03, 0x04,
- 0x02, 0x03,
- 0x03, 0x04,
- 0x04, 0x05,
- 0x05, 0x06,
-};
-DECLARE_ALIGNED(16, static const unsigned char, mask4567_c[16]) = {
- 0x04, 0x05,
- 0x05, 0x06,
- 0x06, 0x07,
- 0x07, 0x08,
- 0x06, 0x07,
- 0x07, 0x08,
- 0x08, 0x09,
- 0x09, 0x0A,
-};
-DECLARE_ALIGNED(16, static const unsigned int, rounding_c[4]) = {
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
- VP9_FILTER_WEIGHT >> 1,
-};
-DECLARE_ALIGNED(16, static const unsigned char, transpose_c[16]) = {
- 0, 4, 8, 12,
- 1, 5, 9, 13,
- 2, 6, 10, 14,
- 3, 7, 11, 15
-};
-
-// Creating a macro to do more than four pixels at once to hide instruction
-// latency is actually slower :-(
-#define DO_FOUR_PIXELS(result, offset) \
- { \
- /*load pixels*/ \
- __m128i src = _mm_loadu_si128((const __m128i *)(src_ptr + offset)); \
- /* extract the ones used for first column */ \
- __m128i src0123 = _mm_shuffle_epi8(src, mask0123); \
- __m128i src4567 = _mm_shuffle_epi8(src, mask4567); \
- __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero); \
- __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero); \
- __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero); \
- __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero); \
- /* multiply accumulate them */ \
- __m128i mad01 = _mm_madd_epi16(src01_16, fil01); \
- __m128i mad23 = _mm_madd_epi16(src23_16, fil23); \
- __m128i mad45 = _mm_madd_epi16(src45_16, fil45); \
- __m128i mad67 = _mm_madd_epi16(src67_16, fil67); \
- __m128i mad0123 = _mm_add_epi32(mad01, mad23); \
- __m128i mad4567 = _mm_add_epi32(mad45, mad67); \
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567); \
- mad_all = _mm_add_epi32(mad_all, rounding); \
- result = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT); \
- }
-
-void vp9_filter_block2d_4x4_8_sse4_1
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- __m128i intermediateA, intermediateB, intermediateC;
-
- const int kInterp_Extend = 4;
-
- const __m128i zero = _mm_set1_epi16(0);
- const __m128i mask0123 = _mm_load_si128((const __m128i *)mask0123_c);
- const __m128i mask4567 = _mm_load_si128((const __m128i *)mask4567_c);
- const __m128i rounding = _mm_load_si128((const __m128i *)rounding_c);
- const __m128i transpose = _mm_load_si128((const __m128i *)transpose_c);
-
- // check alignment
- assert(0 == ((long)HFilter_aligned16)%16);
- assert(0 == ((long)VFilter_aligned16)%16);
-
- {
- __m128i transpose3_0;
- __m128i transpose3_1;
- __m128i transpose3_2;
- __m128i transpose3_3;
-
- // Horizontal pass (src -> intermediate).
- {
- const __m128i HFilter = _mm_load_si128((const __m128i *)HFilter_aligned16);
- // get first two columns filter coefficients
- __m128i fil01 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(0, 0, 0, 0));
- __m128i fil23 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(1, 1, 1, 1));
- __m128i fil45 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(2, 2, 2, 2));
- __m128i fil67 = _mm_shuffle_epi32(HFilter, _MM_SHUFFLE(3, 3, 3, 3));
- src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
-
- {
- __m128i mad_all0;
- __m128i mad_all1;
- __m128i mad_all2;
- __m128i mad_all3;
- DO_FOUR_PIXELS(mad_all0, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, 2*src_stride)
- DO_FOUR_PIXELS(mad_all3, 3*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
- intermediateA = _mm_packus_epi16(mad_all0, mad_all2);
- // --
- src_ptr += src_stride*4;
- // --
- DO_FOUR_PIXELS(mad_all0, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, 2*src_stride)
- DO_FOUR_PIXELS(mad_all3, 3*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all3);
- intermediateB = _mm_packus_epi16(mad_all0, mad_all2);
- // --
- src_ptr += src_stride*4;
- // --
- DO_FOUR_PIXELS(mad_all0, 0*src_stride)
- DO_FOUR_PIXELS(mad_all1, 1*src_stride)
- DO_FOUR_PIXELS(mad_all2, 2*src_stride)
- mad_all0 = _mm_packs_epi32(mad_all0, mad_all1);
- mad_all2 = _mm_packs_epi32(mad_all2, mad_all2);
- intermediateC = _mm_packus_epi16(mad_all0, mad_all2);
- }
- }
-
- // Transpose result (intermediate -> transpose3_x)
- {
- // 00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33
- // 40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73
- // 80 81 82 83 90 91 92 93 A0 A1 A2 A3 xx xx xx xx
- const __m128i transpose1_0 = _mm_shuffle_epi8(intermediateA, transpose);
- const __m128i transpose1_1 = _mm_shuffle_epi8(intermediateB, transpose);
- const __m128i transpose1_2 = _mm_shuffle_epi8(intermediateC, transpose);
- // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
- // 80 90 A0 xx 81 91 A1 xx 82 92 A2 xx 83 93 A3 xx
- const __m128i transpose2_0 = _mm_unpacklo_epi32(transpose1_0, transpose1_1);
- const __m128i transpose2_1 = _mm_unpackhi_epi32(transpose1_0, transpose1_1);
- // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
- transpose3_0 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
- _mm_castsi128_ps(transpose1_2),
- _MM_SHUFFLE(0, 0, 1, 0)));
- transpose3_1 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_0),
- _mm_castsi128_ps(transpose1_2),
- _MM_SHUFFLE(1, 1, 3, 2)));
- transpose3_2 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
- _mm_castsi128_ps(transpose1_2),
- _MM_SHUFFLE(2, 2, 1, 0)));
- transpose3_3 = _mm_castps_si128(
- _mm_shuffle_ps(_mm_castsi128_ps(transpose2_1),
- _mm_castsi128_ps(transpose1_2),
- _MM_SHUFFLE(3, 3, 3, 2)));
- // 00 10 20 30 40 50 60 70 80 90 A0 xx xx xx xx xx
- // 01 11 21 31 41 51 61 71 81 91 A1 xx xx xx xx xx
- // 02 12 22 32 42 52 62 72 82 92 A2 xx xx xx xx xx
- // 03 13 23 33 43 53 63 73 83 93 A3 xx xx xx xx xx
- }
-
- // Vertical pass (transpose3_x -> dst).
- {
- const __m128i VFilter = _mm_load_si128((const __m128i *)VFilter_aligned16);
- // get first two columns filter coefficients
- __m128i fil01 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(0, 0, 0, 0));
- __m128i fil23 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(1, 1, 1, 1));
- __m128i fil45 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(2, 2, 2, 2));
- __m128i fil67 = _mm_shuffle_epi32(VFilter, _MM_SHUFFLE(3, 3, 3, 3));
- __m128i col0, col1, col2, col3;
- {
- //load pixels
- __m128i src = transpose3_0;
- // extract the ones used for first column
- __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
- __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
- __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
- __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
- __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
- __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
- // multiply accumulate them
- __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
- __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
- __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
- __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
- __m128i mad0123 = _mm_add_epi32(mad01, mad23);
- __m128i mad4567 = _mm_add_epi32(mad45, mad67);
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
- mad_all = _mm_add_epi32(mad_all, rounding);
- mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
- mad_all = _mm_packs_epi32(mad_all, mad_all);
- col0 = _mm_packus_epi16(mad_all, mad_all);
- }
- {
- //load pixels
- __m128i src = transpose3_1;
- // extract the ones used for first column
- __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
- __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
- __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
- __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
- __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
- __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
- // multiply accumulate them
- __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
- __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
- __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
- __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
- __m128i mad0123 = _mm_add_epi32(mad01, mad23);
- __m128i mad4567 = _mm_add_epi32(mad45, mad67);
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
- mad_all = _mm_add_epi32(mad_all, rounding);
- mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
- mad_all = _mm_packs_epi32(mad_all, mad_all);
- col1 = _mm_packus_epi16(mad_all, mad_all);
- }
- {
- //load pixels
- __m128i src = transpose3_2;
- // extract the ones used for first column
- __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
- __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
- __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
- __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
- __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
- __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
- // multiply accumulate them
- __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
- __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
- __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
- __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
- __m128i mad0123 = _mm_add_epi32(mad01, mad23);
- __m128i mad4567 = _mm_add_epi32(mad45, mad67);
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
- mad_all = _mm_add_epi32(mad_all, rounding);
- mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
- mad_all = _mm_packs_epi32(mad_all, mad_all);
- col2 = _mm_packus_epi16(mad_all, mad_all);
- }
- {
- //load pixels
- __m128i src = transpose3_3;
- // extract the ones used for first column
- __m128i src0123 = _mm_shuffle_epi8(src, mask0123);
- __m128i src4567 = _mm_shuffle_epi8(src, mask4567);
- __m128i src01_16 = _mm_unpacklo_epi8(src0123, zero);
- __m128i src23_16 = _mm_unpackhi_epi8(src0123, zero);
- __m128i src45_16 = _mm_unpacklo_epi8(src4567, zero);
- __m128i src67_16 = _mm_unpackhi_epi8(src4567, zero);
- // multiply accumulate them
- __m128i mad01 = _mm_madd_epi16(src01_16, fil01);
- __m128i mad23 = _mm_madd_epi16(src23_16, fil23);
- __m128i mad45 = _mm_madd_epi16(src45_16, fil45);
- __m128i mad67 = _mm_madd_epi16(src67_16, fil67);
- __m128i mad0123 = _mm_add_epi32(mad01, mad23);
- __m128i mad4567 = _mm_add_epi32(mad45, mad67);
- __m128i mad_all = _mm_add_epi32(mad0123, mad4567);
- mad_all = _mm_add_epi32(mad_all, rounding);
- mad_all = _mm_srai_epi32(mad_all, VP9_FILTER_SHIFT);
- mad_all = _mm_packs_epi32(mad_all, mad_all);
- col3 = _mm_packus_epi16(mad_all, mad_all);
- }
- {
- __m128i col01 = _mm_unpacklo_epi8(col0, col1);
- __m128i col23 = _mm_unpacklo_epi8(col2, col3);
- __m128i col0123 = _mm_unpacklo_epi16(col01, col23);
- //TODO(cd): look into Ronald's comment:
- // Future suggestion: I believe here, too, you can merge the
- // packs_epi32() and pacus_epi16() for the 4 cols above, so that
- // you get the data in a single register, and then use pshufb
- // (shuffle_epi8()) instead of the unpacks here. Should be
- // 2+3+2 instructions faster.
- *((unsigned int *)&dst_ptr[dst_stride * 0]) =
- _mm_extract_epi32(col0123, 0);
- *((unsigned int *)&dst_ptr[dst_stride * 1]) =
- _mm_extract_epi32(col0123, 1);
- *((unsigned int *)&dst_ptr[dst_stride * 2]) =
- _mm_extract_epi32(col0123, 2);
- *((unsigned int *)&dst_ptr[dst_stride * 3]) =
- _mm_extract_epi32(col0123, 3);
- }
- }
- }
-}
-
-void vp9_filter_block2d_8x4_8_sse4_1
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int j;
- for (j=0; j<8; j+=4) {
- vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j, dst_stride);
- }
-}
-
-void vp9_filter_block2d_8x8_8_sse4_1
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int i, j;
- for (i=0; i<8; i+=4) {
- for (j=0; j<8; j+=4) {
- vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j + i*src_stride, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j + i*dst_stride, dst_stride);
- }
- }
-}
-
-void vp9_filter_block2d_16x16_8_sse4_1
-(
- const unsigned char *src_ptr, const unsigned int src_stride,
- const short *HFilter_aligned16, const short *VFilter_aligned16,
- unsigned char *dst_ptr, unsigned int dst_stride
-) {
- int i, j;
- for (i=0; i<16; i+=4) {
- for (j=0; j<16; j+=4) {
- vp9_filter_block2d_4x4_8_sse4_1(src_ptr + j + i*src_stride, src_stride,
- HFilter_aligned16, VFilter_aligned16,
- dst_ptr + j + i*dst_stride, dst_stride);
- }
- }
-}
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_sse2.asm
@@ -1,0 +1,712 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp9_idct_dequant_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; int blk_stride - 5
+; )
+
+global sym(vp9_idct_dequant_0_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ GET_GOT rbx
+ ; end prolog
+
+ mov rdx, arg(1) ; dequant
+ mov rax, arg(0) ; qcoeff
+
+ movd xmm4, [rax]
+ movd xmm5, [rdx]
+
+ pinsrw xmm4, [rax+32], 4
+ pinsrw xmm5, [rdx], 4
+
+ pmullw xmm4, xmm5
+
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
+ ; clear coeffs
+ movd [rax], xmm5
+ movd [rax+32], xmm5
+;pshufb
+ pshuflw xmm4, xmm4, 00000000b
+ pshufhw xmm4, xmm4, 00000000b
+
+ mov rax, arg(2) ; pre
+ paddw xmm4, [GLOBAL(fours)]
+
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+ psraw xmm4, 3
+
+ movq xmm0, [rax]
+ movq xmm1, [rax+rcx]
+ movq xmm2, [rax+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm3, [rax+rcx]
+
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+
+ mov rax, arg(3) ; dst
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
+
+ ; store blocks back out
+ movq [rax], xmm0
+ movq [rax + rdx], xmm1
+
+ lea rax, [rax + 2*rdx]
+
+ movq [rax], xmm2
+ movq [rax + rdx], xmm3
+
+ ; begin epilog
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_idct_dequant_full_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ movsxd rcx, dword ptr arg(5) ; blk_stride
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+2*rcx]
+ lea rcx, [3*rcx]
+ movq xmm5, [rsi+rcx]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_idct_dequant_dc_0_2x_sse2
+; (
+; short *qcoeff - 0
+; short *dequant - 1
+; unsigned char *pre - 2
+; unsigned char *dst - 3
+; int dst_stride - 4
+; short *dc - 5
+; )
+global sym(vp9_idct_dequant_dc_0_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_dc_0_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+ mov rdx, arg(5) ; dc
+
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
+ ; load up 2 dc words here == 2*16 = doubleword
+ movd xmm4, [rdx]
+
+ ; Load up predict blocks
+ movq xmm0, [rsi]
+ movq xmm1, [rsi+16]
+ movq xmm2, [rsi+32]
+ movq xmm3, [rsi+48]
+
+ ; Duplicate and expand dc across
+ punpcklwd xmm4, xmm4
+ punpckldq xmm4, xmm4
+
+ ; Rounding to dequant and downshift
+ paddw xmm4, [GLOBAL(fours)]
+ psraw xmm4, 3
+
+ ; Predict buffer needs to be expanded from bytes to words
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
+
+ ; Add to predict buffer
+ paddw xmm0, xmm4
+ paddw xmm1, xmm4
+ paddw xmm2, xmm4
+ paddw xmm3, xmm4
+
+ ; pack up before storing
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_idct_dequant_dc_full_2x_sse2) PRIVATE
+sym(vp9_idct_dequant_dc_full_2x_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; special case when 2 blocks have 0 or 1 coeffs
+ ; dc is set as first coeff, so no need to load qcoeff
+ mov rax, arg(0) ; qcoeff
+ mov rsi, arg(2) ; pre
+ mov rdi, arg(3) ; dst
+
+ ; Zero out xmm7, for use unpacking
+ pxor xmm7, xmm7
+
+ mov rdx, arg(1) ; dequant
+
+ ; note the transpose of xmm1 and xmm2, necessary for shuffle
+ ; to spit out sensicle data
+ movdqa xmm0, [rax]
+ movdqa xmm2, [rax+16]
+ movdqa xmm1, [rax+32]
+ movdqa xmm3, [rax+48]
+
+ ; Clear out coeffs
+ movdqa [rax], xmm7
+ movdqa [rax+16], xmm7
+ movdqa [rax+32], xmm7
+ movdqa [rax+48], xmm7
+
+ ; dequantize qcoeff buffer
+ pmullw xmm0, [rdx]
+ pmullw xmm2, [rdx+16]
+ pmullw xmm1, [rdx]
+ pmullw xmm3, [rdx+16]
+
+ ; DC component
+ mov rdx, arg(5)
+
+ ; repack so block 0 row x and block 1 row x are together
+ movdqa xmm4, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm4, xmm1
+
+ pshufd xmm0, xmm0, 11011000b
+ pshufd xmm1, xmm4, 11011000b
+
+ movdqa xmm4, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm4, xmm3
+
+ pshufd xmm2, xmm2, 11011000b
+ pshufd xmm3, xmm4, 11011000b
+
+ ; insert DC component
+ pinsrw xmm0, [rdx], 0
+ pinsrw xmm0, [rdx+2], 4
+
+ ; first pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2 ;
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+
+ ; transpose for the second pass
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ ; second pass
+ psubw xmm0, xmm2 ; b1 = 0-2
+ paddw xmm2, xmm2
+
+ movdqa xmm5, xmm1
+ paddw xmm2, xmm0 ; a1 = 0+2
+
+ pmulhw xmm5, [GLOBAL(x_s1sqr2)]
+ paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movdqa xmm7, xmm3
+ pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
+
+ paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw xmm7, xmm5 ; c1
+
+ movdqa xmm5, xmm1
+ movdqa xmm4, xmm3
+
+ pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
+ paddw xmm5, xmm1
+
+ pmulhw xmm3, [GLOBAL(x_s1sqr2)]
+ paddw xmm3, xmm4
+
+ paddw xmm3, xmm5 ; d1
+ paddw xmm0, [GLOBAL(fours)]
+
+ paddw xmm2, [GLOBAL(fours)]
+ movdqa xmm6, xmm2 ; a1
+
+ movdqa xmm4, xmm0 ; b1
+ paddw xmm2, xmm3 ;0
+
+ paddw xmm4, xmm7 ;1
+ psubw xmm0, xmm7 ;2
+
+ psubw xmm6, xmm3 ;3
+ psraw xmm2, 3
+
+ psraw xmm0, 3
+ psraw xmm4, 3
+
+ psraw xmm6, 3
+
+ ; transpose to save
+ movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
+ punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
+ punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
+
+ movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
+ punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
+ punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
+
+
+ movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
+ punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
+ punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
+
+ movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
+ punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
+ punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
+
+
+ movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
+ punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
+ punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
+
+ movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
+ punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
+ punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
+
+ pshufd xmm0, xmm2, 11011000b
+ pshufd xmm2, xmm1, 11011000b
+
+ pshufd xmm1, xmm5, 11011000b
+ pshufd xmm3, xmm7, 11011000b
+
+ pxor xmm7, xmm7
+
+ ; Load up predict blocks
+ movq xmm4, [rsi]
+ movq xmm5, [rsi+16]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm0, xmm4
+ paddw xmm1, xmm5
+
+ movq xmm4, [rsi+32]
+ movq xmm5, [rsi+48]
+
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+.finish:
+
+ ; pack up before storing
+ packuswb xmm0, xmm7
+ packuswb xmm1, xmm7
+ packuswb xmm2, xmm7
+ packuswb xmm3, xmm7
+
+ ; Load destination stride before writing out,
+ ; doesn't need to persist
+ movsxd rdx, dword ptr arg(4) ; dst_stride
+
+ ; store blocks back out
+ movq [rdi], xmm0
+ movq [rdi + rdx], xmm1
+
+ lea rdi, [rdi + 2*rdx]
+
+ movq [rdi], xmm2
+ movq [rdi + rdx], xmm3
+
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+fours:
+ times 8 dw 0x0004
+align 16
+x_s1sqr2:
+ times 8 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+ times 8 dw 0x4E7B
--- /dev/null
+++ b/vp9/common/x86/vp9_idct_x86.c
@@ -1,0 +1,1975 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+#if HAVE_SSE2
+// In order to improve performance, clip absolute diff values to [0, 255],
+// which allows to keep the additions/subtractions in 8 bits.
+void vp9_dc_only_idct_add_sse2(int input_dc, uint8_t *pred_ptr,
+ uint8_t *dst_ptr, int pitch, int stride) {
+ int a1;
+ int16_t out;
+ uint8_t abs_diff;
+ __m128i p0, p1, p2, p3;
+ unsigned int extended_diff;
+ __m128i diff;
+
+ out = dct_const_round_shift(input_dc * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ // Read prediction data.
+ p0 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 0 * pitch));
+ p1 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 1 * pitch));
+ p2 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 2 * pitch));
+ p3 = _mm_cvtsi32_si128 (*(const int *)(pred_ptr + 3 * pitch));
+
+ // Unpack prediction data, and store 4x4 array in 1 XMM register.
+ p0 = _mm_unpacklo_epi32(p0, p1);
+ p2 = _mm_unpacklo_epi32(p2, p3);
+ p0 = _mm_unpacklo_epi64(p0, p2);
+
+ // Clip dc value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (a1 >= 0) {
+ abs_diff = (a1 > 255) ? 255 : a1;
+ extended_diff = abs_diff * 0x01010101u;
+ diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0);
+
+ p1 = _mm_adds_epu8(p0, diff);
+ } else {
+ abs_diff = (a1 < -255) ? 255 : -a1;
+ extended_diff = abs_diff * 0x01010101u;
+ diff = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_diff), 0);
+
+ p1 = _mm_subs_epu8(p0, diff);
+ }
+
+ // Store results to dst.
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+ dst_ptr += stride;
+
+ p1 = _mm_srli_si128(p1, 4);
+ *(int *)dst_ptr = _mm_cvtsi128_si32(p1);
+}
+
+void vp9_short_idct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i eight = _mm_set1_epi16(8);
+ const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+ (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+ (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+ (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const int half_pitch = pitch >> 1;
+ __m128i input0, input1, input2, input3;
+
+ // Rows
+ input0 = _mm_loadl_epi64((__m128i *)input);
+ input1 = _mm_loadl_epi64((__m128i *)(input + 4));
+ input2 = _mm_loadl_epi64((__m128i *)(input + 8));
+ input3 = _mm_loadl_epi64((__m128i *)(input + 12));
+
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ input0 = _mm_shufflelo_epi16(input0, 0xd8);
+ input1 = _mm_shufflelo_epi16(input1, 0xd8);
+ input2 = _mm_shufflelo_epi16(input2, 0xd8);
+ input3 = _mm_shufflelo_epi16(input3, 0xd8);
+
+ input0 = _mm_unpacklo_epi32(input0, input0);
+ input1 = _mm_unpacklo_epi32(input1, input1);
+ input2 = _mm_unpacklo_epi32(input2, input2);
+ input3 = _mm_unpacklo_epi32(input3, input3);
+
+ // Stage 1
+ input0 = _mm_madd_epi16(input0, cst);
+ input1 = _mm_madd_epi16(input1, cst);
+ input2 = _mm_madd_epi16(input2, cst);
+ input3 = _mm_madd_epi16(input3, cst);
+
+ input0 = _mm_add_epi32(input0, rounding);
+ input1 = _mm_add_epi32(input1, rounding);
+ input2 = _mm_add_epi32(input2, rounding);
+ input3 = _mm_add_epi32(input3, rounding);
+
+ input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+ input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+ input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+ input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+ // Stage 2
+ input0 = _mm_packs_epi32(input0, zero);
+ input1 = _mm_packs_epi32(input1, zero);
+ input2 = _mm_packs_epi32(input2, zero);
+ input3 = _mm_packs_epi32(input3, zero);
+
+ // Transpose
+ input1 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpacklo_epi16(input2, input3);
+ input0 = _mm_unpacklo_epi32(input1, input3);
+ input1 = _mm_unpackhi_epi32(input1, input3);
+
+ // Switch column2, column 3, and then, we got:
+ // input2: column1, column 0; input3: column2, column 3.
+ input1 = _mm_shuffle_epi32(input1, 0x4e);
+ input2 = _mm_add_epi16(input0, input1);
+ input3 = _mm_sub_epi16(input0, input1);
+
+ // Columns
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ input0 = _mm_shufflelo_epi16(input2, 0xd8);
+ input1 = _mm_shufflehi_epi16(input2, 0xd8);
+ input2 = _mm_shufflehi_epi16(input3, 0xd8);
+ input3 = _mm_shufflelo_epi16(input3, 0xd8);
+
+ input0 = _mm_unpacklo_epi32(input0, input0);
+ input1 = _mm_unpackhi_epi32(input1, input1);
+ input2 = _mm_unpackhi_epi32(input2, input2);
+ input3 = _mm_unpacklo_epi32(input3, input3);
+
+ // Stage 1
+ input0 = _mm_madd_epi16(input0, cst);
+ input1 = _mm_madd_epi16(input1, cst);
+ input2 = _mm_madd_epi16(input2, cst);
+ input3 = _mm_madd_epi16(input3, cst);
+
+ input0 = _mm_add_epi32(input0, rounding);
+ input1 = _mm_add_epi32(input1, rounding);
+ input2 = _mm_add_epi32(input2, rounding);
+ input3 = _mm_add_epi32(input3, rounding);
+
+ input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+ input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+ input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+ input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+ // Stage 2
+ input0 = _mm_packs_epi32(input0, zero);
+ input1 = _mm_packs_epi32(input1, zero);
+ input2 = _mm_packs_epi32(input2, zero);
+ input3 = _mm_packs_epi32(input3, zero);
+
+ // Transpose
+ input1 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpacklo_epi16(input2, input3);
+ input0 = _mm_unpacklo_epi32(input1, input3);
+ input1 = _mm_unpackhi_epi32(input1, input3);
+
+ // Switch column2, column 3, and then, we got:
+ // input2: column1, column 0; input3: column2, column 3.
+ input1 = _mm_shuffle_epi32(input1, 0x4e);
+ input2 = _mm_add_epi16(input0, input1);
+ input3 = _mm_sub_epi16(input0, input1);
+
+ // Final round and shift
+ input2 = _mm_add_epi16(input2, eight);
+ input3 = _mm_add_epi16(input3, eight);
+
+ input2 = _mm_srai_epi16(input2, 4);
+ input3 = _mm_srai_epi16(input3, 4);
+
+ // Store results
+ _mm_storel_epi64((__m128i *)output, input2);
+ input2 = _mm_srli_si128(input2, 8);
+ _mm_storel_epi64((__m128i *)(output + half_pitch), input2);
+
+ _mm_storel_epi64((__m128i *)(output + 3 * half_pitch), input3);
+ input3 = _mm_srli_si128(input3, 8);
+ _mm_storel_epi64((__m128i *)(output + 2 * half_pitch), input3);
+}
+
+void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
+ (int16_t)cospi_16_64, (int16_t)-cospi_16_64,
+ (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+ (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+ const __m128i c2 = _mm_setr_epi16(1, 1, 1, 1, 1, -1, 1, -1);
+
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i in, temp;
+
+ // Load input data.
+ in = _mm_loadl_epi64((__m128i *)input);
+
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ in = _mm_shufflelo_epi16(in, 0xd8);
+ in = _mm_unpacklo_epi32(in, in);
+
+ // Stage 1
+ in = _mm_madd_epi16(in, c1);
+ in = _mm_add_epi32(in, rounding);
+ in = _mm_srai_epi32(in, DCT_CONST_BITS);
+ in = _mm_packs_epi32(in, zero);
+
+ // Stage 2
+ temp = _mm_shufflelo_epi16(in, 0x9c);
+ in = _mm_shufflelo_epi16(in, 0xc9);
+ in = _mm_unpacklo_epi64(temp, in);
+ in = _mm_madd_epi16(in, c2);
+ in = _mm_packs_epi32(in, zero);
+
+ // Store results
+ _mm_storel_epi64((__m128i *)output, in);
+}
+
+#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \
+ \
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \
+ \
+ out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \
+ out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \
+ out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \
+ out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
+ out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \
+ out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \
+ out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \
+ out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \
+ }
+
+#define TRANSPOSE_4X8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \
+ \
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \
+ \
+ out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \
+ out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \
+ out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \
+ out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
+ out4 = out5 = out6 = out7 = zero; \
+ }
+
+#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \
+ \
+ in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
+ in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
+ in2 = _mm_unpacklo_epi32(tr0_2, tr0_3); /* i5 i4 */ \
+ in3 = _mm_unpackhi_epi32(tr0_2, tr0_3); /* i7 i6 */ \
+ }
+
+// Define Macro for multiplying elements by constants and adding them together.
+#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \
+ cst0, cst1, cst2, cst3, res0, res1, res2, res3) \
+ { \
+ tmp0 = _mm_madd_epi16(lo_0, cst0); \
+ tmp1 = _mm_madd_epi16(hi_0, cst0); \
+ tmp2 = _mm_madd_epi16(lo_0, cst1); \
+ tmp3 = _mm_madd_epi16(hi_0, cst1); \
+ tmp4 = _mm_madd_epi16(lo_1, cst2); \
+ tmp5 = _mm_madd_epi16(hi_1, cst2); \
+ tmp6 = _mm_madd_epi16(lo_1, cst3); \
+ tmp7 = _mm_madd_epi16(hi_1, cst3); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ tmp4 = _mm_add_epi32(tmp4, rounding); \
+ tmp5 = _mm_add_epi32(tmp5, rounding); \
+ tmp6 = _mm_add_epi32(tmp6, rounding); \
+ tmp7 = _mm_add_epi32(tmp7, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \
+ \
+ res0 = _mm_packs_epi32(tmp0, tmp1); \
+ res1 = _mm_packs_epi32(tmp2, tmp3); \
+ res2 = _mm_packs_epi32(tmp4, tmp5); \
+ res3 = _mm_packs_epi32(tmp6, tmp7); \
+ }
+
+#define IDCT8x8_1D \
+ /* Stage1 */ \
+ { \
+ const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \
+ const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \
+ const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \
+ const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \
+ \
+ MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, \
+ stg1_1, stg1_2, stg1_3, stp1_4, \
+ stp1_7, stp1_5, stp1_6) \
+ } \
+ \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \
+ const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \
+ const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \
+ const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, \
+ stg2_1, stg2_2, stg2_3, stp2_0, \
+ stp2_1, stp2_2, stp2_3) \
+ \
+ stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \
+ stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \
+ stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \
+ stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ \
+ stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \
+ stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \
+ stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \
+ stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \
+ \
+ tmp0 = _mm_madd_epi16(lo_56, stg2_1); \
+ tmp1 = _mm_madd_epi16(hi_56, stg2_1); \
+ tmp2 = _mm_madd_epi16(lo_56, stg2_0); \
+ tmp3 = _mm_madd_epi16(hi_56, stg2_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ } \
+ \
+ /* Stage4 */ \
+ in0 = _mm_adds_epi16(stp1_0, stp2_7); \
+ in1 = _mm_adds_epi16(stp1_1, stp1_6); \
+ in2 = _mm_adds_epi16(stp1_2, stp1_5); \
+ in3 = _mm_adds_epi16(stp1_3, stp2_4); \
+ in4 = _mm_subs_epi16(stp1_3, stp2_4); \
+ in5 = _mm_subs_epi16(stp1_2, stp1_5); \
+ in6 = _mm_subs_epi16(stp1_1, stp1_6); \
+ in7 = _mm_subs_epi16(stp1_0, stp2_7);
+
+void vp9_short_idct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int half_pitch = pitch >> 1;
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1<<4);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ // Load input data.
+ in0 = _mm_load_si128((__m128i *)input);
+ in1 = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in4 = _mm_load_si128((__m128i *)(input + 8 * 4));
+ in5 = _mm_load_si128((__m128i *)(input + 8 * 5));
+ in6 = _mm_load_si128((__m128i *)(input + 8 * 6));
+ in7 = _mm_load_si128((__m128i *)(input + 8 * 7));
+
+ // 2-D
+ for (i = 0; i < 2; i++) {
+ // 8x8 Transpose is copied from vp9_short_fdct8x8_sse2()
+ TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+
+ // 4-stage 1D idct8x8
+ IDCT8x8_1D
+ }
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 5);
+ in1 = _mm_srai_epi16(in1, 5);
+ in2 = _mm_srai_epi16(in2, 5);
+ in3 = _mm_srai_epi16(in3, 5);
+ in4 = _mm_srai_epi16(in4, 5);
+ in5 = _mm_srai_epi16(in5, 5);
+ in6 = _mm_srai_epi16(in6, 5);
+ in7 = _mm_srai_epi16(in7, 5);
+
+ // Store results
+ _mm_store_si128((__m128i *)output, in0);
+ _mm_store_si128((__m128i *)(output + half_pitch * 1), in1);
+ _mm_store_si128((__m128i *)(output + half_pitch * 2), in2);
+ _mm_store_si128((__m128i *)(output + half_pitch * 3), in3);
+ _mm_store_si128((__m128i *)(output + half_pitch * 4), in4);
+ _mm_store_si128((__m128i *)(output + half_pitch * 5), in5);
+ _mm_store_si128((__m128i *)(output + half_pitch * 6), in6);
+ _mm_store_si128((__m128i *)(output + half_pitch * 7), in7);
+}
+
+void vp9_short_idct10_8x8_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int half_pitch = pitch >> 1;
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1<<4);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ // Rows. Load 4-row input data.
+ in0 = _mm_load_si128((__m128i *)input);
+ in1 = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((__m128i *)(input + 8 * 3));
+
+ // 8x4 Transpose
+ TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3)
+
+ // Stage1
+ {
+ const __m128i lo_17 = _mm_unpackhi_epi16(in0, in3);
+ const __m128i lo_35 = _mm_unpackhi_epi16(in1, in2);
+
+ tmp0 = _mm_madd_epi16(lo_17, stg1_0);
+ tmp2 = _mm_madd_epi16(lo_17, stg1_1);
+ tmp4 = _mm_madd_epi16(lo_35, stg1_2);
+ tmp6 = _mm_madd_epi16(lo_35, stg1_3);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp1_4 = _mm_packs_epi32(tmp0, zero);
+ stp1_7 = _mm_packs_epi32(tmp2, zero);
+ stp1_5 = _mm_packs_epi32(tmp4, zero);
+ stp1_6 = _mm_packs_epi32(tmp6, zero);
+ }
+
+ // Stage2
+ {
+ const __m128i lo_04 = _mm_unpacklo_epi16(in0, in2);
+ const __m128i lo_26 = _mm_unpacklo_epi16(in1, in3);
+
+ tmp0 = _mm_madd_epi16(lo_04, stg2_0);
+ tmp2 = _mm_madd_epi16(lo_04, stg2_1);
+ tmp4 = _mm_madd_epi16(lo_26, stg2_2);
+ tmp6 = _mm_madd_epi16(lo_26, stg2_3);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp2_0 = _mm_packs_epi32(tmp0, zero);
+ stp2_1 = _mm_packs_epi32(tmp2, zero);
+ stp2_2 = _mm_packs_epi32(tmp4, zero);
+ stp2_3 = _mm_packs_epi32(tmp6, zero);
+
+ stp2_4 = _mm_adds_epi16(stp1_4, stp1_5);
+ stp2_5 = _mm_subs_epi16(stp1_4, stp1_5);
+ stp2_6 = _mm_subs_epi16(stp1_7, stp1_6);
+ stp2_7 = _mm_adds_epi16(stp1_7, stp1_6);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6);
+ stp1_0 = _mm_adds_epi16(stp2_0, stp2_3);
+ stp1_1 = _mm_adds_epi16(stp2_1, stp2_2);
+ stp1_2 = _mm_subs_epi16(stp2_1, stp2_2);
+ stp1_3 = _mm_subs_epi16(stp2_0, stp2_3);
+
+ tmp0 = _mm_madd_epi16(lo_56, stg3_0);
+ tmp2 = _mm_madd_epi16(lo_56, stg2_0); // stg3_1 = stg2_0
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+
+ stp1_5 = _mm_packs_epi32(tmp0, zero);
+ stp1_6 = _mm_packs_epi32(tmp2, zero);
+ }
+
+ // Stage4
+ in0 = _mm_adds_epi16(stp1_0, stp2_7);
+ in1 = _mm_adds_epi16(stp1_1, stp1_6);
+ in2 = _mm_adds_epi16(stp1_2, stp1_5);
+ in3 = _mm_adds_epi16(stp1_3, stp2_4);
+ in4 = _mm_subs_epi16(stp1_3, stp2_4);
+ in5 = _mm_subs_epi16(stp1_2, stp1_5);
+ in6 = _mm_subs_epi16(stp1_1, stp1_6);
+ in7 = _mm_subs_epi16(stp1_0, stp2_7);
+
+ // Columns. 4x8 Transpose
+ TRANSPOSE_4X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7)
+
+ // 1D idct8x8
+ IDCT8x8_1D
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 5);
+ in1 = _mm_srai_epi16(in1, 5);
+ in2 = _mm_srai_epi16(in2, 5);
+ in3 = _mm_srai_epi16(in3, 5);
+ in4 = _mm_srai_epi16(in4, 5);
+ in5 = _mm_srai_epi16(in5, 5);
+ in6 = _mm_srai_epi16(in6, 5);
+ in7 = _mm_srai_epi16(in7, 5);
+
+ // Store results
+ _mm_store_si128((__m128i *)output, in0);
+ _mm_store_si128((__m128i *)(output + half_pitch * 1), in1);
+ _mm_store_si128((__m128i *)(output + half_pitch * 2), in2);
+ _mm_store_si128((__m128i *)(output + half_pitch * 3), in3);
+ _mm_store_si128((__m128i *)(output + half_pitch * 4), in4);
+ _mm_store_si128((__m128i *)(output + half_pitch * 5), in5);
+ _mm_store_si128((__m128i *)(output + half_pitch * 6), in6);
+ _mm_store_si128((__m128i *)(output + half_pitch * 7), in7);
+}
+
+#define IDCT16x16_1D \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_1_15 = _mm_unpacklo_epi16(in1, in15); \
+ const __m128i hi_1_15 = _mm_unpackhi_epi16(in1, in15); \
+ const __m128i lo_9_7 = _mm_unpacklo_epi16(in9, in7); \
+ const __m128i hi_9_7 = _mm_unpackhi_epi16(in9, in7); \
+ const __m128i lo_5_11 = _mm_unpacklo_epi16(in5, in11); \
+ const __m128i hi_5_11 = _mm_unpackhi_epi16(in5, in11); \
+ const __m128i lo_13_3 = _mm_unpacklo_epi16(in13, in3); \
+ const __m128i hi_13_3 = _mm_unpackhi_epi16(in13, in3); \
+ \
+ MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \
+ stg2_0, stg2_1, stg2_2, stg2_3, \
+ stp2_8, stp2_15, stp2_9, stp2_14) \
+ \
+ MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, \
+ stg2_4, stg2_5, stg2_6, stg2_7, \
+ stp2_10, stp2_13, stp2_11, stp2_12) \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_2_14 = _mm_unpacklo_epi16(in2, in14); \
+ const __m128i hi_2_14 = _mm_unpackhi_epi16(in2, in14); \
+ const __m128i lo_10_6 = _mm_unpacklo_epi16(in10, in6); \
+ const __m128i hi_10_6 = _mm_unpackhi_epi16(in10, in6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \
+ stg3_0, stg3_1, stg3_2, stg3_3, \
+ stp1_4, stp1_7, stp1_5, stp1_6) \
+ \
+ stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \
+ stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \
+ stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \
+ stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \
+ \
+ stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \
+ stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \
+ stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \
+ } \
+ \
+ /* Stage4 */ \
+ { \
+ const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8); \
+ const __m128i hi_0_8 = _mm_unpackhi_epi16(in0, in8); \
+ const __m128i lo_4_12 = _mm_unpacklo_epi16(in4, in12); \
+ const __m128i hi_4_12 = _mm_unpackhi_epi16(in4, in12); \
+ \
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ \
+ MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, \
+ stg4_0, stg4_1, stg4_2, stg4_3, \
+ stp2_0, stp2_1, stp2_2, stp2_3) \
+ \
+ stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \
+ stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \
+ stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \
+ stg4_4, stg4_5, stg4_6, stg4_7, \
+ stp2_9, stp2_14, stp2_10, stp2_13) \
+ } \
+ \
+ /* Stage5 */ \
+ { \
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ \
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \
+ stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \
+ stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \
+ \
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ \
+ stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \
+ \
+ stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \
+ stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \
+ } \
+ \
+ /* Stage6 */ \
+ { \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \
+ \
+ stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \
+ stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \
+ stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \
+ stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \
+ \
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \
+ stg6_0, stg4_0, stg6_0, stg4_0, \
+ stp2_10, stp2_13, stp2_11, stp2_12) \
+ }
+
+void vp9_short_idct16x16_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int half_pitch = pitch >> 1;
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1<<5);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero,
+ in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero,
+ in10 = zero, in11 = zero, in12 = zero, in13 = zero,
+ in14 = zero, in15 = zero;
+ __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero,
+ l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero,
+ l12 = zero, l13 = zero, l14 = zero, l15 = zero;
+ __m128i r0 = zero, r1 = zero, r2 = zero, r3 = zero, r4 = zero, r5 = zero,
+ r6 = zero, r7 = zero, r8 = zero, r9 = zero, r10 = zero, r11 = zero,
+ r12 = zero, r13 = zero, r14 = zero, r15 = zero;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_8_0, stp1_12_0;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ // We work on a 8x16 block each time, and loop 4 times for 2-D 16x16 idct.
+ for (i = 0; i < 4; i++) {
+ // 1-D idct
+ if (i < 2) {
+ if (i == 1) input += 128;
+
+ // Load input data.
+ in0 = _mm_load_si128((__m128i *)input);
+ in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in1 = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in9 = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in2 = _mm_load_si128((__m128i *)(input + 8 * 4));
+ in10 = _mm_load_si128((__m128i *)(input + 8 * 5));
+ in3 = _mm_load_si128((__m128i *)(input + 8 * 6));
+ in11 = _mm_load_si128((__m128i *)(input + 8 * 7));
+ in4 = _mm_load_si128((__m128i *)(input + 8 * 8));
+ in12 = _mm_load_si128((__m128i *)(input + 8 * 9));
+ in5 = _mm_load_si128((__m128i *)(input + 8 * 10));
+ in13 = _mm_load_si128((__m128i *)(input + 8 * 11));
+ in6 = _mm_load_si128((__m128i *)(input + 8 * 12));
+ in14 = _mm_load_si128((__m128i *)(input + 8 * 13));
+ in7 = _mm_load_si128((__m128i *)(input + 8 * 14));
+ in15 = _mm_load_si128((__m128i *)(input + 8 * 15));
+
+ TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE_8X8(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
+ in10, in11, in12, in13, in14, in15);
+ }
+
+ if (i == 2) {
+ TRANSPOSE_8X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE_8X8(r0, r1, r2, r3, r4, r5, r6, r7, in8, in9, in10, in11, in12,
+ in13, in14, in15);
+ }
+
+ if (i == 3) {
+ TRANSPOSE_8X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE_8X8(r8, r9, r10, r11, r12, r13, r14, r15, in8, in9, in10, in11,
+ in12, in13, in14, in15);
+ }
+
+ IDCT16x16_1D
+
+ // Stage7
+ if (i == 0) {
+ // Left 8x16
+ l0 = _mm_add_epi16(stp2_0, stp1_15);
+ l1 = _mm_add_epi16(stp2_1, stp1_14);
+ l2 = _mm_add_epi16(stp2_2, stp2_13);
+ l3 = _mm_add_epi16(stp2_3, stp2_12);
+ l4 = _mm_add_epi16(stp2_4, stp2_11);
+ l5 = _mm_add_epi16(stp2_5, stp2_10);
+ l6 = _mm_add_epi16(stp2_6, stp1_9);
+ l7 = _mm_add_epi16(stp2_7, stp1_8);
+ l8 = _mm_sub_epi16(stp2_7, stp1_8);
+ l9 = _mm_sub_epi16(stp2_6, stp1_9);
+ l10 = _mm_sub_epi16(stp2_5, stp2_10);
+ l11 = _mm_sub_epi16(stp2_4, stp2_11);
+ l12 = _mm_sub_epi16(stp2_3, stp2_12);
+ l13 = _mm_sub_epi16(stp2_2, stp2_13);
+ l14 = _mm_sub_epi16(stp2_1, stp1_14);
+ l15 = _mm_sub_epi16(stp2_0, stp1_15);
+ } else if (i == 1) {
+ // Right 8x16
+ r0 = _mm_add_epi16(stp2_0, stp1_15);
+ r1 = _mm_add_epi16(stp2_1, stp1_14);
+ r2 = _mm_add_epi16(stp2_2, stp2_13);
+ r3 = _mm_add_epi16(stp2_3, stp2_12);
+ r4 = _mm_add_epi16(stp2_4, stp2_11);
+ r5 = _mm_add_epi16(stp2_5, stp2_10);
+ r6 = _mm_add_epi16(stp2_6, stp1_9);
+ r7 = _mm_add_epi16(stp2_7, stp1_8);
+ r8 = _mm_sub_epi16(stp2_7, stp1_8);
+ r9 = _mm_sub_epi16(stp2_6, stp1_9);
+ r10 = _mm_sub_epi16(stp2_5, stp2_10);
+ r11 = _mm_sub_epi16(stp2_4, stp2_11);
+ r12 = _mm_sub_epi16(stp2_3, stp2_12);
+ r13 = _mm_sub_epi16(stp2_2, stp2_13);
+ r14 = _mm_sub_epi16(stp2_1, stp1_14);
+ r15 = _mm_sub_epi16(stp2_0, stp1_15);
+ } else {
+ // 2-D
+ in0 = _mm_add_epi16(stp2_0, stp1_15);
+ in1 = _mm_add_epi16(stp2_1, stp1_14);
+ in2 = _mm_add_epi16(stp2_2, stp2_13);
+ in3 = _mm_add_epi16(stp2_3, stp2_12);
+ in4 = _mm_add_epi16(stp2_4, stp2_11);
+ in5 = _mm_add_epi16(stp2_5, stp2_10);
+ in6 = _mm_add_epi16(stp2_6, stp1_9);
+ in7 = _mm_add_epi16(stp2_7, stp1_8);
+ in8 = _mm_sub_epi16(stp2_7, stp1_8);
+ in9 = _mm_sub_epi16(stp2_6, stp1_9);
+ in10 = _mm_sub_epi16(stp2_5, stp2_10);
+ in11 = _mm_sub_epi16(stp2_4, stp2_11);
+ in12 = _mm_sub_epi16(stp2_3, stp2_12);
+ in13 = _mm_sub_epi16(stp2_2, stp2_13);
+ in14 = _mm_sub_epi16(stp2_1, stp1_14);
+ in15 = _mm_sub_epi16(stp2_0, stp1_15);
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+ in8 = _mm_adds_epi16(in8, final_rounding);
+ in9 = _mm_adds_epi16(in9, final_rounding);
+ in10 = _mm_adds_epi16(in10, final_rounding);
+ in11 = _mm_adds_epi16(in11, final_rounding);
+ in12 = _mm_adds_epi16(in12, final_rounding);
+ in13 = _mm_adds_epi16(in13, final_rounding);
+ in14 = _mm_adds_epi16(in14, final_rounding);
+ in15 = _mm_adds_epi16(in15, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 6);
+ in1 = _mm_srai_epi16(in1, 6);
+ in2 = _mm_srai_epi16(in2, 6);
+ in3 = _mm_srai_epi16(in3, 6);
+ in4 = _mm_srai_epi16(in4, 6);
+ in5 = _mm_srai_epi16(in5, 6);
+ in6 = _mm_srai_epi16(in6, 6);
+ in7 = _mm_srai_epi16(in7, 6);
+ in8 = _mm_srai_epi16(in8, 6);
+ in9 = _mm_srai_epi16(in9, 6);
+ in10 = _mm_srai_epi16(in10, 6);
+ in11 = _mm_srai_epi16(in11, 6);
+ in12 = _mm_srai_epi16(in12, 6);
+ in13 = _mm_srai_epi16(in13, 6);
+ in14 = _mm_srai_epi16(in14, 6);
+ in15 = _mm_srai_epi16(in15, 6);
+
+ // Store results
+ _mm_store_si128((__m128i *)output, in0);
+ _mm_store_si128((__m128i *)(output + half_pitch * 1), in1);
+ _mm_store_si128((__m128i *)(output + half_pitch * 2), in2);
+ _mm_store_si128((__m128i *)(output + half_pitch * 3), in3);
+ _mm_store_si128((__m128i *)(output + half_pitch * 4), in4);
+ _mm_store_si128((__m128i *)(output + half_pitch * 5), in5);
+ _mm_store_si128((__m128i *)(output + half_pitch * 6), in6);
+ _mm_store_si128((__m128i *)(output + half_pitch * 7), in7);
+ _mm_store_si128((__m128i *)(output + half_pitch * 8), in8);
+ _mm_store_si128((__m128i *)(output + half_pitch * 9), in9);
+ _mm_store_si128((__m128i *)(output + half_pitch * 10), in10);
+ _mm_store_si128((__m128i *)(output + half_pitch * 11), in11);
+ _mm_store_si128((__m128i *)(output + half_pitch * 12), in12);
+ _mm_store_si128((__m128i *)(output + half_pitch * 13), in13);
+ _mm_store_si128((__m128i *)(output + half_pitch * 14), in14);
+ _mm_store_si128((__m128i *)(output + half_pitch * 15), in15);
+
+ output += 8;
+ }
+ }
+}
+
+void vp9_short_idct10_16x16_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int half_pitch = pitch >> 1;
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1<<5);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero,
+ in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero,
+ in10 = zero, in11 = zero, in12 = zero, in13 = zero,
+ in14 = zero, in15 = zero;
+ __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero,
+ l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero,
+ l12 = zero, l13 = zero, l14 = zero, l15 = zero;
+
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_8_0, stp1_12_0;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ // 1-D idct. Load input data.
+ in0 = _mm_load_si128((__m128i *)input);
+ in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in1 = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in9 = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in2 = _mm_load_si128((__m128i *)(input + 8 * 4));
+ in10 = _mm_load_si128((__m128i *)(input + 8 * 5));
+ in3 = _mm_load_si128((__m128i *)(input + 8 * 6));
+ in11 = _mm_load_si128((__m128i *)(input + 8 * 7));
+
+ TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE_8X4(in8, in9, in10, in11, in8, in9, in10, in11);
+
+ // Stage2
+ {
+ const __m128i lo_1_15 = _mm_unpackhi_epi16(in0, in11);
+ const __m128i lo_9_7 = _mm_unpackhi_epi16(in8, in3);
+ const __m128i lo_5_11 = _mm_unpackhi_epi16(in2, in9);
+ const __m128i lo_13_3 = _mm_unpackhi_epi16(in10, in1);
+
+ tmp0 = _mm_madd_epi16(lo_1_15, stg2_0);
+ tmp2 = _mm_madd_epi16(lo_1_15, stg2_1);
+ tmp4 = _mm_madd_epi16(lo_9_7, stg2_2);
+ tmp6 = _mm_madd_epi16(lo_9_7, stg2_3);
+ tmp1 = _mm_madd_epi16(lo_5_11, stg2_4);
+ tmp3 = _mm_madd_epi16(lo_5_11, stg2_5);
+ tmp5 = _mm_madd_epi16(lo_13_3, stg2_6);
+ tmp7 = _mm_madd_epi16(lo_13_3, stg2_7);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp5 = _mm_add_epi32(tmp5, rounding);
+ tmp7 = _mm_add_epi32(tmp7, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+ stp2_8 = _mm_packs_epi32(tmp0, zero);
+ stp2_15 = _mm_packs_epi32(tmp2, zero);
+ stp2_9 = _mm_packs_epi32(tmp4, zero);
+ stp2_14 = _mm_packs_epi32(tmp6, zero);
+
+ stp2_10 = _mm_packs_epi32(tmp1, zero);
+ stp2_13 = _mm_packs_epi32(tmp3, zero);
+ stp2_11 = _mm_packs_epi32(tmp5, zero);
+ stp2_12 = _mm_packs_epi32(tmp7, zero);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_2_14 = _mm_unpacklo_epi16(in1, in11);
+ const __m128i lo_10_6 = _mm_unpacklo_epi16(in9, in3);
+
+ tmp0 = _mm_madd_epi16(lo_2_14, stg3_0);
+ tmp2 = _mm_madd_epi16(lo_2_14, stg3_1);
+ tmp4 = _mm_madd_epi16(lo_10_6, stg3_2);
+ tmp6 = _mm_madd_epi16(lo_10_6, stg3_3);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp1_4 = _mm_packs_epi32(tmp0, zero);
+ stp1_7 = _mm_packs_epi32(tmp2, zero);
+ stp1_5 = _mm_packs_epi32(tmp4, zero);
+ stp1_6 = _mm_packs_epi32(tmp6, zero);
+
+ stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9);
+ stp1_9 = _mm_sub_epi16(stp2_8, stp2_9);
+ stp1_10 = _mm_sub_epi16(stp2_11, stp2_10);
+ stp1_11 = _mm_add_epi16(stp2_11, stp2_10);
+
+ stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13);
+ stp1_13 = _mm_sub_epi16(stp2_12, stp2_13);
+ stp1_14 = _mm_sub_epi16(stp2_15, stp2_14);
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_14);
+ }
+
+ // Stage4
+ {
+ const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8);
+ const __m128i lo_4_12 = _mm_unpacklo_epi16(in2, in10);
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+
+ tmp0 = _mm_madd_epi16(lo_0_8, stg4_0);
+ tmp2 = _mm_madd_epi16(lo_0_8, stg4_1);
+ tmp4 = _mm_madd_epi16(lo_4_12, stg4_2);
+ tmp6 = _mm_madd_epi16(lo_4_12, stg4_3);
+ tmp1 = _mm_madd_epi16(lo_9_14, stg4_4);
+ tmp3 = _mm_madd_epi16(lo_9_14, stg4_5);
+ tmp5 = _mm_madd_epi16(lo_10_13, stg4_6);
+ tmp7 = _mm_madd_epi16(lo_10_13, stg4_7);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp5 = _mm_add_epi32(tmp5, rounding);
+ tmp7 = _mm_add_epi32(tmp7, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+ stp2_0 = _mm_packs_epi32(tmp0, zero);
+ stp2_1 = _mm_packs_epi32(tmp2, zero);
+ stp2_2 = _mm_packs_epi32(tmp4, zero);
+ stp2_3 = _mm_packs_epi32(tmp6, zero);
+ stp2_9 = _mm_packs_epi32(tmp1, zero);
+ stp2_14 = _mm_packs_epi32(tmp3, zero);
+ stp2_10 = _mm_packs_epi32(tmp5, zero);
+ stp2_13 = _mm_packs_epi32(tmp7, zero);
+
+ stp2_4 = _mm_add_epi16(stp1_4, stp1_5);
+ stp2_5 = _mm_sub_epi16(stp1_4, stp1_5);
+ stp2_6 = _mm_sub_epi16(stp1_7, stp1_6);
+ stp2_7 = _mm_add_epi16(stp1_7, stp1_6);
+ }
+
+ // Stage5 and Stage6
+ {
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_3);
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_2);
+ stp1_2 = _mm_sub_epi16(stp2_1, stp2_2);
+ stp1_3 = _mm_sub_epi16(stp2_0, stp2_3);
+
+ stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11);
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10);
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);
+ stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11);
+
+ stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0);
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13);
+ stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0);
+ }
+
+ // Stage6
+ {
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);
+
+ tmp1 = _mm_madd_epi16(lo_6_5, stg4_1);
+ tmp3 = _mm_madd_epi16(lo_6_5, stg4_0);
+ tmp0 = _mm_madd_epi16(lo_10_13, stg6_0);
+ tmp2 = _mm_madd_epi16(lo_10_13, stg4_0);
+ tmp4 = _mm_madd_epi16(lo_11_12, stg6_0);
+ tmp6 = _mm_madd_epi16(lo_11_12, stg4_0);
+
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp1_5 = _mm_packs_epi32(tmp1, zero);
+ stp1_6 = _mm_packs_epi32(tmp3, zero);
+ stp2_10 = _mm_packs_epi32(tmp0, zero);
+ stp2_13 = _mm_packs_epi32(tmp2, zero);
+ stp2_11 = _mm_packs_epi32(tmp4, zero);
+ stp2_12 = _mm_packs_epi32(tmp6, zero);
+
+ stp2_0 = _mm_add_epi16(stp1_0, stp2_7);
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6);
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5);
+ stp2_3 = _mm_add_epi16(stp1_3, stp2_4);
+ stp2_4 = _mm_sub_epi16(stp1_3, stp2_4);
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);
+ stp2_7 = _mm_sub_epi16(stp1_0, stp2_7);
+ }
+
+ // Stage7. Left 8x16 only.
+ l0 = _mm_add_epi16(stp2_0, stp1_15);
+ l1 = _mm_add_epi16(stp2_1, stp1_14);
+ l2 = _mm_add_epi16(stp2_2, stp2_13);
+ l3 = _mm_add_epi16(stp2_3, stp2_12);
+ l4 = _mm_add_epi16(stp2_4, stp2_11);
+ l5 = _mm_add_epi16(stp2_5, stp2_10);
+ l6 = _mm_add_epi16(stp2_6, stp1_9);
+ l7 = _mm_add_epi16(stp2_7, stp1_8);
+ l8 = _mm_sub_epi16(stp2_7, stp1_8);
+ l9 = _mm_sub_epi16(stp2_6, stp1_9);
+ l10 = _mm_sub_epi16(stp2_5, stp2_10);
+ l11 = _mm_sub_epi16(stp2_4, stp2_11);
+ l12 = _mm_sub_epi16(stp2_3, stp2_12);
+ l13 = _mm_sub_epi16(stp2_2, stp2_13);
+ l14 = _mm_sub_epi16(stp2_1, stp1_14);
+ l15 = _mm_sub_epi16(stp2_0, stp1_15);
+
+ // 2-D idct. We do 2 8x16 blocks.
+ for (i = 0; i < 2; i++) {
+ if (i == 0)
+ TRANSPOSE_4X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+
+ if (i == 1)
+ TRANSPOSE_4X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+
+ in8 = in9 = in10 = in11 = in12 = in13 = in14 = in15 = zero;
+
+ IDCT16x16_1D
+
+ // Stage7
+ in0 = _mm_add_epi16(stp2_0, stp1_15);
+ in1 = _mm_add_epi16(stp2_1, stp1_14);
+ in2 = _mm_add_epi16(stp2_2, stp2_13);
+ in3 = _mm_add_epi16(stp2_3, stp2_12);
+ in4 = _mm_add_epi16(stp2_4, stp2_11);
+ in5 = _mm_add_epi16(stp2_5, stp2_10);
+ in6 = _mm_add_epi16(stp2_6, stp1_9);
+ in7 = _mm_add_epi16(stp2_7, stp1_8);
+ in8 = _mm_sub_epi16(stp2_7, stp1_8);
+ in9 = _mm_sub_epi16(stp2_6, stp1_9);
+ in10 = _mm_sub_epi16(stp2_5, stp2_10);
+ in11 = _mm_sub_epi16(stp2_4, stp2_11);
+ in12 = _mm_sub_epi16(stp2_3, stp2_12);
+ in13 = _mm_sub_epi16(stp2_2, stp2_13);
+ in14 = _mm_sub_epi16(stp2_1, stp1_14);
+ in15 = _mm_sub_epi16(stp2_0, stp1_15);
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+ in8 = _mm_adds_epi16(in8, final_rounding);
+ in9 = _mm_adds_epi16(in9, final_rounding);
+ in10 = _mm_adds_epi16(in10, final_rounding);
+ in11 = _mm_adds_epi16(in11, final_rounding);
+ in12 = _mm_adds_epi16(in12, final_rounding);
+ in13 = _mm_adds_epi16(in13, final_rounding);
+ in14 = _mm_adds_epi16(in14, final_rounding);
+ in15 = _mm_adds_epi16(in15, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 6);
+ in1 = _mm_srai_epi16(in1, 6);
+ in2 = _mm_srai_epi16(in2, 6);
+ in3 = _mm_srai_epi16(in3, 6);
+ in4 = _mm_srai_epi16(in4, 6);
+ in5 = _mm_srai_epi16(in5, 6);
+ in6 = _mm_srai_epi16(in6, 6);
+ in7 = _mm_srai_epi16(in7, 6);
+ in8 = _mm_srai_epi16(in8, 6);
+ in9 = _mm_srai_epi16(in9, 6);
+ in10 = _mm_srai_epi16(in10, 6);
+ in11 = _mm_srai_epi16(in11, 6);
+ in12 = _mm_srai_epi16(in12, 6);
+ in13 = _mm_srai_epi16(in13, 6);
+ in14 = _mm_srai_epi16(in14, 6);
+ in15 = _mm_srai_epi16(in15, 6);
+
+ // Store results
+ _mm_store_si128((__m128i *)output, in0);
+ _mm_store_si128((__m128i *)(output + half_pitch * 1), in1);
+ _mm_store_si128((__m128i *)(output + half_pitch * 2), in2);
+ _mm_store_si128((__m128i *)(output + half_pitch * 3), in3);
+ _mm_store_si128((__m128i *)(output + half_pitch * 4), in4);
+ _mm_store_si128((__m128i *)(output + half_pitch * 5), in5);
+ _mm_store_si128((__m128i *)(output + half_pitch * 6), in6);
+ _mm_store_si128((__m128i *)(output + half_pitch * 7), in7);
+ _mm_store_si128((__m128i *)(output + half_pitch * 8), in8);
+ _mm_store_si128((__m128i *)(output + half_pitch * 9), in9);
+ _mm_store_si128((__m128i *)(output + half_pitch * 10), in10);
+ _mm_store_si128((__m128i *)(output + half_pitch * 11), in11);
+ _mm_store_si128((__m128i *)(output + half_pitch * 12), in12);
+ _mm_store_si128((__m128i *)(output + half_pitch * 13), in13);
+ _mm_store_si128((__m128i *)(output + half_pitch * 14), in14);
+ _mm_store_si128((__m128i *)(output + half_pitch * 15), in15);
+ output += 8;
+ }
+}
+
+void vp9_short_idct32x32_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int half_pitch = pitch >> 1;
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1<<5);
+
+ // idct constants for each stage
+ const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
+ const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64);
+ const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+ const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64);
+ const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+ const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
+ const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+ const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
+ const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+ const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+ const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+ const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64);
+ const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+ const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+ const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12,
+ in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23,
+ in24, in25, in26, in27, in28, in29, in30, in31;
+ __m128i col[128];
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22,
+ stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29,
+ stp1_30, stp1_31;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15,
+ stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22,
+ stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29,
+ stp2_30, stp2_31;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i, j;
+
+ // We work on a 8x32 block each time, and loop 8 times for 2-D 32x32 idct.
+ for (i = 0; i < 8; i++) {
+ if (i < 4) {
+ // First 1-D idct
+ // Load input data.
+ in0 = _mm_load_si128((__m128i *)input);
+ in8 = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in16 = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in24 = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in1 = _mm_load_si128((__m128i *)(input + 8 * 4));
+ in9 = _mm_load_si128((__m128i *)(input + 8 * 5));
+ in17 = _mm_load_si128((__m128i *)(input + 8 * 6));
+ in25 = _mm_load_si128((__m128i *)(input + 8 * 7));
+ in2 = _mm_load_si128((__m128i *)(input + 8 * 8));
+ in10 = _mm_load_si128((__m128i *)(input + 8 * 9));
+ in18 = _mm_load_si128((__m128i *)(input + 8 * 10));
+ in26 = _mm_load_si128((__m128i *)(input + 8 * 11));
+ in3 = _mm_load_si128((__m128i *)(input + 8 * 12));
+ in11 = _mm_load_si128((__m128i *)(input + 8 * 13));
+ in19 = _mm_load_si128((__m128i *)(input + 8 * 14));
+ in27 = _mm_load_si128((__m128i *)(input + 8 * 15));
+
+ in4 = _mm_load_si128((__m128i *)(input + 8 * 16));
+ in12 = _mm_load_si128((__m128i *)(input + 8 * 17));
+ in20 = _mm_load_si128((__m128i *)(input + 8 * 18));
+ in28 = _mm_load_si128((__m128i *)(input + 8 * 19));
+ in5 = _mm_load_si128((__m128i *)(input + 8 * 20));
+ in13 = _mm_load_si128((__m128i *)(input + 8 * 21));
+ in21 = _mm_load_si128((__m128i *)(input + 8 * 22));
+ in29 = _mm_load_si128((__m128i *)(input + 8 * 23));
+ in6 = _mm_load_si128((__m128i *)(input + 8 * 24));
+ in14 = _mm_load_si128((__m128i *)(input + 8 * 25));
+ in22 = _mm_load_si128((__m128i *)(input + 8 * 26));
+ in30 = _mm_load_si128((__m128i *)(input + 8 * 27));
+ in7 = _mm_load_si128((__m128i *)(input + 8 * 28));
+ in15 = _mm_load_si128((__m128i *)(input + 8 * 29));
+ in23 = _mm_load_si128((__m128i *)(input + 8 * 30));
+ in31 = _mm_load_si128((__m128i *)(input + 8 * 31));
+
+ input += 256;
+
+ // Transpose 32x8 block to 8x32 block
+ TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE_8X8(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
+ in10, in11, in12, in13, in14, in15);
+ TRANSPOSE_8X8(in16, in17, in18, in19, in20, in21, in22, in23, in16, in17,
+ in18, in19, in20, in21, in22, in23);
+ TRANSPOSE_8X8(in24, in25, in26, in27, in28, in29, in30, in31, in24, in25,
+ in26, in27, in28, in29, in30, in31);
+ } else {
+ // Second 1-D idct
+ j = i - 4;
+
+ // Transpose 32x8 block to 8x32 block
+ TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2],
+ col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5],
+ col[j * 8 + 6], col[j * 8 + 7], in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ j += 4;
+ TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2],
+ col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5],
+ col[j * 8 + 6], col[j * 8 + 7], in8, in9, in10,
+ in11, in12, in13, in14, in15);
+ j += 4;
+ TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2],
+ col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5],
+ col[j * 8 + 6], col[j * 8 + 7], in16, in17, in18,
+ in19, in20, in21, in22, in23);
+ j += 4;
+ TRANSPOSE_8X8(col[j * 8 + 0], col[j * 8 + 1], col[j * 8 + 2],
+ col[j * 8 + 3], col[j * 8 + 4], col[j * 8 + 5],
+ col[j * 8 + 6], col[j * 8 + 7], in24, in25, in26, in27,
+ in28, in29, in30, in31);
+ }
+
+ // Stage1
+ {
+ const __m128i lo_1_31 = _mm_unpacklo_epi16(in1, in31);
+ const __m128i hi_1_31 = _mm_unpackhi_epi16(in1, in31);
+ const __m128i lo_17_15 = _mm_unpacklo_epi16(in17, in15);
+ const __m128i hi_17_15 = _mm_unpackhi_epi16(in17, in15);
+
+ const __m128i lo_9_23 = _mm_unpacklo_epi16(in9, in23);
+ const __m128i hi_9_23 = _mm_unpackhi_epi16(in9, in23);
+ const __m128i lo_25_7= _mm_unpacklo_epi16(in25, in7);
+ const __m128i hi_25_7 = _mm_unpackhi_epi16(in25, in7);
+
+ const __m128i lo_5_27 = _mm_unpacklo_epi16(in5, in27);
+ const __m128i hi_5_27 = _mm_unpackhi_epi16(in5, in27);
+ const __m128i lo_21_11 = _mm_unpacklo_epi16(in21, in11);
+ const __m128i hi_21_11 = _mm_unpackhi_epi16(in21, in11);
+
+ const __m128i lo_13_19 = _mm_unpacklo_epi16(in13, in19);
+ const __m128i hi_13_19 = _mm_unpackhi_epi16(in13, in19);
+ const __m128i lo_29_3 = _mm_unpacklo_epi16(in29, in3);
+ const __m128i hi_29_3 = _mm_unpackhi_epi16(in29, in3);
+
+ MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0,
+ stg1_1, stg1_2, stg1_3, stp1_16, stp1_31,
+ stp1_17, stp1_30)
+ MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4,
+ stg1_5, stg1_6, stg1_7, stp1_18, stp1_29,
+ stp1_19, stp1_28)
+ MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8,
+ stg1_9, stg1_10, stg1_11, stp1_20, stp1_27,
+ stp1_21, stp1_26)
+ MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12,
+ stg1_13, stg1_14, stg1_15, stp1_22, stp1_25,
+ stp1_23, stp1_24)
+ }
+
+ // Stage2
+ {
+ const __m128i lo_2_30 = _mm_unpacklo_epi16(in2, in30);
+ const __m128i hi_2_30 = _mm_unpackhi_epi16(in2, in30);
+ const __m128i lo_18_14 = _mm_unpacklo_epi16(in18, in14);
+ const __m128i hi_18_14 = _mm_unpackhi_epi16(in18, in14);
+
+ const __m128i lo_10_22 = _mm_unpacklo_epi16(in10, in22);
+ const __m128i hi_10_22 = _mm_unpackhi_epi16(in10, in22);
+ const __m128i lo_26_6 = _mm_unpacklo_epi16(in26, in6);
+ const __m128i hi_26_6 = _mm_unpackhi_epi16(in26, in6);
+
+ MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0,
+ stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9,
+ stp2_14)
+ MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4,
+ stg2_5, stg2_6, stg2_7, stp2_10, stp2_13,
+ stp2_11, stp2_12)
+
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_17);
+ stp2_17 = _mm_sub_epi16(stp1_16, stp1_17);
+ stp2_18 = _mm_sub_epi16(stp1_19, stp1_18);
+ stp2_19 = _mm_add_epi16(stp1_19, stp1_18);
+
+ stp2_20 = _mm_add_epi16(stp1_20, stp1_21);
+ stp2_21 = _mm_sub_epi16(stp1_20, stp1_21);
+ stp2_22 = _mm_sub_epi16(stp1_23, stp1_22);
+ stp2_23 = _mm_add_epi16(stp1_23, stp1_22);
+
+ stp2_24 = _mm_add_epi16(stp1_24, stp1_25);
+ stp2_25 = _mm_sub_epi16(stp1_24, stp1_25);
+ stp2_26 = _mm_sub_epi16(stp1_27, stp1_26);
+ stp2_27 = _mm_add_epi16(stp1_27, stp1_26);
+
+ stp2_28 = _mm_add_epi16(stp1_28, stp1_29);
+ stp2_29 = _mm_sub_epi16(stp1_28, stp1_29);
+ stp2_30 = _mm_sub_epi16(stp1_31, stp1_30);
+ stp2_31 = _mm_add_epi16(stp1_31, stp1_30);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_4_28 = _mm_unpacklo_epi16(in4, in28);
+ const __m128i hi_4_28 = _mm_unpackhi_epi16(in4, in28);
+ const __m128i lo_20_12 = _mm_unpacklo_epi16(in20, in12);
+ const __m128i hi_20_12 = _mm_unpackhi_epi16(in20, in12);
+
+ const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30);
+ const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30);
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29);
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29);
+
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25);
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25);
+
+ MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0,
+ stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5,
+ stp1_6)
+
+ stp1_8 = _mm_add_epi16(stp2_8, stp2_9);
+ stp1_9 = _mm_sub_epi16(stp2_8, stp2_9);
+ stp1_10 = _mm_sub_epi16(stp2_11, stp2_10);
+ stp1_11 = _mm_add_epi16(stp2_11, stp2_10);
+ stp1_12 = _mm_add_epi16(stp2_12, stp2_13);
+ stp1_13 = _mm_sub_epi16(stp2_12, stp2_13);
+ stp1_14 = _mm_sub_epi16(stp2_15, stp2_14);
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_14);
+
+ MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4,
+ stg3_5, stg3_6, stg3_4, stp1_17, stp1_30,
+ stp1_18, stp1_29)
+ MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8,
+ stg3_9, stg3_10, stg3_8, stp1_21, stp1_26,
+ stp1_22, stp1_25)
+
+ stp1_16 = stp2_16;
+ stp1_31 = stp2_31;
+ stp1_19 = stp2_19;
+ stp1_20 = stp2_20;
+ stp1_23 = stp2_23;
+ stp1_24 = stp2_24;
+ stp1_27 = stp2_27;
+ stp1_28 = stp2_28;
+ }
+
+ // Stage4
+ {
+ const __m128i lo_0_16 = _mm_unpacklo_epi16(in0, in16);
+ const __m128i hi_0_16 = _mm_unpackhi_epi16(in0, in16);
+ const __m128i lo_8_24 = _mm_unpacklo_epi16(in8, in24);
+ const __m128i hi_8_24 = _mm_unpackhi_epi16(in8, in24);
+
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);
+
+ MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0,
+ stg4_1, stg4_2, stg4_3, stp2_0, stp2_1,
+ stp2_2, stp2_3)
+
+ stp2_4 = _mm_add_epi16(stp1_4, stp1_5);
+ stp2_5 = _mm_sub_epi16(stp1_4, stp1_5);
+ stp2_6 = _mm_sub_epi16(stp1_7, stp1_6);
+ stp2_7 = _mm_add_epi16(stp1_7, stp1_6);
+
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4,
+ stg4_5, stg4_6, stg4_4, stp2_9, stp2_14,
+ stp2_10, stp2_13)
+
+ stp2_8 = stp1_8;
+ stp2_15 = stp1_15;
+ stp2_11 = stp1_11;
+ stp2_12 = stp1_12;
+
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_19);
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_18);
+ stp2_18 = _mm_sub_epi16(stp1_17, stp1_18);
+ stp2_19 = _mm_sub_epi16(stp1_16, stp1_19);
+ stp2_20 = _mm_sub_epi16(stp1_23, stp1_20);
+ stp2_21 = _mm_sub_epi16(stp1_22, stp1_21);
+ stp2_22 = _mm_add_epi16(stp1_22, stp1_21);
+ stp2_23 = _mm_add_epi16(stp1_23, stp1_20);
+
+ stp2_24 = _mm_add_epi16(stp1_24, stp1_27);
+ stp2_25 = _mm_add_epi16(stp1_25, stp1_26);
+ stp2_26 = _mm_sub_epi16(stp1_25, stp1_26);
+ stp2_27 = _mm_sub_epi16(stp1_24, stp1_27);
+ stp2_28 = _mm_sub_epi16(stp1_31, stp1_28);
+ stp2_29 = _mm_sub_epi16(stp1_30, stp1_29);
+ stp2_30 = _mm_add_epi16(stp1_29, stp1_30);
+ stp2_31 = _mm_add_epi16(stp1_28, stp1_31);
+ }
+
+ // Stage5
+ {
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5);
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5);
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29);
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29);
+
+ const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28);
+ const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28);
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);
+
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);
+
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_3);
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_2);
+ stp1_2 = _mm_sub_epi16(stp2_1, stp2_2);
+ stp1_3 = _mm_sub_epi16(stp2_0, stp2_3);
+
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1);
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1);
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0);
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1);
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3);
+
+ stp1_4 = stp2_4;
+ stp1_7 = stp2_7;
+
+ stp1_8 = _mm_add_epi16(stp2_8, stp2_11);
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10);
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10);
+ stp1_11 = _mm_sub_epi16(stp2_8, stp2_11);
+ stp1_12 = _mm_sub_epi16(stp2_15, stp2_12);
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13);
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13);
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_12);
+
+ stp1_16 = stp2_16;
+ stp1_17 = stp2_17;
+
+ MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4,
+ stg4_5, stg4_4, stg4_5, stp1_18, stp1_29,
+ stp1_19, stp1_28)
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6,
+ stg4_4, stg4_6, stg4_4, stp1_20, stp1_27,
+ stp1_21, stp1_26)
+
+ stp1_22 = stp2_22;
+ stp1_23 = stp2_23;
+ stp1_24 = stp2_24;
+ stp1_25 = stp2_25;
+ stp1_30 = stp2_30;
+ stp1_31 = stp2_31;
+ }
+
+ // Stage6
+ {
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13);
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12);
+
+ stp2_0 = _mm_add_epi16(stp1_0, stp1_7);
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6);
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5);
+ stp2_3 = _mm_add_epi16(stp1_3, stp1_4);
+ stp2_4 = _mm_sub_epi16(stp1_3, stp1_4);
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5);
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6);
+ stp2_7 = _mm_sub_epi16(stp1_0, stp1_7);
+
+ stp2_8 = stp1_8;
+ stp2_9 = stp1_9;
+ stp2_14 = stp1_14;
+ stp2_15 = stp1_15;
+
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12,
+ stg6_0, stg4_0, stg6_0, stg4_0, stp2_10,
+ stp2_13, stp2_11, stp2_12)
+
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_23);
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_22);
+ stp2_18 = _mm_add_epi16(stp1_18, stp1_21);
+ stp2_19 = _mm_add_epi16(stp1_19, stp1_20);
+ stp2_20 = _mm_sub_epi16(stp1_19, stp1_20);
+ stp2_21 = _mm_sub_epi16(stp1_18, stp1_21);
+ stp2_22 = _mm_sub_epi16(stp1_17, stp1_22);
+ stp2_23 = _mm_sub_epi16(stp1_16, stp1_23);
+
+ stp2_24 = _mm_sub_epi16(stp1_31, stp1_24);
+ stp2_25 = _mm_sub_epi16(stp1_30, stp1_25);
+ stp2_26 = _mm_sub_epi16(stp1_29, stp1_26);
+ stp2_27 = _mm_sub_epi16(stp1_28, stp1_27);
+ stp2_28 = _mm_add_epi16(stp1_27, stp1_28);
+ stp2_29 = _mm_add_epi16(stp1_26, stp1_29);
+ stp2_30 = _mm_add_epi16(stp1_25, stp1_30);
+ stp2_31 = _mm_add_epi16(stp1_24, stp1_31);
+ }
+
+ // Stage7
+ {
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27);
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27);
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26);
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26);
+
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25);
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25);
+ const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24);
+ const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24);
+
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_15);
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_14);
+ stp1_2 = _mm_add_epi16(stp2_2, stp2_13);
+ stp1_3 = _mm_add_epi16(stp2_3, stp2_12);
+ stp1_4 = _mm_add_epi16(stp2_4, stp2_11);
+ stp1_5 = _mm_add_epi16(stp2_5, stp2_10);
+ stp1_6 = _mm_add_epi16(stp2_6, stp2_9);
+ stp1_7 = _mm_add_epi16(stp2_7, stp2_8);
+ stp1_8 = _mm_sub_epi16(stp2_7, stp2_8);
+ stp1_9 = _mm_sub_epi16(stp2_6, stp2_9);
+ stp1_10 = _mm_sub_epi16(stp2_5, stp2_10);
+ stp1_11 = _mm_sub_epi16(stp2_4, stp2_11);
+ stp1_12 = _mm_sub_epi16(stp2_3, stp2_12);
+ stp1_13 = _mm_sub_epi16(stp2_2, stp2_13);
+ stp1_14 = _mm_sub_epi16(stp2_1, stp2_14);
+ stp1_15 = _mm_sub_epi16(stp2_0, stp2_15);
+
+ stp1_16 = stp2_16;
+ stp1_17 = stp2_17;
+ stp1_18 = stp2_18;
+ stp1_19 = stp2_19;
+
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0,
+ stg4_0, stg6_0, stg4_0, stp1_20, stp1_27,
+ stp1_21, stp1_26)
+ MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0,
+ stg4_0, stg6_0, stg4_0, stp1_22, stp1_25,
+ stp1_23, stp1_24)
+
+ stp1_28 = stp2_28;
+ stp1_29 = stp2_29;
+ stp1_30 = stp2_30;
+ stp1_31 = stp2_31;
+ }
+
+ // final stage
+ if (i < 4) {
+ // 1_D: Store 32 intermediate results for each 8x32 block.
+ col[i * 32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
+ col[i * 32 + 1] = _mm_add_epi16(stp1_1, stp1_30);
+ col[i * 32 + 2] = _mm_add_epi16(stp1_2, stp1_29);
+ col[i * 32 + 3] = _mm_add_epi16(stp1_3, stp1_28);
+ col[i * 32 + 4] = _mm_add_epi16(stp1_4, stp1_27);
+ col[i * 32 + 5] = _mm_add_epi16(stp1_5, stp1_26);
+ col[i * 32 + 6] = _mm_add_epi16(stp1_6, stp1_25);
+ col[i * 32 + 7] = _mm_add_epi16(stp1_7, stp1_24);
+ col[i * 32 + 8] = _mm_add_epi16(stp1_8, stp1_23);
+ col[i * 32 + 9] = _mm_add_epi16(stp1_9, stp1_22);
+ col[i * 32 + 10] = _mm_add_epi16(stp1_10, stp1_21);
+ col[i * 32 + 11] = _mm_add_epi16(stp1_11, stp1_20);
+ col[i * 32 + 12] = _mm_add_epi16(stp1_12, stp1_19);
+ col[i * 32 + 13] = _mm_add_epi16(stp1_13, stp1_18);
+ col[i * 32 + 14] = _mm_add_epi16(stp1_14, stp1_17);
+ col[i * 32 + 15] = _mm_add_epi16(stp1_15, stp1_16);
+ col[i * 32 + 16] = _mm_sub_epi16(stp1_15, stp1_16);
+ col[i * 32 + 17] = _mm_sub_epi16(stp1_14, stp1_17);
+ col[i * 32 + 18] = _mm_sub_epi16(stp1_13, stp1_18);
+ col[i * 32 + 19] = _mm_sub_epi16(stp1_12, stp1_19);
+ col[i * 32 + 20] = _mm_sub_epi16(stp1_11, stp1_20);
+ col[i * 32 + 21] = _mm_sub_epi16(stp1_10, stp1_21);
+ col[i * 32 + 22] = _mm_sub_epi16(stp1_9, stp1_22);
+ col[i * 32 + 23] = _mm_sub_epi16(stp1_8, stp1_23);
+ col[i * 32 + 24] = _mm_sub_epi16(stp1_7, stp1_24);
+ col[i * 32 + 25] = _mm_sub_epi16(stp1_6, stp1_25);
+ col[i * 32 + 26] = _mm_sub_epi16(stp1_5, stp1_26);
+ col[i * 32 + 27] = _mm_sub_epi16(stp1_4, stp1_27);
+ col[i * 32 + 28] = _mm_sub_epi16(stp1_3, stp1_28);
+ col[i * 32 + 29] = _mm_sub_epi16(stp1_2, stp1_29);
+ col[i * 32 + 30] = _mm_sub_epi16(stp1_1, stp1_30);
+ col[i * 32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
+ } else {
+ // 2_D: Calculate the results and store them to destination.
+ in0 = _mm_add_epi16(stp1_0, stp1_31);
+ in1 = _mm_add_epi16(stp1_1, stp1_30);
+ in2 = _mm_add_epi16(stp1_2, stp1_29);
+ in3 = _mm_add_epi16(stp1_3, stp1_28);
+ in4 = _mm_add_epi16(stp1_4, stp1_27);
+ in5 = _mm_add_epi16(stp1_5, stp1_26);
+ in6 = _mm_add_epi16(stp1_6, stp1_25);
+ in7 = _mm_add_epi16(stp1_7, stp1_24);
+ in8 = _mm_add_epi16(stp1_8, stp1_23);
+ in9 = _mm_add_epi16(stp1_9, stp1_22);
+ in10 = _mm_add_epi16(stp1_10, stp1_21);
+ in11 = _mm_add_epi16(stp1_11, stp1_20);
+ in12 = _mm_add_epi16(stp1_12, stp1_19);
+ in13 = _mm_add_epi16(stp1_13, stp1_18);
+ in14 = _mm_add_epi16(stp1_14, stp1_17);
+ in15 = _mm_add_epi16(stp1_15, stp1_16);
+ in16 = _mm_sub_epi16(stp1_15, stp1_16);
+ in17 = _mm_sub_epi16(stp1_14, stp1_17);
+ in18 = _mm_sub_epi16(stp1_13, stp1_18);
+ in19 = _mm_sub_epi16(stp1_12, stp1_19);
+ in20 = _mm_sub_epi16(stp1_11, stp1_20);
+ in21 = _mm_sub_epi16(stp1_10, stp1_21);
+ in22 = _mm_sub_epi16(stp1_9, stp1_22);
+ in23 = _mm_sub_epi16(stp1_8, stp1_23);
+ in24 = _mm_sub_epi16(stp1_7, stp1_24);
+ in25 = _mm_sub_epi16(stp1_6, stp1_25);
+ in26 = _mm_sub_epi16(stp1_5, stp1_26);
+ in27 = _mm_sub_epi16(stp1_4, stp1_27);
+ in28 = _mm_sub_epi16(stp1_3, stp1_28);
+ in29 = _mm_sub_epi16(stp1_2, stp1_29);
+ in30 = _mm_sub_epi16(stp1_1, stp1_30);
+ in31 = _mm_sub_epi16(stp1_0, stp1_31);
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+ in8 = _mm_adds_epi16(in8, final_rounding);
+ in9 = _mm_adds_epi16(in9, final_rounding);
+ in10 = _mm_adds_epi16(in10, final_rounding);
+ in11 = _mm_adds_epi16(in11, final_rounding);
+ in12 = _mm_adds_epi16(in12, final_rounding);
+ in13 = _mm_adds_epi16(in13, final_rounding);
+ in14 = _mm_adds_epi16(in14, final_rounding);
+ in15 = _mm_adds_epi16(in15, final_rounding);
+ in16 = _mm_adds_epi16(in16, final_rounding);
+ in17 = _mm_adds_epi16(in17, final_rounding);
+ in18 = _mm_adds_epi16(in18, final_rounding);
+ in19 = _mm_adds_epi16(in19, final_rounding);
+ in20 = _mm_adds_epi16(in20, final_rounding);
+ in21 = _mm_adds_epi16(in21, final_rounding);
+ in22 = _mm_adds_epi16(in22, final_rounding);
+ in23 = _mm_adds_epi16(in23, final_rounding);
+ in24 = _mm_adds_epi16(in24, final_rounding);
+ in25 = _mm_adds_epi16(in25, final_rounding);
+ in26 = _mm_adds_epi16(in26, final_rounding);
+ in27 = _mm_adds_epi16(in27, final_rounding);
+ in28 = _mm_adds_epi16(in28, final_rounding);
+ in29 = _mm_adds_epi16(in29, final_rounding);
+ in30 = _mm_adds_epi16(in30, final_rounding);
+ in31 = _mm_adds_epi16(in31, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 6);
+ in1 = _mm_srai_epi16(in1, 6);
+ in2 = _mm_srai_epi16(in2, 6);
+ in3 = _mm_srai_epi16(in3, 6);
+ in4 = _mm_srai_epi16(in4, 6);
+ in5 = _mm_srai_epi16(in5, 6);
+ in6 = _mm_srai_epi16(in6, 6);
+ in7 = _mm_srai_epi16(in7, 6);
+ in8 = _mm_srai_epi16(in8, 6);
+ in9 = _mm_srai_epi16(in9, 6);
+ in10 = _mm_srai_epi16(in10, 6);
+ in11 = _mm_srai_epi16(in11, 6);
+ in12 = _mm_srai_epi16(in12, 6);
+ in13 = _mm_srai_epi16(in13, 6);
+ in14 = _mm_srai_epi16(in14, 6);
+ in15 = _mm_srai_epi16(in15, 6);
+ in16 = _mm_srai_epi16(in16, 6);
+ in17 = _mm_srai_epi16(in17, 6);
+ in18 = _mm_srai_epi16(in18, 6);
+ in19 = _mm_srai_epi16(in19, 6);
+ in20 = _mm_srai_epi16(in20, 6);
+ in21 = _mm_srai_epi16(in21, 6);
+ in22 = _mm_srai_epi16(in22, 6);
+ in23 = _mm_srai_epi16(in23, 6);
+ in24 = _mm_srai_epi16(in24, 6);
+ in25 = _mm_srai_epi16(in25, 6);
+ in26 = _mm_srai_epi16(in26, 6);
+ in27 = _mm_srai_epi16(in27, 6);
+ in28 = _mm_srai_epi16(in28, 6);
+ in29 = _mm_srai_epi16(in29, 6);
+ in30 = _mm_srai_epi16(in30, 6);
+ in31 = _mm_srai_epi16(in31, 6);
+
+ // Store results
+ _mm_store_si128((__m128i *)output, in0);
+ _mm_store_si128((__m128i *)(output + half_pitch * 1), in1);
+ _mm_store_si128((__m128i *)(output + half_pitch * 2), in2);
+ _mm_store_si128((__m128i *)(output + half_pitch * 3), in3);
+ _mm_store_si128((__m128i *)(output + half_pitch * 4), in4);
+ _mm_store_si128((__m128i *)(output + half_pitch * 5), in5);
+ _mm_store_si128((__m128i *)(output + half_pitch * 6), in6);
+ _mm_store_si128((__m128i *)(output + half_pitch * 7), in7);
+ _mm_store_si128((__m128i *)(output + half_pitch * 8), in8);
+ _mm_store_si128((__m128i *)(output + half_pitch * 9), in9);
+ _mm_store_si128((__m128i *)(output + half_pitch * 10), in10);
+ _mm_store_si128((__m128i *)(output + half_pitch * 11), in11);
+ _mm_store_si128((__m128i *)(output + half_pitch * 12), in12);
+ _mm_store_si128((__m128i *)(output + half_pitch * 13), in13);
+ _mm_store_si128((__m128i *)(output + half_pitch * 14), in14);
+ _mm_store_si128((__m128i *)(output + half_pitch * 15), in15);
+ _mm_store_si128((__m128i *)(output + half_pitch * 16), in16);
+ _mm_store_si128((__m128i *)(output + half_pitch * 17), in17);
+ _mm_store_si128((__m128i *)(output + half_pitch * 18), in18);
+ _mm_store_si128((__m128i *)(output + half_pitch * 19), in19);
+ _mm_store_si128((__m128i *)(output + half_pitch * 20), in20);
+ _mm_store_si128((__m128i *)(output + half_pitch * 21), in21);
+ _mm_store_si128((__m128i *)(output + half_pitch * 22), in22);
+ _mm_store_si128((__m128i *)(output + half_pitch * 23), in23);
+ _mm_store_si128((__m128i *)(output + half_pitch * 24), in24);
+ _mm_store_si128((__m128i *)(output + half_pitch * 25), in25);
+ _mm_store_si128((__m128i *)(output + half_pitch * 26), in26);
+ _mm_store_si128((__m128i *)(output + half_pitch * 27), in27);
+ _mm_store_si128((__m128i *)(output + half_pitch * 28), in28);
+ _mm_store_si128((__m128i *)(output + half_pitch * 29), in29);
+ _mm_store_si128((__m128i *)(output + half_pitch * 30), in30);
+ _mm_store_si128((__m128i *)(output + half_pitch * 31), in31);
+
+ output += 8;
+ }
+ }
+}
+#endif
--- a/vp9/common/x86/vp9_idct_x86.h
+++ b/vp9/common/x86/vp9_idct_x86.h
@@ -20,23 +20,10 @@
*/
#if HAVE_MMX
-extern prototype_idct(vp9_short_idct4x4llm_1_mmx);
-extern prototype_idct(vp9_short_idct4x4llm_mmx);
-extern prototype_idct_scalar_add(vp9_dc_only_idct_add_mmx);
-
extern prototype_second_order(vp9_short_inv_walsh4x4_mmx);
extern prototype_second_order(vp9_short_inv_walsh4x4_1_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp9_idct_idct1
-#define vp9_idct_idct1 vp9_short_idct4x4llm_1_mmx
-
-#undef vp9_idct_idct16
-#define vp9_idct_idct16 vp9_short_idct4x4llm_mmx
-
-#undef vp9_idct_idct1_scalar_add
-#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_mmx
-
#undef vp9_idct_iwalsh16
#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_mmx
--- a/vp9/common/x86/vp9_idctllm_mmx.asm
+++ /dev/null
@@ -1,241 +1,0 @@
-;
-; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-align 16
-x_s1sqr2: times 4 dw 0x8A8C
-align 16
-x_c1sqr2less1: times 4 dw 0x4E7B
-align 16
-pw_16: times 4 dw 16
-
-SECTION .text
-
-
-; /****************************************************************************
-; * Notes:
-; *
-; * This implementation makes use of 16 bit fixed point version of two multiply
-; * constants:
-; * 1. sqrt(2) * cos (pi/8)
-; * 2. sqrt(2) * sin (pi/8)
-; * Because the first constant is bigger than 1, to maintain the same 16 bit
-; * fixed point precision as the second one, we use a trick of
-; * x * a = x + x*(a-1)
-; * so
-; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
-; *
-; * For the second constant, because of the 16bit version is 35468, which
-; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
-; * number.
-; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
-; *
-; **************************************************************************/
-
-INIT_MMX
-
-;void short_idct4x4llm_mmx(short *input, short *output, int pitch)
-cglobal short_idct4x4llm_mmx, 3,3,0, inp, out, pit
- mova m0, [inpq +0]
- mova m1, [inpq +8]
-
- mova m2, [inpq+16]
- mova m3, [inpq+24]
-
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2] ;
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1] ;
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- mova m3, m5 ; 33 23 13 03
-
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2] ;
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1] ;
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- paddw m0, [pw_16]
-
- paddw m2, [pw_16]
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
- psraw m2, 5
-
- psraw m0, 5
- psraw m4, 5
-
- psraw m6, 5
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- mova [outq], m0
-
- mova [outq+r2], m1
- mova [outq+pitq*2], m2
-
- add outq, pitq
- mova [outq+pitq*2], m5
- RET
-
-;void short_idct4x4llm_1_mmx(short *input, short *output, int pitch)
-cglobal short_idct4x4llm_1_mmx,3,3,0,inp,out,pit
- movh m0, [inpq]
- paddw m0, [pw_16]
- psraw m0, 5
- punpcklwd m0, m0
- punpckldq m0, m0
-
- mova [outq], m0
- mova [outq+pitq], m0
-
- mova [outq+pitq*2], m0
- add r1, r2
-
- mova [outq+pitq*2], m0
- RET
-
-
-;void dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
-cglobal dc_only_idct_add_mmx, 4,5,0,in_dc,pred,dst,pit,stride
-%if ARCH_X86_64
- movsxd strideq, dword stridem
-%else
- mov strideq, stridem
-%endif
- pxor m0, m0
-
- movh m5, in_dcq ; dc
- paddw m5, [pw_16]
-
- psraw m5, 5
-
- punpcklwd m5, m5
- punpckldq m5, m5
-
- movh m1, [predq]
- punpcklbw m1, m0
- paddsw m1, m5
- packuswb m1, m0 ; pack and unpack to saturate
- movh [dstq], m1
-
- movh m2, [predq+pitq]
- punpcklbw m2, m0
- paddsw m2, m5
- packuswb m2, m0 ; pack and unpack to saturate
- movh [dstq+strideq], m2
-
- movh m3, [predq+2*pitq]
- punpcklbw m3, m0
- paddsw m3, m5
- packuswb m3, m0 ; pack and unpack to saturate
- movh [dstq+2*strideq], m3
-
- add dstq, strideq
- add predq, pitq
- movh m4, [predq+2*pitq]
- punpcklbw m4, m0
- paddsw m4, m5
- packuswb m4, m0 ; pack and unpack to saturate
- movh [dstq+2*strideq], m4
- RET
-
--- a/vp9/common/x86/vp9_idctllm_sse2.asm
+++ /dev/null
@@ -1,712 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_idct_dequant_0_2x_sse2
-; (
-; short *qcoeff - 0
-; short *dequant - 1
-; unsigned char *pre - 2
-; unsigned char *dst - 3
-; int dst_stride - 4
-; int blk_stride - 5
-; )
-
-global sym(vp9_idct_dequant_0_2x_sse2) PRIVATE
-sym(vp9_idct_dequant_0_2x_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- ; end prolog
-
- mov rdx, arg(1) ; dequant
- mov rax, arg(0) ; qcoeff
-
- movd xmm4, [rax]
- movd xmm5, [rdx]
-
- pinsrw xmm4, [rax+32], 4
- pinsrw xmm5, [rdx], 4
-
- pmullw xmm4, xmm5
-
- ; Zero out xmm5, for use unpacking
- pxor xmm5, xmm5
-
- ; clear coeffs
- movd [rax], xmm5
- movd [rax+32], xmm5
-;pshufb
- pshuflw xmm4, xmm4, 00000000b
- pshufhw xmm4, xmm4, 00000000b
-
- mov rax, arg(2) ; pre
- paddw xmm4, [GLOBAL(fours)]
-
- movsxd rcx, dword ptr arg(5) ; blk_stride
- psraw xmm4, 3
-
- movq xmm0, [rax]
- movq xmm1, [rax+rcx]
- movq xmm2, [rax+2*rcx]
- lea rcx, [3*rcx]
- movq xmm3, [rax+rcx]
-
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
-
- mov rax, arg(3) ; dst
- movsxd rdx, dword ptr arg(4) ; dst_stride
-
- ; Add to predict buffer
- paddw xmm0, xmm4
- paddw xmm1, xmm4
- paddw xmm2, xmm4
- paddw xmm3, xmm4
-
- ; pack up before storing
- packuswb xmm0, xmm5
- packuswb xmm1, xmm5
- packuswb xmm2, xmm5
- packuswb xmm3, xmm5
-
- ; store blocks back out
- movq [rax], xmm0
- movq [rax + rdx], xmm1
-
- lea rax, [rax + 2*rdx]
-
- movq [rax], xmm2
- movq [rax + rdx], xmm3
-
- ; begin epilog
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vp9_idct_dequant_full_2x_sse2) PRIVATE
-sym(vp9_idct_dequant_full_2x_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ; special case when 2 blocks have 0 or 1 coeffs
- ; dc is set as first coeff, so no need to load qcoeff
- mov rax, arg(0) ; qcoeff
- mov rsi, arg(2) ; pre
- mov rdi, arg(3) ; dst
- movsxd rcx, dword ptr arg(5) ; blk_stride
-
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
-
- mov rdx, arg(1) ; dequant
-
- ; note the transpose of xmm1 and xmm2, necessary for shuffle
- ; to spit out sensicle data
- movdqa xmm0, [rax]
- movdqa xmm2, [rax+16]
- movdqa xmm1, [rax+32]
- movdqa xmm3, [rax+48]
-
- ; Clear out coeffs
- movdqa [rax], xmm7
- movdqa [rax+16], xmm7
- movdqa [rax+32], xmm7
- movdqa [rax+48], xmm7
-
- ; dequantize qcoeff buffer
- pmullw xmm0, [rdx]
- pmullw xmm2, [rdx+16]
- pmullw xmm1, [rdx]
- pmullw xmm3, [rdx+16]
-
- ; repack so block 0 row x and block 1 row x are together
- movdqa xmm4, xmm0
- punpckldq xmm0, xmm1
- punpckhdq xmm4, xmm1
-
- pshufd xmm0, xmm0, 11011000b
- pshufd xmm1, xmm4, 11011000b
-
- movdqa xmm4, xmm2
- punpckldq xmm2, xmm3
- punpckhdq xmm4, xmm3
-
- pshufd xmm2, xmm2, 11011000b
- pshufd xmm3, xmm4, 11011000b
-
- ; first pass
- psubw xmm0, xmm2 ; b1 = 0-2
- paddw xmm2, xmm2 ;
-
- movdqa xmm5, xmm1
- paddw xmm2, xmm0 ; a1 = 0+2
-
- pmulhw xmm5, [GLOBAL(x_s1sqr2)]
- paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movdqa xmm7, xmm3
- pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
-
- paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw xmm7, xmm5 ; c1
-
- movdqa xmm5, xmm1
- movdqa xmm4, xmm3
-
- pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
- paddw xmm5, xmm1
-
- pmulhw xmm3, [GLOBAL(x_s1sqr2)]
- paddw xmm3, xmm4
-
- paddw xmm3, xmm5 ; d1
- movdqa xmm6, xmm2 ; a1
-
- movdqa xmm4, xmm0 ; b1
- paddw xmm2, xmm3 ;0
-
- paddw xmm4, xmm7 ;1
- psubw xmm0, xmm7 ;2
-
- psubw xmm6, xmm3 ;3
-
- ; transpose for the second pass
- movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
- punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
- punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
-
- movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
- punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
- punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
-
-
- movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
- punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
- punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
-
- movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
- punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
- punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
-
-
- movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
- punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
- punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
-
- movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
- punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
- punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
-
- pshufd xmm0, xmm2, 11011000b
- pshufd xmm2, xmm1, 11011000b
-
- pshufd xmm1, xmm5, 11011000b
- pshufd xmm3, xmm7, 11011000b
-
- ; second pass
- psubw xmm0, xmm2 ; b1 = 0-2
- paddw xmm2, xmm2
-
- movdqa xmm5, xmm1
- paddw xmm2, xmm0 ; a1 = 0+2
-
- pmulhw xmm5, [GLOBAL(x_s1sqr2)]
- paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movdqa xmm7, xmm3
- pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
-
- paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw xmm7, xmm5 ; c1
-
- movdqa xmm5, xmm1
- movdqa xmm4, xmm3
-
- pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
- paddw xmm5, xmm1
-
- pmulhw xmm3, [GLOBAL(x_s1sqr2)]
- paddw xmm3, xmm4
-
- paddw xmm3, xmm5 ; d1
- paddw xmm0, [GLOBAL(fours)]
-
- paddw xmm2, [GLOBAL(fours)]
- movdqa xmm6, xmm2 ; a1
-
- movdqa xmm4, xmm0 ; b1
- paddw xmm2, xmm3 ;0
-
- paddw xmm4, xmm7 ;1
- psubw xmm0, xmm7 ;2
-
- psubw xmm6, xmm3 ;3
- psraw xmm2, 3
-
- psraw xmm0, 3
- psraw xmm4, 3
-
- psraw xmm6, 3
-
- ; transpose to save
- movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
- punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
- punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
-
- movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
- punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
- punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
-
-
- movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
- punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
- punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
-
- movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
- punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
- punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
-
-
- movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
- punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
- punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
-
- movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
- punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
- punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
-
- pshufd xmm0, xmm2, 11011000b
- pshufd xmm2, xmm1, 11011000b
-
- pshufd xmm1, xmm5, 11011000b
- pshufd xmm3, xmm7, 11011000b
-
- pxor xmm7, xmm7
-
- ; Load up predict blocks
- movq xmm4, [rsi]
- movq xmm5, [rsi+rcx]
-
- punpcklbw xmm4, xmm7
- punpcklbw xmm5, xmm7
-
- paddw xmm0, xmm4
- paddw xmm1, xmm5
-
- movq xmm4, [rsi+2*rcx]
- lea rcx, [3*rcx]
- movq xmm5, [rsi+rcx]
-
- punpcklbw xmm4, xmm7
- punpcklbw xmm5, xmm7
-
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
-.finish:
-
- ; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
-
- ; Load destination stride before writing out,
- ; doesn't need to persist
- movsxd rdx, dword ptr arg(4) ; dst_stride
-
- ; store blocks back out
- movq [rdi], xmm0
- movq [rdi + rdx], xmm1
-
- lea rdi, [rdi + 2*rdx]
-
- movq [rdi], xmm2
- movq [rdi + rdx], xmm3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_idct_dequant_dc_0_2x_sse2
-; (
-; short *qcoeff - 0
-; short *dequant - 1
-; unsigned char *pre - 2
-; unsigned char *dst - 3
-; int dst_stride - 4
-; short *dc - 5
-; )
-global sym(vp9_idct_dequant_dc_0_2x_sse2) PRIVATE
-sym(vp9_idct_dequant_dc_0_2x_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ; special case when 2 blocks have 0 or 1 coeffs
- ; dc is set as first coeff, so no need to load qcoeff
- mov rax, arg(0) ; qcoeff
- mov rsi, arg(2) ; pre
- mov rdi, arg(3) ; dst
- mov rdx, arg(5) ; dc
-
- ; Zero out xmm5, for use unpacking
- pxor xmm5, xmm5
-
- ; load up 2 dc words here == 2*16 = doubleword
- movd xmm4, [rdx]
-
- ; Load up predict blocks
- movq xmm0, [rsi]
- movq xmm1, [rsi+16]
- movq xmm2, [rsi+32]
- movq xmm3, [rsi+48]
-
- ; Duplicate and expand dc across
- punpcklwd xmm4, xmm4
- punpckldq xmm4, xmm4
-
- ; Rounding to dequant and downshift
- paddw xmm4, [GLOBAL(fours)]
- psraw xmm4, 3
-
- ; Predict buffer needs to be expanded from bytes to words
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
-
- ; Add to predict buffer
- paddw xmm0, xmm4
- paddw xmm1, xmm4
- paddw xmm2, xmm4
- paddw xmm3, xmm4
-
- ; pack up before storing
- packuswb xmm0, xmm5
- packuswb xmm1, xmm5
- packuswb xmm2, xmm5
- packuswb xmm3, xmm5
-
- ; Load destination stride before writing out,
- ; doesn't need to persist
- movsxd rdx, dword ptr arg(4) ; dst_stride
-
- ; store blocks back out
- movq [rdi], xmm0
- movq [rdi + rdx], xmm1
-
- lea rdi, [rdi + 2*rdx]
-
- movq [rdi], xmm2
- movq [rdi + rdx], xmm3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vp9_idct_dequant_dc_full_2x_sse2) PRIVATE
-sym(vp9_idct_dequant_dc_full_2x_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ; special case when 2 blocks have 0 or 1 coeffs
- ; dc is set as first coeff, so no need to load qcoeff
- mov rax, arg(0) ; qcoeff
- mov rsi, arg(2) ; pre
- mov rdi, arg(3) ; dst
-
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
-
- mov rdx, arg(1) ; dequant
-
- ; note the transpose of xmm1 and xmm2, necessary for shuffle
- ; to spit out sensicle data
- movdqa xmm0, [rax]
- movdqa xmm2, [rax+16]
- movdqa xmm1, [rax+32]
- movdqa xmm3, [rax+48]
-
- ; Clear out coeffs
- movdqa [rax], xmm7
- movdqa [rax+16], xmm7
- movdqa [rax+32], xmm7
- movdqa [rax+48], xmm7
-
- ; dequantize qcoeff buffer
- pmullw xmm0, [rdx]
- pmullw xmm2, [rdx+16]
- pmullw xmm1, [rdx]
- pmullw xmm3, [rdx+16]
-
- ; DC component
- mov rdx, arg(5)
-
- ; repack so block 0 row x and block 1 row x are together
- movdqa xmm4, xmm0
- punpckldq xmm0, xmm1
- punpckhdq xmm4, xmm1
-
- pshufd xmm0, xmm0, 11011000b
- pshufd xmm1, xmm4, 11011000b
-
- movdqa xmm4, xmm2
- punpckldq xmm2, xmm3
- punpckhdq xmm4, xmm3
-
- pshufd xmm2, xmm2, 11011000b
- pshufd xmm3, xmm4, 11011000b
-
- ; insert DC component
- pinsrw xmm0, [rdx], 0
- pinsrw xmm0, [rdx+2], 4
-
- ; first pass
- psubw xmm0, xmm2 ; b1 = 0-2
- paddw xmm2, xmm2 ;
-
- movdqa xmm5, xmm1
- paddw xmm2, xmm0 ; a1 = 0+2
-
- pmulhw xmm5, [GLOBAL(x_s1sqr2)]
- paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movdqa xmm7, xmm3
- pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
-
- paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw xmm7, xmm5 ; c1
-
- movdqa xmm5, xmm1
- movdqa xmm4, xmm3
-
- pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
- paddw xmm5, xmm1
-
- pmulhw xmm3, [GLOBAL(x_s1sqr2)]
- paddw xmm3, xmm4
-
- paddw xmm3, xmm5 ; d1
- movdqa xmm6, xmm2 ; a1
-
- movdqa xmm4, xmm0 ; b1
- paddw xmm2, xmm3 ;0
-
- paddw xmm4, xmm7 ;1
- psubw xmm0, xmm7 ;2
-
- psubw xmm6, xmm3 ;3
-
- ; transpose for the second pass
- movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
- punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
- punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
-
- movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
- punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
- punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
-
-
- movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
- punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
- punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
-
- movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
- punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
- punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
-
-
- movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
- punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
- punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
-
- movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
- punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
- punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
-
- pshufd xmm0, xmm2, 11011000b
- pshufd xmm2, xmm1, 11011000b
-
- pshufd xmm1, xmm5, 11011000b
- pshufd xmm3, xmm7, 11011000b
-
- ; second pass
- psubw xmm0, xmm2 ; b1 = 0-2
- paddw xmm2, xmm2
-
- movdqa xmm5, xmm1
- paddw xmm2, xmm0 ; a1 = 0+2
-
- pmulhw xmm5, [GLOBAL(x_s1sqr2)]
- paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
-
- movdqa xmm7, xmm3
- pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
-
- paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw xmm7, xmm5 ; c1
-
- movdqa xmm5, xmm1
- movdqa xmm4, xmm3
-
- pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
- paddw xmm5, xmm1
-
- pmulhw xmm3, [GLOBAL(x_s1sqr2)]
- paddw xmm3, xmm4
-
- paddw xmm3, xmm5 ; d1
- paddw xmm0, [GLOBAL(fours)]
-
- paddw xmm2, [GLOBAL(fours)]
- movdqa xmm6, xmm2 ; a1
-
- movdqa xmm4, xmm0 ; b1
- paddw xmm2, xmm3 ;0
-
- paddw xmm4, xmm7 ;1
- psubw xmm0, xmm7 ;2
-
- psubw xmm6, xmm3 ;3
- psraw xmm2, 3
-
- psraw xmm0, 3
- psraw xmm4, 3
-
- psraw xmm6, 3
-
- ; transpose to save
- movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
- punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
- punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
-
- movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
- punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
- punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
-
-
- movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
- punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
- punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
-
- movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
- punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
- punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
-
-
- movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
- punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
- punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
-
- movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
- punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
- punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
-
- pshufd xmm0, xmm2, 11011000b
- pshufd xmm2, xmm1, 11011000b
-
- pshufd xmm1, xmm5, 11011000b
- pshufd xmm3, xmm7, 11011000b
-
- pxor xmm7, xmm7
-
- ; Load up predict blocks
- movq xmm4, [rsi]
- movq xmm5, [rsi+16]
-
- punpcklbw xmm4, xmm7
- punpcklbw xmm5, xmm7
-
- paddw xmm0, xmm4
- paddw xmm1, xmm5
-
- movq xmm4, [rsi+32]
- movq xmm5, [rsi+48]
-
- punpcklbw xmm4, xmm7
- punpcklbw xmm5, xmm7
-
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
-.finish:
-
- ; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
-
- ; Load destination stride before writing out,
- ; doesn't need to persist
- movsxd rdx, dword ptr arg(4) ; dst_stride
-
- ; store blocks back out
- movq [rdi], xmm0
- movq [rdi + rdx], xmm1
-
- lea rdi, [rdi + 2*rdx]
-
- movq [rdi], xmm2
- movq [rdi + rdx], xmm3
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-fours:
- times 8 dw 0x0004
-align 16
-x_s1sqr2:
- times 8 dw 0x8A8C
-align 16
-x_c1sqr2less1:
- times 8 dw 0x4E7B
--- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -26,14 +26,16 @@
DECLARE_ALIGNED(16, unsigned char, flat2_op[7][16]);
DECLARE_ALIGNED(16, unsigned char, flat2_oq[7][16]);
- DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]);
- DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_op[3][16]);
+ DECLARE_ALIGNED(16, unsigned char, flat_oq[3][16]);
+
+ DECLARE_ALIGNED(16, unsigned char, ap[8][16]);
+ DECLARE_ALIGNED(16, unsigned char, aq[8][16]);
+
+
__m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
+ const __m128i one = _mm_set1_epi8(1);
__m128i p7, p6, p5;
__m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
__m128i q5, q6, q7;
@@ -58,12 +60,24 @@
q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
q4 = _mm_loadu_si128((__m128i *)(s + 4 * p));
+
+ _mm_store_si128((__m128i *)ap[4], p4);
+ _mm_store_si128((__m128i *)ap[3], p3);
+ _mm_store_si128((__m128i *)ap[2], p2);
+ _mm_store_si128((__m128i *)ap[1], p1);
+ _mm_store_si128((__m128i *)ap[0], p0);
+ _mm_store_si128((__m128i *)aq[4], q4);
+ _mm_store_si128((__m128i *)aq[3], q3);
+ _mm_store_si128((__m128i *)aq[2], q2);
+ _mm_store_si128((__m128i *)aq[1], q1);
+ _mm_store_si128((__m128i *)aq[0], q0);
+
+
{
const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
_mm_subs_epu8(p0, p1));
const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
_mm_subs_epu8(q0, q1));
- const __m128i one = _mm_set1_epi8(1);
const __m128i fe = _mm_set1_epi8(0xfe);
const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
__m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0),
@@ -95,246 +109,8 @@
mask = _mm_max_epu8(work, mask);
mask = _mm_subs_epu8(mask, limit);
mask = _mm_cmpeq_epi8(mask, zero);
-
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
- _mm_subs_epu8(p0, p2)),
- _mm_or_si128(_mm_subs_epu8(q2, q0),
- _mm_subs_epu8(q0, q2)));
- flat = _mm_max_epu8(work, flat);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
- _mm_subs_epu8(p0, p3)),
- _mm_or_si128(_mm_subs_epu8(q3, q0),
- _mm_subs_epu8(q0, q3)));
- flat = _mm_max_epu8(work, flat);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
- _mm_subs_epu8(p0, p4)),
- _mm_or_si128(_mm_subs_epu8(q4, q0),
- _mm_subs_epu8(q0, q4)));
- flat = _mm_max_epu8(work, flat);
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
}
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // calculate flat2
- p4 = _mm_loadu_si128((__m128i *)(s - 8 * p));
- p3 = _mm_loadu_si128((__m128i *)(s - 7 * p));
- p2 = _mm_loadu_si128((__m128i *)(s - 6 * p));
- p1 = _mm_loadu_si128((__m128i *)(s - 5 * p));
-// p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
-// q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
- q1 = _mm_loadu_si128((__m128i *)(s + 4 * p));
- q2 = _mm_loadu_si128((__m128i *)(s + 5 * p));
- q3 = _mm_loadu_si128((__m128i *)(s + 6 * p));
- q4 = _mm_loadu_si128((__m128i *)(s + 7 * p));
-
- {
- const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
- _mm_subs_epu8(p0, p1));
- const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0),
- _mm_subs_epu8(q0, q1));
- const __m128i one = _mm_set1_epi8(1);
- __m128i work;
- flat2 = _mm_max_epu8(abs_p1p0, abs_q1q0);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
- _mm_subs_epu8(p0, p2)),
- _mm_or_si128(_mm_subs_epu8(q2, q0),
- _mm_subs_epu8(q0, q2)));
- flat2 = _mm_max_epu8(work, flat2);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
- _mm_subs_epu8(p0, p3)),
- _mm_or_si128(_mm_subs_epu8(q3, q0),
- _mm_subs_epu8(q0, q3)));
- flat2 = _mm_max_epu8(work, flat2);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
- _mm_subs_epu8(p0, p4)),
- _mm_or_si128(_mm_subs_epu8(q4, q0),
- _mm_subs_epu8(q0, q4)));
- flat2 = _mm_max_epu8(work, flat2);
- flat2 = _mm_subs_epu8(flat2, one);
- flat2 = _mm_cmpeq_epi8(flat2, zero);
- flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask
- }
- // calculate flat2
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- {
- const __m128i four = _mm_set1_epi16(4);
- unsigned char *src = s;
- i = 0;
- do {
- __m128i workp_a, workp_b, workp_shft;
- p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero);
- p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
- p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
- p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
- p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero);
- q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero);
- q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
- q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
- q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
- q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero);
-
- workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1));
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0);
- workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_op2[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_op1[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_op0[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_oq0[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_oq1[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- _mm_storel_epi64((__m128i *)&flat_oq2[i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- src += 8;
- } while (++i < 2);
- }
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // wide flat
- // TODO(slavarnway): interleave with the flat pixel calculations (see above)
- {
- const __m128i eight = _mm_set1_epi16(8);
- unsigned char *src = s;
- int i = 0;
- do {
- __m128i workp_a, workp_b, workp_shft;
- p7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 8 * p)), zero);
- p6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 7 * p)), zero);
- p5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 6 * p)), zero);
- p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero);
- p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
- p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
- p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
- p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero);
- q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero);
- q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
- q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
- q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
- q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero);
- q5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 5 * p)), zero);
- q6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 6 * p)), zero);
- q7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 7 * p)), zero);
-
-
- workp_a = _mm_sub_epi16(_mm_slli_epi16(p7, 3), p7); // p7 * 7
- workp_a = _mm_add_epi16(_mm_slli_epi16(p6, 1), workp_a);
- workp_b = _mm_add_epi16(_mm_add_epi16(p5, p4), _mm_add_epi16(p3, p2));
- workp_a = _mm_add_epi16(_mm_add_epi16(p1, p0), workp_a);
- workp_b = _mm_add_epi16(_mm_add_epi16(q0, eight), workp_b);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[6][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p5);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p6), q1);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[5][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p4);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p5), q2);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[4][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p4), q3);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[3][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p2);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p3), q4);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[2][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p1);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p2), q5);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[1][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), p0);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), q6);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_op[0][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p7), q0);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[0][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p6), q1);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[1][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p5), q2);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[2][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q2), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[3][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q4);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q3), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[4][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q5);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q4), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[5][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q6);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q5), q7);
- workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 4);
- _mm_storel_epi64((__m128i *)&flat2_oq[6][i*8],
- _mm_packus_epi16(workp_shft, workp_shft));
-
- src += 8;
- } while (++i < 2);
- }
- // wide flat
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
// lp filter
{
const __m128i t4 = _mm_set1_epi8(4);
@@ -345,14 +121,10 @@
const __m128i t1 = _mm_set1_epi8(0x1);
const __m128i t7f = _mm_set1_epi8(0x7f);
- __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)),
- t80);
- __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)),
- t80);
- __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)),
- t80);
- __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)),
- t80);
+ __m128i ps1 = _mm_xor_si128(p1, t80);
+ __m128i ps0 = _mm_xor_si128(p0, t80);
+ __m128i qs0 = _mm_xor_si128(q0, t80);
+ __m128i qs1 = _mm_xor_si128(q1, t80);
__m128i filt;
__m128i work_a;
__m128i filter1, filter2;
@@ -374,6 +146,7 @@
work_a = _mm_and_si128(work_a, te0);
filter1 = _mm_and_si128(filter1, t1f);
filter1 = _mm_or_si128(filter1, work_a);
+ qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
/* Filter2 >> 3 */
work_a = _mm_cmpgt_epi8(zero, filter2);
@@ -381,6 +154,7 @@
work_a = _mm_and_si128(work_a, te0);
filter2 = _mm_and_si128(filter2, t1f);
filter2 = _mm_or_si128(filter2, work_a);
+ ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
/* filt >> 1 */
filt = _mm_adds_epi8(filter1, t1);
@@ -389,20 +163,265 @@
work_a = _mm_and_si128(work_a, t80);
filt = _mm_and_si128(filt, t7f);
filt = _mm_or_si128(filt, work_a);
-
filt = _mm_andnot_si128(hev, filt);
-
- ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80);
ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80);
- qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80);
qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80);
+ // loopfilter done
+ {
+ __m128i work;
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0),
+ _mm_subs_epu8(p0, p2)),
+ _mm_or_si128(_mm_subs_epu8(q2, q0),
+ _mm_subs_epu8(q0, q2)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0),
+ _mm_subs_epu8(p0, p3)),
+ _mm_or_si128(_mm_subs_epu8(q3, q0),
+ _mm_subs_epu8(q0, q3)));
+ flat = _mm_max_epu8(work, flat);
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
+ _mm_subs_epu8(p0, p4)),
+ _mm_or_si128(_mm_subs_epu8(q4, q0),
+ _mm_subs_epu8(q0, q4)));
+ flat = _mm_subs_epu8(flat, one);
+ flat = _mm_cmpeq_epi8(flat, zero);
+ flat = _mm_and_si128(flat, mask);
+
+ p5 = _mm_loadu_si128((__m128i *)(s - 6 * p));
+ q5 = _mm_loadu_si128((__m128i *)(s + 5 * p));
+ flat2 = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p5, p0),
+ _mm_subs_epu8(p0, p5)),
+ _mm_or_si128(_mm_subs_epu8(q5, q0),
+ _mm_subs_epu8(q0, q5)));
+ _mm_store_si128((__m128i *)ap[5], p5);
+ _mm_store_si128((__m128i *)aq[5], q5);
+ flat2 = _mm_max_epu8(work, flat2);
+ p6 = _mm_loadu_si128((__m128i *)(s - 7 * p));
+ q6 = _mm_loadu_si128((__m128i *)(s + 6 * p));
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p6, p0),
+ _mm_subs_epu8(p0, p6)),
+ _mm_or_si128(_mm_subs_epu8(q6, q0),
+ _mm_subs_epu8(q0, q6)));
+ _mm_store_si128((__m128i *)ap[6], p6);
+ _mm_store_si128((__m128i *)aq[6], q6);
+ flat2 = _mm_max_epu8(work, flat2);
+
+ p7 = _mm_loadu_si128((__m128i *)(s - 8 * p));
+ q7 = _mm_loadu_si128((__m128i *)(s + 7 * p));
+ work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p7, p0),
+ _mm_subs_epu8(p0, p7)),
+ _mm_or_si128(_mm_subs_epu8(q7, q0),
+ _mm_subs_epu8(q0, q7)));
+ _mm_store_si128((__m128i *)ap[7], p7);
+ _mm_store_si128((__m128i *)aq[7], q7);
+ flat2 = _mm_max_epu8(work, flat2);
+ flat2 = _mm_subs_epu8(flat2, one);
+ flat2 = _mm_cmpeq_epi8(flat2, zero);
+ flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask
+ }
+
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ // flat and wide flat calculations
+ {
+ const __m128i eight = _mm_set1_epi16(8);
+ const __m128i four = _mm_set1_epi16(4);
+ __m128i temp_flat2 = flat2;
+ unsigned char *src = s;
+ int i = 0;
+ do {
+ __m128i workp_shft;
+ __m128i a, b, c;
+
+ unsigned int off = i * 8;
+ p7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[7] + off)), zero);
+ p6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[6] + off)), zero);
+ p5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[5] + off)), zero);
+ p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[4] + off)), zero);
+ p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[3] + off)), zero);
+ p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[2] + off)), zero);
+ p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[1] + off)), zero);
+ p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(ap[0] + off)), zero);
+ q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[0] + off)), zero);
+ q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[1] + off)), zero);
+ q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[2] + off)), zero);
+ q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[3] + off)), zero);
+ q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[4] + off)), zero);
+ q5 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[5] + off)), zero);
+ q6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[6] + off)), zero);
+ q7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(aq[7] + off)), zero);
+
+ c = _mm_sub_epi16(_mm_slli_epi16(p7, 3), p7); // p7 * 7
+ c = _mm_add_epi16(_mm_slli_epi16(p6, 1), _mm_add_epi16(p4, c));
+
+ b = _mm_add_epi16(_mm_add_epi16(p3, four), _mm_add_epi16(p3, p2));
+ a = _mm_add_epi16(p3, _mm_add_epi16(p2, p1));
+ a = _mm_add_epi16(_mm_add_epi16(p0, q0), a);
+
+ _mm_storel_epi64((__m128i *)&flat_op[2][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+
+ c = _mm_add_epi16(_mm_add_epi16(p5, eight), c);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[6][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q1, a);
+ b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p2)), p1);
+ _mm_storel_epi64((__m128i *)&flat_op[1][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p6)), p5);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[5][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q2, a);
+ b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p1)), p0);
+ _mm_storel_epi64((__m128i *)&flat_op[0][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p5)), p4);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[4][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q3, a);
+ b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p3, p0)), q0);
+ _mm_storel_epi64((__m128i *)&flat_oq[0][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p4)), p3);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[3][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ b = _mm_add_epi16(q3, b);
+ b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p2, q0)), q1);
+ _mm_storel_epi64((__m128i *)&flat_oq[1][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+
+ c = _mm_add_epi16(q4, c);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p3)), p2);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[2][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ b = _mm_add_epi16(q3, b);
+ b = _mm_add_epi16(_mm_sub_epi16(b, _mm_add_epi16(p1, q1)), q2);
+ _mm_storel_epi64((__m128i *)&flat_oq[2][i*8],
+ _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a, b), 3)
+ , b));
+ a = _mm_add_epi16(q5, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p2)), p1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[1][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q6, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p1)), p0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_op[0][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p7, p0)), q0);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[0][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p6, q0)), q1);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[1][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p5, q1)), q2);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[2][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p4, q2)), q3);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[3][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p3, q3)), q4);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[4][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p2, q4)), q5);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[5][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ a = _mm_add_epi16(q7, a);
+ c = _mm_add_epi16(_mm_sub_epi16(c, _mm_add_epi16(p1, q5)), q6);
+ workp_shft = _mm_srli_epi16(_mm_add_epi16(a, c), 4);
+ _mm_storel_epi64((__m128i *)&flat2_oq[6][i*8],
+ _mm_packus_epi16(workp_shft, workp_shft));
+
+ temp_flat2 = _mm_srli_si128(temp_flat2, 8);
+ src += 8;
+ } while (++i < 2);
+ }
+ // wide flat
+ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ work_a = _mm_load_si128((__m128i *)ap[2]);
+ p2 = _mm_load_si128((__m128i *)flat_op[2]);
+ work_a = _mm_andnot_si128(flat, work_a);
+ p2 = _mm_and_si128(flat, p2);
+ p2 = _mm_or_si128(work_a, p2);
+ _mm_store_si128((__m128i *)flat_op[2], p2);
+
+ p1 = _mm_load_si128((__m128i *)flat_op[1]);
+ work_a = _mm_andnot_si128(flat, ps1);
+ p1 = _mm_and_si128(flat, p1);
+ p1 = _mm_or_si128(work_a, p1);
+ _mm_store_si128((__m128i *)flat_op[1], p1);
+
+ p0 = _mm_load_si128((__m128i *)flat_op[0]);
+ work_a = _mm_andnot_si128(flat, ps0);
+ p0 = _mm_and_si128(flat, p0);
+ p0 = _mm_or_si128(work_a, p0);
+ _mm_store_si128((__m128i *)flat_op[0], p0);
+
+ q0 = _mm_load_si128((__m128i *)flat_oq[0]);
+ work_a = _mm_andnot_si128(flat, qs0);
+ q0 = _mm_and_si128(flat, q0);
+ q0 = _mm_or_si128(work_a, q0);
+ _mm_store_si128((__m128i *)flat_oq[0], q0);
+
+ q1 = _mm_load_si128((__m128i *)flat_oq[1]);
+ work_a = _mm_andnot_si128(flat, qs1);
+ q1 = _mm_and_si128(flat, q1);
+ q1 = _mm_or_si128(work_a, q1);
+ _mm_store_si128((__m128i *)flat_oq[1], q1);
+
+ work_a = _mm_load_si128((__m128i *)aq[2]);
+ q2 = _mm_load_si128((__m128i *)flat_oq[2]);
+ work_a = _mm_andnot_si128(flat, work_a);
+ q2 = _mm_and_si128(flat, q2);
+ q2 = _mm_or_si128(work_a, q2);
+ _mm_store_si128((__m128i *)flat_oq[2], q2);
+
// write out op6 - op3
{
unsigned char *dst = (s - 7 * p);
for (i = 6; i > 2; i--) {
__m128i flat2_output;
- work_a = _mm_loadu_si128((__m128i *)dst);
+ work_a = _mm_load_si128((__m128i *)ap[i]);
flat2_output = _mm_load_si128((__m128i *)flat2_op[i]);
work_a = _mm_andnot_si128(flat2, work_a);
flat2_output = _mm_and_si128(flat2, flat2_output);
@@ -412,11 +431,7 @@
}
}
- work_a = _mm_loadu_si128((__m128i *)(s - 3 * p));
- p2 = _mm_load_si128((__m128i *)flat_op2);
- work_a = _mm_andnot_si128(flat, work_a);
- p2 = _mm_and_si128(flat, p2);
- work_a = _mm_or_si128(work_a, p2);
+ work_a = _mm_load_si128((__m128i *)flat_op[2]);
p2 = _mm_load_si128((__m128i *)flat2_op[2]);
work_a = _mm_andnot_si128(flat2, work_a);
p2 = _mm_and_si128(flat2, p2);
@@ -423,10 +438,7 @@
p2 = _mm_or_si128(work_a, p2);
_mm_storeu_si128((__m128i *)(s - 3 * p), p2);
- p1 = _mm_load_si128((__m128i *)flat_op1);
- work_a = _mm_andnot_si128(flat, ps1);
- p1 = _mm_and_si128(flat, p1);
- work_a = _mm_or_si128(work_a, p1);
+ work_a = _mm_load_si128((__m128i *)flat_op[1]);
p1 = _mm_load_si128((__m128i *)flat2_op[1]);
work_a = _mm_andnot_si128(flat2, work_a);
p1 = _mm_and_si128(flat2, p1);
@@ -433,10 +445,7 @@
p1 = _mm_or_si128(work_a, p1);
_mm_storeu_si128((__m128i *)(s - 2 * p), p1);
- p0 = _mm_load_si128((__m128i *)flat_op0);
- work_a = _mm_andnot_si128(flat, ps0);
- p0 = _mm_and_si128(flat, p0);
- work_a = _mm_or_si128(work_a, p0);
+ work_a = _mm_load_si128((__m128i *)flat_op[0]);
p0 = _mm_load_si128((__m128i *)flat2_op[0]);
work_a = _mm_andnot_si128(flat2, work_a);
p0 = _mm_and_si128(flat2, p0);
@@ -443,10 +452,7 @@
p0 = _mm_or_si128(work_a, p0);
_mm_storeu_si128((__m128i *)(s - 1 * p), p0);
- q0 = _mm_load_si128((__m128i *)flat_oq0);
- work_a = _mm_andnot_si128(flat, qs0);
- q0 = _mm_and_si128(flat, q0);
- work_a = _mm_or_si128(work_a, q0);
+ work_a = _mm_load_si128((__m128i *)flat_oq[0]);
q0 = _mm_load_si128((__m128i *)flat2_oq[0]);
work_a = _mm_andnot_si128(flat2, work_a);
q0 = _mm_and_si128(flat2, q0);
@@ -453,10 +459,7 @@
q0 = _mm_or_si128(work_a, q0);
_mm_storeu_si128((__m128i *)(s - 0 * p), q0);
- q1 = _mm_load_si128((__m128i *)flat_oq1);
- work_a = _mm_andnot_si128(flat, qs1);
- q1 = _mm_and_si128(flat, q1);
- work_a = _mm_or_si128(work_a, q1);
+ work_a = _mm_load_si128((__m128i *)flat_oq[1]);
q1 = _mm_load_si128((__m128i *)flat2_oq[1]);
work_a = _mm_andnot_si128(flat2, work_a);
q1 = _mm_and_si128(flat2, q1);
@@ -463,11 +466,7 @@
q1 = _mm_or_si128(work_a, q1);
_mm_storeu_si128((__m128i *)(s + 1 * p), q1);
- work_a = _mm_loadu_si128((__m128i *)(s + 2 * p));
- q2 = _mm_load_si128((__m128i *)flat_oq2);
- work_a = _mm_andnot_si128(flat, work_a);
- q2 = _mm_and_si128(flat, q2);
- work_a = _mm_or_si128(work_a, q2);
+ work_a = _mm_load_si128((__m128i *)flat_oq[2]);
q2 = _mm_load_si128((__m128i *)flat2_oq[2]);
work_a = _mm_andnot_si128(flat2, work_a);
q2 = _mm_and_si128(flat2, q2);
@@ -479,7 +478,7 @@
unsigned char *dst = (s + 3 * p);
for (i = 3; i < 7; i++) {
__m128i flat2_output;
- work_a = _mm_loadu_si128((__m128i *)dst);
+ work_a = _mm_load_si128((__m128i *)aq[i]);
flat2_output = _mm_load_si128((__m128i *)flat2_oq[i]);
work_a = _mm_andnot_si128(flat2, work_a);
flat2_output = _mm_and_si128(flat2, flat2_output);
@@ -504,7 +503,7 @@
DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);
__m128i mask, hev, flat;
const __m128i zero = _mm_set1_epi16(0);
- __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;
+ __m128i p3, p2, p1, p0, q0, q1, q2, q3;
const unsigned int extended_thresh = _thresh[0] * 0x01010101u;
const unsigned int extended_limit = _limit[0] * 0x01010101u;
const unsigned int extended_blimit = _blimit[0] * 0x01010101u;
@@ -515,7 +514,6 @@
const __m128i blimit =
_mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);
- p4 = _mm_loadu_si128((__m128i *)(s - 5 * p));
p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
@@ -524,7 +522,6 @@
q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
- q4 = _mm_loadu_si128((__m128i *)(s + 4 * p));
{
const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0),
_mm_subs_epu8(p0, p1));
@@ -573,11 +570,6 @@
_mm_or_si128(_mm_subs_epu8(q3, q0),
_mm_subs_epu8(q0, q3)));
flat = _mm_max_epu8(work, flat);
- work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p4, p0),
- _mm_subs_epu8(p0, p4)),
- _mm_or_si128(_mm_subs_epu8(q4, q0),
- _mm_subs_epu8(q0, q4)));
- flat = _mm_max_epu8(work, flat);
flat = _mm_subs_epu8(flat, one);
flat = _mm_cmpeq_epi8(flat, zero);
flat = _mm_and_si128(flat, mask);
@@ -588,7 +580,6 @@
int i = 0;
do {
__m128i workp_a, workp_b, workp_shft;
- p4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 5 * p)), zero);
p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero);
p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero);
p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero);
@@ -597,11 +588,10 @@
q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero);
q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero);
q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero);
- q4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 4 * p)), zero);
- workp_a = _mm_add_epi16(_mm_add_epi16(p4, p3), _mm_add_epi16(p2, p1));
+ workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1));
workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0);
- workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p4);
+ workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3);
workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
_mm_storel_epi64((__m128i *)&flat_op2[i*8],
_mm_packus_epi16(workp_shft, workp_shft));
@@ -611,7 +601,7 @@
_mm_storel_epi64((__m128i *)&flat_op1[i*8],
_mm_packus_epi16(workp_shft, workp_shft));
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p4), q2);
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2);
workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0);
workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
_mm_storel_epi64((__m128i *)&flat_op0[i*8],
@@ -623,13 +613,13 @@
_mm_storel_epi64((__m128i *)&flat_oq0[i*8],
_mm_packus_epi16(workp_shft, workp_shft));
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q4);
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3);
workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1);
workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
_mm_storel_epi64((__m128i *)&flat_oq1[i*8],
_mm_packus_epi16(workp_shft, workp_shft));
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q4);
+ workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3);
workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2);
workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
_mm_storel_epi64((__m128i *)&flat_oq2[i*8],
@@ -813,8 +803,8 @@
_mm_loadl_epi64((__m128i *)(src + 120)));
}
-static __inline void transpose8x16(unsigned char *in0, unsigned char *in1,
- int in_p, unsigned char *out, int out_p) {
+static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1,
+ int in_p, unsigned char *out, int out_p) {
__m128i x0, x1, x2, x3, x4, x5, x6, x7;
__m128i x8, x9, x10, x11, x12, x13, x14, x15;
@@ -879,9 +869,9 @@
_mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15));
}
-static __inline void transpose(unsigned char *src[], int in_p,
- unsigned char *dst[], int out_p,
- int num_8x8_to_transpose) {
+static INLINE void transpose(unsigned char *src[], int in_p,
+ unsigned char *dst[], int out_p,
+ int num_8x8_to_transpose) {
int idx8x8 = 0;
__m128i x0, x1, x2, x3, x4, x5, x6, x7;
do {
--- a/vp9/common/x86/vp9_postproc_mmx.asm
+++ b/vp9/common/x86/vp9_postproc_mmx.asm
@@ -459,11 +459,11 @@
%undef flimit2
-;void vp9_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
+;void vp9_plane_add_noise_mmx (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
+; unsigned int width, unsigned int height, int pitch)
extern sym(rand)
global sym(vp9_plane_add_noise_mmx) PRIVATE
sym(vp9_plane_add_noise_mmx):
--- a/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/vp9/common/x86/vp9_postproc_sse2.asm
@@ -624,11 +624,11 @@
%undef flimit4
-;void vp9_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
+;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
+; unsigned int width, unsigned int height, int pitch)
extern sym(rand)
global sym(vp9_plane_add_noise_wmt) PRIVATE
sym(vp9_plane_add_noise_wmt):
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -21,34 +21,92 @@
;
;*************************************************************************************/
-;void vp9_filter_block1d8_v8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
+%macro VERTx4 1
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
+
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
+
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+
+ movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
+
+%if ABI_IS_32BIT=0
+ movsxd r8, DWORD PTR arg(3) ;out_pitch
+%endif
+ mov rax, rsi
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+ add rax, rdx
+
+ lea rbx, [rdx + rdx*4]
+ add rbx, rdx ;pitch * 6
+
+.loop:
+ movd xmm0, [rsi] ;A
+ movd xmm1, [rsi + rdx] ;B
+ movd xmm2, [rsi + rdx * 2] ;C
+ movd xmm3, [rax + rdx * 2] ;D
+ movd xmm4, [rsi + rdx * 4] ;E
+ movd xmm5, [rax + rdx * 4] ;F
+
+ punpcklbw xmm0, xmm1 ;A B
+ punpcklbw xmm2, xmm3 ;C D
+ punpcklbw xmm4, xmm5 ;E F
+
+ movd xmm6, [rsi + rbx] ;G
+ movd xmm7, [rax + rbx] ;H
+
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm2, k2k3
+ punpcklbw xmm6, xmm7 ;G H
+ pmaddubsw xmm4, k4k5
+ pmaddubsw xmm6, k6k7
+
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ paddsw xmm4, xmm6
+ paddsw xmm0, xmm4
+
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+
+ add rsi, rdx
+ add rax, rdx
+%if %1
+ movd xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movd [rdi], xmm0
+
+%if ABI_IS_32BIT
+ add rdi, DWORD PTR arg(3) ;out_pitch
+%else
+ add rdi, r8
+%endif
+ dec rcx
+ jnz .loop
+%endm
+
+%macro VERTx8 1
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
@@ -86,7 +144,7 @@
lea rbx, [rdx + rdx*4]
add rbx, rdx ;pitch * 6
-.vp9_filter_block1d8_v8_ssse3_loop:
+.loop:
movq xmm0, [rsi] ;A
movq xmm1, [rsi + rdx] ;B
movq xmm2, [rsi + rdx * 2] ;C
@@ -117,7 +175,10 @@
add rsi, rdx
add rax, rdx
-
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
movq [rdi], xmm0
%if ABI_IS_32BIT
@@ -126,47 +187,11 @@
add rdi, r8
%endif
dec rcx
- jnz .vp9_filter_block1d8_v8_ssse3_loop
+ jnz .loop
+%endm
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-;void vp9_filter_block1d16_v8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
+%macro VERTx16 1
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
@@ -204,7 +229,7 @@
lea rbx, [rdx + rdx*4]
add rbx, rdx ;pitch * 6
-.vp9_filter_block1d16_v8_ssse3_loop:
+.loop:
movq xmm0, [rsi] ;A
movq xmm1, [rsi + rdx] ;B
movq xmm2, [rsi + rdx * 2] ;C
@@ -232,7 +257,10 @@
psraw xmm0, 7
packuswb xmm0, xmm0
-
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
movq [rdi], xmm0
movq xmm0, [rsi + 8] ;A
@@ -267,6 +295,10 @@
add rsi, rdx
add rax, rdx
+%if %1
+ movq xmm1, [rdi+8]
+ pavgb xmm0, xmm1
+%endif
movq [rdi+8], xmm0
@@ -276,8 +308,39 @@
add rdi, r8
%endif
dec rcx
- jnz .vp9_filter_block1d16_v8_ssse3_loop
+ jnz .loop
+%endm
+;void vp9_filter_block1d8_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ VERTx4 0
+
add rsp, 16*5
pop rsp
pop rbx
@@ -289,24 +352,24 @@
pop rbp
ret
-;void vp9_filter_block1d8_h8_ssse3
+;void vp9_filter_block1d8_v8_ssse3
;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
; short *filter
;)
-global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h8_ssse3):
+global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v8_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
- GET_GOT rbx
push rsi
push rdi
+ push rbx
; end prolog
ALIGN_STACK 16, rax
@@ -317,6 +380,162 @@
%define k6k7 [rsp + 16*3]
%define krd [rsp + 16*4]
+ VERTx8 0
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d16_v8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pitch,
+; unsigned char *output_ptr,
+; unsigned int out_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ VERTx16 0
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+global sym(vp9_filter_block1d4_v8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ VERTx4 1
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ VERTx8 1
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ push rbx
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ VERTx16 1
+
+ add rsp, 16*5
+ pop rsp
+ pop rbx
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+%macro HORIZx4 1
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
@@ -340,19 +559,16 @@
pshufd xmm5, xmm5, 0
movdqa k4k5, xmm2
movdqa k6k7, xmm3
-; movdqa krd, xmm5
+ movdqa krd, xmm5
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rdx, dword ptr arg(3) ;output_pitch
movsxd rcx, dword ptr arg(4) ;output_height
-.filter_block1d8_h8_rowloop_ssse3:
+.loop:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -371,59 +587,94 @@
pmaddubsw xmm4, k6k7
paddsw xmm0, xmm1
- paddsw xmm0, xmm2
- paddsw xmm0, xmm5
paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
-
+%if %1
+ movd xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
lea rsi, [rsi + rax]
- movq [rdi], xmm0
+ movd [rdi], xmm0
lea rdi, [rdi + rdx]
dec rcx
- jnz .filter_block1d8_h8_rowloop_ssse3
+ jnz .loop
+%endm
- add rsp, 16*5
- pop rsp
+%macro HORIZx8 1
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
+ movdqa xmm4, [rdx] ;load filters
+ movd xmm5, rcx
+ packsswb xmm4, xmm4
+ pshuflw xmm0, xmm4, 0b ;k0_k1
+ pshuflw xmm1, xmm4, 01010101b ;k2_k3
+ pshuflw xmm2, xmm4, 10101010b ;k4_k5
+ pshuflw xmm3, xmm4, 11111111b ;k6_k7
-;void vp9_filter_block1d16_h8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
+ punpcklqdq xmm0, xmm0
+ punpcklqdq xmm1, xmm1
+ punpcklqdq xmm2, xmm2
+ punpcklqdq xmm3, xmm3
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
+ movdqa k0k1, xmm0
+ movdqa k2k3, xmm1
+ pshufd xmm5, xmm5, 0
+ movdqa k4k5, xmm2
+ movdqa k6k7, xmm3
+ movdqa krd, xmm5
+ movsxd rax, dword ptr arg(1) ;src_pixels_per_line
+ movsxd rdx, dword ptr arg(3) ;output_pitch
+ movsxd rcx, dword ptr arg(4) ;output_height
+
+.loop:
+ movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+
+ movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
+ punpcklqdq xmm0, xmm3
+
+ movdqa xmm1, xmm0
+ pshufb xmm0, [GLOBAL(shuf_t0t1)]
+ pmaddubsw xmm0, k0k1
+
+ movdqa xmm2, xmm1
+ pshufb xmm1, [GLOBAL(shuf_t2t3)]
+ pmaddubsw xmm1, k2k3
+
+ movdqa xmm4, xmm2
+ pshufb xmm2, [GLOBAL(shuf_t4t5)]
+ pmaddubsw xmm2, k4k5
+
+ pshufb xmm4, [GLOBAL(shuf_t6t7)]
+ pmaddubsw xmm4, k6k7
+
+ paddsw xmm0, xmm1
+ paddsw xmm0, xmm4
+ paddsw xmm0, xmm2
+ paddsw xmm0, krd
+ psraw xmm0, 7
+ packuswb xmm0, xmm0
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+
+ lea rsi, [rsi + rax]
+ movq [rdi], xmm0
+
+ lea rdi, [rdi + rdx]
+ dec rcx
+ jnz .loop
+%endm
+
+%macro HORIZx16 1
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
@@ -453,13 +704,10 @@
movsxd rdx, dword ptr arg(3) ;output_pitch
movsxd rcx, dword ptr arg(4) ;output_height
-.filter_block1d16_h8_rowloop_ssse3:
+.loop:
movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
-; movq xmm3, [rsi + 4] ; 4 5 6 7 8 9 10 11
movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
-;note: if we create a k0_k7 filter, we can save a pshufb
-; punpcklbw xmm0, xmm3 ; -3 4 -2 5 -1 6 0 7 1 8 2 9 3 10 4 11
punpcklqdq xmm0, xmm3
movdqa xmm1, xmm0
@@ -486,10 +734,7 @@
movq xmm3, [rsi + 5]
-; movq xmm7, [rsi + 12]
movq xmm7, [rsi + 13]
-;note: same as above
-; punpcklbw xmm3, xmm7
punpcklqdq xmm3, xmm7
movdqa xmm1, xmm3
@@ -508,12 +753,16 @@
pmaddubsw xmm4, k6k7
paddsw xmm3, xmm1
+ paddsw xmm3, xmm4
paddsw xmm3, xmm2
paddsw xmm3, krd
- paddsw xmm3, xmm4
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm0, xmm3
+%if %1
+ movdqa xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
lea rsi, [rsi + rax]
movdqa [rdi], xmm0
@@ -520,8 +769,39 @@
lea rdi, [rdi + rdx]
dec rcx
- jnz .filter_block1d16_h8_rowloop_ssse3
+ jnz .loop
+%endm
+;void vp9_filter_block1d4_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx4 0
+
add rsp, 16*5
pop rsp
@@ -534,7 +814,188 @@
pop rbp
ret
+;void vp9_filter_block1d8_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx8 0
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void vp9_filter_block1d16_h8_ssse3
+;(
+; unsigned char *src_ptr,
+; unsigned int src_pixels_per_line,
+; unsigned char *output_ptr,
+; unsigned int output_pitch,
+; unsigned int output_height,
+; short *filter
+;)
+global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h8_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx16 0
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx4 1
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx8 1
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h8_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h8_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ALIGN_STACK 16, rax
+ sub rsp, 16*5
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ %define krd [rsp + 16*4]
+
+ HORIZx16 1
+
+ add rsp, 16*5
+ pop rsp
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
SECTION_RODATA
align 16
shuf_t0t1:
--- a/vp9/common/x86/vp9_subpixel_mmx.asm
+++ /dev/null
@@ -1,268 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-%define BLOCK_HEIGHT_WIDTH 4
-%define vp9_filter_weight 128
-%define VP9_FILTER_SHIFT 7
-
-
-;void vp9_filter_block1d_h6_mmx
-;(
-; unsigned char *src_ptr,
-; unsigned short *output_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; short * vp9_filter
-;)
-global sym(vp9_filter_block1d_h6_mmx) PRIVATE
-sym(vp9_filter_block1d_h6_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdx, arg(6) ;vp9_filter
-
- movq mm1, [rdx + 16] ; do both the negative taps first!!!
- movq mm2, [rdx + 32] ;
- movq mm6, [rdx + 48] ;
- movq mm7, [rdx + 64] ;
-
- mov rdi, arg(1) ;output_ptr
- mov rsi, arg(0) ;src_ptr
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rax, dword ptr arg(5) ;output_width ; destination pitch?
- pxor mm0, mm0 ; mm0 = 00000000
-
-.nextrow:
- movq mm3, [rsi-2] ; mm3 = p-2..p5
- movq mm4, mm3 ; mm4 = p-2..p5
- psrlq mm3, 8 ; mm3 = p-1..p5
- punpcklbw mm3, mm0 ; mm3 = p-1..p2
- pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
-
- movq mm5, mm4 ; mm5 = p-2..p5
- punpckhbw mm4, mm0 ; mm5 = p2..p5
- pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers
- paddsw mm3, mm4 ; mm3 += mm5
-
- movq mm4, mm5 ; mm4 = p-2..p5;
- psrlq mm5, 16 ; mm5 = p0..p5;
- punpcklbw mm5, mm0 ; mm5 = p0..p3
- pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers
- paddsw mm3, mm5 ; mm3 += mm5
-
- movq mm5, mm4 ; mm5 = p-2..p5
- psrlq mm4, 24 ; mm4 = p1..p5
- punpcklbw mm4, mm0 ; mm4 = p1..p4
- pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers
- paddsw mm3, mm4 ; mm3 += mm5
-
- ; do outer positive taps
- movd mm4, [rsi+3]
- punpcklbw mm4, mm0 ; mm5 = p3..p6
- pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers
- paddsw mm3, mm4 ; mm3 += mm5
-
- punpcklbw mm5, mm0 ; mm5 = p-2..p1
- pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
- paddsw mm3, mm5 ; mm3 += mm5
-
- paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
- packuswb mm3, mm0 ; pack and unpack to saturate
- punpcklbw mm3, mm0 ;
-
- movq [rdi], mm3 ; store the results in the destination
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line
- add rdi, rax;
-%else
- movsxd r8, dword ptr arg(2) ;src_pixels_per_line
- add rdi, rax;
-
- add rsi, r8 ; next line
-%endif
-
- dec rcx ; decrement count
- jnz .nextrow ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1dc_v6_mmx
-;(
-; short *src_ptr,
-; unsigned char *output_ptr,
-; int output_pitch,
-; unsigned int pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; short * vp9_filter
-;)
-global sym(vp9_filter_block1dc_v6_mmx) PRIVATE
-sym(vp9_filter_block1dc_v6_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movq mm5, [GLOBAL(rd)]
- push rbx
- mov rbx, arg(7) ;vp9_filter
- movq mm1, [rbx + 16] ; do both the negative taps first!!!
- movq mm2, [rbx + 32] ;
- movq mm6, [rbx + 48] ;
- movq mm7, [rbx + 64] ;
-
- movsxd rdx, dword ptr arg(3) ;pixels_per_line
- mov rdi, arg(1) ;output_ptr
- mov rsi, arg(0) ;src_ptr
- sub rsi, rdx
- sub rsi, rdx
- movsxd rcx, DWORD PTR arg(5) ;output_height
- movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch?
- pxor mm0, mm0 ; mm0 = 00000000
-
-
-.nextrow_cv:
- movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
- pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
-
-
- movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
- pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
- paddsw mm3, mm4 ; mm3 += mm4
-
- movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
- pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
- paddsw mm3, mm4 ; mm3 += mm4
-
- movq mm4, [rsi] ; mm4 = p0..p3 = row -2
- pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
- paddsw mm3, mm4 ; mm3 += mm4
-
-
- add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
- movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
- pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
- paddsw mm3, mm4 ; mm3 += mm4
-
- movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
- pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
- paddsw mm3, mm4 ; mm3 += mm4
-
-
- paddsw mm3, mm5 ; mm3 += round value
- psraw mm3, VP9_FILTER_SHIFT ; mm3 /= 128
- packuswb mm3, mm0 ; pack and saturate
-
- movd [rdi],mm3 ; store the results in the destination
- ; the subsequent iterations repeat 3 out of 4 of these reads. Since the
- ; recon block should be in cache this shouldn't cost much. Its obviously
- ; avoidable!!!.
- lea rdi, [rdi+rax] ;
- dec rcx ; decrement count
- jnz .nextrow_cv ; next row
-
- pop rbx
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-rd:
- times 4 dw 0x40
-
-align 16
-global HIDDEN_DATA(sym(vp9_six_tap_mmx))
-sym(vp9_six_tap_mmx):
- times 8 dw 0
- times 8 dw 0
- times 8 dw 128
- times 8 dw 0
- times 8 dw 0
- times 8 dw 0
-
- times 8 dw 0
- times 8 dw -6
- times 8 dw 123
- times 8 dw 12
- times 8 dw -1
- times 8 dw 0
-
- times 8 dw 2
- times 8 dw -11
- times 8 dw 108
- times 8 dw 36
- times 8 dw -8
- times 8 dw 1
-
- times 8 dw 0
- times 8 dw -9
- times 8 dw 93
- times 8 dw 50
- times 8 dw -6
- times 8 dw 0
-
- times 8 dw 3
- times 8 dw -16
- times 8 dw 77
- times 8 dw 77
- times 8 dw -16
- times 8 dw 3
-
- times 8 dw 0
- times 8 dw -6
- times 8 dw 50
- times 8 dw 93
- times 8 dw -9
- times 8 dw 0
-
- times 8 dw 1
- times 8 dw -8
- times 8 dw 36
- times 8 dw 108
- times 8 dw -11
- times 8 dw 2
-
- times 8 dw 0
- times 8 dw -1
- times 8 dw 12
- times 8 dw 123
- times 8 dw -6
- times 8 dw 0
-
--- a/vp9/common/x86/vp9_subpixel_sse2.asm
+++ /dev/null
@@ -1,1372 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%define BLOCK_HEIGHT_WIDTH 4
-%define VP9_FILTER_WEIGHT 128
-%define VP9_FILTER_SHIFT 7
-
-
-;/************************************************************************************
-; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
-; input pixel array has output_height rows. This routine assumes that output_height is an
-; even number. This function handles 8 pixels in horizontal direction, calculating ONE
-; rows each iteration to take advantage of the 128 bits operations.
-;*************************************************************************************/
-;void vp9_filter_block1d8_h6_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned short *output_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; short *vp9_filter
-;)
-global sym(vp9_filter_block1d8_h6_sse2) PRIVATE
-sym(vp9_filter_block1d8_h6_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdx, arg(6) ;vp9_filter
- mov rsi, arg(0) ;src_ptr
-
- mov rdi, arg(1) ;output_ptr
-
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(5) ;output_width
-%endif
- pxor xmm0, xmm0 ; clear xmm0 for unpack
-
-.filter_block1d8_h6_rowloop:
- movq xmm3, MMWORD PTR [rsi - 2]
- movq xmm1, MMWORD PTR [rsi + 6]
-
- prefetcht2 [rsi+rax-2]
-
- pslldq xmm1, 8
- por xmm1, xmm3
-
- movdqa xmm4, xmm1
- movdqa xmm5, xmm1
-
- movdqa xmm6, xmm1
- movdqa xmm7, xmm1
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm1
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0
- punpcklbw xmm4, xmm0
-
- movdqa XMMWORD Ptr [rdi], xmm4
- lea rsi, [rsi + rax]
-
-%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(5) ;[output_width]
-%else
- add rdi, r8
-%endif
- dec rcx
-
- jnz .filter_block1d8_h6_rowloop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d16_h6_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned short *output_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; short *vp9_filter
-;)
-;/************************************************************************************
-; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
-; input pixel array has output_height rows. This routine assumes that output_height is an
-; even number. This function handles 8 pixels in horizontal direction, calculating ONE
-; rows each iteration to take advantage of the 128 bits operations.
-;*************************************************************************************/
-global sym(vp9_filter_block1d16_h6_sse2) PRIVATE
-sym(vp9_filter_block1d16_h6_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdx, arg(6) ;vp9_filter
- mov rsi, arg(0) ;src_ptr
-
- mov rdi, arg(1) ;output_ptr
-
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(5) ;output_width
-%endif
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
-
-.filter_block1d16_h6_sse2_rowloop:
- movq xmm3, MMWORD PTR [rsi - 2]
- movq xmm1, MMWORD PTR [rsi + 6]
-
- movq xmm2, MMWORD PTR [rsi +14]
- pslldq xmm2, 8
-
- por xmm2, xmm1
- prefetcht2 [rsi+rax-2]
-
- pslldq xmm1, 8
- por xmm1, xmm3
-
- movdqa xmm4, xmm1
- movdqa xmm5, xmm1
-
- movdqa xmm6, xmm1
- movdqa xmm7, xmm1
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm1
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0
- punpcklbw xmm4, xmm0
-
- movdqa XMMWORD Ptr [rdi], xmm4
-
- movdqa xmm3, xmm2
- movdqa xmm4, xmm2
-
- movdqa xmm5, xmm2
- movdqa xmm6, xmm2
-
- movdqa xmm7, xmm2
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm2
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0
- punpcklbw xmm4, xmm0
-
- movdqa XMMWORD Ptr [rdi+16], xmm4
-
- lea rsi, [rsi + rax]
-%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(5) ;[output_width]
-%else
- add rdi, r8
-%endif
-
- dec rcx
- jnz .filter_block1d16_h6_sse2_rowloop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d8_v6_sse2
-;(
-; short *src_ptr,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; short * vp9_filter
-;)
-;/************************************************************************************
-; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The
-; input pixel array has output_height rows.
-;*************************************************************************************/
-global sym(vp9_filter_block1d8_v6_sse2) PRIVATE
-sym(vp9_filter_block1d8_v6_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rax, arg(7) ;vp9_filter
- movsxd rdx, dword ptr arg(3) ;pixels_per_line
-
- mov rdi, arg(1) ;output_ptr
- mov rsi, arg(0) ;src_ptr
-
- sub rsi, rdx
- sub rsi, rdx
-
- movsxd rcx, DWORD PTR arg(5) ;[output_height]
- pxor xmm0, xmm0 ; clear xmm0
-
- movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(2) ; dst_ptich
-%endif
-
-.vp9_filter_block1d8_v6_sse2_loop:
- movdqa xmm1, XMMWORD PTR [rsi]
- pmullw xmm1, [rax]
-
- movdqa xmm2, XMMWORD PTR [rsi + rdx]
- pmullw xmm2, [rax + 16]
-
- movdqa xmm3, XMMWORD PTR [rsi + rdx * 2]
- pmullw xmm3, [rax + 32]
-
- movdqa xmm5, XMMWORD PTR [rsi + rdx * 4]
- pmullw xmm5, [rax + 64]
-
- add rsi, rdx
- movdqa xmm4, XMMWORD PTR [rsi + rdx * 2]
-
- pmullw xmm4, [rax + 48]
- movdqa xmm6, XMMWORD PTR [rsi + rdx * 4]
-
- pmullw xmm6, [rax + 80]
-
- paddsw xmm2, xmm5
- paddsw xmm2, xmm3
-
- paddsw xmm2, xmm1
- paddsw xmm2, xmm4
-
- paddsw xmm2, xmm6
- paddsw xmm2, xmm7
-
- psraw xmm2, 7
- packuswb xmm2, xmm0 ; pack and saturate
-
- movq QWORD PTR [rdi], xmm2 ; store the results in the destination
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(2) ;[dst_ptich]
-%else
- add rdi, r8
-%endif
- dec rcx ; decrement count
- jnz .vp9_filter_block1d8_v6_sse2_loop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d16_v6_sse2
-;(
-; unsigned short *src_ptr,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int pixels_per_line,
-; unsigned int pixel_step,
-; unsigned int output_height,
-; unsigned int output_width,
-; const short *vp9_filter
-;)
-;/************************************************************************************
-; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
-; input pixel array has output_height rows.
-;*************************************************************************************/
-global sym(vp9_filter_block1d16_v6_sse2) PRIVATE
-sym(vp9_filter_block1d16_v6_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rax, arg(7) ;vp9_filter
- movsxd rdx, dword ptr arg(3) ;pixels_per_line
-
- mov rdi, arg(1) ;output_ptr
- mov rsi, arg(0) ;src_ptr
-
- sub rsi, rdx
- sub rsi, rdx
-
- movsxd rcx, DWORD PTR arg(5) ;[output_height]
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(2) ; dst_ptich
-%endif
-
-.vp9_filter_block1d16_v6_sse2_loop:
-; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
- movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2
- movdqa xmm2, XMMWORD PTR [rsi + rdx + 16]
- pmullw xmm1, [rax + 16]
- pmullw xmm2, [rax + 16]
-
- movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5
- movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16]
- pmullw xmm3, [rax + 64]
- pmullw xmm4, [rax + 64]
-
- movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3
- movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16]
- pmullw xmm5, [rax + 32]
- pmullw xmm6, [rax + 32]
-
- movdqa xmm7, XMMWORD PTR [rsi] ; line 1
- movdqa xmm0, XMMWORD PTR [rsi + 16]
- pmullw xmm7, [rax]
- pmullw xmm0, [rax]
-
- paddsw xmm1, xmm3
- paddsw xmm2, xmm4
- paddsw xmm1, xmm5
- paddsw xmm2, xmm6
- paddsw xmm1, xmm7
- paddsw xmm2, xmm0
-
- add rsi, rdx
-
- movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4
- movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16]
- pmullw xmm3, [rax + 48]
- pmullw xmm4, [rax + 48]
-
- movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6
- movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16]
- pmullw xmm5, [rax + 80]
- pmullw xmm6, [rax + 80]
-
- movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
- pxor xmm0, xmm0 ; clear xmm0
-
- paddsw xmm1, xmm3
- paddsw xmm2, xmm4
- paddsw xmm1, xmm5
- paddsw xmm2, xmm6
-
- paddsw xmm1, xmm7
- paddsw xmm2, xmm7
-
- psraw xmm1, 7
- psraw xmm2, 7
-
- packuswb xmm1, xmm2 ; pack and saturate
- movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(2) ;[dst_ptich]
-%else
- add rdi, r8
-%endif
- dec rcx ; decrement count
- jnz .vp9_filter_block1d16_v6_sse2_loop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d8_h6_only_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int output_height,
-; const short *vp9_filter
-;)
-; First-pass filter only when yoffset==0
-global sym(vp9_filter_block1d8_h6_only_sse2) PRIVATE
-sym(vp9_filter_block1d8_h6_only_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdx, arg(5) ;vp9_filter
- mov rsi, arg(0) ;src_ptr
-
- mov rdi, arg(2) ;output_ptr
-
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(3) ;dst_ptich
-%endif
- pxor xmm0, xmm0 ; clear xmm0 for unpack
-
-.filter_block1d8_h6_only_rowloop:
- movq xmm3, MMWORD PTR [rsi - 2]
- movq xmm1, MMWORD PTR [rsi + 6]
-
- prefetcht2 [rsi+rax-2]
-
- pslldq xmm1, 8
- por xmm1, xmm3
-
- movdqa xmm4, xmm1
- movdqa xmm5, xmm1
-
- movdqa xmm6, xmm1
- movdqa xmm7, xmm1
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm1
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0
-
- movq QWORD PTR [rdi], xmm4 ; store the results in the destination
- lea rsi, [rsi + rax]
-
-%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(3) ;dst_ptich
-%else
- add rdi, r8
-%endif
- dec rcx
-
- jnz .filter_block1d8_h6_only_rowloop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d16_h6_only_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int output_height,
-; const short *vp9_filter
-;)
-; First-pass filter only when yoffset==0
-global sym(vp9_filter_block1d16_h6_only_sse2) PRIVATE
-sym(vp9_filter_block1d16_h6_only_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rdx, arg(5) ;vp9_filter
- mov rsi, arg(0) ;src_ptr
-
- mov rdi, arg(2) ;output_ptr
-
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(3) ;dst_ptich
-%endif
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
-
-.filter_block1d16_h6_only_sse2_rowloop:
- movq xmm3, MMWORD PTR [rsi - 2]
- movq xmm1, MMWORD PTR [rsi + 6]
-
- movq xmm2, MMWORD PTR [rsi +14]
- pslldq xmm2, 8
-
- por xmm2, xmm1
- prefetcht2 [rsi+rax-2]
-
- pslldq xmm1, 8
- por xmm1, xmm3
-
- movdqa xmm4, xmm1
- movdqa xmm5, xmm1
-
- movdqa xmm6, xmm1
- movdqa xmm7, xmm1
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm1
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0 ; lower 8 bytes
-
- movq QWORD Ptr [rdi], xmm4 ; store the results in the destination
-
- movdqa xmm3, xmm2
- movdqa xmm4, xmm2
-
- movdqa xmm5, xmm2
- movdqa xmm6, xmm2
-
- movdqa xmm7, xmm2
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
-
- pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
- punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
-
- psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
- pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
-
- punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
- psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
-
- pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
-
- punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
- psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
-
- pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
-
- punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
- psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
-
- pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
-
- punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
- pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
-
- paddsw xmm4, xmm7
- paddsw xmm4, xmm5
-
- paddsw xmm4, xmm3
- paddsw xmm4, xmm6
-
- paddsw xmm4, xmm2
- paddsw xmm4, [GLOBAL(rd)]
-
- psraw xmm4, 7
-
- packuswb xmm4, xmm0 ; higher 8 bytes
-
- movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination
-
- lea rsi, [rsi + rax]
-%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(3) ;dst_ptich
-%else
- add rdi, r8
-%endif
-
- dec rcx
- jnz .filter_block1d16_h6_only_sse2_rowloop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_filter_block1d8_v6_only_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; int dst_ptich,
-; unsigned int output_height,
-; const short *vp9_filter
-;)
-; Second-pass filter only when xoffset==0
-global sym(vp9_filter_block1d8_v6_only_sse2) PRIVATE
-sym(vp9_filter_block1d8_v6_only_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
-
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
-
- mov rax, arg(5) ;vp9_filter
-
- pxor xmm0, xmm0 ; clear xmm0
-
- movdqa xmm7, XMMWORD PTR [GLOBAL(rd)]
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(3) ; dst_ptich
-%endif
-
-.vp9_filter_block1d8_v6_only_sse2_loop:
- movq xmm1, MMWORD PTR [rsi]
- movq xmm2, MMWORD PTR [rsi + rdx]
- movq xmm3, MMWORD PTR [rsi + rdx * 2]
- movq xmm5, MMWORD PTR [rsi + rdx * 4]
- add rsi, rdx
- movq xmm4, MMWORD PTR [rsi + rdx * 2]
- movq xmm6, MMWORD PTR [rsi + rdx * 4]
-
- punpcklbw xmm1, xmm0
- pmullw xmm1, [rax]
-
- punpcklbw xmm2, xmm0
- pmullw xmm2, [rax + 16]
-
- punpcklbw xmm3, xmm0
- pmullw xmm3, [rax + 32]
-
- punpcklbw xmm5, xmm0
- pmullw xmm5, [rax + 64]
-
- punpcklbw xmm4, xmm0
- pmullw xmm4, [rax + 48]
-
- punpcklbw xmm6, xmm0
- pmullw xmm6, [rax + 80]
-
- paddsw xmm2, xmm5
- paddsw xmm2, xmm3
-
- paddsw xmm2, xmm1
- paddsw xmm2, xmm4
-
- paddsw xmm2, xmm6
- paddsw xmm2, xmm7
-
- psraw xmm2, 7
- packuswb xmm2, xmm0 ; pack and saturate
-
- movq QWORD PTR [rdi], xmm2 ; store the results in the destination
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;[dst_ptich]
-%else
- add rdi, r8
-%endif
- dec rcx ; decrement count
- jnz .vp9_filter_block1d8_v6_only_sse2_loop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_unpack_block1d16_h6_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned short *output_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned int output_height,
-; unsigned int output_width
-;)
-global sym(vp9_unpack_block1d16_h6_sse2) PRIVATE
-sym(vp9_unpack_block1d16_h6_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;output_ptr
-
- movsxd rcx, dword ptr arg(3) ;output_height
- movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
-%endif
-
-.unpack_block1d16_h6_sse2_rowloop:
- movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
- movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1
-
- punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
- punpcklbw xmm1, xmm0
-
- movdqa XMMWORD Ptr [rdi], xmm1
- movdqa XMMWORD Ptr [rdi + 16], xmm3
-
- lea rsi, [rsi + rax]
-%if ABI_IS_32BIT
- add rdi, DWORD Ptr arg(4) ;[output_width]
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .unpack_block1d16_h6_sse2_rowloop ; next row
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_bilinear_predict16x16_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-extern sym(vp9_bilinear_filters_mmx)
-global sym(vp9_bilinear_predict16x16_sse2) PRIVATE
-sym(vp9_bilinear_predict16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
-
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_mmx))]
- movsxd rax, dword ptr arg(2) ;xoffset
-
- cmp rax, 0 ;skip first_pass filter if xoffset=0
- je .b16x16_sp_only
-
- shl rax, 5
- add rax, rcx ;HFilter
-
- mov rdi, arg(4) ;dst_ptr
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, dword ptr arg(5) ;dst_pitch
-
- movdqa xmm1, [rax]
- movdqa xmm2, [rax+16]
-
- movsxd rax, dword ptr arg(3) ;yoffset
-
- cmp rax, 0 ;skip second_pass filter if yoffset=0
- je .b16x16_fp_only
-
- shl rax, 5
- add rax, rcx ;VFilter
-
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
-
- pxor xmm0, xmm0
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(5) ;dst_pitch
-%endif
- ; get the first horizontal line done
- movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
-
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- punpckhbw xmm4, xmm0
-
- pmullw xmm3, xmm1
- pmullw xmm4, xmm1
-
- movdqu xmm5, [rsi+1]
- movdqa xmm6, xmm5
-
- punpcklbw xmm5, xmm0
- punpckhbw xmm6, xmm0
-
- pmullw xmm5, xmm2
- pmullw xmm6, xmm2
-
- paddw xmm3, xmm5
- paddw xmm4, xmm6
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- movdqa xmm7, xmm3
- packuswb xmm7, xmm4
-
- add rsi, rdx ; next line
-.next_row:
- movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
-
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- punpckhbw xmm4, xmm0
-
- pmullw xmm3, xmm1
- pmullw xmm4, xmm1
-
- movdqu xmm5, [rsi+1]
- movdqa xmm6, xmm5
-
- punpcklbw xmm5, xmm0
- punpckhbw xmm6, xmm0
-
- pmullw xmm5, xmm2
- pmullw xmm6, xmm2
-
- paddw xmm3, xmm5
- paddw xmm4, xmm6
-
- movdqa xmm5, xmm7
- movdqa xmm6, xmm7
-
- punpcklbw xmm5, xmm0
- punpckhbw xmm6, xmm0
-
- pmullw xmm5, [rax]
- pmullw xmm6, [rax]
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- movdqa xmm7, xmm3
- packuswb xmm7, xmm4
-
- pmullw xmm3, [rax+16]
- pmullw xmm4, [rax+16]
-
- paddw xmm3, xmm5
- paddw xmm4, xmm6
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- packuswb xmm3, xmm4
- movdqa [rdi], xmm3 ; store the results in the destination
-
- add rsi, rdx ; next line
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(5) ;dst_pitch
-%else
- add rdi, r8
-%endif
-
- cmp rdi, rcx
- jne .next_row
-
- jmp .done
-
-.b16x16_sp_only:
- movsxd rax, dword ptr arg(3) ;yoffset
- shl rax, 5
- add rax, rcx ;VFilter
-
- mov rdi, arg(4) ;dst_ptr
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, dword ptr arg(5) ;dst_pitch
-
- movdqa xmm1, [rax]
- movdqa xmm2, [rax+16]
-
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
-
- pxor xmm0, xmm0
-
- ; get the first horizontal line done
- movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
-
- add rsi, rax ; next line
-.next_row_spo:
- movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
-
- movdqa xmm5, xmm7
- movdqa xmm6, xmm7
-
- movdqa xmm4, xmm3 ; make a copy of current line
- movdqa xmm7, xmm3
-
- punpcklbw xmm5, xmm0
- punpckhbw xmm6, xmm0
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- punpckhbw xmm4, xmm0
-
- pmullw xmm5, xmm1
- pmullw xmm6, xmm1
- pmullw xmm3, xmm2
- pmullw xmm4, xmm2
-
- paddw xmm3, xmm5
- paddw xmm4, xmm6
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- packuswb xmm3, xmm4
- movdqa [rdi], xmm3 ; store the results in the destination
-
- add rsi, rax ; next line
- add rdi, rdx ;dst_pitch
- cmp rdi, rcx
- jne .next_row_spo
-
- jmp .done
-
-.b16x16_fp_only:
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- pxor xmm0, xmm0
-
-.next_row_fpo:
- movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
-
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- punpckhbw xmm4, xmm0
-
- pmullw xmm3, xmm1
- pmullw xmm4, xmm1
-
- movdqu xmm5, [rsi+1]
- movdqa xmm6, xmm5
-
- punpcklbw xmm5, xmm0
- punpckhbw xmm6, xmm0
-
- pmullw xmm5, xmm2
- pmullw xmm6, xmm2
-
- paddw xmm3, xmm5
- paddw xmm4, xmm6
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- packuswb xmm3, xmm4
- movdqa [rdi], xmm3 ; store the results in the destination
-
- add rsi, rax ; next line
- add rdi, rdx ; dst_pitch
- cmp rdi, rcx
- jne .next_row_fpo
-
-.done:
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_bilinear_predict8x8_sse2
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-extern sym(vp9_bilinear_filters_mmx)
-global sym(vp9_bilinear_predict8x8_sse2) PRIVATE
-sym(vp9_bilinear_predict8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 144 ; reserve 144 bytes
-
- ;const short *HFilter = bilinear_filters_mmx[xoffset]
- ;const short *VFilter = bilinear_filters_mmx[yoffset]
- lea rcx, [GLOBAL(sym(vp9_bilinear_filters_mmx))]
-
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
-
- ;Read 9-line unaligned data in and put them on stack. This gives a big
- ;performance boost.
- movdqu xmm0, [rsi]
- lea rax, [rdx + rdx*2]
- movdqu xmm1, [rsi+rdx]
- movdqu xmm2, [rsi+rdx*2]
- add rsi, rax
- movdqu xmm3, [rsi]
- movdqu xmm4, [rsi+rdx]
- movdqu xmm5, [rsi+rdx*2]
- add rsi, rax
- movdqu xmm6, [rsi]
- movdqu xmm7, [rsi+rdx]
-
- movdqa XMMWORD PTR [rsp], xmm0
-
- movdqu xmm0, [rsi+rdx*2]
-
- movdqa XMMWORD PTR [rsp+16], xmm1
- movdqa XMMWORD PTR [rsp+32], xmm2
- movdqa XMMWORD PTR [rsp+48], xmm3
- movdqa XMMWORD PTR [rsp+64], xmm4
- movdqa XMMWORD PTR [rsp+80], xmm5
- movdqa XMMWORD PTR [rsp+96], xmm6
- movdqa XMMWORD PTR [rsp+112], xmm7
- movdqa XMMWORD PTR [rsp+128], xmm0
-
- movsxd rax, dword ptr arg(2) ;xoffset
- shl rax, 5
- add rax, rcx ;HFilter
-
- mov rdi, arg(4) ;dst_ptr
- movsxd rdx, dword ptr arg(5) ;dst_pitch
-
- movdqa xmm1, [rax]
- movdqa xmm2, [rax+16]
-
- movsxd rax, dword ptr arg(3) ;yoffset
- shl rax, 5
- add rax, rcx ;VFilter
-
- lea rcx, [rdi+rdx*8]
-
- movdqa xmm5, [rax]
- movdqa xmm6, [rax+16]
-
- pxor xmm0, xmm0
-
- ; get the first horizontal line done
- movdqa xmm3, XMMWORD PTR [rsp]
- movdqa xmm4, xmm3 ; make a copy of current line
- psrldq xmm4, 1
-
- punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07
- punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08
-
- pmullw xmm3, xmm1
- pmullw xmm4, xmm2
-
- paddw xmm3, xmm4
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- movdqa xmm7, xmm3
- add rsp, 16 ; next line
-.next_row8x8:
- movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
- movdqa xmm4, xmm3 ; make a copy of current line
- psrldq xmm4, 1
-
- punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07
- punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08
-
- pmullw xmm3, xmm1
- pmullw xmm4, xmm2
-
- paddw xmm3, xmm4
- pmullw xmm7, xmm5
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- movdqa xmm4, xmm3
-
- pmullw xmm3, xmm6
- paddw xmm3, xmm7
-
- movdqa xmm7, xmm4
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- packuswb xmm3, xmm0
- movq [rdi], xmm3 ; store the results in the destination
-
- add rsp, 16 ; next line
- add rdi, rdx
-
- cmp rdi, rcx
- jne .next_row8x8
-
- ;add rsp, 144
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-SECTION_RODATA
-align 16
-rd:
- times 8 dw 0x40
--- a/vp9/common/x86/vp9_subpixel_ssse3.asm
+++ /dev/null
@@ -1,1515 +1,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%define BLOCK_HEIGHT_WIDTH 4
-%define VP9_FILTER_WEIGHT 128
-%define VP9_FILTER_SHIFT 7
-
-
-;/************************************************************************************
-; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
-; input pixel array has output_height rows. This routine assumes that output_height is an
-; even number. This function handles 8 pixels in horizontal direction, calculating ONE
-; rows each iteration to take advantage of the 128 bits operations.
-;
-; This is an implementation of some of the SSE optimizations first seen in ffvp8
-;
-;*************************************************************************************/
-;void vp9_filter_block1d8_h6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d8_h6_ssse3) PRIVATE
-sym(vp9_filter_block1d8_h6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4
-
- movdqa xmm7, [GLOBAL(rd)]
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
- mov rdi, arg(2) ;output_ptr
-
- cmp esi, DWORD PTR [rax]
- je vp9_filter_block1d8_h4_ssse3
-
- movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
-
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
- sub rdi, rdx
-;xmm3 free
-.filter_block1d8_h6_rowloop_ssse3:
- movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
-
- movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
-
- punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
-
- movdqa xmm1, xmm0
- pmaddubsw xmm0, xmm4
-
- movdqa xmm2, xmm1
- pshufb xmm1, [GLOBAL(shuf2bfrom1)]
-
- pshufb xmm2, [GLOBAL(shuf3bfrom1)]
- pmaddubsw xmm1, xmm5
-
- lea rdi, [rdi + rdx]
- pmaddubsw xmm2, xmm6
-
- lea rsi, [rsi + rax]
- dec rcx
-
- paddsw xmm0, xmm1
- paddsw xmm2, xmm7
-
- paddsw xmm0, xmm2
-
- psraw xmm0, 7
-
- packuswb xmm0, xmm0
-
- movq MMWORD Ptr [rdi], xmm0
- jnz .filter_block1d8_h6_rowloop_ssse3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-vp9_filter_block1d8_h4_ssse3:
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
-
- movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)]
- movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)]
-
- mov rsi, arg(0) ;src_ptr
-
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
-
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
- sub rdi, rdx
-
-.filter_block1d8_h4_rowloop_ssse3:
- movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
-
- movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
-
- punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
-
- movdqa xmm2, xmm0
- pshufb xmm0, xmm3
-
- pshufb xmm2, xmm4
- pmaddubsw xmm0, xmm5
-
- lea rdi, [rdi + rdx]
- pmaddubsw xmm2, xmm6
-
- lea rsi, [rsi + rax]
- dec rcx
-
- paddsw xmm0, xmm7
-
- paddsw xmm0, xmm2
-
- psraw xmm0, 7
-
- packuswb xmm0, xmm0
-
- movq MMWORD Ptr [rdi], xmm0
-
- jnz .filter_block1d8_h4_rowloop_ssse3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-;void vp9_filter_block1d16_h6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d16_h6_ssse3) PRIVATE
-sym(vp9_filter_block1d16_h6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4 ;
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
-
- mov rdi, arg(2) ;output_ptr
-
- mov rsi, arg(0) ;src_ptr
-
- movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
-
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
-.filter_block1d16_h6_rowloop_ssse3:
- movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5
-
- movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10
-
- punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10
-
- movdqa xmm1, xmm0
- pmaddubsw xmm0, xmm4
-
- movdqa xmm2, xmm1
- pshufb xmm1, [GLOBAL(shuf2bfrom1)]
-
- pshufb xmm2, [GLOBAL(shuf3bfrom1)]
- movq xmm3, MMWORD PTR [rsi + 6]
-
- pmaddubsw xmm1, xmm5
- movq xmm7, MMWORD PTR [rsi + 11]
-
- pmaddubsw xmm2, xmm6
- punpcklbw xmm3, xmm7
-
- paddsw xmm0, xmm1
- movdqa xmm1, xmm3
-
- pmaddubsw xmm3, xmm4
- paddsw xmm0, xmm2
-
- movdqa xmm2, xmm1
- paddsw xmm0, [GLOBAL(rd)]
-
- pshufb xmm1, [GLOBAL(shuf2bfrom1)]
- pshufb xmm2, [GLOBAL(shuf3bfrom1)]
-
- psraw xmm0, 7
- pmaddubsw xmm1, xmm5
-
- pmaddubsw xmm2, xmm6
- packuswb xmm0, xmm0
-
- lea rsi, [rsi + rax]
- paddsw xmm3, xmm1
-
- paddsw xmm3, xmm2
-
- paddsw xmm3, [GLOBAL(rd)]
-
- psraw xmm3, 7
-
- packuswb xmm3, xmm3
-
- punpcklqdq xmm0, xmm3
-
- movdqa XMMWORD Ptr [rdi], xmm0
-
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .filter_block1d16_h6_rowloop_ssse3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_filter_block1d4_h6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d4_h6_ssse3) PRIVATE
-sym(vp9_filter_block1d4_h6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4 ;
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
- movdqa xmm7, [GLOBAL(rd)]
-
- cmp esi, DWORD PTR [rax]
- je .vp9_filter_block1d4_h4_ssse3
-
- movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
-
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
-;xmm3 free
-.filter_block1d4_h6_rowloop_ssse3:
- movdqu xmm0, XMMWORD PTR [rsi - 2]
-
- movdqa xmm1, xmm0
- pshufb xmm0, [GLOBAL(shuf1b)]
-
- movdqa xmm2, xmm1
- pshufb xmm1, [GLOBAL(shuf2b)]
- pmaddubsw xmm0, xmm4
- pshufb xmm2, [GLOBAL(shuf3b)]
- pmaddubsw xmm1, xmm5
-
-;--
- pmaddubsw xmm2, xmm6
-
- lea rsi, [rsi + rax]
-;--
- paddsw xmm0, xmm1
- paddsw xmm0, xmm7
- pxor xmm1, xmm1
- paddsw xmm0, xmm2
- psraw xmm0, 7
- packuswb xmm0, xmm0
-
- movd DWORD PTR [rdi], xmm0
-
- add rdi, rdx
- dec rcx
- jnz .filter_block1d4_h6_rowloop_ssse3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-.vp9_filter_block1d4_h4_ssse3:
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
- movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)]
- movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)]
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
-
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
-.filter_block1d4_h4_rowloop_ssse3:
- movdqu xmm1, XMMWORD PTR [rsi - 2]
-
- movdqa xmm2, xmm1
- pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)]
- pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)]
- pmaddubsw xmm1, xmm5
-
-;--
- pmaddubsw xmm2, xmm6
-
- lea rsi, [rsi + rax]
-;--
- paddsw xmm1, xmm7
- paddsw xmm1, xmm2
- psraw xmm1, 7
- packuswb xmm1, xmm1
-
- movd DWORD PTR [rdi], xmm1
-
- add rdi, rdx
- dec rcx
- jnz .filter_block1d4_h4_rowloop_ssse3
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
-;void vp9_filter_block1d16_v6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d16_v6_ssse3) PRIVATE
-sym(vp9_filter_block1d16_v6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4 ;
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
-
- cmp esi, DWORD PTR [rax]
- je .vp9_filter_block1d16_v4_ssse3
-
- movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
- movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
- mov rdi, arg(2) ;output_ptr
-
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ;out_pitch
-%endif
- mov rax, rsi
- movsxd rcx, DWORD PTR arg(4) ;output_height
- add rax, rdx
-
-
-.vp9_filter_block1d16_v6_ssse3_loop:
- movq xmm1, MMWORD PTR [rsi] ;A
- movq xmm2, MMWORD PTR [rsi + rdx] ;B
- movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw xmm2, xmm4 ;B D
- punpcklbw xmm3, xmm0 ;C E
-
- movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
-
- pmaddubsw xmm3, xmm6
- punpcklbw xmm1, xmm0 ;A F
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm5
-
- paddsw xmm2, xmm3
- paddsw xmm2, xmm1
- paddsw xmm2, [GLOBAL(rd)]
- psraw xmm2, 7
- packuswb xmm2, xmm2
-
- movq MMWORD PTR [rdi], xmm2 ;store the results
-
- movq xmm1, MMWORD PTR [rsi + 8] ;A
- movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B
- movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
-
- punpcklbw xmm2, xmm4 ;B D
- punpcklbw xmm3, xmm0 ;C E
-
- movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F
- pmaddubsw xmm3, xmm6
- punpcklbw xmm1, xmm0 ;A F
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm5
-
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw xmm2, xmm3
- paddsw xmm2, xmm1
- paddsw xmm2, [GLOBAL(rd)]
- psraw xmm2, 7
- packuswb xmm2, xmm2
-
- movq MMWORD PTR [rdi+8], xmm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;out_pitch
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d16_v6_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-.vp9_filter_block1d16_v4_ssse3:
- movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
- mov rdi, arg(2) ;output_ptr
-
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ;out_pitch
-%endif
- mov rax, rsi
- movsxd rcx, DWORD PTR arg(4) ;output_height
- add rax, rdx
-
-.vp9_filter_block1d16_v4_ssse3_loop:
- movq xmm2, MMWORD PTR [rsi + rdx] ;B
- movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw xmm2, xmm4 ;B D
- punpcklbw xmm3, xmm0 ;C E
-
- pmaddubsw xmm3, xmm6
- pmaddubsw xmm2, xmm7
- movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B
- movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
-
- paddsw xmm2, [GLOBAL(rd)]
- paddsw xmm2, xmm3
- psraw xmm2, 7
- packuswb xmm2, xmm2
-
- punpcklbw xmm5, xmm4 ;B D
- punpcklbw xmm1, xmm0 ;C E
-
- pmaddubsw xmm1, xmm6
- pmaddubsw xmm5, xmm7
-
- movdqa xmm4, [GLOBAL(rd)]
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw xmm5, xmm1
- paddsw xmm5, xmm4
- psraw xmm5, 7
- packuswb xmm5, xmm5
-
- punpcklqdq xmm2, xmm5
-
- movdqa XMMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;out_pitch
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d16_v4_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_filter_block1d8_v6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d8_v6_ssse3) PRIVATE
-sym(vp9_filter_block1d8_v6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4 ;
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
-
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
- mov rdi, arg(2) ;output_ptr
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ; out_pitch
-%endif
- movsxd rcx, DWORD PTR arg(4) ;[output_height]
-
- cmp esi, DWORD PTR [rax]
- je .vp9_filter_block1d8_v4_ssse3
-
- movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
- movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
-
- mov rax, rsi
- add rax, rdx
-
-.vp9_filter_block1d8_v6_ssse3_loop:
- movq xmm1, MMWORD PTR [rsi] ;A
- movq xmm2, MMWORD PTR [rsi + rdx] ;B
- movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw xmm2, xmm4 ;B D
- punpcklbw xmm3, xmm0 ;C E
-
- movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
- movdqa xmm4, [GLOBAL(rd)]
-
- pmaddubsw xmm3, xmm6
- punpcklbw xmm1, xmm0 ;A F
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm5
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw xmm2, xmm3
- paddsw xmm2, xmm1
- paddsw xmm2, xmm4
- psraw xmm2, 7
- packuswb xmm2, xmm2
-
- movq MMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;[out_pitch]
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d8_v6_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-.vp9_filter_block1d8_v4_ssse3:
- movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
- movdqa xmm5, [GLOBAL(rd)]
-
- mov rsi, arg(0) ;src_ptr
-
- mov rax, rsi
- add rax, rdx
-
-.vp9_filter_block1d8_v4_ssse3_loop:
- movq xmm2, MMWORD PTR [rsi + rdx] ;B
- movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
- movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
- movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw xmm2, xmm4 ;B D
- punpcklbw xmm3, xmm0 ;C E
-
- pmaddubsw xmm3, xmm6
- pmaddubsw xmm2, xmm7
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw xmm2, xmm3
- paddsw xmm2, xmm5
- psraw xmm2, 7
- packuswb xmm2, xmm2
-
- movq MMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;[out_pitch]
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d8_v4_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-;void vp9_filter_block1d4_v6_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; unsigned int vp9_filter_index
-;)
-global sym(vp9_filter_block1d4_v6_ssse3) PRIVATE
-sym(vp9_filter_block1d4_v6_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movsxd rdx, DWORD PTR arg(5) ;table index
- xor rsi, rsi
- shl rdx, 4 ;
-
- lea rax, [GLOBAL(k0_k5)]
- add rax, rdx
-
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
- mov rdi, arg(2) ;output_ptr
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ; out_pitch
-%endif
- movsxd rcx, DWORD PTR arg(4) ;[output_height]
-
- cmp esi, DWORD PTR [rax]
- je .vp9_filter_block1d4_v4_ssse3
-
- movq mm5, MMWORD PTR [rax] ;k0_k5
- movq mm6, MMWORD PTR [rax+256] ;k2_k4
- movq mm7, MMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
-
- mov rax, rsi
- add rax, rdx
-
-.vp9_filter_block1d4_v6_ssse3_loop:
- movd mm1, DWORD PTR [rsi] ;A
- movd mm2, DWORD PTR [rsi + rdx] ;B
- movd mm3, DWORD PTR [rsi + rdx * 2] ;C
- movd mm4, DWORD PTR [rax + rdx * 2] ;D
- movd mm0, DWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw mm2, mm4 ;B D
- punpcklbw mm3, mm0 ;C E
-
- movd mm0, DWORD PTR [rax + rdx * 4] ;F
-
- movq mm4, [GLOBAL(rd)]
-
- pmaddubsw mm3, mm6
- punpcklbw mm1, mm0 ;A F
- pmaddubsw mm2, mm7
- pmaddubsw mm1, mm5
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw mm2, mm3
- paddsw mm2, mm1
- paddsw mm2, mm4
- psraw mm2, 7
- packuswb mm2, mm2
-
- movd DWORD PTR [rdi], mm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;[out_pitch]
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d4_v6_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-.vp9_filter_block1d4_v4_ssse3:
- movq mm6, MMWORD PTR [rax+256] ;k2_k4
- movq mm7, MMWORD PTR [rax+128] ;k1_k3
- movq mm5, MMWORD PTR [GLOBAL(rd)]
-
- mov rsi, arg(0) ;src_ptr
-
- mov rax, rsi
- add rax, rdx
-
-.vp9_filter_block1d4_v4_ssse3_loop:
- movd mm2, DWORD PTR [rsi + rdx] ;B
- movd mm3, DWORD PTR [rsi + rdx * 2] ;C
- movd mm4, DWORD PTR [rax + rdx * 2] ;D
- movd mm0, DWORD PTR [rsi + rdx * 4] ;E
-
- punpcklbw mm2, mm4 ;B D
- punpcklbw mm3, mm0 ;C E
-
- pmaddubsw mm3, mm6
- pmaddubsw mm2, mm7
- add rsi, rdx
- add rax, rdx
-;--
-;--
- paddsw mm2, mm3
- paddsw mm2, mm5
- psraw mm2, 7
- packuswb mm2, mm2
-
- movd DWORD PTR [rdi], mm2
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;[out_pitch]
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .vp9_filter_block1d4_v4_ssse3_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_bilinear_predict16x16_ssse3
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-global sym(vp9_bilinear_predict16x16_ssse3) PRIVATE
-sym(vp9_bilinear_predict16x16_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- lea rcx, [GLOBAL(bilinear_filters_ssse3)]
- movsxd rax, dword ptr arg(2) ; xoffset
-
- cmp rax, 0 ; skip first_pass filter if xoffset=0
- je .b16x16_sp_only
-
- shl rax, 4
- lea rax, [rax + rcx] ; HFilter
-
- mov rdi, arg(4) ; dst_ptr
- mov rsi, arg(0) ; src_ptr
- movsxd rdx, dword ptr arg(5) ; dst_pitch
-
- movdqa xmm1, [rax]
-
- movsxd rax, dword ptr arg(3) ; yoffset
-
- cmp rax, 0 ; skip second_pass filter if yoffset=0
- je .b16x16_fp_only
-
- shl rax, 4
- lea rax, [rax + rcx] ; VFilter
-
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rdx, dword ptr arg(1) ; src_pixels_per_line
-
- movdqa xmm2, [rax]
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(5) ; dst_pitch
-%endif
- movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07
- movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
-
- punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
- movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
-
- movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
-
- lea rsi, [rsi + rdx] ; next line
-
- pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14
-
- punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
- pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
- psraw xmm4, VP9_FILTER_SHIFT ; xmm4 /= 128
-
- movdqa xmm7, xmm3
- packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
-
-.next_row:
- movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07
- movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08
-
- punpcklbw xmm6, xmm5
- movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15
-
- movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16
- lea rsi, [rsi + rdx] ; next line
-
- pmaddubsw xmm6, xmm1
-
- punpcklbw xmm4, xmm5
- pmaddubsw xmm4, xmm1
-
- paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
- psraw xmm6, VP9_FILTER_SHIFT ; xmm6 /= 128
-
- paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value
- psraw xmm4, VP9_FILTER_SHIFT ; xmm4 /= 128
-
- packuswb xmm6, xmm4
- movdqa xmm5, xmm7
-
- punpcklbw xmm5, xmm6
- pmaddubsw xmm5, xmm2
-
- punpckhbw xmm7, xmm6
- pmaddubsw xmm7, xmm2
-
- paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value
- psraw xmm5, VP9_FILTER_SHIFT ; xmm5 /= 128
-
- paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
- psraw xmm7, VP9_FILTER_SHIFT ; xmm7 /= 128
-
- packuswb xmm5, xmm7
- movdqa xmm7, xmm6
-
- movdqa [rdi], xmm5 ; store the results in the destination
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(5) ; dst_pitch
-%else
- add rdi, r8
-%endif
-
- cmp rdi, rcx
- jne .next_row
-
- jmp .done
-
-.b16x16_sp_only:
- movsxd rax, dword ptr arg(3) ; yoffset
- shl rax, 4
- lea rax, [rax + rcx] ; VFilter
-
- mov rdi, arg(4) ; dst_ptr
- mov rsi, arg(0) ; src_ptr
- movsxd rdx, dword ptr arg(5) ; dst_pitch
-
- movdqa xmm1, [rax] ; VFilter
-
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rax, dword ptr arg(1) ; src_pixels_per_line
-
- ; get the first horizontal line done
- movq xmm4, [rsi] ; load row 0
- movq xmm2, [rsi + 8] ; load row 0
-
- lea rsi, [rsi + rax] ; next line
-.next_row_sp:
- movq xmm3, [rsi] ; load row + 1
- movq xmm5, [rsi + 8] ; load row + 1
-
- punpcklbw xmm4, xmm3
- punpcklbw xmm2, xmm5
-
- pmaddubsw xmm4, xmm1
- movq xmm7, [rsi + rax] ; load row + 2
-
- pmaddubsw xmm2, xmm1
- movq xmm6, [rsi + rax + 8] ; load row + 2
-
- punpcklbw xmm3, xmm7
- punpcklbw xmm5, xmm6
-
- pmaddubsw xmm3, xmm1
- paddw xmm4, [GLOBAL(rd)]
-
- pmaddubsw xmm5, xmm1
- paddw xmm2, [GLOBAL(rd)]
-
- psraw xmm4, VP9_FILTER_SHIFT
- psraw xmm2, VP9_FILTER_SHIFT
-
- packuswb xmm4, xmm2
- paddw xmm3, [GLOBAL(rd)]
-
- movdqa [rdi], xmm4 ; store row 0
- paddw xmm5, [GLOBAL(rd)]
-
- psraw xmm3, VP9_FILTER_SHIFT
- psraw xmm5, VP9_FILTER_SHIFT
-
- packuswb xmm3, xmm5
- movdqa xmm4, xmm7
-
- movdqa [rdi + rdx],xmm3 ; store row 1
- lea rsi, [rsi + 2*rax]
-
- movdqa xmm2, xmm6
- lea rdi, [rdi + 2*rdx]
-
- cmp rdi, rcx
- jne .next_row_sp
-
- jmp .done
-
-.b16x16_fp_only:
- lea rcx, [rdi+rdx*8]
- lea rcx, [rcx+rdx*8]
- movsxd rax, dword ptr arg(1) ; src_pixels_per_line
-
-.next_row_fp:
- movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07
- movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08
-
- punpcklbw xmm2, xmm4
- movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15
-
- pmaddubsw xmm2, xmm1
- movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16
-
- lea rsi, [rsi + rax] ; next line
- punpcklbw xmm3, xmm4
-
- pmaddubsw xmm3, xmm1
- movq xmm5, [rsi]
-
- paddw xmm2, [GLOBAL(rd)]
- movq xmm7, [rsi+1]
-
- movq xmm6, [rsi+8]
- psraw xmm2, VP9_FILTER_SHIFT
-
- punpcklbw xmm5, xmm7
- movq xmm7, [rsi+9]
-
- paddw xmm3, [GLOBAL(rd)]
- pmaddubsw xmm5, xmm1
-
- psraw xmm3, VP9_FILTER_SHIFT
- punpcklbw xmm6, xmm7
-
- packuswb xmm2, xmm3
- pmaddubsw xmm6, xmm1
-
- movdqa [rdi], xmm2 ; store the results in the destination
- paddw xmm5, [GLOBAL(rd)]
-
- lea rdi, [rdi + rdx] ; dst_pitch
- psraw xmm5, VP9_FILTER_SHIFT
-
- paddw xmm6, [GLOBAL(rd)]
- psraw xmm6, VP9_FILTER_SHIFT
-
- packuswb xmm5, xmm6
- lea rsi, [rsi + rax] ; next line
-
- movdqa [rdi], xmm5 ; store the results in the destination
- lea rdi, [rdi + rdx] ; dst_pitch
-
- cmp rdi, rcx
-
- jne .next_row_fp
-
-.done:
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_bilinear_predict8x8_ssse3
-;(
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; int xoffset,
-; int yoffset,
-; unsigned char *dst_ptr,
-; int dst_pitch
-;)
-global sym(vp9_bilinear_predict8x8_ssse3) PRIVATE
-sym(vp9_bilinear_predict8x8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 144 ; reserve 144 bytes
-
- lea rcx, [GLOBAL(bilinear_filters_ssse3)]
-
- mov rsi, arg(0) ;src_ptr
- movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
-
- ;Read 9-line unaligned data in and put them on stack. This gives a big
- ;performance boost.
- movdqu xmm0, [rsi]
- lea rax, [rdx + rdx*2]
- movdqu xmm1, [rsi+rdx]
- movdqu xmm2, [rsi+rdx*2]
- add rsi, rax
- movdqu xmm3, [rsi]
- movdqu xmm4, [rsi+rdx]
- movdqu xmm5, [rsi+rdx*2]
- add rsi, rax
- movdqu xmm6, [rsi]
- movdqu xmm7, [rsi+rdx]
-
- movdqa XMMWORD PTR [rsp], xmm0
-
- movdqu xmm0, [rsi+rdx*2]
-
- movdqa XMMWORD PTR [rsp+16], xmm1
- movdqa XMMWORD PTR [rsp+32], xmm2
- movdqa XMMWORD PTR [rsp+48], xmm3
- movdqa XMMWORD PTR [rsp+64], xmm4
- movdqa XMMWORD PTR [rsp+80], xmm5
- movdqa XMMWORD PTR [rsp+96], xmm6
- movdqa XMMWORD PTR [rsp+112], xmm7
- movdqa XMMWORD PTR [rsp+128], xmm0
-
- movsxd rax, dword ptr arg(2) ; xoffset
- cmp rax, 0 ; skip first_pass filter if xoffset=0
- je .b8x8_sp_only
-
- shl rax, 4
- add rax, rcx ; HFilter
-
- mov rdi, arg(4) ; dst_ptr
- movsxd rdx, dword ptr arg(5) ; dst_pitch
-
- movdqa xmm0, [rax]
-
- movsxd rax, dword ptr arg(3) ; yoffset
- cmp rax, 0 ; skip second_pass filter if yoffset=0
- je .b8x8_fp_only
-
- shl rax, 4
- lea rax, [rax + rcx] ; VFilter
-
- lea rcx, [rdi+rdx*8]
-
- movdqa xmm1, [rax]
-
- ; get the first horizontal line done
- movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
- movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx
-
- psrldq xmm5, 1
- lea rsp, [rsp + 16] ; next line
-
- punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
- pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14
-
- paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value
- psraw xmm3, VP9_FILTER_SHIFT ; xmm3 /= 128
-
- movdqa xmm7, xmm3
- packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
-
-.next_row:
- movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
- lea rsp, [rsp + 16] ; next line
-
- movdqa xmm5, xmm6
-
- psrldq xmm5, 1
-
- punpcklbw xmm6, xmm5
- pmaddubsw xmm6, xmm0
-
- paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value
- psraw xmm6, VP9_FILTER_SHIFT ; xmm6 /= 128
-
- packuswb xmm6, xmm6
-
- punpcklbw xmm7, xmm6
- pmaddubsw xmm7, xmm1
-
- paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value
- psraw xmm7, VP9_FILTER_SHIFT ; xmm7 /= 128
-
- packuswb xmm7, xmm7
-
- movq [rdi], xmm7 ; store the results in the destination
- lea rdi, [rdi + rdx]
-
- movdqa xmm7, xmm6
-
- cmp rdi, rcx
- jne .next_row
-
- jmp .done8x8
-
-.b8x8_sp_only:
- movsxd rax, dword ptr arg(3) ; yoffset
- shl rax, 4
- lea rax, [rax + rcx] ; VFilter
-
- mov rdi, arg(4) ;dst_ptr
- movsxd rdx, dword ptr arg(5) ; dst_pitch
-
- movdqa xmm0, [rax] ; VFilter
-
- movq xmm1, XMMWORD PTR [rsp]
- movq xmm2, XMMWORD PTR [rsp+16]
-
- movq xmm3, XMMWORD PTR [rsp+32]
- punpcklbw xmm1, xmm2
-
- movq xmm4, XMMWORD PTR [rsp+48]
- punpcklbw xmm2, xmm3
-
- movq xmm5, XMMWORD PTR [rsp+64]
- punpcklbw xmm3, xmm4
-
- movq xmm6, XMMWORD PTR [rsp+80]
- punpcklbw xmm4, xmm5
-
- movq xmm7, XMMWORD PTR [rsp+96]
- punpcklbw xmm5, xmm6
-
- pmaddubsw xmm1, xmm0
- pmaddubsw xmm2, xmm0
-
- pmaddubsw xmm3, xmm0
- pmaddubsw xmm4, xmm0
-
- pmaddubsw xmm5, xmm0
- punpcklbw xmm6, xmm7
-
- pmaddubsw xmm6, xmm0
- paddw xmm1, [GLOBAL(rd)]
-
- paddw xmm2, [GLOBAL(rd)]
- psraw xmm1, VP9_FILTER_SHIFT
-
- paddw xmm3, [GLOBAL(rd)]
- psraw xmm2, VP9_FILTER_SHIFT
-
- paddw xmm4, [GLOBAL(rd)]
- psraw xmm3, VP9_FILTER_SHIFT
-
- paddw xmm5, [GLOBAL(rd)]
- psraw xmm4, VP9_FILTER_SHIFT
-
- paddw xmm6, [GLOBAL(rd)]
- psraw xmm5, VP9_FILTER_SHIFT
-
- psraw xmm6, VP9_FILTER_SHIFT
- packuswb xmm1, xmm1
-
- packuswb xmm2, xmm2
- movq [rdi], xmm1
-
- packuswb xmm3, xmm3
- movq [rdi+rdx], xmm2
-
- packuswb xmm4, xmm4
- movq xmm1, XMMWORD PTR [rsp+112]
-
- lea rdi, [rdi + 2*rdx]
- movq xmm2, XMMWORD PTR [rsp+128]
-
- packuswb xmm5, xmm5
- movq [rdi], xmm3
-
- packuswb xmm6, xmm6
- movq [rdi+rdx], xmm4
-
- lea rdi, [rdi + 2*rdx]
- punpcklbw xmm7, xmm1
-
- movq [rdi], xmm5
- pmaddubsw xmm7, xmm0
-
- movq [rdi+rdx], xmm6
- punpcklbw xmm1, xmm2
-
- pmaddubsw xmm1, xmm0
- paddw xmm7, [GLOBAL(rd)]
-
- psraw xmm7, VP9_FILTER_SHIFT
- paddw xmm1, [GLOBAL(rd)]
-
- psraw xmm1, VP9_FILTER_SHIFT
- packuswb xmm7, xmm7
-
- packuswb xmm1, xmm1
- lea rdi, [rdi + 2*rdx]
-
- movq [rdi], xmm7
-
- movq [rdi+rdx], xmm1
- lea rsp, [rsp + 144]
-
- jmp .done8x8
-
-.b8x8_fp_only:
- lea rcx, [rdi+rdx*8]
-
-.next_row_fp:
- movdqa xmm1, XMMWORD PTR [rsp]
- movdqa xmm3, XMMWORD PTR [rsp+16]
-
- movdqa xmm2, xmm1
- movdqa xmm5, XMMWORD PTR [rsp+32]
-
- psrldq xmm2, 1
- movdqa xmm7, XMMWORD PTR [rsp+48]
-
- movdqa xmm4, xmm3
- psrldq xmm4, 1
-
- movdqa xmm6, xmm5
- psrldq xmm6, 1
-
- punpcklbw xmm1, xmm2
- pmaddubsw xmm1, xmm0
-
- punpcklbw xmm3, xmm4
- pmaddubsw xmm3, xmm0
-
- punpcklbw xmm5, xmm6
- pmaddubsw xmm5, xmm0
-
- movdqa xmm2, xmm7
- psrldq xmm2, 1
-
- punpcklbw xmm7, xmm2
- pmaddubsw xmm7, xmm0
-
- paddw xmm1, [GLOBAL(rd)]
- psraw xmm1, VP9_FILTER_SHIFT
-
- paddw xmm3, [GLOBAL(rd)]
- psraw xmm3, VP9_FILTER_SHIFT
-
- paddw xmm5, [GLOBAL(rd)]
- psraw xmm5, VP9_FILTER_SHIFT
-
- paddw xmm7, [GLOBAL(rd)]
- psraw xmm7, VP9_FILTER_SHIFT
-
- packuswb xmm1, xmm1
- packuswb xmm3, xmm3
-
- packuswb xmm5, xmm5
- movq [rdi], xmm1
-
- packuswb xmm7, xmm7
- movq [rdi+rdx], xmm3
-
- lea rdi, [rdi + 2*rdx]
- movq [rdi], xmm5
-
- lea rsp, [rsp + 4*16]
- movq [rdi+rdx], xmm7
-
- lea rdi, [rdi + 2*rdx]
- cmp rdi, rcx
-
- jne .next_row_fp
-
- lea rsp, [rsp + 16]
-
-.done8x8:
- ;add rsp, 144
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-shuf1b:
- db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
-shuf2b:
- db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
-shuf3b:
- db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
-
-align 16
-shuf2bfrom1:
- db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13
-align 16
-shuf3bfrom1:
- db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11
-
-align 16
-rd:
- times 8 dw 0x40
-
-align 16
-k0_k5:
- times 8 db 0, 0 ;placeholder
- times 8 db 0, 0
- times 8 db 2, 1
- times 8 db 0, 0
- times 8 db 3, 3
- times 8 db 0, 0
- times 8 db 1, 2
- times 8 db 0, 0
-k1_k3:
- times 8 db 0, 0 ;placeholder
- times 8 db -6, 12
- times 8 db -11, 36
- times 8 db -9, 50
- times 8 db -16, 77
- times 8 db -6, 93
- times 8 db -8, 108
- times 8 db -1, 123
-k2_k4:
- times 8 db 128, 0 ;placeholder
- times 8 db 123, -1
- times 8 db 108, -8
- times 8 db 93, -6
- times 8 db 77, -16
- times 8 db 50, -9
- times 8 db 36, -11
- times 8 db 12, -6
-align 16
-bilinear_filters_ssse3:
- times 8 db 128, 0
- times 8 db 120, 8
- times 8 db 112, 16
- times 8 db 104, 24
- times 8 db 96, 32
- times 8 db 88, 40
- times 8 db 80, 48
- times 8 db 72, 56
- times 8 db 64, 64
- times 8 db 56, 72
- times 8 db 48, 80
- times 8 db 40, 88
- times 8 db 32, 96
- times 8 db 24, 104
- times 8 db 16, 112
- times 8 db 8, 120
-
--- a/vp9/common/x86/vp9_subpixel_x86.h
+++ /dev/null
@@ -1,109 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_COMMON_X86_VP9_SUBPIXEL_X86_H_
-#define VP9_COMMON_X86_VP9_SUBPIXEL_X86_H_
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-
-#if HAVE_MMX
-extern prototype_subpixel_predict(vp9_sixtap_predict16x16_mmx);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x8_mmx);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x4_mmx);
-extern prototype_subpixel_predict(vp9_sixtap_predict4x4_mmx);
-extern prototype_subpixel_predict(vp9_bilinear_predict16x16_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp9_subpix_sixtap16x16
-#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_mmx
-
-#undef vp9_subpix_sixtap8x8
-#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_mmx
-
-#undef vp9_subpix_sixtap8x4
-#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_mmx
-
-#undef vp9_subpix_sixtap4x4
-#define vp9_subpix_sixtap4x4 vp9_sixtap_predict4x4_mmx
-
-#undef vp9_subpix_bilinear16x16
-#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_mmx
-
-#endif
-#endif
-
-
-#if HAVE_SSE2
-extern prototype_subpixel_predict(vp9_sixtap_predict16x16_sse2);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x8_sse2);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x4_sse2);
-extern prototype_subpixel_predict(vp9_bilinear_predict16x16_sse2);
-extern prototype_subpixel_predict(vp9_bilinear_predict8x8_sse2);
-
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp9_subpix_sixtap16x16
-#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_sse2
-
-#undef vp9_subpix_sixtap8x8
-#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_sse2
-
-#undef vp9_subpix_sixtap8x4
-#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_sse2
-
-#undef vp9_subpix_bilinear16x16
-#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_sse2
-
-#undef vp9_subpix_bilinear8x8
-#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_sse2
-
-#endif
-#endif
-
-#if HAVE_SSSE3
-extern prototype_subpixel_predict(vp9_sixtap_predict16x16_ssse3);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x8_ssse3);
-extern prototype_subpixel_predict(vp9_sixtap_predict8x4_ssse3);
-extern prototype_subpixel_predict(vp9_sixtap_predict4x4_ssse3);
-extern prototype_subpixel_predict(vp9_bilinear_predict16x16_ssse3);
-extern prototype_subpixel_predict(vp9_bilinear_predict8x8_ssse3);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp9_subpix_sixtap16x16
-#define vp9_subpix_sixtap16x16 vp9_sixtap_predict16x16_ssse3
-
-#undef vp9_subpix_sixtap8x8
-#define vp9_subpix_sixtap8x8 vp9_sixtap_predict8x8_ssse3
-
-#undef vp9_subpix_sixtap8x4
-#define vp9_subpix_sixtap8x4 vp9_sixtap_predict8x4_ssse3
-
-#undef vp9_subpix_sixtap4x4
-#define vp9_subpix_sixtap4x4 vp9_sixtap_predict4x4_ssse3
-
-
-#undef vp9_subpix_bilinear16x16
-#define vp9_subpix_bilinear16x16 vp9_bilinear_predict16x16_ssse3
-
-#undef vp9_subpix_bilinear8x8
-#define vp9_subpix_bilinear8x8 vp9_bilinear_predict8x8_ssse3
-
-#endif
-#endif
-
-
-
-#endif
--- a/vp9/decoder/vp9_dboolhuff.c
+++ b/vp9/decoder/vp9_dboolhuff.c
@@ -8,19 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include "vp9/decoder/vp9_dboolhuff.h"
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_dboolhuff.h"
+
int vp9_start_decode(BOOL_DECODER *br,
const unsigned char *source,
unsigned int source_sz) {
br->user_buffer_end = source + source_sz;
- br->user_buffer = source;
- br->value = 0;
- br->count = -8;
- br->range = 255;
+ br->user_buffer = source;
+ br->value = 0;
+ br->count = -8;
+ br->range = 255;
if (source_sz && !source)
return 1;
@@ -33,17 +33,28 @@
void vp9_bool_decoder_fill(BOOL_DECODER *br) {
- const unsigned char *bufptr;
- const unsigned char *bufend;
- VP9_BD_VALUE value;
- int count;
- bufend = br->user_buffer_end;
- bufptr = br->user_buffer;
- value = br->value;
- count = br->count;
+ const unsigned char *bufptr = br->user_buffer;
+ const unsigned char *bufend = br->user_buffer_end;
+ VP9_BD_VALUE value = br->value;
+ int count = br->count;
+ int shift = VP9_BD_VALUE_SIZE - 8 - (count + 8);
+ int loop_end = 0;
+ int bits_left = (int)((bufend - bufptr)*CHAR_BIT);
+ int x = shift + CHAR_BIT - bits_left;
- VP9DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+ if (x >= 0) {
+ count += VP9_LOTS_OF_BITS;
+ loop_end = x;
+ }
+ if (x < 0 || bits_left) {
+ while (shift >= loop_end) {
+ count += CHAR_BIT;
+ value |= (VP9_BD_VALUE)*bufptr++ << shift;
+ shift -= CHAR_BIT;
+ }
+ }
+
br->user_buffer = bufptr;
br->value = value;
br->count = count;
@@ -52,7 +63,9 @@
static int get_unsigned_bits(unsigned num_values) {
int cat = 0;
- if ((num_values--) <= 1) return 0;
+ if (num_values <= 1)
+ return 0;
+ num_values--;
while (num_values > 0) {
cat++;
num_values >>= 1;
@@ -61,9 +74,12 @@
}
int vp9_inv_recenter_nonneg(int v, int m) {
- if (v > (m << 1)) return v;
- else if ((v & 1) == 0) return (v >> 1) + m;
- else return m - ((v + 1) >> 1);
+ if (v > (m << 1))
+ return v;
+ else if ((v & 1) == 0)
+ return (v >> 1) + m;
+ else
+ return m - ((v + 1) >> 1);
}
int vp9_decode_uniform(BOOL_DECODER *br, int n) {
--- a/vp9/decoder/vp9_dboolhuff.h
+++ b/vp9/decoder/vp9_dboolhuff.h
@@ -13,6 +13,7 @@
#include <stddef.h>
#include <limits.h>
+
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
@@ -19,11 +20,11 @@
typedef size_t VP9_BD_VALUE;
-# define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
+#define VP9_BD_VALUE_SIZE ((int)sizeof(VP9_BD_VALUE)*CHAR_BIT)
/*This is meant to be a large, positive constant that can still be efficiently
loaded as an immediate (on platforms like ARM, for example).
Even relatively modest values like 100 would work fine.*/
-# define VP9_LOTS_OF_BITS (0x40000000)
+#define VP9_LOTS_OF_BITS (0x40000000)
typedef struct {
const unsigned char *user_buffer_end;
@@ -45,47 +46,14 @@
int vp9_decode_term_subexp(BOOL_DECODER *br, int k, int num_syms);
int vp9_inv_recenter_nonneg(int v, int m);
-/*The refill loop is used in several places, so define it in a macro to make
- sure they're all consistent.
- An inline function would be cleaner, but has a significant penalty, because
- multiple BOOL_DECODER fields must be modified, and the compiler is not smart
- enough to eliminate the stores to those fields and the subsequent reloads
- from them when inlining the function.*/
-#define VP9DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
- do \
- { \
- int shift = VP9_BD_VALUE_SIZE - 8 - ((_count) + 8); \
- int loop_end, x; \
- int bits_left = (int)(((_bufend)-(_bufptr))*CHAR_BIT); \
- \
- x = shift + CHAR_BIT - bits_left; \
- loop_end = 0; \
- if(x >= 0) \
- { \
- (_count) += VP9_LOTS_OF_BITS; \
- loop_end = x; \
- if(!bits_left) break; \
- } \
- while(shift >= loop_end) \
- { \
- (_count) += CHAR_BIT; \
- (_value) |= (VP9_BD_VALUE)*(_bufptr)++ << shift; \
- shift -= CHAR_BIT; \
- } \
- } \
- while(0) \
-
-
static int decode_bool(BOOL_DECODER *br, int probability) {
unsigned int bit = 0;
VP9_BD_VALUE value;
- unsigned int split;
VP9_BD_VALUE bigsplit;
int count;
unsigned int range;
+ unsigned int split = 1 + (((br->range - 1) * probability) >> 8);
- split = 1 + (((br->range - 1) * probability) >> 8);
-
if (br->count < 0)
vp9_bool_decoder_fill(br);
@@ -120,7 +88,7 @@
int bit;
for (bit = bits - 1; bit >= 0; bit--) {
- z |= (decode_bool(br, 0x80) << bit);
+ z |= decode_bool(br, 0x80) << bit;
}
return z;
@@ -127,29 +95,23 @@
}
static int bool_error(BOOL_DECODER *br) {
- /* Check if we have reached the end of the buffer.
- *
- * Variable 'count' stores the number of bits in the 'value' buffer, minus
- * 8. The top byte is part of the algorithm, and the remainder is buffered
- * to be shifted into it. So if count == 8, the top 16 bits of 'value' are
- * occupied, 8 for the algorithm and 8 in the buffer.
- *
- * When reading a byte from the user's buffer, count is filled with 8 and
- * one byte is filled into the value buffer. When we reach the end of the
- * data, count is additionally filled with VP9_LOTS_OF_BITS. So when
- * count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted.
- */
- if ((br->count > VP9_BD_VALUE_SIZE) && (br->count < VP9_LOTS_OF_BITS)) {
- /* We have tried to decode bits after the end of
- * stream was encountered.
- */
- return 1;
- }
-
- /* No error. */
- return 0;
+ // Check if we have reached the end of the buffer.
+ //
+ // Variable 'count' stores the number of bits in the 'value' buffer, minus
+ // 8. The top byte is part of the algorithm, and the remainder is buffered
+ // to be shifted into it. So if count == 8, the top 16 bits of 'value' are
+ // occupied, 8 for the algorithm and 8 in the buffer.
+ //
+ // When reading a byte from the user's buffer, count is filled with 8 and
+ // one byte is filled into the value buffer. When we reach the end of the
+ // data, count is additionally filled with VP9_LOTS_OF_BITS. So when
+ // count == VP9_LOTS_OF_BITS - 1, the user's data has been exhausted.
+ //
+ // 1 if we have tried to decode bits after the end of stream was encountered.
+ // 0 No error.
+ return br->count > VP9_BD_VALUE_SIZE && br->count < VP9_LOTS_OF_BITS;
}
-extern int vp9_decode_unsigned_max(BOOL_DECODER *br, int max);
+int vp9_decode_unsigned_max(BOOL_DECODER *br, int max);
#endif // VP9_DECODER_VP9_DBOOLHUFF_H_
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -12,6 +12,7 @@
#include "vp9/decoder/vp9_treereader.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
@@ -28,12 +29,13 @@
#ifdef DEBUG_DEC_MV
int dec_mvcount = 0;
#endif
+
// #define DEC_DEBUG
#ifdef DEC_DEBUG
extern int dec_debug;
#endif
-static int read_bmode(vp9_reader *bc, const vp9_prob *p) {
+static B_PREDICTION_MODE read_bmode(vp9_reader *bc, const vp9_prob *p) {
B_PREDICTION_MODE m = treed_read(bc, vp9_bmode_tree, p);
#if CONFIG_NEWBINTRAMODES
if (m == B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS)
@@ -43,24 +45,24 @@
return m;
}
-static int read_kf_bmode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_kf_bmode_tree, p);
+static B_PREDICTION_MODE read_kf_bmode(vp9_reader *bc, const vp9_prob *p) {
+ return (B_PREDICTION_MODE)treed_read(bc, vp9_kf_bmode_tree, p);
}
-static int read_ymode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_ymode_tree, p);
+static MB_PREDICTION_MODE read_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(bc, vp9_ymode_tree, p);
}
-static int read_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_sb_ymode_tree, p);
+static MB_PREDICTION_MODE read_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(bc, vp9_sb_ymode_tree, p);
}
-static int read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_uv_mode_tree, p);
+static MB_PREDICTION_MODE read_kf_sb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p);
}
-static int read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_kf_ymode_tree, p);
+static MB_PREDICTION_MODE read_kf_mb_ymode(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(bc, vp9_kf_ymode_tree, p);
}
static int read_i8x8_mode(vp9_reader *bc, const vp9_prob *p) {
@@ -67,29 +69,39 @@
return treed_read(bc, vp9_i8x8_mode_tree, p);
}
-static int read_uv_mode(vp9_reader *bc, const vp9_prob *p) {
- return treed_read(bc, vp9_uv_mode_tree, p);
+static MB_PREDICTION_MODE read_uv_mode(vp9_reader *bc, const vp9_prob *p) {
+ return (MB_PREDICTION_MODE)treed_read(bc, vp9_uv_mode_tree, p);
}
// This function reads the current macro block's segnent id from the bitstream
// It should only be called if a segment map update is indicated.
-static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi,
- MACROBLOCKD *xd) {
- /* Is segmentation enabled */
+static void read_mb_segid(vp9_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *xd) {
if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
- /* If so then read the segment id. */
- if (vp9_read(r, xd->mb_segment_tree_probs[0]))
- mi->segment_id =
- (unsigned char)(2 + vp9_read(r, xd->mb_segment_tree_probs[2]));
- else
- mi->segment_id =
- (unsigned char)(vp9_read(r, xd->mb_segment_tree_probs[1]));
+ const vp9_prob *const p = xd->mb_segment_tree_probs;
+ mi->segment_id = vp9_read(r, p[0]) ? 2 + vp9_read(r, p[2])
+ : vp9_read(r, p[1]);
}
}
+// This function reads the current macro block's segnent id from the bitstream
+// It should only be called if a segment map update is indicated.
+static void read_mb_segid_except(VP9_COMMON *cm,
+ vp9_reader *r, MB_MODE_INFO *mi,
+ MACROBLOCKD *xd, int mb_row, int mb_col) {
+ const int mb_index = mb_row * cm->mb_cols + mb_col;
+ const int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, mb_index);
+ const vp9_prob *const p = xd->mb_segment_tree_probs;
+ const vp9_prob prob = xd->mb_segment_mispred_tree_probs[pred_seg_id];
+
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
+ mi->segment_id = vp9_read(r, prob)
+ ? 2 + (pred_seg_id < 2 ? vp9_read(r, p[2]) : (pred_seg_id == 2))
+ : (pred_seg_id >= 2 ? vp9_read(r, p[1]) : (pred_seg_id == 0));
+ }
+}
+
#if CONFIG_NEW_MVREF
-int vp9_read_mv_ref_id(vp9_reader *r,
- vp9_prob * ref_id_probs) {
+int vp9_read_mv_ref_id(vp9_reader *r, vp9_prob *ref_id_probs) {
int ref_index = 0;
if (vp9_read(r, ref_id_probs[0])) {
@@ -111,10 +123,13 @@
int mb_col,
BOOL_DECODER* const bc) {
VP9_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
const int mis = pbi->common.mode_info_stride;
int map_index = mb_row * pbi->common.mb_cols + mb_col;
MB_PREDICTION_MODE y_mode;
+ m->mbmi.ref_frame = INTRA_FRAME;
+
// Read the Macroblock segmentation map if it is being updated explicitly
// this frame (reset to 0 by default).
m->mbmi.segment_id = 0;
@@ -139,30 +154,19 @@
m->mbmi.mb_skip_coeff = 0;
if (pbi->common.mb_no_coeff_skip &&
- (!vp9_segfeature_active(&pbi->mb,
- m->mbmi.segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(&pbi->mb,
- m->mbmi.segment_id, SEG_LVL_EOB) != 0))) {
- MACROBLOCKD *const xd = &pbi->mb;
- m->mbmi.mb_skip_coeff =
- vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
+ (!vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id, SEG_LVL_SKIP))) {
+ m->mbmi.mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, &pbi->mb,
+ PRED_MBSKIP));
} else {
- if (vp9_segfeature_active(&pbi->mb,
- m->mbmi.segment_id, SEG_LVL_EOB) &&
- (vp9_get_segdata(&pbi->mb,
- m->mbmi.segment_id, SEG_LVL_EOB) == 0)) {
- m->mbmi.mb_skip_coeff = 1;
- } else
- m->mbmi.mb_skip_coeff = 0;
+ m->mbmi.mb_skip_coeff = vp9_segfeature_active(&pbi->mb, m->mbmi.segment_id,
+ SEG_LVL_SKIP);
}
- if (m->mbmi.sb_type) {
- y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc,
- pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
- } else {
- y_mode = (MB_PREDICTION_MODE) read_kf_mb_ymode(bc,
- pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
- }
+ y_mode = m->mbmi.sb_type ?
+ read_kf_sb_ymode(bc,
+ pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]):
+ read_kf_mb_ymode(bc,
+ pbi->common.kf_ymode_prob[pbi->common.kf_ymode_probs_index]);
m->mbmi.ref_frame = INTRA_FRAME;
@@ -169,30 +173,33 @@
if ((m->mbmi.mode = y_mode) == B_PRED) {
int i = 0;
do {
- const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE L = left_block_mode(m, i);
+ const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
+ const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ?
+ left_block_mode(m, i) : B_DC_PRED;
- m->bmi[i].as_mode.first =
- (B_PREDICTION_MODE) read_kf_bmode(
- bc, pbi->common.kf_bmode_prob [A] [L]);
+ m->bmi[i].as_mode.first = read_kf_bmode(bc,
+ pbi->common.kf_bmode_prob[a][l]);
} while (++i < 16);
}
+
if ((m->mbmi.mode = y_mode) == I8X8_PRED) {
int i;
- int mode8x8;
for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
- mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ const int ib = vp9_i8x8_block[i];
+ const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+
m->bmi[ib + 0].as_mode.first = mode8x8;
m->bmi[ib + 1].as_mode.first = mode8x8;
m->bmi[ib + 4].as_mode.first = mode8x8;
m->bmi[ib + 5].as_mode.first = mode8x8;
}
- } else
- m->mbmi.uv_mode = (MB_PREDICTION_MODE)read_uv_mode(bc,
- pbi->common.kf_uv_mode_prob[m->mbmi.mode]);
+ } else {
+ m->mbmi.uv_mode = read_uv_mode(bc,
+ pbi->common.kf_uv_mode_prob[m->mbmi.mode]);
+ }
- if (cm->txfm_mode == TX_MODE_SELECT && m->mbmi.mb_skip_coeff == 0 &&
+ if (cm->txfm_mode == TX_MODE_SELECT &&
+ m->mbmi.mb_skip_coeff == 0 &&
m->mbmi.mode <= I8X8_PRED) {
// FIXME(rbultje) code ternary symbol once all experiments are merged
m->mbmi.txfm_size = vp9_read(bc, cm->prob_tx[0]);
@@ -215,23 +222,23 @@
static int read_nmv_component(vp9_reader *r,
int rv,
const nmv_component *mvcomp) {
- int v, s, z, c, o, d;
- s = vp9_read(r, mvcomp->sign);
- c = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
- if (c == MV_CLASS_0) {
+ int mag, d;
+ const int sign = vp9_read(r, mvcomp->sign);
+ const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
+
+ if (mv_class == MV_CLASS_0) {
d = treed_read(r, vp9_mv_class0_tree, mvcomp->class0);
} else {
- int i, b;
+ int i;
+ int n = mv_class + CLASS0_BITS - 1; // number of bits
+
d = 0;
- b = c + CLASS0_BITS - 1; /* number of bits */
- for (i = 0; i < b; ++i)
- d |= (vp9_read(r, mvcomp->bits[i]) << i);
+ for (i = 0; i < n; ++i)
+ d |= vp9_read(r, mvcomp->bits[i]) << i;
}
- o = d << 3;
- z = vp9_get_mv_mag(c, o);
- v = (s ? -(z + 8) : (z + 8));
- return v;
+ mag = vp9_get_mv_mag(mv_class, d << 3);
+ return sign ? -(mag + 8) : (mag + 8);
}
static int read_nmv_component_fp(vp9_reader *r,
@@ -239,43 +246,34 @@
int rv,
const nmv_component *mvcomp,
int usehp) {
- int s, z, c, o, d, e, f;
- s = v < 0;
- z = (s ? -v : v) - 1; /* magnitude - 1 */
- z &= ~7;
+ const int sign = v < 0;
+ int mag = ((sign ? -v : v) - 1) & ~7; // magnitude - 1
+ int offset;
+ const int mv_class = vp9_get_mv_class(mag, &offset);
+ const int f = mv_class == MV_CLASS_0 ?
+ treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[offset >> 3]):
+ treed_read(r, vp9_mv_fp_tree, mvcomp->fp);
- c = vp9_get_mv_class(z, &o);
- d = o >> 3;
+ offset += f << 1;
- if (c == MV_CLASS_0) {
- f = treed_read(r, vp9_mv_fp_tree, mvcomp->class0_fp[d]);
- } else {
- f = treed_read(r, vp9_mv_fp_tree, mvcomp->fp);
- }
- o += (f << 1);
-
if (usehp) {
- if (c == MV_CLASS_0) {
- e = vp9_read(r, mvcomp->class0_hp);
- } else {
- e = vp9_read(r, mvcomp->hp);
- }
- o += e;
+ const vp9_prob p = mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp;
+ offset += vp9_read(r, p);
} else {
- ++o; /* Note if hp is not used, the default value of the hp bit is 1 */
+ offset += 1; // If hp is not used, the default value of the hp bit is 1
}
- z = vp9_get_mv_mag(c, o);
- v = (s ? -(z + 1) : (z + 1));
- return v;
+ mag = vp9_get_mv_mag(mv_class, offset);
+ return sign ? -(mag + 1) : (mag + 1);
}
static void read_nmv(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *mvctx) {
- MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints);
+ const MV_JOINT_TYPE j = treed_read(r, vp9_mv_joint_tree, mvctx->joints);
mv->row = mv-> col = 0;
if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]);
}
+
if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]);
}
@@ -283,7 +281,7 @@
static void read_nmv_fp(vp9_reader *r, MV *mv, const MV *ref,
const nmv_context *mvctx, int usehp) {
- MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
+ const MV_JOINT_TYPE j = vp9_get_mv_joint(*mv);
usehp = usehp && vp9_use_nmv_hp(ref);
if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0],
@@ -293,7 +291,10 @@
mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1],
usehp);
}
- //printf(" %d: %d %d ref: %d %d\n", usehp, mv->row, mv-> col, ref->row, ref->col);
+ /*
+ printf("MV: %d %d REF: %d %d\n", mv->row + ref->row, mv->col + ref->col,
+ ref->row, ref->col);
+ */
}
static void update_nmv(vp9_reader *bc, vp9_prob *const p,
@@ -310,48 +311,40 @@
static void read_nmvprobs(vp9_reader *bc, nmv_context *mvctx,
int usehp) {
int i, j, k;
+
#ifdef MV_GROUP_UPDATE
- if (!vp9_read_bit(bc)) return;
+ if (!vp9_read_bit(bc))
+ return;
#endif
- for (j = 0; j < MV_JOINTS - 1; ++j) {
- update_nmv(bc, &mvctx->joints[j],
- VP9_NMV_UPDATE_PROB);
- }
+ for (j = 0; j < MV_JOINTS - 1; ++j)
+ update_nmv(bc, &mvctx->joints[j], VP9_NMV_UPDATE_PROB);
+
for (i = 0; i < 2; ++i) {
- update_nmv(bc, &mvctx->comps[i].sign,
- VP9_NMV_UPDATE_PROB);
- for (j = 0; j < MV_CLASSES - 1; ++j) {
- update_nmv(bc, &mvctx->comps[i].classes[j],
- VP9_NMV_UPDATE_PROB);
- }
- for (j = 0; j < CLASS0_SIZE - 1; ++j) {
- update_nmv(bc, &mvctx->comps[i].class0[j],
- VP9_NMV_UPDATE_PROB);
- }
- for (j = 0; j < MV_OFFSET_BITS; ++j) {
- update_nmv(bc, &mvctx->comps[i].bits[j],
- VP9_NMV_UPDATE_PROB);
- }
+ update_nmv(bc, &mvctx->comps[i].sign, VP9_NMV_UPDATE_PROB);
+ for (j = 0; j < MV_CLASSES - 1; ++j)
+ update_nmv(bc, &mvctx->comps[i].classes[j], VP9_NMV_UPDATE_PROB);
+
+ for (j = 0; j < CLASS0_SIZE - 1; ++j)
+ update_nmv(bc, &mvctx->comps[i].class0[j], VP9_NMV_UPDATE_PROB);
+
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ update_nmv(bc, &mvctx->comps[i].bits[j], VP9_NMV_UPDATE_PROB);
}
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j) {
for (k = 0; k < 3; ++k)
- update_nmv(bc, &mvctx->comps[i].class0_fp[j][k],
- VP9_NMV_UPDATE_PROB);
+ update_nmv(bc, &mvctx->comps[i].class0_fp[j][k], VP9_NMV_UPDATE_PROB);
}
- for (j = 0; j < 3; ++j) {
- update_nmv(bc, &mvctx->comps[i].fp[j],
- VP9_NMV_UPDATE_PROB);
- }
+
+ for (j = 0; j < 3; ++j)
+ update_nmv(bc, &mvctx->comps[i].fp[j], VP9_NMV_UPDATE_PROB);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
- update_nmv(bc, &mvctx->comps[i].class0_hp,
- VP9_NMV_UPDATE_PROB);
- update_nmv(bc, &mvctx->comps[i].hp,
- VP9_NMV_UPDATE_PROB);
+ update_nmv(bc, &mvctx->comps[i].class0_hp, VP9_NMV_UPDATE_PROB);
+ update_nmv(bc, &mvctx->comps[i].hp, VP9_NMV_UPDATE_PROB);
}
}
}
@@ -361,15 +354,11 @@
vp9_reader *const bc,
unsigned char segment_id) {
MV_REFERENCE_FRAME ref_frame;
- int seg_ref_active;
- int seg_ref_count = 0;
-
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- seg_ref_active = vp9_segfeature_active(xd,
- segment_id,
- SEG_LVL_REF_FRAME);
+ int seg_ref_count = 0;
+ int seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
// If segment coding enabled does the segment allow for more than one
// possible reference frame
@@ -384,15 +373,13 @@
// multiple reference frame options
if (!seg_ref_active || (seg_ref_count > 1)) {
// Values used in prediction model coding
- unsigned char prediction_flag;
- vp9_prob pred_prob;
MV_REFERENCE_FRAME pred_ref;
// Get the context probability the prediction flag
- pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
+ vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_REF);
// Read the prediction status flag
- prediction_flag = (unsigned char)vp9_read(bc, pred_prob);
+ unsigned char prediction_flag = vp9_read(bc, pred_prob);
// Store the prediction flag.
vp9_set_pred_flag(xd, PRED_REF, prediction_flag);
@@ -403,9 +390,8 @@
// If correctly predicted then use the predicted value
if (prediction_flag) {
ref_frame = pred_ref;
- }
- // else decode the explicitly coded value
- else {
+ } else {
+ // decode the explicitly coded value
vp9_prob mod_refprobs[PREDICTION_PROBS];
vpx_memcpy(mod_refprobs,
cm->mod_refprobs[pred_ref], sizeof(mod_refprobs));
@@ -456,10 +442,8 @@
}
}
}
- }
-
- // Segment reference frame features are enabled
- else {
+ } else {
+ // Segment reference frame features are enabled
// The reference frame for the mb is considered as correclty predicted
// if it is signaled at the segment level for the purposes of the
// common prediction model
@@ -492,12 +476,12 @@
};
#endif
-static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1};
+static const unsigned char mbsplit_fill_count[4] = { 8, 8, 4, 1 };
static const unsigned char mbsplit_fill_offset[4][16] = {
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
- { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
- { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 },
+ { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
};
static void read_switchable_interp_probs(VP9D_COMP* const pbi,
@@ -506,7 +490,7 @@
int i, j;
for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
- cm->fc.switchable_interp_prob[j][i] = vp9_read_literal(bc, 8);
+ cm->fc.switchable_interp_prob[j][i] = vp9_read_prob(bc);
}
}
//printf("DECODER: %d %d\n", cm->fc.switchable_interp_prob[0],
@@ -527,13 +511,13 @@
#if CONFIG_COMP_INTERINTRA_PRED
if (cm->use_interintra) {
if (vp9_read(bc, VP9_UPD_INTERINTRA_PROB))
- cm->fc.interintra_prob = (vp9_prob)vp9_read_literal(bc, 8);
+ cm->fc.interintra_prob = vp9_read_prob(bc);
}
#endif
// Decode the baseline probabilities for decoding reference frame
- cm->prob_intra_coded = (vp9_prob)vp9_read_literal(bc, 8);
- cm->prob_last_coded = (vp9_prob)vp9_read_literal(bc, 8);
- cm->prob_gf_coded = (vp9_prob)vp9_read_literal(bc, 8);
+ cm->prob_intra_coded = vp9_read_prob(bc);
+ cm->prob_last_coded = vp9_read_prob(bc);
+ cm->prob_gf_coded = vp9_read_prob(bc);
// Computes a modified set of probabilities for use when reference
// frame prediction fails.
@@ -545,7 +529,7 @@
if (cm->comp_pred_mode == HYBRID_PREDICTION) {
int i;
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
- cm->prob_comppred[i] = (vp9_prob)vp9_read_literal(bc, 8);
+ cm->prob_comppred[i] = vp9_read_prob(bc);
}
if (vp9_read_bit(bc)) {
@@ -552,7 +536,7 @@
int i = 0;
do {
- cm->fc.ymode_prob[i] = (vp9_prob) vp9_read_literal(bc, 8);
+ cm->fc.ymode_prob[i] = vp9_read_prob(bc);
} while (++i < VP9_YMODES - 1);
}
@@ -560,7 +544,7 @@
int i = 0;
do {
- cm->fc.sb_ymode_prob[i] = (vp9_prob) vp9_read_literal(bc, 8);
+ cm->fc.sb_ymode_prob[i] = vp9_read_prob(bc);
} while (++i < VP9_I32X32_MODES - 1);
}
@@ -575,10 +559,10 @@
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
VP9_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &pbi->mb;
+ MACROBLOCKD *const xd = &pbi->mb;
MODE_INFO *mi = xd->mode_info_context;
MB_MODE_INFO *mbmi = &mi->mbmi;
- int index = mb_row * pbi->common.mb_cols + mb_col;
+ int mb_index = mb_row * pbi->common.mb_cols + mb_col;
if (xd->segmentation_enabled) {
if (xd->update_mb_segmentation_map) {
@@ -586,12 +570,10 @@
if (cm->temporal_update) {
// Get the context based probability for reading the
// prediction status flag
- vp9_prob pred_prob =
- vp9_get_pred_prob(cm, xd, PRED_SEG_ID);
+ vp9_prob pred_prob = vp9_get_pred_prob(cm, xd, PRED_SEG_ID);
// Read the prediction status flag
- unsigned char seg_pred_flag =
- (unsigned char)vp9_read(bc, pred_prob);
+ unsigned char seg_pred_flag = vp9_read(bc, pred_prob);
// Store the prediction flag.
vp9_set_pred_flag(xd, PRED_SEG_ID, seg_pred_flag);
@@ -599,17 +581,16 @@
// If the value is flagged as correctly predicted
// then use the predicted value
if (seg_pred_flag) {
- mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, index);
+ mbmi->segment_id = vp9_get_pred_mb_segid(cm, xd, mb_index);
+ } else {
+ // Decode it explicitly
+ read_mb_segid_except(cm, bc, mbmi, xd, mb_row, mb_col);
}
- // Else .... decode it explicitly
- else {
- read_mb_segid(bc, mbmi, xd);
- }
- }
- // Normal unpredicted coding mode
- else {
+ } else {
+ // Normal unpredicted coding mode
read_mb_segid(bc, mbmi, xd);
}
+
if (mbmi->sb_type) {
const int nmbs = 1 << mbmi->sb_type;
const int ymbs = MIN(cm->mb_rows - mb_row, nmbs);
@@ -618,12 +599,12 @@
for (y = 0; y < ymbs; y++) {
for (x = 0; x < xmbs; x++) {
- cm->last_frame_seg_map[index + x + y * cm->mb_cols] =
+ cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols] =
mbmi->segment_id;
}
}
} else {
- cm->last_frame_seg_map[index] = mbmi->segment_id;
+ cm->last_frame_seg_map[mb_index] = mbmi->segment_id;
}
} else {
if (mbmi->sb_type) {
@@ -636,13 +617,12 @@
for (y = 0; y < ymbs; y++) {
for (x = 0; x < xmbs; x++) {
segment_id = MIN(segment_id,
- cm->last_frame_seg_map[index + x +
- y * cm->mb_cols]);
+ cm->last_frame_seg_map[mb_index + x + y * cm->mb_cols]);
}
}
mbmi->segment_id = segment_id;
} else {
- mbmi->segment_id = cm->last_frame_seg_map[index];
+ mbmi->segment_id = cm->last_frame_seg_map[mb_index];
}
}
} else {
@@ -652,6 +632,27 @@
}
}
+
+static INLINE void assign_and_clamp_mv(int_mv *dst, const int_mv *src,
+ int mb_to_left_edge,
+ int mb_to_right_edge,
+ int mb_to_top_edge,
+ int mb_to_bottom_edge) {
+ dst->as_int = src->as_int;
+ clamp_mv(dst, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge,
+ mb_to_bottom_edge);
+}
+
+static INLINE void process_mv(BOOL_DECODER* bc, MV *mv, MV *ref,
+ nmv_context *nmvc, nmv_context_counts *mvctx,
+ int usehp) {
+ read_nmv(bc, mv, ref, nmvc);
+ read_nmv_fp(bc, mv, ref, nmvc, usehp);
+ vp9_increment_nmv(mv, ref, mvctx, usehp);
+ mv->row += ref->row;
+ mv->col += ref->col;
+}
+
static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
MODE_INFO *prev_mi,
int mb_row, int mb_col,
@@ -659,31 +660,20 @@
VP9_COMMON *const cm = &pbi->common;
nmv_context *const nmvc = &pbi->common.fc.nmvc;
const int mis = pbi->common.mode_info_stride;
- MACROBLOCKD *const xd = &pbi->mb;
+ MACROBLOCKD *const xd = &pbi->mb;
int_mv *const mv = &mbmi->mv[0];
- int mb_to_left_edge;
- int mb_to_right_edge;
- int mb_to_top_edge;
- int mb_to_bottom_edge;
const int mb_size = 1 << mi->mbmi.sb_type;
- mb_to_top_edge = xd->mb_to_top_edge;
- mb_to_bottom_edge = xd->mb_to_bottom_edge;
- mb_to_top_edge -= LEFT_TOP_MARGIN;
- mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+ const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+ !cm->error_resilient_mode;
+
+ int mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge;
+
mbmi->need_to_clamp_mvs = 0;
mbmi->need_to_clamp_secondmv = 0;
mbmi->second_ref_frame = NONE;
- /* Distance of Mb to the various image edges.
- * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
- */
- xd->mb_to_left_edge =
- mb_to_left_edge = -((mb_col * 16) << 3);
- mb_to_left_edge -= LEFT_TOP_MARGIN;
- xd->mb_to_right_edge =
- mb_to_right_edge = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3;
- mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
// Make sure the MACROBLOCKD mode info pointer is pointed at the
// correct entry for the current macroblock.
@@ -690,69 +680,68 @@
xd->mode_info_context = mi;
xd->prev_mode_info_context = prev_mi;
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to MV values
+ // that are in 1/8th pel units
+ set_mb_row(cm, xd, mb_row, mb_size);
+ set_mb_col(cm, xd, mb_col, mb_size);
+
+ mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+ mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+ mb_to_left_edge = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+ mb_to_right_edge = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
// Read the macroblock segment id.
read_mb_segment_id(pbi, mb_row, mb_col, bc);
if (pbi->common.mb_no_coeff_skip &&
- (!vp9_segfeature_active(xd,
- mbmi->segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) != 0))) {
+ (!vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP))) {
// Read the macroblock coeff skip flag if this feature is in use,
// else default to 0
mbmi->mb_skip_coeff = vp9_read(bc, vp9_get_pred_prob(cm, xd, PRED_MBSKIP));
} else {
- if (vp9_segfeature_active(xd,
- mbmi->segment_id, SEG_LVL_EOB) &&
- (vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_EOB) == 0)) {
- mbmi->mb_skip_coeff = 1;
- } else
- mbmi->mb_skip_coeff = 0;
+ mbmi->mb_skip_coeff = vp9_segfeature_active(xd, mbmi->segment_id,
+ SEG_LVL_SKIP);
}
// Read the reference frame
- if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)
- && vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE) < NEARESTMV)
- mbmi->ref_frame = INTRA_FRAME;
- else
- mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id);
+ mbmi->ref_frame = read_ref_frame(pbi, bc, mbmi->segment_id);
+ /*
+ if (pbi->common.current_video_frame == 1)
+ printf("ref frame: %d [%d %d]\n", mbmi->ref_frame, mb_row, mb_col);
+ */
+
// If reference frame is an Inter frame
if (mbmi->ref_frame) {
int_mv nearest, nearby, best_mv;
int_mv nearest_second, nearby_second, best_mv_second;
- vp9_prob mv_ref_p [VP9_MVREFS - 1];
+ vp9_prob mv_ref_p[VP9_MVREFS - 1];
- int recon_y_stride, recon_yoffset;
- int recon_uv_stride, recon_uvoffset;
MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
+ xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
{
- int ref_fb_idx;
+ const int use_prev_in_find_best_ref =
+ xd->scale_factor[0].x_num == xd->scale_factor[0].x_den &&
+ xd->scale_factor[0].y_num == xd->scale_factor[0].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
/* Select the appropriate reference frame for this MB */
- if (ref_frame == LAST_FRAME)
- ref_fb_idx = cm->lst_fb_idx;
- else if (ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cm->gld_fb_idx;
- else
- ref_fb_idx = cm->alt_fb_idx;
+ const int ref_fb_idx = cm->active_ref_idx[ref_frame - 1];
- recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride ;
- recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+ setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx],
+ mb_row, mb_col, &xd->scale_factor[0], &xd->scale_factor_uv[0]);
- recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
- recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
-
- xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
-
#ifdef DEC_DEBUG
if (dec_debug)
printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row,
xd->mode_info_context->mbmi.mv[0].as_mv.col);
#endif
- vp9_find_mv_refs(xd, mi, prev_mi,
+ // if (cm->current_video_frame == 1 && mb_row == 4 && mb_col == 5)
+ // printf("Dello\n");
+ vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL,
ref_frame, mbmi->ref_mvs[ref_frame],
cm->ref_frame_sign_bias);
@@ -759,16 +748,12 @@
vp9_mv_ref_probs(&pbi->common, mv_ref_p,
mbmi->mb_mode_context[ref_frame]);
- // Is the segment level mode feature enabled for this segment
- if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) {
- mbmi->mode =
- vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE);
+ // If the segment level skip mode enabled
+ if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) {
+ mbmi->mode = ZEROMV;
} else {
- if (mbmi->sb_type)
- mbmi->mode = read_sb_mv_ref(bc, mv_ref_p);
- else
- mbmi->mode = read_mv_ref(bc, mv_ref_p);
-
+ mbmi->mode = mbmi->sb_type ? read_sb_mv_ref(bc, mv_ref_p)
+ : read_mv_ref(bc, mv_ref_p);
vp9_accum_mv_refs(&pbi->common, mbmi->mode,
mbmi->mb_mode_context[ref_frame]);
}
@@ -775,8 +760,9 @@
if (mbmi->mode != ZEROMV) {
vp9_find_best_ref_mvs(xd,
- xd->pre.y_buffer,
- recon_y_stride,
+ use_prev_in_find_best_ref ?
+ xd->pre.y_buffer : NULL,
+ xd->pre.y_stride,
mbmi->ref_mvs[ref_frame],
&nearest, &nearby);
@@ -791,8 +777,7 @@
#endif
}
- if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV)
- {
+ if (mbmi->mode >= NEARESTMV && mbmi->mode <= SPLITMV) {
if (cm->mcomp_filter_type == SWITCHABLE) {
mbmi->interp_filter = vp9_switchable_interp[
treed_read(bc, vp9_switchable_interp_tree,
@@ -817,23 +802,22 @@
mbmi->second_ref_frame = 1;
if (mbmi->second_ref_frame > 0) {
int second_ref_fb_idx;
+ int use_prev_in_find_best_ref;
+
+ xd->scale_factor[1] = cm->active_ref_scale[mbmi->second_ref_frame - 1];
+ use_prev_in_find_best_ref =
+ xd->scale_factor[1].x_num == xd->scale_factor[1].x_den &&
+ xd->scale_factor[1].y_num == xd->scale_factor[1].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
+
/* Select the appropriate reference frame for this MB */
- if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cm->lst_fb_idx;
- else if (mbmi->second_ref_frame ==
- GOLDEN_FRAME)
- second_ref_fb_idx = cm->gld_fb_idx;
- else
- second_ref_fb_idx = cm->alt_fb_idx;
+ second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
- xd->second_pre.y_buffer =
- cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
- xd->second_pre.u_buffer =
- cm->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
- xd->second_pre.v_buffer =
- cm->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col, &xd->scale_factor[1], &xd->scale_factor_uv[1]);
- vp9_find_mv_refs(xd, mi, prev_mi,
+ vp9_find_mv_refs(cm, xd, mi, use_prev_in_find_mv_refs ? prev_mi : NULL,
mbmi->second_ref_frame,
mbmi->ref_mvs[mbmi->second_ref_frame],
cm->ref_frame_sign_bias);
@@ -840,8 +824,9 @@
if (mbmi->mode != ZEROMV) {
vp9_find_best_ref_mvs(xd,
- xd->second_pre.y_buffer,
- recon_y_stride,
+ use_prev_in_find_best_ref ?
+ xd->second_pre.y_buffer : NULL,
+ xd->second_pre.y_stride,
mbmi->ref_mvs[mbmi->second_ref_frame],
&nearest_second,
&nearby_second);
@@ -861,12 +846,11 @@
pbi->common.fc.interintra_counts[
mbmi->second_ref_frame == INTRA_FRAME]++;
if (mbmi->second_ref_frame == INTRA_FRAME) {
- mbmi->interintra_mode = (MB_PREDICTION_MODE)read_ymode(
- bc, pbi->common.fc.ymode_prob);
+ mbmi->interintra_mode = read_ymode(bc, pbi->common.fc.ymode_prob);
pbi->common.fc.ymode_counts[mbmi->interintra_mode]++;
#if SEPARATE_INTERINTRA_UV
- mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)read_uv_mode(
- bc, pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]);
+ mbmi->interintra_uv_mode = read_uv_mode(bc,
+ pbi->common.fc.uv_mode_prob[mbmi->interintra_mode]);
pbi->common.fc.uv_mode_counts[mbmi->interintra_mode]
[mbmi->interintra_uv_mode]++;
#else
@@ -905,28 +889,26 @@
mbmi->uv_mode = DC_PRED;
switch (mbmi->mode) {
case SPLITMV: {
- const int s = mbmi->partitioning =
- treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
- const int num_p = vp9_mbsplit_count [s];
+ const int s = treed_read(bc, vp9_mbsplit_tree, cm->fc.mbsplit_prob);
+ const int num_p = vp9_mbsplit_count[s];
int j = 0;
- cm->fc.mbsplit_counts[s]++;
+ cm->fc.mbsplit_counts[s]++;
mbmi->need_to_clamp_mvs = 0;
- do { /* for each subset j */
+ mbmi->partitioning = s;
+ do { // for each subset j
int_mv leftmv, abovemv, second_leftmv, second_abovemv;
int_mv blockmv, secondmv;
- int k; /* first block in subset j */
int mv_contz;
int blockmode;
+ int k = vp9_mbsplit_offset[s][j]; // first block in subset j
- k = vp9_mbsplit_offset[s][j];
-
- leftmv.as_int = left_block_mv(mi, k);
+ leftmv.as_int = left_block_mv(xd, mi, k);
abovemv.as_int = above_block_mv(mi, k, mis);
second_leftmv.as_int = 0;
second_abovemv.as_int = 0;
if (mbmi->second_ref_frame > 0) {
- second_leftmv.as_int = left_block_second_mv(mi, k);
+ second_leftmv.as_int = left_block_second_mv(xd, mi, k);
second_abovemv.as_int = above_block_second_mv(mi, k, mis);
}
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
@@ -935,23 +917,13 @@
switch (blockmode) {
case NEW4X4:
- read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc);
- read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
- xd->allow_high_precision_mv);
- vp9_increment_nmv(&blockmv.as_mv, &best_mv.as_mv,
- &cm->fc.NMVcount, xd->allow_high_precision_mv);
- blockmv.as_mv.row += best_mv.as_mv.row;
- blockmv.as_mv.col += best_mv.as_mv.col;
+ process_mv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
- if (mbmi->second_ref_frame > 0) {
- read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc);
- read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
- xd->allow_high_precision_mv);
- vp9_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv,
- &cm->fc.NMVcount, xd->allow_high_precision_mv);
- secondmv.as_mv.row += best_mv_second.as_mv.row;
- secondmv.as_mv.col += best_mv_second.as_mv.col;
- }
+ if (mbmi->second_ref_frame > 0)
+ process_mv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
+
#ifdef VPX_MODE_COUNT
vp9_mv_cont_count[mv_contz][3]++;
#endif
@@ -1005,15 +977,14 @@
/* Fill (uniform) modes, mvs of jth subset.
Must do it here because ensuing subsets can
refer back to us via "left" or "above". */
- const unsigned char *fill_offset;
unsigned int fill_count = mbsplit_fill_count[s];
+ const unsigned char *fill_offset =
+ &mbsplit_fill_offset[s][j * fill_count];
- fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]];
-
do {
- mi->bmi[ *fill_offset].as_mv.first.as_int = blockmv.as_int;
+ mi->bmi[*fill_offset].as_mv[0].as_int = blockmv.as_int;
if (mbmi->second_ref_frame > 0)
- mi->bmi[ *fill_offset].as_mv.second.as_int = secondmv.as_int;
+ mi->bmi[*fill_offset].as_mv[1].as_int = secondmv.as_int;
fill_offset++;
} while (--fill_count);
}
@@ -1021,33 +992,35 @@
} while (++j < num_p);
}
- mv->as_int = mi->bmi[15].as_mv.first.as_int;
- mbmi->mv[1].as_int = mi->bmi[15].as_mv.second.as_int;
+ mv->as_int = mi->bmi[15].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[15].as_mv[1].as_int;
break; /* done with SPLITMV */
case NEARMV:
- mv->as_int = nearby.as_int;
- /* Clip "next_nearest" so that it does not extend to far out of image */
- clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
- mb_to_top_edge, mb_to_bottom_edge);
- if (mbmi->second_ref_frame > 0) {
- mbmi->mv[1].as_int = nearby_second.as_int;
- clamp_mv(&mbmi->mv[1], mb_to_left_edge, mb_to_right_edge,
- mb_to_top_edge, mb_to_bottom_edge);
- }
+ // Clip "next_nearest" so that it does not extend to far out of image
+ assign_and_clamp_mv(mv, &nearby, mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+ if (mbmi->second_ref_frame > 0)
+ assign_and_clamp_mv(&mbmi->mv[1], &nearby_second, mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
break;
case NEARESTMV:
- mv->as_int = nearest.as_int;
- /* Clip "next_nearest" so that it does not extend to far out of image */
- clamp_mv(mv, mb_to_left_edge, mb_to_right_edge,
- mb_to_top_edge, mb_to_bottom_edge);
- if (mbmi->second_ref_frame > 0) {
- mbmi->mv[1].as_int = nearest_second.as_int;
- clamp_mv(&mbmi->mv[1], mb_to_left_edge, mb_to_right_edge,
- mb_to_top_edge, mb_to_bottom_edge);
- }
+ // Clip "next_nearest" so that it does not extend to far out of image
+ assign_and_clamp_mv(mv, &nearest, mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
+ if (mbmi->second_ref_frame > 0)
+ assign_and_clamp_mv(&mbmi->mv[1], &nearest_second, mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
break;
case ZEROMV:
@@ -1057,21 +1030,13 @@
break;
case NEWMV:
+ process_mv(bc, &mv->as_mv, &best_mv.as_mv, nmvc, &cm->fc.NMVcount,
+ xd->allow_high_precision_mv);
- read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
- read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
- xd->allow_high_precision_mv);
- vp9_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount,
- xd->allow_high_precision_mv);
-
- mv->as_mv.row += best_mv.as_mv.row;
- mv->as_mv.col += best_mv.as_mv.col;
-
- /* Don't need to check this on NEARMV and NEARESTMV modes
- * since those modes clamp the MV. The NEWMV mode does not,
- * so signal to the prediction stage whether special
- * handling may be required.
- */
+ // Don't need to check this on NEARMV and NEARESTMV modes
+ // since those modes clamp the MV. The NEWMV mode does not,
+ // so signal to the prediction stage whether special
+ // handling may be required.
mbmi->need_to_clamp_mvs = check_mv_bounds(mv,
mb_to_left_edge,
mb_to_right_edge,
@@ -1079,17 +1044,13 @@
mb_to_bottom_edge);
if (mbmi->second_ref_frame > 0) {
- read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc);
- read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
- xd->allow_high_precision_mv);
- vp9_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv,
- &cm->fc.NMVcount, xd->allow_high_precision_mv);
- mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row;
- mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col;
- mbmi->need_to_clamp_secondmv |=
- check_mv_bounds(&mbmi->mv[1],
- mb_to_left_edge, mb_to_right_edge,
- mb_to_top_edge, mb_to_bottom_edge);
+ process_mv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ mbmi->need_to_clamp_secondmv |= check_mv_bounds(&mbmi->mv[1],
+ mb_to_left_edge,
+ mb_to_right_edge,
+ mb_to_top_edge,
+ mb_to_bottom_edge);
}
break;
default:
@@ -1102,16 +1063,11 @@
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
- if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_MODE)) {
- mbmi->mode = (MB_PREDICTION_MODE)
- vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE);
- } else if (mbmi->sb_type) {
- mbmi->mode = (MB_PREDICTION_MODE)
- read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob);
+ if (mbmi->sb_type) {
+ mbmi->mode = read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob);
pbi->common.fc.sb_ymode_counts[mbmi->mode]++;
} else {
- mbmi->mode = (MB_PREDICTION_MODE)
- read_ymode(bc, pbi->common.fc.ymode_prob);
+ mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob);
pbi->common.fc.ymode_counts[mbmi->mode]++;
}
@@ -1119,9 +1075,8 @@
if (mbmi->mode == B_PRED) {
int j = 0;
do {
- int m;
- m = mi->bmi[j].as_mode.first = (B_PREDICTION_MODE)
- read_bmode(bc, pbi->common.fc.bmode_prob);
+ int m = read_bmode(bc, pbi->common.fc.bmode_prob);
+ mi->bmi[j].as_mode.first = m;
#if CONFIG_NEWBINTRAMODES
if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
#endif
@@ -1131,10 +1086,10 @@
if (mbmi->mode == I8X8_PRED) {
int i;
- int mode8x8;
for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
- mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+ const int ib = vp9_i8x8_block[i];
+ const int mode8x8 = read_i8x8_mode(bc, pbi->common.fc.i8x8_mode_prob);
+
mi->bmi[ib + 0].as_mode.first = mode8x8;
mi->bmi[ib + 1].as_mode.first = mode8x8;
mi->bmi[ib + 4].as_mode.first = mode8x8;
@@ -1142,11 +1097,14 @@
pbi->common.fc.i8x8_mode_counts[mode8x8]++;
}
} else {
- mbmi->uv_mode = (MB_PREDICTION_MODE)read_uv_mode(
- bc, pbi->common.fc.uv_mode_prob[mbmi->mode]);
+ mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob[mbmi->mode]);
pbi->common.fc.uv_mode_counts[mbmi->mode][mbmi->uv_mode]++;
}
}
+ /*
+ if (pbi->common.current_video_frame == 1)
+ printf("mode: %d skip: %d\n", mbmi->mode, mbmi->mb_skip_coeff);
+ */
if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= I8X8_PRED) ||
@@ -1182,22 +1140,305 @@
vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
if (pbi->common.mb_no_coeff_skip) {
int k;
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
- cm->mbskip_pred_probs[k] = (vp9_prob)vp9_read_literal(bc, 8);
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
+ cm->mbskip_pred_probs[k] = vp9_read_prob(bc);
+ }
}
mb_mode_mv_init(pbi, bc);
}
+
+#if CONFIG_CODE_NONZEROCOUNT
+static uint16_t read_nzc(VP9_COMMON *const cm,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type,
+ BOOL_DECODER* const bc) {
+ int c, e;
+ uint16_t nzc;
+ if (tx_size == TX_32X32) {
+ c = treed_read(bc, vp9_nzc32x32_tree,
+ cm->fc.nzc_probs_32x32[nzc_context][ref][type]);
+ cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_16X16) {
+ c = treed_read(bc, vp9_nzc16x16_tree,
+ cm->fc.nzc_probs_16x16[nzc_context][ref][type]);
+ cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_8X8) {
+ c = treed_read(bc, vp9_nzc8x8_tree,
+ cm->fc.nzc_probs_8x8[nzc_context][ref][type]);
+ cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_4X4) {
+ c = treed_read(bc, vp9_nzc4x4_tree,
+ cm->fc.nzc_probs_4x4[nzc_context][ref][type]);
+ cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ } else {
+ assert(0);
+ }
+ nzc = vp9_basenzcvalue[c];
+ if ((e = vp9_extranzcbits[c])) {
+ int x = 0;
+ while (e--) {
+ int b = vp9_read(
+ bc, cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
+ x |= (b << e);
+ cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ nzc += x;
+ }
+ if (tx_size == TX_32X32)
+ assert(nzc <= 1024);
+ else if (tx_size == TX_16X16)
+ assert(nzc <= 256);
+ else if (tx_size == TX_8X8)
+ assert(nzc <= 64);
+ else if (tx_size == TX_4X4)
+ assert(nzc <= 16);
+ return nzc;
+}
+
+static void read_nzcs_sb64(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void read_nzcs_sb32(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void read_nzcs_mb16(VP9_COMMON *const cm,
+ MACROBLOCKD* xd,
+ int mb_row,
+ int mb_col,
+ BOOL_DECODER* const bc) {
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0]));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
int mb_row,
int mb_col,
BOOL_DECODER* const bc) {
+ VP9_COMMON *const cm = &pbi->common;
MODE_INFO *mi = xd->mode_info_context;
MODE_INFO *prev_mi = xd->prev_mode_info_context;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
- if (pbi->common.frame_type == KEY_FRAME)
+ if (pbi->common.frame_type == KEY_FRAME) {
kfread_modes(pbi, mi, mb_row, mb_col, bc);
- else
+ } else {
read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc);
+ set_scale_factors(xd,
+ mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1,
+ pbi->common.active_ref_scale);
+ }
+#if CONFIG_CODE_NONZEROCOUNT
+ if (mbmi->sb_type == BLOCK_SIZE_SB64X64)
+ read_nzcs_sb64(cm, xd, mb_row, mb_col, bc);
+ else if (mbmi->sb_type == BLOCK_SIZE_SB32X32)
+ read_nzcs_sb32(cm, xd, mb_row, mb_col, bc);
+ else
+ read_nzcs_mb16(cm, xd, mb_row, mb_col, bc);
+#endif // CONFIG_CODE_NONZEROCOUNT
+
+ if (mbmi->sb_type) {
+ const int n_mbs = 1 << mbmi->sb_type;
+ const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row);
+ const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col);
+ const int mis = cm->mode_info_stride;
+ int x, y;
+
+ for (y = 0; y < y_mbs; y++) {
+ for (x = !y; x < x_mbs; x++) {
+ mi[y * mis + x] = *mi;
+ }
+ }
+ } else {
+ update_blockd_bmi(xd);
+ }
}
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -13,7 +13,6 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_header.h"
#include "vp9/common/vp9_reconintra.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/decoder/vp9_decodframe.h"
@@ -32,7 +31,7 @@
#include "vp9/decoder/vp9_dboolhuff.h"
#include "vp9/common/vp9_seg_common.h"
-#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9_rtcd.h"
#include <assert.h>
@@ -40,11 +39,25 @@
#define COEFCOUNT_TESTING
-//#define DEC_DEBUG
+// #define DEC_DEBUG
#ifdef DEC_DEBUG
int dec_debug = 0;
#endif
+static int read_le16(const uint8_t *p) {
+ return (p[1] << 8) | p[0];
+}
+
+static int read_le32(const uint8_t *p) {
+ return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
+}
+
+// len == 0 is not allowed
+static int read_is_valid(const unsigned char *start, size_t len,
+ const unsigned char *end) {
+ return start + len > start && start + len <= end;
+}
+
static int merge_index(int v, int n, int modulus) {
int max1 = (n - 1 - modulus / 2) / modulus + 1;
if (v < max1) v = v * modulus + modulus / 2;
@@ -62,14 +75,13 @@
static int inv_remap_prob(int v, int m) {
const int n = 256;
const int modulus = MODULUS_PARAM;
- int i;
+
v = merge_index(v, n - 1, modulus);
if ((m << 1) <= n) {
- i = vp9_inv_recenter_nonneg(v + 1, m);
+ return vp9_inv_recenter_nonneg(v + 1, m);
} else {
- i = n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
+ return n - 1 - vp9_inv_recenter_nonneg(v + 1, n - 1 - m);
}
- return i;
}
static vp9_prob read_prob_diff_update(vp9_reader *const bc, int oldp) {
@@ -79,103 +91,78 @@
void vp9_init_de_quantizer(VP9D_COMP *pbi) {
int i;
- int Q;
+ int q;
VP9_COMMON *const pc = &pbi->common;
- for (Q = 0; Q < QINDEX_RANGE; Q++) {
- pc->Y1dequant[Q][0] = (int16_t)vp9_dc_quant(Q, pc->y1dc_delta_q);
- pc->Y2dequant[Q][0] = (int16_t)vp9_dc2quant(Q, pc->y2dc_delta_q);
- pc->UVdequant[Q][0] = (int16_t)vp9_dc_uv_quant(Q, pc->uvdc_delta_q);
+ for (q = 0; q < QINDEX_RANGE; q++) {
+ pc->Y1dequant[q][0] = (int16_t)vp9_dc_quant(q, pc->y1dc_delta_q);
+ pc->UVdequant[q][0] = (int16_t)vp9_dc_uv_quant(q, pc->uvdc_delta_q);
/* all the ac values =; */
for (i = 1; i < 16; i++) {
int rc = vp9_default_zig_zag1d_4x4[i];
- pc->Y1dequant[Q][rc] = (int16_t)vp9_ac_yquant(Q);
- pc->Y2dequant[Q][rc] = (int16_t)vp9_ac2quant(Q, pc->y2ac_delta_q);
- pc->UVdequant[Q][rc] = (int16_t)vp9_ac_uv_quant(Q, pc->uvac_delta_q);
+ pc->Y1dequant[q][rc] = (int16_t)vp9_ac_yquant(q);
+ pc->UVdequant[q][rc] = (int16_t)vp9_ac_uv_quant(q, pc->uvac_delta_q);
}
}
}
-static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
+static int get_qindex(MACROBLOCKD *mb, int segment_id, int base_qindex) {
+ // Set the Q baseline allowing for any segment level adjustment
+ if (vp9_segfeature_active(mb, segment_id, SEG_LVL_ALT_Q)) {
+ if (mb->mb_segment_abs_delta == SEGMENT_ABSDATA)
+ return vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q); // Abs Value
+ else
+ return clamp(base_qindex + vp9_get_segdata(mb, segment_id, SEG_LVL_ALT_Q),
+ 0, MAXQ); // Delta Value
+ } else {
+ return base_qindex;
+ }
+}
+
+static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *mb) {
int i;
- int QIndex;
+
VP9_COMMON *const pc = &pbi->common;
- int segment_id = xd->mode_info_context->mbmi.segment_id;
+ const int segment_id = mb->mode_info_context->mbmi.segment_id;
+ const int qindex = get_qindex(mb, segment_id, pc->base_qindex);
+ mb->q_index = qindex;
- // Set the Q baseline allowing for any segment level adjustment
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
- /* Abs Value */
- if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA)
- QIndex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
+ for (i = 0; i < 16; i++)
+ mb->block[i].dequant = pc->Y1dequant[qindex];
- /* Delta Value */
- else {
- QIndex = pc->base_qindex +
- vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
- }
- } else
- QIndex = pc->base_qindex;
- xd->q_index = QIndex;
+ for (i = 16; i < 24; i++)
+ mb->block[i].dequant = pc->UVdequant[qindex];
- /* Set up the block level dequant pointers */
- for (i = 0; i < 16; i++) {
- xd->block[i].dequant = pc->Y1dequant[QIndex];
- }
-
-#if CONFIG_LOSSLESS
- if (!QIndex) {
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
- pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
- pbi->idct_add = vp9_dequant_idct_add_lossless_c;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
+ if (mb->lossless) {
+ assert(qindex == 0);
+ mb->inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
+ mb->inv_txm4x4 = vp9_short_iwalsh4x4;
+ mb->itxm_add = vp9_dequant_idct_add_lossless_c;
+ mb->itxm_add_y_block = vp9_dequant_idct_add_y_block_lossless_c;
+ mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c;
} else {
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
- pbi->idct_add = vp9_dequant_idct_add;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
+ mb->inv_txm4x4_1 = vp9_short_idct4x4_1;
+ mb->inv_txm4x4 = vp9_short_idct4x4;
+ mb->itxm_add = vp9_dequant_idct_add;
+ mb->itxm_add_y_block = vp9_dequant_idct_add_y_block;
+ mb->itxm_add_uv_block = vp9_dequant_idct_add_uv_block;
}
-#else
- pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
- pbi->idct_add = vp9_dequant_idct_add;
- pbi->dc_idct_add = vp9_dequant_dc_idct_add;
- pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
- pbi->idct_add_y_block = vp9_dequant_idct_add_y_block;
- pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block;
-#endif
-
- for (i = 16; i < 24; i++) {
- xd->block[i].dequant = pc->UVdequant[QIndex];
- }
-
- xd->block[24].dequant = pc->Y2dequant[QIndex];
-
}
/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
* to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
*/
-static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {
+static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
+ BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
- if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ if (sb_type == BLOCK_SIZE_SB64X64) {
vp9_build_intra_predictors_sb64uv_s(xd);
vp9_build_intra_predictors_sb64y_s(xd);
- } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
vp9_build_intra_predictors_sbuv_s(xd);
vp9_build_intra_predictors_sby_s(xd);
} else {
@@ -183,46 +170,30 @@
vp9_build_intra_predictors_mby_s(xd);
}
} else {
- if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ if (sb_type == BLOCK_SIZE_SB64X64) {
vp9_build_inter64x64_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
- } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
vp9_build_inter32x32_predictors_sb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.y_stride,
- xd->dst.uv_stride);
+ xd->dst.uv_stride,
+ mb_row, mb_col);
} else {
- vp9_build_1st_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
-
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- vp9_build_2nd_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- }
-#if CONFIG_COMP_INTERINTRA_PRED
- else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
- vp9_build_interintra_16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- }
-#endif
+ vp9_build_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
}
}
}
@@ -229,10 +200,8 @@
static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
BOOL_DECODER* const bc) {
- BLOCKD *bd = &xd->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
- assert(get_2nd_order_usage(xd) == 0);
-#ifdef DEC_DEBUG
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
+#if 0 // def DEC_DEBUG
if (dec_debug) {
int i;
printf("\n");
@@ -262,7 +231,7 @@
vp9_dequant_idct_add_uv_block_8x8(
xd->qcoeff + 16 * 16, xd->block[16].dequant,
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->eobs + 16, xd);
+ xd->dst.uv_stride, xd);
}
static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -269,13 +238,13 @@
BOOL_DECODER* const bc) {
// First do Y
// if the first one is DCT_DCT assume all the rest are as well
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
-#ifdef DEC_DEBUG
+ TX_TYPE tx_type = get_tx_type_8x8(xd, 0);
+#if 0 // def DEC_DEBUG
if (dec_debug) {
int i;
printf("\n");
printf("qcoeff 8x8\n");
- for (i = 0; i < 400; i++) {
+ for (i = 0; i < 384; i++) {
printf("%3d ", xd->qcoeff[i]);
if (i % 16 == 15) printf("\n");
}
@@ -283,7 +252,6 @@
#endif
if (tx_type != DCT_DCT || xd->mode_info_context->mbmi.mode == I8X8_PRED) {
int i;
- assert(get_2nd_order_usage(xd) == 0);
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
int idx = (ib & 0x02) ? (ib + 2) : ib;
@@ -295,46 +263,24 @@
BLOCKD *b = &xd->block[ib];
if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
int i8x8mode = b->bmi.as_mode.first;
- vp9_intra8x8_predict(b, i8x8mode, b->predictor);
+ vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
}
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
xd->eobs[idx]);
} else {
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
- 0, xd->eobs[idx]);
+ xd->eobs[idx]);
}
}
- } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
- assert(get_2nd_order_usage(xd) == 0);
+ } else {
vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
xd->dst.y_buffer,
xd->dst.y_stride,
- xd->eobs, xd);
- } else {
- BLOCKD *b = &xd->block[24];
- assert(get_2nd_order_usage(xd) == 1);
- vp9_dequantize_b_2x2(b);
- vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
- ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct
- ((int *)b->qcoeff)[1] = 0;
- ((int *)b->qcoeff)[2] = 0;
- ((int *)b->qcoeff)[3] = 0;
- ((int *)b->qcoeff)[4] = 0;
- ((int *)b->qcoeff)[5] = 0;
- ((int *)b->qcoeff)[6] = 0;
- ((int *)b->qcoeff)[7] = 0;
- vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff,
- xd->block[0].dequant,
- xd->predictor,
- xd->dst.y_buffer,
- xd->dst.y_stride,
- xd->eobs,
- xd->block[24].diff,
- xd);
+ xd);
}
// Now do UV
@@ -344,26 +290,28 @@
int ib = vp9_i8x8_block[i];
BLOCKD *b = &xd->block[ib];
int i8x8mode = b->bmi.as_mode.first;
+
b = &xd->block[16 + i];
- vp9_intra_uv4x4_predict(&xd->block[16 + i], i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 8, b->dst_stride);
+ vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
+
b = &xd->block[20 + i];
- vp9_intra_uv4x4_predict(&xd->block[20 + i], i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 8, b->dst_stride);
+ vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
}
} else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->eobs + 16);
+ xd->dst.uv_stride, xd);
} else {
vp9_dequant_idct_add_uv_block_8x8
(xd->qcoeff + 16 * 16, xd->block[16].dequant,
xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->eobs + 16, xd);
+ xd->dst.uv_stride, xd);
}
-#ifdef DEC_DEBUG
+#if 0 // def DEC_DEBUG
if (dec_debug) {
int i;
printf("\n");
@@ -381,94 +329,98 @@
TX_TYPE tx_type;
int i, eobtotal = 0;
MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode;
+#if 0 // def DEC_DEBUG
+ if (dec_debug) {
+ int i;
+ printf("\n");
+ printf("predictor\n");
+ for (i = 0; i < 384; i++) {
+ printf("%3d ", xd->predictor[i]);
+ if (i % 16 == 15) printf("\n");
+ }
+ }
+#endif
if (mode == I8X8_PRED) {
- assert(get_2nd_order_usage(xd) == 0);
for (i = 0; i < 4; i++) {
int ib = vp9_i8x8_block[i];
const int iblock[4] = {0, 1, 4, 5};
int j;
- int i8x8mode;
- BLOCKD *b;
- b = &xd->block[ib];
- i8x8mode = b->bmi.as_mode.first;
- vp9_intra8x8_predict(b, i8x8mode, b->predictor);
+ BLOCKD *b = &xd->block[ib];
+ int i8x8mode = b->bmi.as_mode.first;
+ vp9_intra8x8_predict(xd, b, i8x8mode, b->predictor);
for (j = 0; j < 4; j++) {
b = &xd->block[ib + iblock[j]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16,
- b->dst_stride, b->eob);
+ b->dst_stride, xd->eobs[ib + iblock[j]]);
} else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride,
+ xd->eobs[ib + iblock[j]]);
}
}
b = &xd->block[16 + i];
- vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 8, b->dst_stride);
+ vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
b = &xd->block[20 + i];
- vp9_intra_uv4x4_predict(b, i8x8mode, b->predictor);
- pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 8, b->dst_stride);
+ vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
}
} else if (mode == B_PRED) {
- assert(get_2nd_order_usage(xd) == 0);
for (i = 0; i < 16; i++) {
- int b_mode;
BLOCKD *b = &xd->block[i];
- b_mode = xd->mode_info_context->bmi[i].as_mode.first;
+ int b_mode = xd->mode_info_context->bmi[i].as_mode.first;
#if CONFIG_NEWBINTRAMODES
xd->mode_info_context->bmi[i].as_mode.context = b->bmi.as_mode.context =
- vp9_find_bpred_context(b);
+ vp9_find_bpred_context(xd, b);
#endif
if (!xd->mode_info_context->mbmi.mb_skip_coeff)
eobtotal += vp9_decode_coefs_4x4(pbi, xd, bc, PLANE_TYPE_Y_WITH_DC, i);
- vp9_intra4x4_predict(b, b_mode, b->predictor);
- tx_type = get_tx_type_4x4(xd, b);
+ vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
+ tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride,
- b->eob);
+ xd->eobs[i]);
} else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
}
}
if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
}
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
vp9_build_intra_predictors_mbuv(xd);
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.uv_stride,
- xd->eobs + 16);
- } else if (mode == SPLITMV) {
- assert(get_2nd_order_usage(xd) == 0);
- pbi->idct_add_y_block(xd->qcoeff,
+ xd);
+ } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
+ xd->itxm_add_y_block(xd->qcoeff,
xd->block[0].dequant,
xd->predictor,
xd->dst.y_buffer,
xd->dst.y_stride,
- xd->eobs);
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd);
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.uv_stride,
- xd->eobs + 16);
+ xd);
} else {
-#ifdef DEC_DEBUG
+#if 0 // def DEC_DEBUG
if (dec_debug) {
int i;
printf("\n");
@@ -485,211 +437,35 @@
}
}
#endif
- tx_type = get_tx_type_4x4(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- assert(get_2nd_order_usage(xd) == 0);
- for (i = 0; i < 16; i++) {
- BLOCKD *b = &xd->block[i];
- tx_type = get_tx_type_4x4(xd, b);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
- b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16,
- b->dst_stride, b->eob);
- } else {
- vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
- }
- }
- } else {
- BLOCKD *b = &xd->block[24];
- assert(get_2nd_order_usage(xd) == 1);
- vp9_dequantize_b(b);
- if (xd->eobs[24] > 1) {
- vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
- ((int *)b->qcoeff)[0] = 0;
- ((int *)b->qcoeff)[1] = 0;
- ((int *)b->qcoeff)[2] = 0;
- ((int *)b->qcoeff)[3] = 0;
- ((int *)b->qcoeff)[4] = 0;
- ((int *)b->qcoeff)[5] = 0;
- ((int *)b->qcoeff)[6] = 0;
- ((int *)b->qcoeff)[7] = 0;
+ for (i = 0; i < 16; i++) {
+ BLOCKD *b = &xd->block[i];
+ tx_type = get_tx_type_4x4(xd, i);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+ b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16,
+ b->dst_stride, xd->eobs[i]);
} else {
- xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
- ((int *)b->qcoeff)[0] = 0;
+ xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+ *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
}
- vp9_dequantize_b(b);
- pbi->dc_idct_add_y_block(xd->qcoeff,
- xd->block[0].dequant,
- xd->predictor,
- xd->dst.y_buffer,
- xd->dst.y_stride,
- xd->eobs,
- xd->block[24].diff);
}
- pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+ xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
xd->block[16].dequant,
xd->predictor + 16 * 16,
xd->dst.u_buffer,
xd->dst.v_buffer,
xd->dst.uv_stride,
- xd->eobs + 16);
+ xd);
}
}
-static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_16x16_c(
- tx_type, xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
- } else {
- vp9_dequant_idct_add_16x16(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]);
- }
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16,
- xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
-};
-
-static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- BLOCKD *b = &xd->block[24];
- TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- int i;
- for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
- int idx = (ib & 0x02) ? (ib + 2) : ib;
- int16_t *q = xd->block[idx].qcoeff;
- int16_t *dq = xd->block[0].dequant;
- int stride = xd->dst.y_stride;
- BLOCKD *b = &xd->block[ib];
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_8x8_c(
- tx_type, q, dq,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- stride, stride, b->eob);
- } else {
- vp9_dequant_idct_add_8x8_c(
- q, dq,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
- + x_idx * 16 + (i & 1) * 8,
- stride, stride, 0, b->eob);
- }
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
- }
- } else {
- vp9_dequantize_b_2x2(b);
- vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
- ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct
- ((int *)b->qcoeff)[1] = 0;
- ((int *)b->qcoeff)[2] = 0;
- ((int *)b->qcoeff)[3] = 0;
- ((int *)b->qcoeff)[4] = 0;
- ((int *)b->qcoeff)[5] = 0;
- ((int *)b->qcoeff)[6] = 0;
- ((int *)b->qcoeff)[7] = 0;
- vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
- vp9_dequant_idct_add_uv_block_8x8_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
- }
-};
-
-static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
- BOOL_DECODER* const bc, int n,
- int maska, int shiftb) {
- int x_idx = n & maska, y_idx = n >> shiftb;
- BLOCKD *b = &xd->block[24];
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]);
- if (tx_type != DCT_DCT) {
- int i;
- for (i = 0; i < 16; i++) {
- BLOCKD *b = &xd->block[i];
- tx_type = get_tx_type_4x4(xd, b);
- if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_c(
- tx_type, b->qcoeff, b->dequant,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride, b->eob);
- } else {
- vp9_dequant_idct_add_c(
- b->qcoeff, b->dequant,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
- + x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride);
- }
- }
- } else {
- vp9_dequantize_b(b);
- if (xd->eobs[24] > 1) {
- vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
- ((int *)b->qcoeff)[0] = 0;
- ((int *)b->qcoeff)[1] = 0;
- ((int *)b->qcoeff)[2] = 0;
- ((int *)b->qcoeff)[3] = 0;
- ((int *)b->qcoeff)[4] = 0;
- ((int *)b->qcoeff)[5] = 0;
- ((int *)b->qcoeff)[6] = 0;
- ((int *)b->qcoeff)[7] = 0;
- } else {
- xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
- ((int *)b->qcoeff)[0] = 0;
- }
- vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
- xd->qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
- }
- vp9_dequant_idct_add_uv_block_4x4_inplace_c(
- xd->qcoeff + 16 * 16, xd->block[16].dequant,
- xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8,
- xd->dst.uv_stride, xd->eobs + 16, xd);
-};
-
static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
- int i, n, eobtotal;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+ int n, eobtotal;
VP9_COMMON *const pc = &pbi->common;
- MODE_INFO *orig_mi = xd->mode_info_context;
+ MODE_INFO *mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64);
@@ -702,25 +478,12 @@
mb_init_dequantizer(pbi, xd);
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
- int n;
+ vp9_reset_sb64_tokens_context(xd);
- vp9_reset_mb_tokens_context(xd);
- for (n = 1; n <= 3; n++) {
- if (mb_col < pc->mb_cols - n)
- xd->above_context += n;
- if (mb_row < pc->mb_rows - n)
- xd->left_context += n;
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - n)
- xd->above_context -= n;
- if (mb_row < pc->mb_rows - n)
- xd->left_context -= n;
- }
-
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
*/
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
@@ -731,91 +494,151 @@
} else {
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
/* dequantization and idct */
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- for (n = 0; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
+ eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc);
+ if (eobtotal == 0) { // skip loopfilter
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
- if (mb_col + x_idx * 2 >= pc->mb_cols ||
- mb_row + y_idx * 2 >= pc->mb_rows)
- continue;
+ if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows)
+ mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ }
+ } else {
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32;
+ vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]);
+ }
+ vp9_dequant_idct_add_32x32(xd->qcoeff + 4096,
+ xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]);
+ vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024,
+ xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
+ break;
+ case TX_16X16:
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd,
+ (y_idx * 16 + x_idx) * 4);
- xd->left_context = pc->left_context + (y_idx << 1);
- xd->above_context = pc->above_context + mb_col + (x_idx << 1);
- xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis;
- eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
- if (mb_row + 1 < pc->mb_rows) {
- xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ } else {
+ vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
}
- } else {
- vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer + x_idx * 32 +
- xd->dst.y_stride * y_idx * 32,
- xd->dst.y_buffer + x_idx * 32 +
- xd->dst.y_stride * y_idx * 32,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->eobs[0]);
- vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
- xd->block[16].dequant,
- xd->dst.u_buffer + x_idx * 16 +
- xd->dst.uv_stride * y_idx * 16,
- xd->dst.v_buffer + x_idx * 16 +
- xd->dst.uv_stride * y_idx * 16,
- xd->dst.uv_stride, xd->eobs + 16);
- }
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const int uv_offset = y_idx * 16 * xd->dst.uv_stride + x_idx * 16;
+ vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]);
+ vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
+ }
+ break;
+ case TX_8X8:
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ } else {
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
+ }
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]);
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]);
+ }
+ break;
+ case TX_4X4:
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+ const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
+ if (tx_type == DCT_DCT) {
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ } else {
+ vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
+ }
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4;
+ xd->itxm_add(xd->qcoeff + 4096 + n * 16,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]);
+ xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]);
+ }
+ break;
+ default: assert(0);
}
- } else {
- for (n = 0; n < 16; n++) {
- int x_idx = n & 3, y_idx = n >> 2;
-
- if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows)
- continue;
-
- xd->above_context = pc->above_context + mb_col + x_idx;
- xd->left_context = pc->left_context + y_idx;
- xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
- for (i = 0; i < 25; i++) {
- xd->block[i].eob = 0;
- xd->eobs[i] = 0;
- }
-
- eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- continue;
- }
-
- if (tx_size == TX_16X16) {
- decode_16x16_sb(pbi, xd, bc, n, 3, 2);
- } else if (tx_size == TX_8X8) {
- decode_8x8_sb(pbi, xd, bc, n, 3, 2);
- } else {
- decode_4x4_sb(pbi, xd, bc, n, 3, 2);
- }
- }
}
-
- xd->above_context = pc->above_context + mb_col;
- xd->left_context = pc->left_context;
- xd->mode_info_context = orig_mi;
}
static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd,
int mb_row, int mb_col,
BOOL_DECODER* const bc) {
- int i, n, eobtotal;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
+ int n, eobtotal;
VP9_COMMON *const pc = &pbi->common;
- MODE_INFO *orig_mi = xd->mode_info_context;
const int mis = pc->mode_info_stride;
assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32);
@@ -828,21 +651,12 @@
mb_init_dequantizer(pbi, xd);
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - 1)
- xd->above_context++;
- if (mb_row < pc->mb_rows - 1)
- xd->left_context++;
- vp9_reset_mb_tokens_context(xd);
- if (mb_col < pc->mb_cols - 1)
- xd->above_context--;
- if (mb_row < pc->mb_rows - 1)
- xd->left_context--;
+ vp9_reset_sb_tokens_context(xd);
/* Special case: Force the loopfilter to skip when eobtotal and
* mb_skip_coeff are zero.
*/
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
@@ -853,64 +667,131 @@
} else {
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
/* dequantization and idct */
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ eobtotal = vp9_decode_sb_tokens(pbi, xd, bc);
+ if (eobtotal == 0) { // skip loopfilter
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ if (mb_col + 1 < pc->mb_cols)
+ xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
+ if (mb_row + 1 < pc->mb_rows) {
+ xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[1].mbmi.mb_skip_coeff = 1;
- if (mb_row + 1 < pc->mb_rows) {
- xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1;
- if (mb_col + 1 < pc->mb_cols)
- xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
- }
- } else {
- vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant,
- xd->dst.y_buffer, xd->dst.y_buffer,
- xd->dst.y_stride, xd->dst.y_stride,
- xd->eobs[0]);
- vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024,
- xd->block[16].dequant,
- xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.uv_stride, xd->eobs + 16);
+ xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1;
}
} else {
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant,
+ xd->dst.y_buffer, xd->dst.y_buffer,
+ xd->dst.y_stride, xd->dst.y_stride,
+ xd->eobs[0]);
+ vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride, xd);
+ break;
+ case TX_16X16:
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd,
+ (y_idx * 8 + x_idx) * 4);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_16x16(
+ xd->qcoeff + n * 256, xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ } else {
+ vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
+ }
+ }
+ vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.uv_stride, xd);
+ break;
+ case TX_8X8:
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const int y_offset = y_idx * 8 * xd->dst.y_stride + x_idx * 8;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
+ if (tx_type == DCT_DCT) {
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ } else {
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, xd->qcoeff + n * 64,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]);
+ }
+ }
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const int uv_offset = y_idx * 8 * xd->dst.uv_stride + x_idx * 8;
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]);
+ vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]);
+ }
+ break;
+ case TX_4X4:
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const int y_offset = y_idx * 4 * xd->dst.y_stride + x_idx * 4;
- if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows)
- continue;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
+ if (tx_type == DCT_DCT) {
+ xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ } else {
+ vp9_ht_dequant_idct_add_c(tx_type, xd->qcoeff + n * 16,
+ xd->block[0].dequant,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_buffer + y_offset,
+ xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]);
+ }
+ }
- xd->above_context = pc->above_context + mb_col + x_idx;
- xd->left_context = pc->left_context + y_idx + (mb_row & 2);
- xd->mode_info_context = orig_mi + x_idx + y_idx * mis;
- for (i = 0; i < 25; i++) {
- xd->block[i].eob = 0;
- xd->eobs[i] = 0;
- }
-
- eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
- if (eobtotal == 0) { // skip loopfilter
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- continue;
- }
-
- if (tx_size == TX_16X16) {
- decode_16x16_sb(pbi, xd, bc, n, 1, 1);
- } else if (tx_size == TX_8X8) {
- decode_8x8_sb(pbi, xd, bc, n, 1, 1);
- } else {
- decode_4x4_sb(pbi, xd, bc, n, 1, 1);
- }
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const int uv_offset = y_idx * 4 * xd->dst.uv_stride + x_idx * 4;
+ xd->itxm_add(xd->qcoeff + 1024 + n * 16,
+ xd->block[16].dequant,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.u_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]);
+ xd->itxm_add(xd->qcoeff + 1280 + n * 16,
+ xd->block[20].dequant,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.v_buffer + uv_offset,
+ xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]);
+ }
+ break;
+ default: assert(0);
}
-
- xd->above_context = pc->above_context + mb_col;
- xd->left_context = pc->left_context + (mb_row & 2);
- xd->mode_info_context = orig_mi;
}
}
@@ -919,7 +800,6 @@
BOOL_DECODER* const bc) {
int eobtotal = 0;
MB_PREDICTION_MODE mode;
- int i;
int tx_size;
assert(!xd->mode_info_context->mbmi.sb_type);
@@ -934,13 +814,8 @@
if (xd->mode_info_context->mbmi.mb_skip_coeff) {
vp9_reset_mb_tokens_context(xd);
} else if (!bool_error(bc)) {
- for (i = 0; i < 25; i++) {
- xd->block[i].eob = 0;
- xd->eobs[i] = 0;
- }
- if (mode != B_PRED) {
+ if (mode != B_PRED)
eobtotal = vp9_decode_mb_tokens(pbi, xd, bc);
- }
}
//mode = xd->mode_info_context->mbmi.mode;
@@ -948,24 +823,25 @@
vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter,
&pbi->common);
- if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV
- && mode != I8X8_PRED
- && !bool_error(bc)) {
+ if (eobtotal == 0 &&
+ mode != B_PRED &&
+ mode != SPLITMV &&
+ mode != I8X8_PRED &&
+ !bool_error(bc)) {
/* Special case: Force the loopfilter to skip when eobtotal and
- * mb_skip_coeff are zero.
- * */
+ mb_skip_coeff are zero. */
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- skip_recon_mb(pbi, xd);
+ skip_recon_mb(pbi, xd, mb_row, mb_col);
return;
}
-#ifdef DEC_DEBUG
+#if 0 // def DEC_DEBUG
if (dec_debug)
printf("Decoding mb: %d %d\n", xd->mode_info_context->mbmi.mode, tx_size);
#endif
// moved to be performed before detokenization
-// if (xd->segmentation_enabled)
-// mb_init_dequantizer(pbi, xd);
+ // if (xd->segmentation_enabled)
+ // mb_init_dequantizer(pbi, xd);
/* do prediction */
if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
@@ -976,13 +852,13 @@
}
}
} else {
-#ifdef DEC_DEBUG
+#if 0 // def DEC_DEBUG
if (dec_debug)
printf("Decoding mb: %d %d interp %d\n",
xd->mode_info_context->mbmi.mode, tx_size,
xd->mode_info_context->mbmi.interp_filter);
#endif
- vp9_build_inter_predictors_mb(xd);
+ vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
}
if (tx_size == TX_16X16) {
@@ -996,6 +872,13 @@
if (dec_debug) {
int i, j;
printf("\n");
+ printf("predictor y\n");
+ for (i = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++)
+ printf("%3d ", xd->predictor[i * 16 + j]);
+ printf("\n");
+ }
+ printf("\n");
printf("final y\n");
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j++)
@@ -1062,18 +945,13 @@
xd->above_context = cm->above_context + mb_col;
xd->left_context = cm->left_context + (mb_row & 3);
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to
- * values that are in 1/8th pel units
- */
+ // Distance of Mb to the various image edges.
+ // These are specified to 8th pel as they are always compared to
+ // values that are in 1/8th pel units
block_size >>= 4; // in mb units
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
- xd->up_available = (mb_row != 0);
- xd->left_available = (mb_col != 0);
+ set_mb_row(cm, xd, mb_row, block_size);
+ set_mb_col(cm, xd, mb_col, block_size);
xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
@@ -1080,71 +958,34 @@
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
}
-static void set_refs(VP9D_COMP *pbi, int block_size,
- int mb_row, int mb_col) {
+static void set_refs(VP9D_COMP *pbi, int block_size, int mb_row, int mb_col) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- MODE_INFO *mi = xd->mode_info_context;
- MB_MODE_INFO *const mbmi = &mi->mbmi;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
if (mbmi->ref_frame > INTRA_FRAME) {
- int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride;
+ // Select the appropriate reference frame for this MB
+ int ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1];
+ xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
+ xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1];
+ setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
- /* Select the appropriate reference frame for this MB */
- if (mbmi->ref_frame == LAST_FRAME)
- ref_fb_idx = cm->lst_fb_idx;
- else if (mbmi->ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cm->gld_fb_idx;
- else
- ref_fb_idx = cm->alt_fb_idx;
-
- ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
- ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col;
- xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset;
- ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
- ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col;
- xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset;
- xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset;
-
- /* propagate errors from reference frames */
+ // propagate errors from reference frames
xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted;
if (mbmi->second_ref_frame > INTRA_FRAME) {
- int second_ref_fb_idx;
+ // Select the appropriate reference frame for this MB
+ int second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];
- /* Select the appropriate reference frame for this MB */
- if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cm->lst_fb_idx;
- else if (mbmi->second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cm->gld_fb_idx;
- else
- second_ref_fb_idx = cm->alt_fb_idx;
+ setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
- xd->second_pre.y_buffer =
- cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset;
- xd->second_pre.u_buffer =
- cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset;
- xd->second_pre.v_buffer =
- cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset;
-
- /* propagate errors from reference frames */
+ // propagate errors from reference frames
xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted;
}
}
-
- if (mbmi->sb_type) {
- const int n_mbs = 1 << mbmi->sb_type;
- const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row);
- const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col);
- const int mis = cm->mode_info_stride;
- int x, y;
-
- for (y = 0; y < y_mbs; y++) {
- for (x = !y; x < x_mbs; x++) {
- mi[y * mis + x] = *mi;
- }
- }
- }
}
/* Decode a row of Superblocks (2x2 region of MBs) */
@@ -1156,8 +997,15 @@
// For a SB there are 2 left contexts, each pertaining to a MB row within
vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
- for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) {
+ for (mb_col = pc->cur_tile_mb_col_start;
+ mb_col < pc->cur_tile_mb_col_end; mb_col += 4) {
if (vp9_read(bc, pc->sb64_coded)) {
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+ mb_row == 8 && mb_col == 0);
+ if (dec_debug)
+ printf("Debug Decode SB64\n");
+#endif
set_offsets(pbi, 64, mb_row, mb_col);
vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc);
set_refs(pbi, 64, mb_row, mb_col);
@@ -1178,6 +1026,12 @@
xd->sb_index = j;
if (vp9_read(bc, pc->sb32_coded)) {
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+ mb_row + y_idx_sb == 8 && mb_col + x_idx_sb == 0);
+ if (dec_debug)
+ printf("Debug Decode SB32\n");
+#endif
set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb);
vp9_decode_mb_mode_mv(pbi,
xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc);
@@ -1198,14 +1052,18 @@
// MB lies outside frame, skip on to next
continue;
}
+#ifdef DEC_DEBUG
+ dec_debug = (pc->current_video_frame == 11 && pc->show_frame &&
+ mb_row + y_idx == 8 && mb_col + x_idx == 0);
+ if (dec_debug)
+ printf("Debug Decode MB\n");
+#endif
set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx);
xd->mb_index = i;
vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
- update_blockd_bmi(xd);
set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx);
- vp9_intra_prediction_down_copy(xd);
- decode_macroblock(pbi, xd, mb_row, mb_col, bc);
+ decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc);
/* check if the boolean decoder has suffered an error */
xd->corrupted |= bool_error(bc);
@@ -1216,38 +1074,19 @@
}
}
-static unsigned int read_partition_size(const unsigned char *cx_size) {
- const unsigned int size =
- cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16);
- return size;
-}
-static int read_is_valid(const unsigned char *start,
- size_t len,
- const unsigned char *end) {
- return (start + len > start && start + len <= end);
-}
-
-
static void setup_token_decoder(VP9D_COMP *pbi,
const unsigned char *cx_data,
BOOL_DECODER* const bool_decoder) {
- VP9_COMMON *pc = &pbi->common;
+ VP9_COMMON *pc = &pbi->common;
const unsigned char *user_data_end = pbi->Source + pbi->source_sz;
- const unsigned char *partition;
+ const unsigned char *partition = cx_data;
+ ptrdiff_t bytes_left = user_data_end - partition;
+ ptrdiff_t partition_size = bytes_left;
- ptrdiff_t partition_size;
- ptrdiff_t bytes_left;
-
- // Set up pointers to token partition
- partition = cx_data;
- bytes_left = user_data_end - partition;
- partition_size = bytes_left;
-
- /* Validate the calculated partition length. If the buffer
- * described by the partition can't be fully read, then restrict
- * it to the portion that can be (for EC mode) or throw an error.
- */
+ // Validate the calculated partition length. If the buffer
+ // described by the partition can't be fully read, then restrict
+ // it to the portion that can be (for EC mode) or throw an error.
if (!read_is_valid(partition, partition_size, user_data_end)) {
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition "
@@ -1262,100 +1101,251 @@
static void init_frame(VP9D_COMP *pbi) {
VP9_COMMON *const pc = &pbi->common;
- MACROBLOCKD *const xd = &pbi->mb;
+ MACROBLOCKD *const xd = &pbi->mb;
if (pc->frame_type == KEY_FRAME) {
+ vp9_setup_past_independence(pc, xd);
+ // All buffers are implicitly updated on key frames.
+ pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1;
+ } else if (pc->error_resilient_mode) {
+ vp9_setup_past_independence(pc, xd);
+ }
- if (pc->last_frame_seg_map)
- vpx_memset(pc->last_frame_seg_map, 0, (pc->mb_rows * pc->mb_cols));
+ if (pc->frame_type != KEY_FRAME) {
+ pc->mcomp_filter_type = pc->use_bilinear_mc_filter ? BILINEAR : EIGHTTAP;
- vp9_init_mv_probs(pc);
+ // To enable choice of different interpolation filters
+ vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
+ }
- vp9_init_mbmode_probs(pc);
- vp9_default_bmode_probs(pc->fc.bmode_prob);
+ xd->mode_info_context = pc->mi;
+ xd->prev_mode_info_context = pc->prev_mi;
+ xd->frame_type = pc->frame_type;
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ xd->mode_info_stride = pc->mode_info_stride;
+ xd->corrupted = 0;
+ xd->fullpixel_mask = pc->full_pixel ? 0xfffffff8 : 0xffffffff;
+}
- vp9_default_coef_probs(pc);
- vp9_kf_default_bmode_probs(pc->kf_bmode_prob);
+#if CONFIG_CODE_NONZEROCOUNT
+static void read_nzc_probs_common(VP9_COMMON *cm,
+ BOOL_DECODER* const bc,
+ int block_size) {
+ int c, r, b, t;
+ int tokens, nodes;
+ vp9_prob *nzc_probs;
+ vp9_prob upd;
- // Reset the segment feature data to the default stats:
- // Features disabled, 0, with delta coding (Default state).
- vp9_clearall_segfeatures(xd);
+ if (!vp9_read_bit(bc)) return;
- xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ upd = NZC_UPDATE_PROB_32X32;
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ upd = NZC_UPDATE_PROB_16X16;
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ upd = NZC_UPDATE_PROB_8X8;
+ } else {
+ tokens = NZC4X4_TOKENS;
+ nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ upd = NZC_UPDATE_PROB_4X4;
+ }
+ nodes = tokens - 1;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob *p = &nzc_probs[offset_nodes + t];
+ if (vp9_read(bc, upd)) {
+ *p = read_prob_diff_update(bc, *p);
+ }
+ }
+ }
+ }
+ }
+}
- /* reset the mode ref deltasa for loop filter */
- vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
- vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
+static void read_nzc_pcat_probs(VP9_COMMON *cm, BOOL_DECODER* const bc) {
+ int c, t, b;
+ vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+ if (!vp9_read_bit(bc)) {
+ return;
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob *p = &cm->fc.nzc_pcat_probs[c][t][b];
+ if (vp9_read(bc, upd)) {
+ *p = read_prob_diff_update(bc, *p);
+ }
+ }
+ }
+ }
+}
- /* All buffers are implicitly updated on key frames. */
- pc->refresh_golden_frame = 1;
- pc->refresh_alt_ref_frame = 1;
- pc->copy_buffer_to_gf = 0;
- pc->copy_buffer_to_arf = 0;
+static void read_nzc_probs(VP9_COMMON *cm,
+ BOOL_DECODER* const bc) {
+ read_nzc_probs_common(cm, bc, 4);
+ if (cm->txfm_mode != ONLY_4X4)
+ read_nzc_probs_common(cm, bc, 8);
+ if (cm->txfm_mode > ALLOW_8X8)
+ read_nzc_probs_common(cm, bc, 16);
+ if (cm->txfm_mode > ALLOW_16X16)
+ read_nzc_probs_common(cm, bc, 32);
+#ifdef NZC_PCAT_UPDATE
+ read_nzc_pcat_probs(cm, bc);
+#endif
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
- /* Note that Golden and Altref modes cannot be used on a key frame so
- * ref_frame_sign_bias[] is undefined and meaningless
- */
- pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
- pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
+static void read_coef_probs_common(BOOL_DECODER* const bc,
+ vp9_coeff_probs *coef_probs,
+ int block_types) {
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES;
+#else
+ const int entropy_nodes_update = ENTROPY_NODES;
+#endif
- vp9_init_mode_contexts(&pbi->common);
- vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
- vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc));
+ int i, j, k, l, m;
- vpx_memset(pc->prev_mip, 0,
- (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO));
- vpx_memset(pc->mip, 0,
- (pc->mb_cols + 1) * (pc->mb_rows + 1)* sizeof(MODE_INFO));
+ if (vp9_read_bit(bc)) {
+ for (i = 0; i < block_types; i++) {
+ for (j = 0; j < REF_TYPES; j++) {
+ for (k = 0; k < COEF_BANDS; k++) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ if (l >= 3 && k == 0)
+ continue;
+ for (m = CONFIG_CODE_NONZEROCOUNT; m < entropy_nodes_update; m++) {
+ vp9_prob *const p = coef_probs[i][j][k][l] + m;
- vp9_update_mode_info_border(pc, pc->mip);
- vp9_update_mode_info_in_image(pc, pc->mi);
+ if (vp9_read(bc, vp9_coef_update_prob[m])) {
+ *p = read_prob_diff_update(bc, *p);
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ if (m == UNCONSTRAINED_NODES - 1)
+ vp9_get_model_distribution(*p, coef_probs[i][j][k][l], i, j);
+#endif
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
+ VP9_COMMON *const pc = &pbi->common;
- } else {
+ read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES);
- if (!pc->use_bilinear_mc_filter)
- pc->mcomp_filter_type = EIGHTTAP;
- else
- pc->mcomp_filter_type = BILINEAR;
+ if (pbi->common.txfm_mode != ONLY_4X4)
+ read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES);
- /* To enable choice of different interpolation filters */
- vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
- }
+ if (pbi->common.txfm_mode > ALLOW_8X8)
+ read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES);
- xd->mode_info_context = pc->mi;
- xd->prev_mode_info_context = pc->prev_mi;
- xd->frame_type = pc->frame_type;
- xd->mode_info_context->mbmi.mode = DC_PRED;
- xd->mode_info_stride = pc->mode_info_stride;
- xd->corrupted = 0; /* init without corruption */
+ if (pbi->common.txfm_mode > ALLOW_16X16)
+ read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES);
+}
- xd->fullpixel_mask = 0xffffffff;
- if (pc->full_pixel)
- xd->fullpixel_mask = 0xfffffff8;
+static void update_frame_size(VP9D_COMP *pbi) {
+ VP9_COMMON *cm = &pbi->common;
+ /* our internal buffers are always multiples of 16 */
+ const int width = (cm->width + 15) & ~15;
+ const int height = (cm->height + 15) & ~15;
+
+ cm->mb_rows = height >> 4;
+ cm->mb_cols = width >> 4;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+ cm->mode_info_stride = cm->mb_cols + 1;
+ memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));
+ vp9_update_mode_info_border(cm, cm->mip);
+
+ cm->mi = cm->mip + cm->mode_info_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
+ vp9_update_mode_info_in_image(cm, cm->mi);
}
-static void read_coef_probs_common(BOOL_DECODER* const bc,
- vp9_coeff_probs *coef_probs,
- int block_types) {
- int i, j, k, l;
+static void setup_segmentation(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) {
+ int i, j;
- if (vp9_read_bit(bc)) {
- for (i = 0; i < block_types; i++) {
- for (j = !i; j < COEF_BANDS; j++) {
- /* NB: This j loop starts from 1 on block type i == 0 */
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- if (k >= 3 && ((i == 0 && j == 1) ||
- (i > 0 && j == 0)))
- continue;
- for (l = 0; l < ENTROPY_NODES; l++) {
- vp9_prob *const p = coef_probs[i][j][k] + l;
+ xd->segmentation_enabled = vp9_read_bit(r);
+ if (xd->segmentation_enabled) {
+ // Read whether or not the segmentation map is being explicitly updated
+ // this frame.
+ xd->update_mb_segmentation_map = vp9_read_bit(r);
- if (vp9_read(bc, COEF_UPDATE_PROB)) {
- *p = read_prob_diff_update(bc, *p);
+ // If so what method will be used.
+ if (xd->update_mb_segmentation_map) {
+ // Which macro block level features are enabled. Read the probs used to
+ // decode the segment id for each macro block.
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
+ xd->mb_segment_tree_probs[i] = vp9_read_bit(r) ? vp9_read_prob(r) : 255;
+ }
+
+ // Read the prediction probs needed to decode the segment id
+ pc->temporal_update = vp9_read_bit(r);
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ pc->segment_pred_probs[i] = pc->temporal_update
+ ? (vp9_read_bit(r) ? vp9_read_prob(r) : 255)
+ : 255;
+ }
+
+ if (pc->temporal_update) {
+ const vp9_prob *p = xd->mb_segment_tree_probs;
+ vp9_prob *p_mod = xd->mb_segment_mispred_tree_probs;
+ const int c0 = p[0] * p[1];
+ const int c1 = p[0] * (256 - p[1]);
+ const int c2 = (256 - p[0]) * p[2];
+ const int c3 = (256 - p[0]) * (256 - p[2]);
+
+ p_mod[0] = get_binary_prob(c1, c2 + c3);
+ p_mod[1] = get_binary_prob(c0, c2 + c3);
+ p_mod[2] = get_binary_prob(c0 + c1, c3);
+ p_mod[3] = get_binary_prob(c0 + c1, c2);
+ }
+ }
+
+ xd->update_mb_segmentation_data = vp9_read_bit(r);
+ if (xd->update_mb_segmentation_data) {
+ int data;
+
+ xd->mb_segment_abs_delta = vp9_read_bit(r);
+
+ vp9_clearall_segfeatures(xd);
+
+ // For each segmentation...
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ // For each of the segments features...
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ // Is the feature enabled
+ if (vp9_read_bit(r)) {
+ // Update the feature data and mask
+ vp9_enable_segfeature(xd, i, j);
+
+ data = vp9_decode_unsigned_max(r, vp9_seg_feature_data_max(j));
+
+ // Is the segment data signed..
+ if (vp9_is_segfeature_signed(j)) {
+ if (vp9_read_bit(r))
+ data = -data;
}
+ } else {
+ data = 0;
}
+
+ vp9_set_segdata(xd, i, j, data);
}
}
}
@@ -1362,23 +1352,266 @@
}
}
-static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) {
+static void setup_loopfilter(VP9_COMMON *pc, MACROBLOCKD *xd, vp9_reader *r) {
+ int i;
+
+ pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(r);
+ pc->filter_level = vp9_read_literal(r, 6);
+ pc->sharpness_level = vp9_read_literal(r, 3);
+
+#if CONFIG_LOOP_DERING
+ if (vp9_read_bit(r))
+ pc->dering_enabled = 1 + vp9_read_literal(r, 4);
+ else
+ pc->dering_enabled = 0;
+#endif
+
+ // Read in loop filter deltas applied at the MB level based on mode or ref
+ // frame.
+ xd->mode_ref_lf_delta_update = 0;
+ xd->mode_ref_lf_delta_enabled = vp9_read_bit(r);
+
+ if (xd->mode_ref_lf_delta_enabled) {
+ // Do the deltas need to be updated
+ xd->mode_ref_lf_delta_update = vp9_read_bit(r);
+
+ if (xd->mode_ref_lf_delta_update) {
+ // Send update
+ for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
+ if (vp9_read_bit(r)) {
+ // sign = vp9_read_bit(r);
+ xd->ref_lf_deltas[i] = vp9_read_literal(r, 6);
+
+ if (vp9_read_bit(r))
+ xd->ref_lf_deltas[i] = -xd->ref_lf_deltas[i]; // Apply sign
+ }
+ }
+
+ // Send update
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
+ if (vp9_read_bit(r)) {
+ // sign = vp9_read_bit(r);
+ xd->mode_lf_deltas[i] = vp9_read_literal(r, 6);
+
+ if (vp9_read_bit(r))
+ xd->mode_lf_deltas[i] = -xd->mode_lf_deltas[i]; // Apply sign
+ }
+ }
+ }
+ }
+}
+
+static const uint8_t *setup_frame_size(VP9D_COMP *pbi, int scaling_active,
+ const uint8_t *data,
+ const uint8_t *data_end) {
VP9_COMMON *const pc = &pbi->common;
+ const int width = pc->width;
+ const int height = pc->height;
- read_coef_probs_common(bc, pc->fc.coef_probs_4x4, BLOCK_TYPES_4X4);
- read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);
+ // If error concealment is enabled we should only parse the new size
+ // if we have enough data. Otherwise we will end up with the wrong size.
+ if (scaling_active && data + 4 < data_end) {
+ pc->display_width = read_le16(data + 0);
+ pc->display_height = read_le16(data + 2);
+ data += 4;
+ }
- if (pbi->common.txfm_mode != ONLY_4X4) {
- read_coef_probs_common(bc, pc->fc.coef_probs_8x8, BLOCK_TYPES_8X8);
- read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);
+ if (data + 4 < data_end) {
+ pc->width = read_le16(data + 0);
+ pc->height = read_le16(data + 2);
+ data += 4;
}
- if (pbi->common.txfm_mode > ALLOW_8X8) {
- read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES_16X16);
- read_coef_probs_common(bc, pc->fc.hybrid_coef_probs_16x16,
- BLOCK_TYPES_16X16);
+
+ if (!scaling_active) {
+ pc->display_width = pc->width;
+ pc->display_height = pc->height;
+ }
+
+ if (width != pc->width || height != pc->height) {
+ if (pc->width <= 0) {
+ pc->width = width;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame width");
+ }
+
+ if (pc->height <= 0) {
+ pc->height = height;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame height");
+ }
+
+ if (!pbi->initial_width || !pbi->initial_height) {
+ if (vp9_alloc_frame_buffers(pc, pc->width, pc->height))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+ pbi->initial_width = pc->width;
+ pbi->initial_height = pc->height;
+ }
+
+ if (pc->width > pbi->initial_width) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Frame width too large");
+ }
+
+ if (pc->height > pbi->initial_height) {
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Frame height too large");
+ }
+
+ update_frame_size(pbi);
+ }
+
+ return data;
+}
+
+static void update_frame_context(VP9D_COMP *pbi, vp9_reader *r) {
+ FRAME_CONTEXT *const fc = &pbi->common.fc;
+
+ vp9_copy(fc->pre_coef_probs_4x4, fc->coef_probs_4x4);
+ vp9_copy(fc->pre_coef_probs_8x8, fc->coef_probs_8x8);
+ vp9_copy(fc->pre_coef_probs_16x16, fc->coef_probs_16x16);
+ vp9_copy(fc->pre_coef_probs_32x32, fc->coef_probs_32x32);
+ vp9_copy(fc->pre_ymode_prob, fc->ymode_prob);
+ vp9_copy(fc->pre_sb_ymode_prob, fc->sb_ymode_prob);
+ vp9_copy(fc->pre_uv_mode_prob, fc->uv_mode_prob);
+ vp9_copy(fc->pre_bmode_prob, fc->bmode_prob);
+ vp9_copy(fc->pre_i8x8_mode_prob, fc->i8x8_mode_prob);
+ vp9_copy(fc->pre_sub_mv_ref_prob, fc->sub_mv_ref_prob);
+ vp9_copy(fc->pre_mbsplit_prob, fc->mbsplit_prob);
+ fc->pre_nmvc = fc->nmvc;
+
+ vp9_zero(fc->coef_counts_4x4);
+ vp9_zero(fc->coef_counts_8x8);
+ vp9_zero(fc->coef_counts_16x16);
+ vp9_zero(fc->coef_counts_32x32);
+ vp9_zero(fc->eob_branch_counts);
+ vp9_zero(fc->ymode_counts);
+ vp9_zero(fc->sb_ymode_counts);
+ vp9_zero(fc->uv_mode_counts);
+ vp9_zero(fc->bmode_counts);
+ vp9_zero(fc->i8x8_mode_counts);
+ vp9_zero(fc->sub_mv_ref_counts);
+ vp9_zero(fc->mbsplit_counts);
+ vp9_zero(fc->NMVcount);
+ vp9_zero(fc->mv_ref_ct);
+
+#if CONFIG_COMP_INTERINTRA_PRED
+ fc->pre_interintra_prob = fc->interintra_prob;
+ vp9_zero(fc->interintra_counts);
+#endif
+
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(fc->pre_nzc_probs_4x4, fc->nzc_probs_4x4);
+ vp9_copy(fc->pre_nzc_probs_8x8, fc->nzc_probs_8x8);
+ vp9_copy(fc->pre_nzc_probs_16x16, fc->nzc_probs_16x16);
+ vp9_copy(fc->pre_nzc_probs_32x32, fc->nzc_probs_32x32);
+ vp9_copy(fc->pre_nzc_pcat_probs, fc->nzc_pcat_probs);
+
+ vp9_zero(fc->nzc_counts_4x4);
+ vp9_zero(fc->nzc_counts_8x8);
+ vp9_zero(fc->nzc_counts_16x16);
+ vp9_zero(fc->nzc_counts_32x32);
+ vp9_zero(fc->nzc_pcat_counts);
+#endif
+
+ read_coef_probs(pbi, r);
+#if CONFIG_CODE_NONZEROCOUNT
+ read_nzc_probs(&pbi->common, r);
+#endif
+}
+
+static void decode_tiles(VP9D_COMP *pbi,
+ const uint8_t *data, int first_partition_size,
+ BOOL_DECODER *header_bc, BOOL_DECODER *residual_bc) {
+ VP9_COMMON *const pc = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+
+ const uint8_t *data_ptr = data + first_partition_size;
+ int tile_row, tile_col, delta_log2_tiles;
+ int mb_row;
+
+ vp9_get_tile_n_bits(pc, &pc->log2_tile_columns, &delta_log2_tiles);
+ while (delta_log2_tiles--) {
+ if (vp9_read_bit(header_bc)) {
+ pc->log2_tile_columns++;
+ } else {
+ break;
+ }
}
- if (pbi->common.txfm_mode > ALLOW_16X16) {
- read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32);
+ pc->log2_tile_rows = vp9_read_bit(header_bc);
+ if (pc->log2_tile_rows)
+ pc->log2_tile_rows += vp9_read_bit(header_bc);
+ pc->tile_columns = 1 << pc->log2_tile_columns;
+ pc->tile_rows = 1 << pc->log2_tile_rows;
+
+ vpx_memset(pc->above_context, 0,
+ sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
+
+ if (pbi->oxcf.inv_tile_order) {
+ const int n_cols = pc->tile_columns;
+ const uint8_t *data_ptr2[4][1 << 6];
+ BOOL_DECODER UNINITIALIZED_IS_SAFE(bc_bak);
+
+ // pre-initialize the offsets, we're going to read in inverse order
+ data_ptr2[0][0] = data_ptr;
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ if (tile_row) {
+ const int size = read_le32(data_ptr2[tile_row - 1][n_cols - 1]);
+ data_ptr2[tile_row - 1][n_cols - 1] += 4;
+ data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][n_cols - 1] + size;
+ }
+
+ for (tile_col = 1; tile_col < n_cols; tile_col++) {
+ const int size = read_le32(data_ptr2[tile_row][tile_col - 1]);
+ data_ptr2[tile_row][tile_col - 1] += 4;
+ data_ptr2[tile_row][tile_col] =
+ data_ptr2[tile_row][tile_col - 1] + size;
+ }
+ }
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ for (tile_col = n_cols - 1; tile_col >= 0; tile_col--) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+ setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], residual_bc);
+
+ // Decode a row of superblocks
+ for (mb_row = pc->cur_tile_mb_row_start;
+ mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
+ decode_sb_row(pbi, pc, mb_row, xd, residual_bc);
+ }
+
+ if (tile_row == pc->tile_rows - 1 && tile_col == n_cols - 1)
+ bc_bak = *residual_bc;
+ }
+ }
+ *residual_bc = bc_bak;
+ } else {
+ int has_more;
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ for (tile_col = 0; tile_col < pc->tile_columns; tile_col++) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+
+ has_more = tile_col < pc->tile_columns - 1 ||
+ tile_row < pc->tile_rows - 1;
+
+ // Setup decoder
+ setup_token_decoder(pbi, data_ptr + (has_more ? 4 : 0), residual_bc);
+
+ // Decode a row of superblocks
+ for (mb_row = pc->cur_tile_mb_row_start;
+ mb_row < pc->cur_tile_mb_row_end; mb_row += 4) {
+ decode_sb_row(pbi, pc, mb_row, xd, residual_bc);
+ }
+
+ if (has_more) {
+ const int size = read_le32(data_ptr);
+ data_ptr += 4 + size;
+ }
+ }
+ }
}
}
@@ -1386,31 +1619,28 @@
BOOL_DECODER header_bc, residual_bc;
VP9_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- const unsigned char *data = (const unsigned char *)pbi->Source;
- const unsigned char *data_end = data + pbi->source_sz;
+ const uint8_t *data = (const uint8_t *)pbi->Source;
+ const uint8_t *data_end = data + pbi->source_sz;
ptrdiff_t first_partition_length_in_bytes = 0;
+ int i, corrupt_tokens = 0;
- int mb_row;
- int i, j;
- int corrupt_tokens = 0;
+ // printf("Decoding frame %d\n", pc->current_video_frame);
- /* start with no corruption of current frame */
- xd->corrupted = 0;
+ xd->corrupted = 0; // start with no corruption of current frame
pc->yv12_fb[pc->new_fb_idx].corrupted = 0;
if (data_end - data < 3) {
- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
- "Truncated packet");
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
} else {
+ int scaling_active;
pc->last_frame_type = pc->frame_type;
pc->frame_type = (FRAME_TYPE)(data[0] & 1);
pc->version = (data[0] >> 1) & 7;
pc->show_frame = (data[0] >> 4) & 1;
- first_partition_length_in_bytes =
- (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
+ scaling_active = (data[0] >> 5) & 1;
+ first_partition_length_in_bytes = read_le16(data + 1);
- if ((data + first_partition_length_in_bytes > data_end
- || data + first_partition_length_in_bytes < data))
+ if (!read_is_valid(data, first_partition_length_in_bytes, data_end))
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt partition 0 length");
@@ -1419,136 +1649,42 @@
vp9_setup_version(pc);
if (pc->frame_type == KEY_FRAME) {
- const int Width = pc->Width;
- const int Height = pc->Height;
-
- /* vet via sync code */
- /* When error concealment is enabled we should only check the sync
- * code if we have enough bits available
- */
+ // When error concealment is enabled we should only check the sync
+ // code if we have enough bits available
if (data + 3 < data_end) {
if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame sync code");
}
-
- /* If error concealment is enabled we should only parse the new size
- * if we have enough data. Otherwise we will end up with the wrong
- * size.
- */
- if (data + 6 < data_end) {
- pc->Width = (data[3] | (data[4] << 8)) & 0x3fff;
- pc->horiz_scale = data[4] >> 6;
- pc->Height = (data[5] | (data[6] << 8)) & 0x3fff;
- pc->vert_scale = data[6] >> 6;
- }
- data += 7;
-
- if (Width != pc->Width || Height != pc->Height) {
- if (pc->Width <= 0) {
- pc->Width = Width;
- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
- "Invalid frame width");
- }
-
- if (pc->Height <= 0) {
- pc->Height = Height;
- vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
- "Invalid frame height");
- }
-
- if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))
- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate frame buffers");
- }
+ data += 3;
}
+
+ data = setup_frame_size(pbi, scaling_active, data, data_end);
}
-#ifdef DEC_DEBUG
- printf("Decode frame %d\n", pc->current_video_frame);
-#endif
if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME) ||
- pc->Width == 0 || pc->Height == 0) {
+ pc->width == 0 || pc->height == 0) {
return -1;
}
init_frame(pbi);
+ // Reset the frame pointers to the current frame size
+ vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx],
+ pc->width, pc->height,
+ VP9BORDERINPIXELS);
+
if (vp9_start_decode(&header_bc, data,
(unsigned int)first_partition_length_in_bytes))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
- if (pc->frame_type == KEY_FRAME) {
- pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc);
- pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc);
- }
- /* Is segmentation enabled */
- xd->segmentation_enabled = (unsigned char)vp9_read_bit(&header_bc);
+ pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc);
+ pc->clamp_type = (CLAMP_TYPE)vp9_read_bit(&header_bc);
+ pc->error_resilient_mode = vp9_read_bit(&header_bc);
- if (xd->segmentation_enabled) {
- // Read whether or not the segmentation map is being explicitly
- // updated this frame.
- xd->update_mb_segmentation_map = (unsigned char)vp9_read_bit(&header_bc);
+ setup_segmentation(pc, xd, &header_bc);
- // If so what method will be used.
- if (xd->update_mb_segmentation_map) {
- // Which macro block level features are enabled
-
- // Read the probs used to decode the segment id for each macro
- // block.
- for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) {
- xd->mb_segment_tree_probs[i] = vp9_read_bit(&header_bc) ?
- (vp9_prob)vp9_read_literal(&header_bc, 8) : 255;
- }
-
- // Read the prediction probs needed to decode the segment id
- pc->temporal_update = (unsigned char)vp9_read_bit(&header_bc);
- for (i = 0; i < PREDICTION_PROBS; i++) {
- if (pc->temporal_update) {
- pc->segment_pred_probs[i] = vp9_read_bit(&header_bc) ?
- (vp9_prob)vp9_read_literal(&header_bc, 8) : 255;
- } else {
- pc->segment_pred_probs[i] = 255;
- }
- }
- }
- // Is the segment data being updated
- xd->update_mb_segmentation_data = (unsigned char)vp9_read_bit(&header_bc);
-
- if (xd->update_mb_segmentation_data) {
- int data;
-
- xd->mb_segment_abs_delta = (unsigned char)vp9_read_bit(&header_bc);
-
- vp9_clearall_segfeatures(xd);
-
- // For each segmentation...
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
- // For each of the segments features...
- for (j = 0; j < SEG_LVL_MAX; j++) {
- // Is the feature enabled
- if (vp9_read_bit(&header_bc)) {
- // Update the feature data and mask
- vp9_enable_segfeature(xd, i, j);
-
- data = vp9_decode_unsigned_max(&header_bc,
- vp9_seg_feature_data_max(j));
-
- // Is the segment data signed..
- if (vp9_is_segfeature_signed(j)) {
- if (vp9_read_bit(&header_bc))
- data = -data;
- }
- } else
- data = 0;
-
- vp9_set_segdata(xd, i, j, data);
- }
- }
- }
- }
-
// Read common prediction model status flag probability updates for the
// reference frame
if (pc->frame_type == KEY_FRAME) {
@@ -1556,81 +1692,43 @@
pc->ref_pred_probs[0] = 120;
pc->ref_pred_probs[1] = 80;
pc->ref_pred_probs[2] = 40;
-
} else {
for (i = 0; i < PREDICTION_PROBS; i++) {
if (vp9_read_bit(&header_bc))
- pc->ref_pred_probs[i] = (vp9_prob)vp9_read_literal(&header_bc, 8);
+ pc->ref_pred_probs[i] = vp9_read_prob(&header_bc);
}
}
- pc->sb64_coded = vp9_read_literal(&header_bc, 8);
- pc->sb32_coded = vp9_read_literal(&header_bc, 8);
+ pc->sb64_coded = vp9_read_prob(&header_bc);
+ pc->sb32_coded = vp9_read_prob(&header_bc);
+ xd->lossless = vp9_read_bit(&header_bc);
+ if (xd->lossless) {
+ pc->txfm_mode = ONLY_4X4;
+ } else {
+ // Read the loop filter level and type
+ pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+ if (pc->txfm_mode == ALLOW_32X32)
+ pc->txfm_mode += vp9_read_bit(&header_bc);
- /* Read the loop filter level and type */
- pc->txfm_mode = vp9_read_literal(&header_bc, 2);
- if (pc->txfm_mode == 3)
- pc->txfm_mode += vp9_read_bit(&header_bc);
- if (pc->txfm_mode == TX_MODE_SELECT) {
- pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
- pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
- pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
- }
-
- pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
- pc->filter_level = vp9_read_literal(&header_bc, 6);
- pc->sharpness_level = vp9_read_literal(&header_bc, 3);
-
- /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
- xd->mode_ref_lf_delta_update = 0;
- xd->mode_ref_lf_delta_enabled = (unsigned char)vp9_read_bit(&header_bc);
-
- if (xd->mode_ref_lf_delta_enabled) {
- /* Do the deltas need to be updated */
- xd->mode_ref_lf_delta_update = (unsigned char)vp9_read_bit(&header_bc);
-
- if (xd->mode_ref_lf_delta_update) {
- /* Send update */
- for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
- if (vp9_read_bit(&header_bc)) {
- /*sign = vp9_read_bit( &header_bc );*/
- xd->ref_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6);
-
- if (vp9_read_bit(&header_bc)) /* Apply sign */
- xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
- }
- }
-
- /* Send update */
- for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
- if (vp9_read_bit(&header_bc)) {
- /*sign = vp9_read_bit( &header_bc );*/
- xd->mode_lf_deltas[i] = (signed char)vp9_read_literal(&header_bc, 6);
-
- if (vp9_read_bit(&header_bc)) /* Apply sign */
- xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
- }
- }
+ if (pc->txfm_mode == TX_MODE_SELECT) {
+ pc->prob_tx[0] = vp9_read_prob(&header_bc);
+ pc->prob_tx[1] = vp9_read_prob(&header_bc);
+ pc->prob_tx[2] = vp9_read_prob(&header_bc);
}
}
+ setup_loopfilter(pc, xd, &header_bc);
+
// Dummy read for now
vp9_read_literal(&header_bc, 2);
- setup_token_decoder(pbi, data + first_partition_length_in_bytes,
- &residual_bc);
-
/* Read the default quantizers. */
{
- int Q, q_update;
+ int q_update = 0;
+ pc->base_qindex = vp9_read_literal(&header_bc, QINDEX_BITS);
- Q = vp9_read_literal(&header_bc, QINDEX_BITS);
- pc->base_qindex = Q;
- q_update = 0;
/* AC 1st order Q = default */
pc->y1dc_delta_q = get_delta_q(&header_bc, pc->y1dc_delta_q, &q_update);
- pc->y2dc_delta_q = get_delta_q(&header_bc, pc->y2dc_delta_q, &q_update);
- pc->y2ac_delta_q = get_delta_q(&header_bc, pc->y2ac_delta_q, &q_update);
pc->uvdc_delta_q = get_delta_q(&header_bc, pc->uvdc_delta_q, &q_update);
pc->uvac_delta_q = get_delta_q(&header_bc, pc->uvac_delta_q, &q_update);
@@ -1641,58 +1739,52 @@
mb_init_dequantizer(pbi, &pbi->mb);
}
- /* Determine if the golden frame or ARF buffer should be updated and how.
- * For all non key frames the GF and ARF refresh flags and sign bias
- * flags must be set explicitly.
- */
- if (pc->frame_type != KEY_FRAME) {
- /* Should the GF or ARF be updated from the current frame */
- pc->refresh_golden_frame = vp9_read_bit(&header_bc);
- pc->refresh_alt_ref_frame = vp9_read_bit(&header_bc);
+ // Determine if the golden frame or ARF buffer should be updated and how.
+ // For all non key frames the GF and ARF refresh flags and sign bias
+ // flags must be set explicitly.
+ if (pc->frame_type == KEY_FRAME) {
+ pc->active_ref_idx[0] = pc->new_fb_idx;
+ pc->active_ref_idx[1] = pc->new_fb_idx;
+ pc->active_ref_idx[2] = pc->new_fb_idx;
+ } else {
+ // Should the GF or ARF be updated from the current frame
+ pbi->refresh_frame_flags = vp9_read_literal(&header_bc, NUM_REF_FRAMES);
- if (pc->refresh_alt_ref_frame) {
- vpx_memcpy(&pc->fc, &pc->lfc_a, sizeof(pc->fc));
- } else {
- vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
+ // Select active reference frames
+ for (i = 0; i < 3; i++) {
+ int ref_frame_num = vp9_read_literal(&header_bc, NUM_REF_FRAMES_LG2);
+ pc->active_ref_idx[i] = pc->ref_frame_map[ref_frame_num];
}
- /* Buffer to buffer copy flags. */
- pc->copy_buffer_to_gf = 0;
-
- if (!pc->refresh_golden_frame)
- pc->copy_buffer_to_gf = vp9_read_literal(&header_bc, 2);
-
- pc->copy_buffer_to_arf = 0;
-
- if (!pc->refresh_alt_ref_frame)
- pc->copy_buffer_to_arf = vp9_read_literal(&header_bc, 2);
-
pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp9_read_bit(&header_bc);
pc->ref_frame_sign_bias[ALTREF_FRAME] = vp9_read_bit(&header_bc);
- /* Is high precision mv allowed */
- xd->allow_high_precision_mv = (unsigned char)vp9_read_bit(&header_bc);
+ // Is high precision mv allowed
+ xd->allow_high_precision_mv = vp9_read_bit(&header_bc);
+
// Read the type of subpel filter to use
- if (vp9_read_bit(&header_bc)) {
- pc->mcomp_filter_type = SWITCHABLE;
- } else {
- pc->mcomp_filter_type = vp9_read_literal(&header_bc, 2);
- }
+ pc->mcomp_filter_type = vp9_read_bit(&header_bc)
+ ? SWITCHABLE
+ : vp9_read_literal(&header_bc, 2);
+
#if CONFIG_COMP_INTERINTRA_PRED
pc->use_interintra = vp9_read_bit(&header_bc);
#endif
- /* To enable choice of different interploation filters */
+ // To enable choice of different interploation filters
vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
}
- pc->refresh_entropy_probs = vp9_read_bit(&header_bc);
- if (pc->refresh_entropy_probs == 0) {
- vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ if (!pc->error_resilient_mode) {
+ pc->refresh_entropy_probs = vp9_read_bit(&header_bc);
+ pc->frame_parallel_decoding_mode = vp9_read_bit(&header_bc);
+ } else {
+ pc->refresh_entropy_probs = 0;
+ pc->frame_parallel_decoding_mode = 1;
}
+ pc->frame_context_idx = vp9_read_literal(&header_bc, NUM_FRAME_CONTEXTS_LG2);
+ vpx_memcpy(&pc->fc, &pc->frame_contexts[pc->frame_context_idx],
+ sizeof(pc->fc));
- pc->refresh_last_frame = (pc->frame_type == KEY_FRAME)
- || vp9_read_bit(&header_bc);
-
// Read inter mode probability context updates
if (pc->frame_type != KEY_FRAME) {
int i, j;
@@ -1699,20 +1791,19 @@
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
for (j = 0; j < 4; j++) {
if (vp9_read(&header_bc, 252)) {
- pc->fc.vp9_mode_contexts[i][j] =
- (vp9_prob)vp9_read_literal(&header_bc, 8);
+ pc->fc.vp9_mode_contexts[i][j] = vp9_read_prob(&header_bc);
}
}
}
}
+#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS
+ if (pc->frame_type == KEY_FRAME)
+ vp9_adjust_default_coef_probs(pc);
+#endif
#if CONFIG_NEW_MVREF
// If Key frame reset mv ref id probabilities to defaults
- if (pc->frame_type == KEY_FRAME) {
- // Defaults probabilities for encoding the MV ref id signal
- vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB,
- sizeof(xd->mb_mv_ref_probs));
- } else {
+ if (pc->frame_type != KEY_FRAME) {
// Read any mv_ref index probability updates
int i, j;
@@ -1725,8 +1816,7 @@
// Read any updates to probabilities
for (j = 0; j < MAX_MV_REF_CANDIDATES - 1; ++j) {
if (vp9_read(&header_bc, VP9_MVREF_UPDATE_PROB)) {
- xd->mb_mv_ref_probs[i][j] =
- (vp9_prob)vp9_read_literal(&header_bc, 8);
+ xd->mb_mv_ref_probs[i][j] = vp9_read_prob(&header_bc);
}
}
}
@@ -1735,66 +1825,22 @@
if (0) {
FILE *z = fopen("decodestats.stt", "a");
- fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n",
+ fprintf(z, "%6d F:%d,R:%d,Q:%d\n",
pc->current_video_frame,
pc->frame_type,
- pc->refresh_golden_frame,
- pc->refresh_alt_ref_frame,
- pc->refresh_last_frame,
+ pbi->refresh_frame_flags,
pc->base_qindex);
fclose(z);
}
- vp9_copy(pbi->common.fc.pre_coef_probs_4x4,
- pbi->common.fc.coef_probs_4x4);
- vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_4x4,
- pbi->common.fc.hybrid_coef_probs_4x4);
- vp9_copy(pbi->common.fc.pre_coef_probs_8x8,
- pbi->common.fc.coef_probs_8x8);
- vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_8x8,
- pbi->common.fc.hybrid_coef_probs_8x8);
- vp9_copy(pbi->common.fc.pre_coef_probs_16x16,
- pbi->common.fc.coef_probs_16x16);
- vp9_copy(pbi->common.fc.pre_hybrid_coef_probs_16x16,
- pbi->common.fc.hybrid_coef_probs_16x16);
- vp9_copy(pbi->common.fc.pre_coef_probs_32x32,
- pbi->common.fc.coef_probs_32x32);
- vp9_copy(pbi->common.fc.pre_ymode_prob, pbi->common.fc.ymode_prob);
- vp9_copy(pbi->common.fc.pre_sb_ymode_prob, pbi->common.fc.sb_ymode_prob);
- vp9_copy(pbi->common.fc.pre_uv_mode_prob, pbi->common.fc.uv_mode_prob);
- vp9_copy(pbi->common.fc.pre_bmode_prob, pbi->common.fc.bmode_prob);
- vp9_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
- vp9_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
- vp9_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
-#if CONFIG_COMP_INTERINTRA_PRED
- pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob;
-#endif
- pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
- vp9_zero(pbi->common.fc.coef_counts_4x4);
- vp9_zero(pbi->common.fc.hybrid_coef_counts_4x4);
- vp9_zero(pbi->common.fc.coef_counts_8x8);
- vp9_zero(pbi->common.fc.hybrid_coef_counts_8x8);
- vp9_zero(pbi->common.fc.coef_counts_16x16);
- vp9_zero(pbi->common.fc.hybrid_coef_counts_16x16);
- vp9_zero(pbi->common.fc.coef_counts_32x32);
- vp9_zero(pbi->common.fc.ymode_counts);
- vp9_zero(pbi->common.fc.sb_ymode_counts);
- vp9_zero(pbi->common.fc.uv_mode_counts);
- vp9_zero(pbi->common.fc.bmode_counts);
- vp9_zero(pbi->common.fc.i8x8_mode_counts);
- vp9_zero(pbi->common.fc.sub_mv_ref_counts);
- vp9_zero(pbi->common.fc.mbsplit_counts);
- vp9_zero(pbi->common.fc.NMVcount);
- vp9_zero(pbi->common.fc.mv_ref_ct);
-#if CONFIG_COMP_INTERINTRA_PRED
- vp9_zero(pbi->common.fc.interintra_counts);
-#endif
+ update_frame_context(pbi, &header_bc);
- read_coef_probs(pbi, &header_bc);
+ // Initialize xd pointers. Any reference should do for xd->pre, so use 0.
+ vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]],
+ sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx],
+ sizeof(YV12_BUFFER_CONFIG));
- vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
- vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
-
// Create the segmentation map structure and set to 0
if (!pc->last_frame_seg_map)
CHECK_MEM_ERROR(pc->last_frame_seg_map,
@@ -1815,23 +1861,22 @@
vp9_decode_mode_mvs_init(pbi, &header_bc);
- vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
-
- /* Decode a row of superblocks */
- for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) {
- decode_sb_row(pbi, pc, mb_row, xd, &residual_bc);
- }
+ decode_tiles(pbi, data, first_partition_length_in_bytes,
+ &header_bc, &residual_bc);
corrupt_tokens |= xd->corrupted;
- /* Collect information about decoder corruption. */
- /* 1. Check first boolean decoder for errors. */
- pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc);
- /* 2. Check the macroblock information */
- pc->yv12_fb[pc->new_fb_idx].corrupted |= corrupt_tokens;
+ // keep track of the last coded dimensions
+ pc->last_width = pc->width;
+ pc->last_height = pc->height;
+ // Collect information about decoder corruption.
+ // 1. Check first boolean decoder for errors.
+ // 2. Check the macroblock information
+ pc->yv12_fb[pc->new_fb_idx].corrupted = bool_error(&header_bc) |
+ corrupt_tokens;
+
if (!pbi->decoded_key_frame) {
- if (pc->frame_type == KEY_FRAME &&
- !pc->yv12_fb[pc->new_fb_idx].corrupted)
+ if (pc->frame_type == KEY_FRAME && !pc->yv12_fb[pc->new_fb_idx].corrupted)
pbi->decoded_key_frame = 1;
else
vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME,
@@ -1838,23 +1883,24 @@
"A stream must start with a complete key frame");
}
- vp9_adapt_coef_probs(pc);
- if (pc->frame_type != KEY_FRAME) {
- vp9_adapt_mode_probs(pc);
- vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
- vp9_update_mode_context(&pbi->common);
+ if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) {
+ vp9_adapt_coef_probs(pc);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_adapt_nzc_probs(pc);
+#endif
}
- /* If this was a kf or Gf note the Q used */
- if ((pc->frame_type == KEY_FRAME) ||
- pc->refresh_golden_frame || pc->refresh_alt_ref_frame) {
- pc->last_kf_gf_q = pc->base_qindex;
+ if (pc->frame_type != KEY_FRAME) {
+ if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(pc);
+ vp9_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
+ vp9_adapt_mode_context(&pbi->common);
+ }
}
+
if (pc->refresh_entropy_probs) {
- if (pc->refresh_alt_ref_frame)
- vpx_memcpy(&pc->lfc_a, &pc->fc, sizeof(pc->fc));
- else
- vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ vpx_memcpy(&pc->frame_contexts[pc->frame_context_idx], &pc->fc,
+ sizeof(pc->fc));
}
#ifdef PACKET_TESTING
@@ -1866,11 +1912,10 @@
fclose(f);
}
#endif
- // printf("Frame %d Done\n", frame_count++);
/* Find the end of the coded buffer */
- while (residual_bc.count > CHAR_BIT
- && residual_bc.count < VP9_BD_VALUE_SIZE) {
+ while (residual_bc.count > CHAR_BIT &&
+ residual_bc.count < VP9_BD_VALUE_SIZE) {
residual_bc.count -= CHAR_BIT;
residual_bc.user_buffer--;
}
--- a/vp9/decoder/vp9_decodframe.h
+++ b/vp9/decoder/vp9_decodframe.h
@@ -14,6 +14,6 @@
struct VP9Decompressor;
-extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi);
+void vp9_init_de_quantizer(struct VP9Decompressor *pbi);
#endif // VP9_DECODER_VP9_DECODFRAME_H_
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -14,14 +14,15 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/common/vp9_common.h"
+
+
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
uint8_t *dest, int stride, int width, int height) {
int r, c;
for (r = 0; r < height; r++) {
- for (c = 0; c < width; c++) {
+ for (c = 0; c < width; c++)
dest[c] = clip_pixel(diff[c] + pred[c]);
- }
dest += stride;
diff += width;
@@ -29,6 +30,26 @@
}
}
+void vp9_add_residual_4x4_c(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 4, 4);
+}
+
+void vp9_add_residual_8x8_c(const int16_t *diff, const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 8, 8);
+}
+
+void vp9_add_residual_16x16_c(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 16, 16);
+}
+
+void vp9_add_residual_32x32_c(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_residual(diff, pred, pitch, dest, stride, 32, 32);
+}
+
static void add_constant_residual(const int16_t diff, const uint8_t *pred,
int pitch, uint8_t *dest, int stride,
int width, int height) {
@@ -35,9 +56,8 @@
int r, c;
for (r = 0; r < height; r++) {
- for (c = 0; c < width; c++) {
+ for (c = 0; c < width; c++)
dest[c] = clip_pixel(diff + pred[c]);
- }
dest += stride;
pred += pitch;
@@ -44,117 +64,114 @@
}
}
-void vp9_dequantize_b_c(BLOCKD *d) {
+void vp9_add_constant_residual_8x8_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 8, 8);
+}
- int i;
- int16_t *DQ = d->dqcoeff;
- const int16_t *Q = d->qcoeff;
- const int16_t *DQC = d->dequant;
+void vp9_add_constant_residual_16x16_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 16, 16);
+}
- for (i = 0; i < 16; i++) {
- DQ[i] = Q[i] * DQC[i];
- }
+void vp9_add_constant_residual_32x32_c(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ add_constant_residual(diff, pred, pitch, dest, stride, 32, 32);
}
-
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride, uint16_t eobs) {
- int16_t output[16];
- int16_t *diff_ptr = output;
+ int pitch, int stride, int eob) {
int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- for (i = 0; i < 16; i++) {
- input[i] = dq[i] * input[i];
- }
+ for (i = 0; i < 16; i++)
+ input[i] *= dq[i];
- vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
-
+ vp9_short_iht4x4(input, output, 4, tx_type);
vpx_memset(input, 0, 32);
-
- add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride, uint16_t eobs) {
- int16_t output[64];
- int16_t *diff_ptr = output;
- int i;
- if (eobs == 0) {
- /* All 0 DCT coefficient */
+ int pitch, int stride, int eob) {
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
+
+ if (eob == 0) {
+ // All 0 DCT coefficients
vp9_copy_mem8x8(pred, pitch, dest, stride);
- } else if (eobs > 0) {
- input[0] = dq[0] * input[0];
- for (i = 1; i < 64; i++) {
- input[i] = dq[1] * input[i];
- }
+ } else if (eob > 0) {
+ int i;
- vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
+ input[0] *= dq[0];
+ for (i = 1; i < 64; i++)
+ input[i] *= dq[1];
+ vp9_short_iht8x8(input, output, 8, tx_type);
vpx_memset(input, 0, 128);
-
- add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
}
}
void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride) {
- int16_t output[16];
- int16_t *diff_ptr = output;
+ uint8_t *dest, int pitch, int stride, int eob) {
int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- for (i = 0; i < 16; i++) {
- input[i] = dq[i] * input[i];
- }
+ if (eob > 1) {
+ for (i = 0; i < 16; i++)
+ input[i] *= dq[i];
- /* the idct halves ( >> 1) the pitch */
- vp9_short_idct4x4llm_c(input, output, 4 << 1);
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct4x4(input, output, 4 << 1);
- vpx_memset(input, 0, 32);
+ vpx_memset(input, 0, 32);
- add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ } else {
+ vp9_dc_only_idct_add(input[0]*dq[0], pred, dest, pitch, stride);
+ ((int *)input)[0] = 0;
+ }
}
void vp9_dequant_dc_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride, int Dc) {
+ uint8_t *dest, int pitch, int stride, int dc) {
int i;
- int16_t output[16];
- int16_t *diff_ptr = output;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- input[0] = (int16_t)Dc;
+ input[0] = dc;
- for (i = 1; i < 16; i++) {
- input[i] = dq[i] * input[i];
- }
+ for (i = 1; i < 16; i++)
+ input[i] *= dq[i];
- /* the idct halves ( >> 1) the pitch */
- vp9_short_idct4x4llm_c(input, output, 4 << 1);
-
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct4x4(input, output, 4 << 1);
vpx_memset(input, 0, 32);
-
- add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
-#if CONFIG_LOSSLESS
void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride) {
- int16_t output[16];
- int16_t *diff_ptr = output;
+ int pitch, int stride, int eob) {
int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- for (i = 0; i < 16; i++) {
- input[i] = dq[i] * input[i];
- }
+ if (eob > 1) {
+ for (i = 0; i < 16; i++)
+ input[i] *= dq[i];
- vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+ vp9_short_iwalsh4x4_c(input, output, 4 << 1);
- vpx_memset(input, 0, 32);
+ vpx_memset(input, 0, 32);
- add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
+ } else {
+ vp9_dc_only_inv_walsh_add(input[0]*dq[0], pred, dest, pitch, stride);
+ ((int *)input)[0] = 0;
+ }
}
void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
@@ -162,79 +179,58 @@
uint8_t *dest,
int pitch, int stride, int dc) {
int i;
- int16_t output[16];
- int16_t *diff_ptr = output;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 16);
- input[0] = (int16_t)dc;
+ input[0] = dc;
- for (i = 1; i < 16; i++) {
- input[i] = dq[i] * input[i];
- }
+ for (i = 1; i < 16; i++)
+ input[i] *= dq[i];
- vp9_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+ vp9_short_iwalsh4x4_c(input, output, 4 << 1);
vpx_memset(input, 0, 32);
-
- add_residual(diff_ptr, pred, pitch, dest, stride, 4, 4);
+ vp9_add_residual_4x4(output, pred, pitch, dest, stride);
}
-#endif
-void vp9_dequantize_b_2x2_c(BLOCKD *d) {
- int i;
- int16_t *DQ = d->dqcoeff;
- const int16_t *Q = d->qcoeff;
- const int16_t *DQC = d->dequant;
-
- for (i = 0; i < 16; i++) {
- DQ[i] = (int16_t)((Q[i] * DQC[i]));
- }
-}
-
void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
- int stride, int dc, int eob) {
- int16_t output[64];
- int16_t *diff_ptr = output;
- int i;
+ int stride, int eob) {
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 64);
- /* If dc is 1, then input[0] is the reconstructed value, do not need
- * dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
- */
- if (!dc)
- input[0] *= dq[0];
+ // If dc is 1, then input[0] is the reconstructed value, do not need
+ // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+ input[0] *= dq[0];
- /* The calculation can be simplified if there are not many non-zero dct
- * coefficients. Use eobs to decide what to do.
- * TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
- * Combine that with code here.
- */
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to decide what to do.
+ // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
+ // Combine that with code here.
if (eob == 0) {
- /* All 0 DCT coefficient */
+ // All 0 DCT coefficients
vp9_copy_mem8x8(pred, pitch, dest, stride);
} else if (eob == 1) {
- /* DC only DCT coefficient. */
+ // DC only DCT coefficient
+ int16_t in = input[0];
int16_t out;
- /* Note: the idct1 will need to be modified accordingly whenever
- * vp9_short_idct8x8_c() is modified. */
- out = (input[0] + 1 + (input[0] < 0)) >> 2;
- out = out << 3;
- out = (out + 32) >> 7;
-
+ // Note: the idct1 will need to be modified accordingly whenever
+ // vp9_short_idct8x8_c() is modified.
+ vp9_short_idct1_8x8_c(&in, &out);
input[0] = 0;
- add_constant_residual(out, pred, pitch, dest, stride, 8, 8);
+ vp9_add_constant_residual_8x8(out, pred, pitch, dest, stride);
+#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
- input[1] = input[1] * dq[1];
- input[2] = input[2] * dq[1];
- input[3] = input[3] * dq[1];
- input[8] = input[8] * dq[1];
- input[9] = input[9] * dq[1];
- input[10] = input[10] * dq[1];
- input[16] = input[16] * dq[1];
- input[17] = input[17] * dq[1];
- input[24] = input[24] * dq[1];
+ input[1] *= dq[1];
+ input[2] *= dq[1];
+ input[3] *= dq[1];
+ input[8] *= dq[1];
+ input[9] *= dq[1];
+ input[10] *= dq[1];
+ input[16] *= dq[1];
+ input[17] *= dq[1];
+ input[24] *= dq[1];
- vp9_short_idct10_8x8_c(input, output, 16);
+ vp9_short_idct10_8x8(input, output, 16);
input[0] = input[1] = input[2] = input[3] = 0;
input[8] = input[9] = input[10] = 0;
@@ -241,19 +237,19 @@
input[16] = input[17] = 0;
input[24] = 0;
- add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
+#endif
} else {
+ int i;
+
// recover quantizer for 4 4x4 blocks
- for (i = 1; i < 64; i++) {
- input[i] = input[i] * dq[1];
- }
- // the idct halves ( >> 1) the pitch
- vp9_short_idct8x8_c(input, output, 16);
+ for (i = 1; i < 64; i++)
+ input[i] *= dq[1];
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct8x8(input, output, 8 << 1);
vpx_memset(input, 0, 128);
-
- add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
-
+ vp9_add_residual_8x8(output, pred, pitch, dest, stride);
}
}
@@ -260,29 +256,30 @@
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, uint8_t *pred,
uint8_t *dest, int pitch, int stride,
- uint16_t eobs) {
- int16_t output[256];
- int16_t *diff_ptr = output;
- int i;
- if (eobs == 0) {
- /* All 0 DCT coefficient */
+ int eob) {
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
+
+ if (eob == 0) {
+ // All 0 DCT coefficients
vp9_copy_mem16x16(pred, pitch, dest, stride);
- } else if (eobs > 0) {
- input[0]= input[0] * dq[0];
+ } else if (eob > 0) {
+ int i;
+ input[0] *= dq[0];
+
// recover quantizer for 4 4x4 blocks
for (i = 1; i < 256; i++)
- input[i] = input[i] * dq[1];
+ input[i] *= dq[1];
// inverse hybrid transform
- vp9_ihtllm(input, output, 32, tx_type, 16, eobs);
+ vp9_short_iht16x16(input, output, 16, tx_type);
// the idct halves ( >> 1) the pitch
- // vp9_short_idct16x16_c(input, output, 32);
+ // vp9_short_idct16x16(input, output, 32);
vpx_memset(input, 0, 512);
- add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
}
}
@@ -289,9 +286,7 @@
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, int eob) {
- int16_t output[256];
- int16_t *diff_ptr = output;
- int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 256);
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
@@ -300,31 +295,30 @@
vp9_copy_mem16x16(pred, pitch, dest, stride);
} else if (eob == 1) {
/* DC only DCT coefficient. */
+ int16_t in = input[0] * dq[0];
int16_t out;
-
/* Note: the idct1 will need to be modified accordingly whenever
- * vp9_short_idct16x16_c() is modified. */
- out = (input[0] * dq[0] + 2) >> 2;
- out = (out + 2) >> 2;
- out = (out + 4) >> 3;
-
+ * vp9_short_idct16x16() is modified. */
+ vp9_short_idct1_16x16_c(&in, &out);
input[0] = 0;
- add_constant_residual(out, pred, pitch, dest, stride, 16, 16);
+ vp9_add_constant_residual_16x16(out, pred, pitch, dest, stride);
+#if !CONFIG_SCATTERSCAN
} else if (eob <= 10) {
- input[0]= input[0] * dq[0];
- input[1] = input[1] * dq[1];
- input[2] = input[2] * dq[1];
- input[3] = input[3] * dq[1];
- input[16] = input[16] * dq[1];
- input[17] = input[17] * dq[1];
- input[18] = input[18] * dq[1];
- input[32] = input[32] * dq[1];
- input[33] = input[33] * dq[1];
- input[48] = input[48] * dq[1];
+ input[0] *= dq[0];
+ input[1] *= dq[1];
+ input[2] *= dq[1];
+ input[3] *= dq[1];
+ input[16] *= dq[1];
+ input[17] *= dq[1];
+ input[18] *= dq[1];
+ input[32] *= dq[1];
+ input[33] *= dq[1];
+ input[48] *= dq[1];
+
// the idct halves ( >> 1) the pitch
- vp9_short_idct10_16x16_c(input, output, 32);
+ vp9_short_idct10_16x16(input, output, 32);
input[0] = input[1] = input[2] = input[3] = 0;
input[16] = input[17] = input[18] = 0;
@@ -331,20 +325,23 @@
input[32] = input[33] = 0;
input[48] = 0;
- add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
+#endif
} else {
- input[0]= input[0] * dq[0];
+ int i;
+ input[0] *= dq[0];
+
// recover quantizer for 4 4x4 blocks
for (i = 1; i < 256; i++)
- input[i] = input[i] * dq[1];
+ input[i] *= dq[1];
// the idct halves ( >> 1) the pitch
- vp9_short_idct16x16_c(input, output, 32);
+ vp9_short_idct16x16(input, output, 16 << 1);
vpx_memset(input, 0, 512);
- add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ vp9_add_residual_16x16(output, pred, pitch, dest, stride);
}
}
@@ -351,16 +348,45 @@
void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
uint8_t *pred, uint8_t *dest, int pitch,
int stride, int eob) {
- int16_t output[1024];
- int i;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024);
- input[0]= input[0] * dq[0] / 2;
- for (i = 1; i < 1024; i++)
- input[i] = input[i] * dq[1] / 2;
- vp9_short_idct32x32_c(input, output, 64);
- vpx_memset(input, 0, 2048);
+ if (eob) {
+ input[0] = input[0] * dq[0] / 2;
+ if (eob == 1) {
+ vp9_short_idct1_32x32(input, output);
+ vp9_add_constant_residual_32x32(output[0], pred, pitch, dest, stride);
+ input[0] = 0;
+#if !CONFIG_SCATTERSCAN
+ } else if (eob <= 10) {
+ input[1] = input[1] * dq[1] / 2;
+ input[2] = input[2] * dq[1] / 2;
+ input[3] = input[3] * dq[1] / 2;
+ input[32] = input[32] * dq[1] / 2;
+ input[33] = input[33] * dq[1] / 2;
+ input[34] = input[34] * dq[1] / 2;
+ input[64] = input[64] * dq[1] / 2;
+ input[65] = input[65] * dq[1] / 2;
+ input[96] = input[96] * dq[1] / 2;
- add_residual(output, pred, pitch, dest, stride, 32, 32);
+ // the idct halves ( >> 1) the pitch
+ vp9_short_idct10_32x32(input, output, 64);
+
+ input[0] = input[1] = input[2] = input[3] = 0;
+ input[32] = input[33] = input[34] = 0;
+ input[64] = input[65] = 0;
+ input[96] = 0;
+
+ vp9_add_residual_32x32(output, pred, pitch, dest, stride);
+#endif
+ } else {
+ int i;
+ for (i = 1; i < 1024; i++)
+ input[i] = input[i] * dq[1] / 2;
+ vp9_short_idct32x32(input, output, 64);
+ vpx_memset(input, 0, 2048);
+ vp9_add_residual_32x32(output, pred, pitch, dest, stride);
+ }
+ }
}
void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
@@ -367,8 +393,9 @@
uint8_t *dstu,
uint8_t *dstv,
int stride,
- uint16_t *eobs) {
- vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, eobs[0]);
- vp9_dequant_idct_add_16x16_c(q + 256, dq,
- dstv, dstv, stride, stride, eobs[4]);
+ MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
+ xd->eobs[64]);
+ vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
+ xd->eobs[80]);
}
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -11,84 +11,80 @@
#ifndef VP9_DECODER_VP9_DEQUANTIZE_H_
#define VP9_DECODER_VP9_DEQUANTIZE_H_
+
#include "vp9/common/vp9_blockd.h"
-#if CONFIG_LOSSLESS
-extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- unsigned char *pred,
- unsigned char *output,
- int pitch, int stride);
-extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
- unsigned char *pred,
- unsigned char *output,
- int pitch, int stride, int dc);
-extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,
- const int16_t *dq,
- unsigned char *pre,
- unsigned char *dst,
- int stride,
- uint16_t *eobs,
- const int16_t *dc);
-extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
- unsigned char *pre,
- unsigned char *dst,
- int stride,
- uint16_t *eobs);
-extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
- unsigned char *pre,
- unsigned char *dst_u,
- unsigned char *dst_v,
- int stride,
- uint16_t *eobs);
-#endif
-typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- unsigned char *pred, unsigned char *output, int pitch, int stride);
-typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);
+void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ unsigned char *pred,
+ unsigned char *output,
+ int pitch, int stride, int eob);
-typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs,
- const int16_t *dc);
-typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs);
-typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq,
- unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,
- uint16_t *eobs);
+void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ unsigned char *pred,
+ unsigned char *output,
+ int pitch, int stride, int dc);
+void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,
+ const int16_t *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride,
+ const int16_t *dc);
+
+void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
+ unsigned char *pre,
+ unsigned char *dst,
+ int stride,
+ struct macroblockd *xd);
+
+void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
+ unsigned char *pre,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
+ int stride,
+ struct macroblockd *xd);
+
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
unsigned char *pred, unsigned char *dest,
- int pitch, int stride, uint16_t eobs);
+ int pitch, int stride, int eob);
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride,
- uint16_t eobs);
+ int eob);
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, unsigned char *pred,
unsigned char *dest,
- int pitch, int stride, uint16_t eobs);
+ int pitch, int stride, int eob);
void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
unsigned char *dst,
int stride,
- uint16_t *eobs,
const int16_t *dc,
MACROBLOCKD *xd);
+void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
+ int stride,
+ MACROBLOCKD *xd);
+
void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
unsigned char *dst,
int stride,
- uint16_t *eobs,
const int16_t *dc,
MACROBLOCKD *xd);
+void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
+ int stride,
+ MACROBLOCKD *xd);
+
void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
unsigned char *dstu,
unsigned char *dstv,
int stride,
- uint16_t *eobs,
MACROBLOCKD *xd);
void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
@@ -95,7 +91,6 @@
unsigned char *dstu,
unsigned char *dstv,
int stride,
- uint16_t *eobs,
MACROBLOCKD *xd);
-#endif
+#endif // VP9_DECODER_VP9_DEQUANTIZE_H_
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -59,115 +59,215 @@
DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
-static int get_signed(BOOL_DECODER *br, int value_to_sign) {
+static int16_t get_signed(BOOL_DECODER *br, int16_t value_to_sign) {
return decode_bool(br, 128) ? -value_to_sign : value_to_sign;
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#define INCREMENT_COUNT(token) \
- do { \
- coef_counts[type][coef_bands[c]][pn][token]++; \
- pn = pt = vp9_prev_token_class[token]; \
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(coef_bands[c + 1])) \
- pn = vp9_get_coef_neighbor_context( \
- qcoeff_ptr, nodc, neighbors, scan[c + 1]); \
- } while (0)
-#else
-#define PT pt
+
#define INCREMENT_COUNT(token) \
do { \
- coef_counts[type][coef_bands[c]][pt][token]++; \
- pt = vp9_prev_token_class[token]; \
+ coef_counts[type][ref][get_coef_band(scan, txfm_size, c)] \
+ [pt][token]++; \
+ token_cache[c] = token; \
+ pt = vp9_get_coef_context(scan, nb, pad, token_cache, \
+ c + 1, default_eob); \
} while (0)
-#endif /* CONFIG_NEWCOEFCONTEXT */
+#if CONFIG_CODE_NONZEROCOUNT
#define WRITE_COEF_CONTINUE(val, token) \
{ \
- qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \
+ qcoeff_ptr[scan[c]] = get_signed(br, val); \
INCREMENT_COUNT(token); \
c++; \
+ nzc++; \
continue; \
}
+#else
+#define WRITE_COEF_CONTINUE(val, token) \
+ { \
+ qcoeff_ptr[scan[c]] = get_signed(br, val); \
+ INCREMENT_COUNT(token); \
+ c++; \
+ continue; \
+ }
+#endif // CONFIG_CODE_NONZEROCOUNT
#define ADJUST_COEF(prob, bits_count) \
do { \
if (vp9_read(br, prob)) \
- val += (uint16_t)(1 << bits_count);\
+ val += 1 << bits_count; \
} while (0);
static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd,
- BOOL_DECODER* const br,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- PLANE_TYPE type,
- TX_TYPE tx_type,
- int seg_eob, int16_t *qcoeff_ptr,
- const int *const scan, TX_SIZE txfm_size,
- const int *coef_bands) {
+ BOOL_DECODER* const br, int block_idx,
+ PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
+ TX_SIZE txfm_size) {
+ ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context;
+ ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context;
+ int aidx, lidx;
+ ENTROPY_CONTEXT above_ec, left_ec;
FRAME_CONTEXT *const fc = &dx->common.fc;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
-#endif
- int nodc = (type == PLANE_TYPE_Y_NO_DC);
- int pt, c = nodc;
+ int pt, c = 0, pad, default_eob;
vp9_coeff_probs *coef_probs;
vp9_prob *prob;
vp9_coeff_count *coef_counts;
+ const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t nzc = 0;
+ uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx];
+#endif
+ const int *scan, *nb;
+ uint8_t token_cache[1024];
+ if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ aidx = vp9_block2above_sb64[txfm_size][block_idx];
+ lidx = vp9_block2left_sb64[txfm_size][block_idx];
+ } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) {
+ aidx = vp9_block2above_sb[txfm_size][block_idx];
+ lidx = vp9_block2left_sb[txfm_size][block_idx];
+ } else {
+ aidx = vp9_block2above[txfm_size][block_idx];
+ lidx = vp9_block2left[txfm_size][block_idx];
+ }
+
switch (txfm_size) {
default:
- case TX_4X4:
- if (tx_type == DCT_DCT) {
- coef_probs = fc->coef_probs_4x4;
- coef_counts = fc->coef_counts_4x4;
- } else {
- coef_probs = fc->hybrid_coef_probs_4x4;
- coef_counts = fc->hybrid_coef_counts_4x4;
+ case TX_4X4: {
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_4x4(xd, block_idx) : DCT_DCT;
+ switch (tx_type) {
+ default:
+ scan = vp9_default_zig_zag1d_4x4;
+ break;
+ case ADST_DCT:
+ scan = vp9_row_scan_4x4;
+ break;
+ case DCT_ADST:
+ scan = vp9_col_scan_4x4;
+ break;
}
+ above_ec = A0[aidx] != 0;
+ left_ec = L0[lidx] != 0;
+ coef_probs = fc->coef_probs_4x4;
+ coef_counts = fc->coef_counts_4x4;
+ default_eob = 16;
break;
- case TX_8X8:
- if (tx_type == DCT_DCT) {
- coef_probs = fc->coef_probs_8x8;
- coef_counts = fc->coef_counts_8x8;
- } else {
- coef_probs = fc->hybrid_coef_probs_8x8;
- coef_counts = fc->hybrid_coef_counts_8x8;
+ }
+ case TX_8X8: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 3 + sb_type, x = block_idx & ((1 << sz) - 1);
+ const int y = block_idx - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
+ switch (tx_type) {
+ default:
+ scan = vp9_default_zig_zag1d_8x8;
+ break;
+ case ADST_DCT:
+ scan = vp9_row_scan_8x8;
+ break;
+ case DCT_ADST:
+ scan = vp9_col_scan_8x8;
+ break;
}
+ coef_probs = fc->coef_probs_8x8;
+ coef_counts = fc->coef_counts_8x8;
+ above_ec = (A0[aidx] + A0[aidx + 1]) != 0;
+ left_ec = (L0[lidx] + L0[lidx + 1]) != 0;
+ default_eob = 64;
break;
- case TX_16X16:
- if (tx_type == DCT_DCT) {
- coef_probs = fc->coef_probs_16x16;
- coef_counts = fc->coef_counts_16x16;
+ }
+ case TX_16X16: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 4 + sb_type, x = block_idx & ((1 << sz) - 1);
+ const int y = block_idx - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
+ switch (tx_type) {
+ default:
+ scan = vp9_default_zig_zag1d_16x16;
+ break;
+ case ADST_DCT:
+ scan = vp9_row_scan_16x16;
+ break;
+ case DCT_ADST:
+ scan = vp9_col_scan_16x16;
+ break;
+ }
+ coef_probs = fc->coef_probs_16x16;
+ coef_counts = fc->coef_counts_16x16;
+ if (type == PLANE_TYPE_UV) {
+ ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
+ ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
+ above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1]) != 0;
+ left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1]) != 0;
} else {
- coef_probs = fc->hybrid_coef_probs_16x16;
- coef_counts = fc->hybrid_coef_counts_16x16;
+ above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3]) != 0;
+ left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3]) != 0;
}
+ default_eob = 256;
break;
+ }
case TX_32X32:
+ scan = vp9_default_zig_zag1d_32x32;
coef_probs = fc->coef_probs_32x32;
coef_counts = fc->coef_counts_32x32;
+ if (type == PLANE_TYPE_UV) {
+ ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
+ ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
+ ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2);
+ ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2);
+ ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3);
+ ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3);
+ above_ec = (A0[aidx] + A0[aidx + 1] + A1[aidx] + A1[aidx + 1] +
+ A2[aidx] + A2[aidx + 1] + A3[aidx] + A3[aidx + 1]) != 0;
+ left_ec = (L0[lidx] + L0[lidx + 1] + L1[lidx] + L1[lidx + 1] +
+ L2[lidx] + L2[lidx + 1] + L3[lidx] + L3[lidx + 1]) != 0;
+ } else {
+ ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
+ ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
+ above_ec = (A0[aidx] + A0[aidx + 1] + A0[aidx + 2] + A0[aidx + 3] +
+ A1[aidx] + A1[aidx + 1] + A1[aidx + 2] + A1[aidx + 3]) != 0;
+ left_ec = (L0[lidx] + L0[lidx + 1] + L0[lidx + 2] + L0[lidx + 3] +
+ L1[lidx] + L1[lidx + 1] + L1[lidx + 2] + L1[lidx + 3]) != 0;
+ }
+ default_eob = 1024;
break;
}
- VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-#if CONFIG_NEWCOEFCONTEXT
- pn = pt;
- neighbors = vp9_get_coef_neighbors_handle(scan);
-#endif
+ VP9_COMBINEENTROPYCONTEXTS(pt, above_ec, left_ec);
+ nb = vp9_get_coef_neighbors_handle(scan, &pad);
+
while (1) {
int val;
const uint8_t *cat6 = cat6_prob;
- if (c >= seg_eob) break;
- prob = coef_probs[type][coef_bands[c]][PT];
+
+ if (c >= seg_eob)
+ break;
+#if CONFIG_CODE_NONZEROCOUNT
+ if (nzc == nzc_expected)
+ break;
+#endif
+ prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt];
+#if CONFIG_CODE_NONZEROCOUNT == 0
+ fc->eob_branch_counts[txfm_size][type][ref]
+ [get_coef_band(scan, txfm_size, c)][pt]++;
if (!vp9_read(br, prob[EOB_CONTEXT_NODE]))
break;
+#endif
SKIP_START:
- if (c >= seg_eob) break;
+ if (c >= seg_eob)
+ break;
+#if CONFIG_CODE_NONZEROCOUNT
+ if (nzc == nzc_expected)
+ break;
+ // decode zero node only if there are zeros left
+ if (seg_eob - nzc_expected - c + nzc > 0)
+#endif
if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
++c;
- prob = coef_probs[type][coef_bands[c]][PT];
+ prob = coef_probs[type][ref][get_coef_band(scan, txfm_size, c)][pt];
goto SKIP_START;
}
// ONE_CONTEXT_NODE_0_
@@ -230,141 +330,147 @@
WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
if (c < seg_eob)
- coef_counts[type][coef_bands[c]][PT][DCT_EOB_TOKEN]++;
+ coef_counts[type][ref][get_coef_band(scan, txfm_size, c)]
+ [pt][DCT_EOB_TOKEN]++;
+#endif
- a[0] = l[0] = (c > !type);
-
+ A0[aidx] = L0[lidx] = c > 0;
+ if (txfm_size >= TX_8X8) {
+ A0[aidx + 1] = L0[lidx + 1] = A0[aidx];
+ if (txfm_size >= TX_16X16) {
+ if (type == PLANE_TYPE_UV) {
+ ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
+ ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
+ A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx];
+ if (txfm_size >= TX_32X32) {
+ ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2);
+ ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2);
+ ENTROPY_CONTEXT *A3 = (ENTROPY_CONTEXT *) (xd->above_context + 3);
+ ENTROPY_CONTEXT *L3 = (ENTROPY_CONTEXT *) (xd->left_context + 3);
+ A2[aidx] = A2[aidx + 1] = A3[aidx] = A3[aidx + 1] = A0[aidx];
+ L2[lidx] = L2[lidx + 1] = L3[lidx] = L3[lidx + 1] = A0[aidx];
+ }
+ } else {
+ A0[aidx + 2] = A0[aidx + 3] = L0[lidx + 2] = L0[lidx + 3] = A0[aidx];
+ if (txfm_size >= TX_32X32) {
+ ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1);
+ ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1);
+ A1[aidx] = A1[aidx + 1] = A1[aidx + 2] = A1[aidx + 3] = A0[aidx];
+ L1[lidx] = L1[lidx + 1] = L1[lidx + 2] = L1[lidx + 3] = A0[aidx];
+ }
+ }
+ }
+ }
return c;
}
static int get_eob(MACROBLOCKD* const xd, int segment_id, int eob_max) {
- int active = vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB);
- int eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
-
- if (!active || eob > eob_max)
- eob = eob_max;
- return eob;
+ return vp9_get_segdata(xd, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
-int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
- MACROBLOCKD* const xd,
- BOOL_DECODER* const bc) {
- ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
- ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
- ENTROPY_CONTEXT* const A1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]);
- ENTROPY_CONTEXT* const L1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]);
- uint16_t *const eobs = xd->eobs;
+static INLINE int decode_sb(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc,
+ int offset, int count, int inc,
+ int eob_max, TX_SIZE tx_size) {
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- int c, i, eobtotal = 0, seg_eob;
+ const int seg_eob = get_eob(xd, segment_id, eob_max);
+ int i, eobtotal = 0;
- // Luma block
-#if CONFIG_CNVCONTEXT
- ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3] +
- A1[0] + A1[1] + A1[2] + A1[3]) != 0;
- ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3] +
- L1[0] + L1[1] + L1[2] + L1[3]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = A[0];
- ENTROPY_CONTEXT left_ec = L[0];
-#endif
- eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec,
- PLANE_TYPE_Y_WITH_DC,
- DCT_DCT, get_eob(xd, segment_id, 1024),
- xd->sb_coeff_data.qcoeff,
- vp9_default_zig_zag1d_32x32,
- TX_32X32, vp9_coef_bands_32x32);
- A[1] = A[2] = A[3] = A[0] = above_ec;
- L[1] = L[2] = L[3] = L[0] = left_ec;
- A1[1] = A1[2] = A1[3] = A1[0] = above_ec;
- L1[1] = L1[2] = L1[3] = L1[0] = left_ec;
+ // luma blocks
+ for (i = 0; i < offset; i += inc) {
+ const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob,
+ xd->qcoeff + i * 16, tx_size);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
- eobtotal += c;
+ // chroma blocks
+ for (i = offset; i < count; i += inc) {
+ const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, tx_size);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
- // 16x16 chroma blocks
- seg_eob = get_eob(xd, segment_id, 256);
+ return eobtotal;
+}
- for (i = 16; i < 24; i += 4) {
- ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_16X16][i];
- ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_16X16][i];
- ENTROPY_CONTEXT* const a1 = A1 + vp9_block2above[TX_16X16][i];
- ENTROPY_CONTEXT* const l1 = L1 + vp9_block2left[TX_16X16][i];
-#if CONFIG_CNVCONTEXT
- above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
- left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
-#else
- above_ec = a[0];
- left_ec = l[0];
-#endif
+int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32: {
+ // 32x32 luma block
+ const int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int i, eobtotal = 0, seg_eob;
+ int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
+ get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32);
+ xd->eobs[0] = c;
+ eobtotal += c;
- eobs[i] = c = decode_coefs(pbi, xd, bc,
- &above_ec, &left_ec,
- PLANE_TYPE_UV,
- DCT_DCT, seg_eob,
- xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
- vp9_default_zig_zag1d_16x16,
- TX_16X16, vp9_coef_bands_16x16);
+ // 16x16 chroma blocks
+ seg_eob = get_eob(xd, segment_id, 256);
+ for (i = 64; i < 96; i += 16) {
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+ xd->qcoeff + i * 16, TX_16X16);
+ xd->eobs[i] = c;
+ eobtotal += c;
+ }
+ return eobtotal;
+ }
+ case TX_16X16:
+ return decode_sb(pbi, xd, bc, 64, 96, 16, 16 * 16, TX_16X16);
+ case TX_8X8:
+ return decode_sb(pbi, xd, bc, 64, 96, 4, 8 * 8, TX_8X8);
+ case TX_4X4:
+ return decode_sb(pbi, xd, bc, 64, 96, 1, 4 * 4, TX_4X4);
+ default:
+ assert(0);
+ return 0;
+ }
+}
- a1[1] = a1[0] = a[1] = a[0] = above_ec;
- l1[1] = l1[0] = l[1] = l[0] = left_ec;
- eobtotal += c;
+int vp9_decode_sb64_tokens(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc) {
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ return decode_sb(pbi, xd, bc, 256, 384, 64, 32 * 32, TX_32X32);
+ case TX_16X16:
+ return decode_sb(pbi, xd, bc, 256, 384, 16, 16 * 16, TX_16X16);
+ case TX_8X8:
+ return decode_sb(pbi, xd, bc, 256, 384, 4, 8 * 8, TX_8X8);
+ case TX_4X4:
+ return decode_sb(pbi, xd, bc, 256, 384, 1, 4 * 4, TX_4X4);
+ default:
+ assert(0);
+ return 0;
}
- // no Y2 block
- A[8] = L[8] = A1[8] = L1[8] = 0;
- return eobtotal;
}
static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
- ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;
- ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;
- uint16_t *const eobs = xd->eobs;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- int c, i, eobtotal = 0, seg_eob;
- // Luma block
+ int i, eobtotal = 0, seg_eob;
-#if CONFIG_CNVCONTEXT
- ENTROPY_CONTEXT above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
- ENTROPY_CONTEXT left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = A[0];
- ENTROPY_CONTEXT left_ec = L[0];
-#endif
- eobs[0] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec,
- PLANE_TYPE_Y_WITH_DC,
- get_tx_type(xd, &xd->block[0]),
- get_eob(xd, segment_id, 256),
- xd->qcoeff, vp9_default_zig_zag1d_16x16,
- TX_16X16, vp9_coef_bands_16x16);
- A[1] = A[2] = A[3] = A[0] = above_ec;
- L[1] = L[2] = L[3] = L[0] = left_ec;
+ // Luma block
+ int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
+ get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16);
+ xd->eobs[0] = c;
eobtotal += c;
// 8x8 chroma blocks
seg_eob = get_eob(xd, segment_id, 64);
for (i = 16; i < 24; i += 4) {
- ENTROPY_CONTEXT* const a = A + vp9_block2above[TX_8X8][i];
- ENTROPY_CONTEXT* const l = L + vp9_block2left[TX_8X8][i];
-#if CONFIG_CNVCONTEXT
- above_ec = (a[0] + a[1]) != 0;
- left_ec = (l[0] + l[1]) != 0;
-#else
- above_ec = a[0];
- left_ec = l[0];
-#endif
- eobs[i] = c = decode_coefs(pbi, xd, bc,
- &above_ec, &left_ec,
- PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8,
- TX_8X8, vp9_coef_bands_8x8);
- a[1] = a[0] = above_ec;
- l[1] = l[0] = left_ec;
+ c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
+ xd->eobs[i] = c;
eobtotal += c;
}
- A[8] = 0;
- L[8] = 0;
return eobtotal;
}
@@ -371,53 +477,15 @@
static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
- ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
- ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
- uint16_t *const eobs = xd->eobs;
- PLANE_TYPE type;
- int c, i, eobtotal = 0, seg_eob;
+ int i, eobtotal = 0;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- int has_2nd_order = get_2nd_order_usage(xd);
- // 2nd order DC block
- if (has_2nd_order) {
- ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][24];
- ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][24];
-
- eobs[24] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_Y2,
- DCT_DCT, get_eob(xd, segment_id, 4),
- xd->block[24].qcoeff,
- vp9_default_zig_zag1d_4x4, TX_8X8,
- vp9_coef_bands_4x4);
- eobtotal += c - 4;
- type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- eobs[24] = 0;
- type = PLANE_TYPE_Y_WITH_DC;
- }
-
// luma blocks
- seg_eob = get_eob(xd, segment_id, 64);
+ int seg_eob = get_eob(xd, segment_id, 64);
for (i = 0; i < 16; i += 4) {
- ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i];
- ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i];
-#if CONFIG_CNVCONTEXT
- ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
- ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
- eobs[i] = c = decode_coefs(pbi, xd, bc, &above_ec, &left_ec, type,
- type == PLANE_TYPE_Y_WITH_DC ?
- get_tx_type(xd, xd->block + i) : DCT_DCT,
- seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8,
- TX_8X8, vp9_coef_bands_8x8);
- a[1] = a[0] = above_ec;
- l[1] = l[0] = left_ec;
+ const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
+ xd->eobs[i] = c;
eobtotal += c;
}
@@ -427,34 +495,16 @@
// use 4x4 transform for U, V components in I8X8/splitmv prediction mode
seg_eob = get_eob(xd, segment_id, 16);
for (i = 16; i < 24; i++) {
- ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i];
- ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i];
-
- eobs[i] = c = decode_coefs(pbi, xd, bc, a, l, PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_4x4, TX_4X4,
- vp9_coef_bands_4x4);
+ const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
+ seg_eob, xd->block[i].qcoeff, TX_4X4);
+ xd->eobs[i] = c;
eobtotal += c;
}
} else {
for (i = 16; i < 24; i += 4) {
- ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_8X8][i];
- ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_8X8][i];
-#if CONFIG_CNVCONTEXT
- ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
- ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
- eobs[i] = c = decode_coefs(pbi, xd, bc,
- &above_ec, &left_ec,
- PLANE_TYPE_UV,
- DCT_DCT, seg_eob, xd->block[i].qcoeff,
- vp9_default_zig_zag1d_8x8,
- TX_8X8, vp9_coef_bands_8x8);
- a[1] = a[0] = above_ec;
- l[1] = l[0] = left_ec;
+ const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
+ seg_eob, xd->block[i].qcoeff, TX_8X8);
+ xd->eobs[i] = c;
eobtotal += c;
}
}
@@ -464,44 +514,13 @@
static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
- PLANE_TYPE type, int i, int seg_eob,
- TX_TYPE tx_type, const int *scan) {
- ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;
- ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;
- ENTROPY_CONTEXT *const a = A + vp9_block2above[TX_4X4][i];
- ENTROPY_CONTEXT *const l = L + vp9_block2left[TX_4X4][i];
- uint16_t *const eobs = xd->eobs;
- int c;
-
- c = decode_coefs(dx, xd, bc, a, l, type, tx_type, seg_eob,
- xd->block[i].qcoeff, scan, TX_4X4, vp9_coef_bands_4x4);
- eobs[i] = c;
-
+ PLANE_TYPE type, int i, int seg_eob) {
+ const int c = decode_coefs(dx, xd, bc, i, type, seg_eob,
+ xd->block[i].qcoeff, TX_4X4);
+ xd->eobs[i] = c;
return c;
}
-static int decode_coefs_4x4_y(VP9D_COMP *dx, MACROBLOCKD *xd,
- BOOL_DECODER* const bc,
- PLANE_TYPE type, int i, int seg_eob) {
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, &xd->block[i]) : DCT_DCT;
- const int *scan;
-
- switch (tx_type) {
- case ADST_DCT:
- scan = vp9_row_scan_4x4;
- break;
- case DCT_ADST:
- scan = vp9_col_scan_4x4;
- break;
- default:
- scan = vp9_default_zig_zag1d_4x4;
- break;
- }
-
- return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob, tx_type, scan);
-}
-
int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
PLANE_TYPE type, int i) {
@@ -508,7 +527,7 @@
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int seg_eob = get_eob(xd, segment_id, 16);
- return decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob);
+ return decode_coefs_4x4(dx, xd, bc, type, i, seg_eob);
}
static int decode_mb_tokens_4x4_uv(VP9D_COMP* const dx,
@@ -515,13 +534,11 @@
MACROBLOCKD* const xd,
BOOL_DECODER* const bc,
int seg_eob) {
- int eobtotal = 0, i;
+ int i, eobtotal = 0;
// chroma blocks
- for (i = 16; i < 24; i++) {
- eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob,
- DCT_DCT, vp9_default_zig_zag1d_4x4);
- }
+ for (i = 16; i < 24; i++)
+ eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_UV, i, seg_eob);
return eobtotal;
}
@@ -539,27 +556,12 @@
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
int i, eobtotal = 0;
- PLANE_TYPE type;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
const int seg_eob = get_eob(xd, segment_id, 16);
- const int has_2nd_order = get_2nd_order_usage(xd);
- // 2nd order DC block
- if (has_2nd_order) {
- eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y2, 24, seg_eob,
- DCT_DCT, vp9_default_zig_zag1d_4x4) - 16;
- type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- xd->eobs[24] = 0;
- type = PLANE_TYPE_Y_WITH_DC;
- }
-
// luma blocks
- for (i = 0; i < 16; ++i) {
- eobtotal += decode_coefs_4x4_y(dx, xd, bc, type, i, seg_eob);
- }
+ for (i = 0; i < 16; ++i)
+ eobtotal += decode_coefs_4x4(dx, xd, bc, PLANE_TYPE_Y_WITH_DC, i, seg_eob);
// chroma blocks
eobtotal += decode_mb_tokens_4x4_uv(dx, xd, bc, seg_eob);
@@ -571,16 +573,13 @@
MACROBLOCKD* const xd,
BOOL_DECODER* const bc) {
const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
- int eobtotal;
-
- if (tx_size == TX_16X16) {
- eobtotal = vp9_decode_mb_tokens_16x16(dx, xd, bc);
- } else if (tx_size == TX_8X8) {
- eobtotal = vp9_decode_mb_tokens_8x8(dx, xd, bc);
- } else {
- assert(tx_size == TX_4X4);
- eobtotal = vp9_decode_mb_tokens_4x4(dx, xd, bc);
+ switch (tx_size) {
+ case TX_16X16:
+ return vp9_decode_mb_tokens_16x16(dx, xd, bc);
+ case TX_8X8:
+ return vp9_decode_mb_tokens_8x8(dx, xd, bc);
+ default:
+ assert(tx_size == TX_4X4);
+ return vp9_decode_mb_tokens_4x4(dx, xd, bc);
}
-
- return eobtotal;
}
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -14,8 +14,6 @@
#include "vp9/decoder/vp9_onyxd_int.h"
-void vp9_reset_mb_tokens_context(MACROBLOCKD* const);
-
int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
BOOL_DECODER* const bc,
PLANE_TYPE type, int i);
@@ -26,6 +24,10 @@
int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
MACROBLOCKD* const xd,
BOOL_DECODER* const bc);
+
+int vp9_decode_sb64_tokens(VP9D_COMP* const pbi,
+ MACROBLOCKD* const xd,
+ BOOL_DECODER* const bc);
int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd,
BOOL_DECODER* const bc);
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -10,75 +10,35 @@
#include "vp9_rtcd.h"
#include "vp9/common/vp9_blockd.h"
-#if CONFIG_LOSSLESS
#include "vp9/decoder/vp9_dequantize.h"
-#endif
-void vp9_dequant_dc_idct_add_y_block_c(int16_t *q, const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst,
- int stride, uint16_t *eobs,
- const int16_t *dc) {
+void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q,
+ const int16_t *dq,
+ uint8_t *dst,
+ int stride,
+ MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (*eobs++ > 1)
- vp9_dequant_dc_idct_add_c(q, dq, pre, dst, 16, stride, dc[0]);
- else
- vp9_dc_only_idct_add_c(dc[0], pre, dst, 16, stride);
-
+ xd->itxm_add(q, dq, dst, dst, stride, stride, xd->eobs[i * 4 + j]);
q += 16;
- pre += 4;
dst += 4;
- dc++;
}
- pre += 64 - 16;
dst += 4 * stride - 16;
}
}
-void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dst,
- int stride,
- uint16_t *eobs,
- const int16_t *dc,
- MACROBLOCKD *xd) {
- int i, j;
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- if (*eobs++ > 1)
- vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]);
- else
- vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride);
-
- q += 16;
- dst += 4;
- dc++;
- }
-
- dst += 4 * stride - 16;
- }
-}
-
void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
uint8_t *pre,
uint8_t *dst,
- int stride, uint16_t *eobs) {
+ int stride, MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_c(q, dq, pre, dst, 16, stride);
- else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dst, 16, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add(q, dq, pre, dst, 16, stride, xd->eobs[i * 4 + j]);
q += 16;
pre += 4;
dst += 4;
@@ -92,18 +52,13 @@
void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
uint8_t *pre, uint8_t *dstu,
uint8_t *dstv, int stride,
- uint16_t *eobs) {
+ MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_c(q, dq, pre, dstu, 8, stride);
- else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstu, 8, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride,
+ xd->eobs[16 + i * 2 + j]);
q += 16;
pre += 4;
dstu += 4;
@@ -115,13 +70,8 @@
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_c(q, dq, pre, dstv, 8, stride);
- else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], pre, dstv, 8, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride,
+ xd->eobs[20 + i * 2 + j]);
q += 16;
pre += 4;
dstv += 4;
@@ -136,19 +86,12 @@
uint8_t *dstu,
uint8_t *dstv,
int stride,
- uint16_t *eobs,
MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1) {
- vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride);
- } else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride);
- ((int *)q)[0] = 0;
- }
-
+ xd->itxm_add(q, dq, dstu, dstu, stride, stride, xd->eobs[16 + i * 2 + j]);
q += 16;
dstu += 4;
}
@@ -158,13 +101,7 @@
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1) {
- vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride);
- } else {
- vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride);
- ((int *)q)[0] = 0;
- }
-
+ xd->itxm_add(q, dq, dstv, dstv, stride, stride, xd->eobs[20 + i * 2 + j]);
q += 16;
dstv += 4;
}
@@ -173,69 +110,40 @@
}
}
-void vp9_dequant_dc_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst,
- int stride, uint16_t *eobs,
- const int16_t *dc,
- MACROBLOCKD *xd) {
- q[0] = dc[0];
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 1, xd->eobs[0]);
+void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q,
+ const int16_t *dq,
+ uint8_t *dst,
+ int stride,
+ MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, xd->eobs[0]);
- q[64] = dc[1];
- vp9_dequant_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, 1,
- xd->eobs[4]);
-
- q[128] = dc[4];
- vp9_dequant_idct_add_8x8_c(&q[128], dq, pre + 8 * 16,
- dst + 8 * stride, 16, stride, 1, xd->eobs[8]);
-
- q[192] = dc[8];
- vp9_dequant_idct_add_8x8_c(&q[192], dq, pre + 8 * 16 + 8,
- dst + 8 * stride + 8, 16, stride, 1,
- xd->eobs[12]);
-}
-
-void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dst,
- int stride,
- uint16_t *eobs,
- const int16_t *dc,
- MACROBLOCKD *xd) {
- q[0] = dc[0];
- vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 1, xd->eobs[0]);
-
- q[64] = dc[1];
vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8,
- dst + 8, stride, stride, 1, xd->eobs[4]);
+ dst + 8, stride, stride, xd->eobs[4]);
- q[128] = dc[4];
vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride,
- dst + 8 * stride, stride, stride, 1,
- xd->eobs[8]);
+ dst + 8 * stride, stride, stride,
+ xd->eobs[8]);
- q[192] = dc[8];
vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8,
- dst + 8 * stride + 8, stride, stride, 1,
- xd->eobs[12]);
+ dst + 8 * stride + 8, stride, stride,
+ xd->eobs[12]);
}
void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
uint8_t *pre,
uint8_t *dst,
- int stride, uint16_t *eobs,
- MACROBLOCKD *xd) {
+ int stride, MACROBLOCKD *xd) {
uint8_t *origdest = dst;
uint8_t *origpred = pre;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, 0, xd->eobs[0]);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride, xd->eobs[0]);
vp9_dequant_idct_add_8x8_c(&q[64], dq, origpred + 8,
- origdest + 8, 16, stride, 0, xd->eobs[4]);
+ origdest + 8, 16, stride, xd->eobs[4]);
vp9_dequant_idct_add_8x8_c(&q[128], dq, origpred + 8 * 16,
- origdest + 8 * stride, 16, stride, 0, xd->eobs[8]);
+ origdest + 8 * stride, 16, stride,
+ xd->eobs[8]);
vp9_dequant_idct_add_8x8_c(&q[192], dq, origpred + 8 * 16 + 8,
- origdest + 8 * stride + 8, 16, stride, 0,
+ origdest + 8 * stride + 8, 16, stride,
xd->eobs[12]);
}
@@ -243,14 +151,13 @@
uint8_t *pre,
uint8_t *dstu,
uint8_t *dstv,
- int stride, uint16_t *eobs,
- MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, 0, xd->eobs[16]);
+ int stride, MACROBLOCKD *xd) {
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]);
q += 64;
pre += 64;
- vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, 0, xd->eobs[20]);
+ vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]);
}
void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
@@ -257,58 +164,26 @@
uint8_t *dstu,
uint8_t *dstv,
int stride,
- uint16_t *eobs,
MACROBLOCKD *xd) {
- vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride, 0,
+ vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride,
xd->eobs[16]);
q += 64;
- vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride, 0,
+ vp9_dequant_idct_add_8x8_c(q, dq, dstv, dstv, stride, stride,
xd->eobs[20]);
}
-#if CONFIG_LOSSLESS
-void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst,
- int stride,
- uint16_t *eobs,
- const int16_t *dc) {
- int i, j;
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- if (*eobs++ > 1)
- vp9_dequant_dc_idct_add_lossless_c(q, dq, pre, dst, 16, stride, dc[0]);
- else
- vp9_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride);
-
- q += 16;
- pre += 4;
- dst += 4;
- dc++;
- }
-
- pre += 64 - 16;
- dst += 4 * stride - 16;
- }
-}
-
void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
uint8_t *pre,
uint8_t *dst,
- int stride, uint16_t *eobs) {
+ int stride, MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride);
- else {
- vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 16, stride,
+ xd->eobs[i * 4 + j]);
q += 16;
pre += 4;
dst += 4;
@@ -324,18 +199,13 @@
uint8_t *dstu,
uint8_t *dstv,
int stride,
- uint16_t *eobs) {
+ MACROBLOCKD *xd) {
int i, j;
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride);
- else {
- vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride,
+ xd->eobs[16 + i * 2 + j]);
q += 16;
pre += 4;
dstu += 4;
@@ -347,13 +217,8 @@
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
- if (*eobs++ > 1)
- vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride);
- else {
- vp9_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride);
- ((int *)q)[0] = 0;
- }
-
+ vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride,
+ xd->eobs[20 + i * 2 + j]);
q += 16;
pre += 4;
dstv += 4;
@@ -363,5 +228,4 @@
dstv += 4 * stride - 8;
}
}
-#endif
--- a/vp9/decoder/vp9_onyxd.h
+++ b/vp9/decoder/vp9_onyxd.h
@@ -27,6 +27,7 @@
int Version;
int postprocess;
int max_threads;
+ int inv_tile_order;
int input_partition;
} VP9D_CONFIG;
typedef enum {
@@ -45,13 +46,15 @@
int64_t *time_stamp, int64_t *time_end_stamp,
vp9_ppflags_t *flags);
- vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR comp,
- VP9_REFFRAME ref_frame_flag,
- YV12_BUFFER_CONFIG *sd);
+ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR comp,
+ VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR comp,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
+
+ int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb);
VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf);
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -9,6 +9,9 @@
*/
+#include <stdio.h>
+#include <assert.h>
+
#include "vp9/common/vp9_onyxc_int.h"
#if CONFIG_POSTPROC
#include "vp9/common/vp9_postproc.h"
@@ -19,8 +22,6 @@
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_swapyv12buffer.h"
-#include <stdio.h>
-#include <assert.h>
#include "vp9/common/vp9_quant_common.h"
#include "vpx_scale/vpx_scale.h"
@@ -30,34 +31,34 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "./vpx_scale_rtcd.h"
-static int get_free_fb(VP9_COMMON *cm);
-static void ref_cnt_fb(int *buf, int *idx, int new_idx);
-
#define WRITE_RECON_BUFFER 0
#if WRITE_RECON_BUFFER == 1
-static void recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) {
+static void recon_write_yuv_frame(const char *name,
+ const YV12_BUFFER_CONFIG *s,
+ int w, int _h) {
FILE *yuv_file = fopen((char *)name, "ab");
- uint8_t *src = s->y_buffer;
- int h = s->y_height;
+ const uint8_t *src = s->y_buffer;
+ int h = _h;
do {
- fwrite(src, s->y_width, 1, yuv_file);
+ fwrite(src, w, 1, yuv_file);
src += s->y_stride;
} while (--h);
src = s->u_buffer;
- h = s->uv_height;
+ h = (_h + 1) >> 1;
+ w = (w + 1) >> 1;
do {
- fwrite(src, s->uv_width, 1, yuv_file);
+ fwrite(src, w, 1, yuv_file);
src += s->uv_stride;
} while (--h);
src = s->v_buffer;
- h = s->uv_height;
+ h = (_h + 1) >> 1;
do {
- fwrite(src, s->uv_width, 1, yuv_file);
+ fwrite(src, w, 1, yuv_file);
src += s->uv_stride;
} while (--h);
@@ -99,7 +100,7 @@
}
#endif
-void vp9_initialize_dec(void) {
+void vp9_initialize_dec() {
static int init_done = 0;
if (!init_done) {
@@ -127,6 +128,7 @@
vp9_initialize_dec();
vp9_create_common(&pbi->common);
+ pbi->oxcf = *oxcf;
pbi->common.current_video_frame = 0;
pbi->ready_for_new_data = 1;
@@ -152,8 +154,8 @@
if (!pbi)
return;
- // Delete sementation map
- if (pbi->common.last_frame_seg_map != 0)
+ // Delete segmentation map
+ if (pbi->common.last_frame_seg_map)
vpx_free(pbi->common.last_frame_seg_map);
vp9_remove_common(&pbi->common);
@@ -161,33 +163,37 @@
vpx_free(pbi);
}
+static int equal_dimensions(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
+ return a->y_height == b->y_height && a->y_width == b->y_width &&
+ a->uv_height == b->uv_height && a->uv_width == b->uv_width;
+}
-vpx_codec_err_t vp9_get_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag,
- YV12_BUFFER_CONFIG *sd) {
+vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr,
+ VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
VP9D_COMP *pbi = (VP9D_COMP *) ptr;
VP9_COMMON *cm = &pbi->common;
int ref_fb_idx;
- if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->lst_fb_idx;
- else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->gld_fb_idx;
- else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->alt_fb_idx;
- else {
+ /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+ * encoder is using the frame buffers for. This is just a stub to keep the
+ * vpxenc --test-decode functionality working, and will be replaced in a
+ * later commit that adds VP9-specific controls for this functionality.
+ */
+ if (ref_frame_flag == VP9_LAST_FLAG) {
+ ref_fb_idx = pbi->common.ref_frame_map[0];
+ } else {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Invalid reference frame");
return pbi->common.error.error_code;
}
- if (cm->yv12_fb[ref_fb_idx].y_height != sd->y_height ||
- cm->yv12_fb[ref_fb_idx].y_width != sd->y_width ||
- cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height ||
- cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width) {
+ if (!equal_dimensions(&cm->yv12_fb[ref_fb_idx], sd)) {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Incorrect buffer dimensions");
- } else
+ } else {
vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd);
+ }
return pbi->common.error.error_code;
}
@@ -198,14 +204,18 @@
VP9D_COMP *pbi = (VP9D_COMP *) ptr;
VP9_COMMON *cm = &pbi->common;
int *ref_fb_ptr = NULL;
- int free_fb;
+ /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+ * encoder is using the frame buffers for. This is just a stub to keep the
+ * vpxenc --test-decode functionality working, and will be replaced in a
+ * later commit that adds VP9-specific controls for this functionality.
+ */
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_ptr = &cm->lst_fb_idx;
+ ref_fb_ptr = &pbi->common.active_ref_idx[0];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_ptr = &cm->gld_fb_idx;
+ ref_fb_ptr = &pbi->common.active_ref_idx[1];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_ptr = &cm->alt_fb_idx;
+ ref_fb_ptr = &pbi->common.active_ref_idx[2];
else {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Invalid reference frame");
@@ -212,20 +222,17 @@
return pbi->common.error.error_code;
}
- if (cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height ||
- cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width ||
- cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height ||
- cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width) {
+ if (!equal_dimensions(&cm->yv12_fb[*ref_fb_ptr], sd)) {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Incorrect buffer dimensions");
} else {
- /* Find an empty frame buffer. */
- free_fb = get_free_fb(cm);
- /* Decrease fb_idx_ref_cnt since it will be increased again in
- * ref_cnt_fb() below. */
+ // Find an empty frame buffer.
+ const int free_fb = get_free_fb(cm);
+ // Decrease fb_idx_ref_cnt since it will be increased again in
+ // ref_cnt_fb() below.
cm->fb_idx_ref_cnt[free_fb]--;
- /* Manage the reference counters and copy image. */
+ // Manage the reference counters and copy image.
ref_cnt_fb(cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb);
vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]);
}
@@ -234,77 +241,36 @@
}
-static int get_free_fb(VP9_COMMON *cm) {
- int i;
- for (i = 0; i < NUM_YV12_BUFFERS; i++)
- if (cm->fb_idx_ref_cnt[i] == 0)
- break;
+int vp9_get_reference_dec(VP9D_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
+ VP9D_COMP *pbi = (VP9D_COMP *) ptr;
+ VP9_COMMON *cm = &pbi->common;
- assert(i < NUM_YV12_BUFFERS);
- cm->fb_idx_ref_cnt[i] = 1;
- return i;
-}
+ if (index < 0 || index >= NUM_REF_FRAMES)
+ return -1;
-static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
- if (buf[*idx] > 0)
- buf[*idx]--;
-
- *idx = new_idx;
-
- buf[new_idx]++;
+ *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+ return 0;
}
-/* If any buffer copy / swapping is signalled it should be done here. */
-static int swap_frame_buffers(VP9_COMMON *cm) {
- int err = 0;
+/* If any buffer updating is signalled it should be done here. */
+static void swap_frame_buffers(VP9D_COMP *pbi) {
+ int ref_index = 0, mask;
- /* The alternate reference frame or golden frame can be updated
- * using the new, last, or golden/alt ref frame. If it
- * is updated using the newly decoded frame it is a refresh.
- * An update using the last or golden/alt ref frame is a copy.
- */
- if (cm->copy_buffer_to_arf) {
- int new_fb = 0;
-
- if (cm->copy_buffer_to_arf == 1)
- new_fb = cm->lst_fb_idx;
- else if (cm->copy_buffer_to_arf == 2)
- new_fb = cm->gld_fb_idx;
- else
- err = -1;
-
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ if (mask & 1) {
+ ref_cnt_fb(pbi->common.fb_idx_ref_cnt,
+ &pbi->common.ref_frame_map[ref_index],
+ pbi->common.new_fb_idx);
+ }
+ ++ref_index;
}
- if (cm->copy_buffer_to_gf) {
- int new_fb = 0;
+ pbi->common.frame_to_show = &pbi->common.yv12_fb[pbi->common.new_fb_idx];
+ pbi->common.fb_idx_ref_cnt[pbi->common.new_fb_idx]--;
- if (cm->copy_buffer_to_gf == 1)
- new_fb = cm->lst_fb_idx;
- else if (cm->copy_buffer_to_gf == 2)
- new_fb = cm->alt_fb_idx;
- else
- err = -1;
-
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
- }
-
- if (cm->refresh_golden_frame)
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
-
- if (cm->refresh_alt_ref_frame)
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
-
- if (cm->refresh_last_frame) {
- ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
-
- cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
- } else
- cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
-
- cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
-
- return err;
+ /* Invalidate these references until the next frame starts. */
+ for (ref_index = 0; ref_index < 3; ref_index++)
+ pbi->common.active_ref_idx[ref_index] = INT_MAX;
}
int vp9_receive_compressed_data(VP9D_PTR ptr, unsigned long size,
@@ -332,8 +298,13 @@
* We do not know if the missing frame(s) was supposed to update
* any of the reference buffers, but we act conservative and
* mark only the last buffer as corrupted.
+ *
+ * TODO(jkoleszar): Error concealment is undefined and non-normative
+ * at this point, but if it becomes so, [0] may not always be the correct
+ * thing to do here.
*/
- cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+ if (cm->active_ref_idx[0] != INT_MAX)
+ cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1;
}
cm->new_fb_idx = get_free_fb(cm);
@@ -344,8 +315,13 @@
/* We do not know if the missing frame(s) was supposed to update
* any of the reference buffers, but we act conservative and
* mark only the last buffer as corrupted.
+ *
+ * TODO(jkoleszar): Error concealment is undefined and non-normative
+ * at this point, but if it becomes so, [0] may not always be the correct
+ * thing to do here.
*/
- cm->yv12_fb[cm->lst_fb_idx].corrupted = 1;
+ if (cm->active_ref_idx[0] != INT_MAX)
+ cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1;
if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
@@ -365,11 +341,7 @@
}
{
- if (swap_frame_buffers(cm)) {
- pbi->common.error.error_code = VPX_CODEC_ERROR;
- pbi->common.error.setjmp = 0;
- return -1;
- }
+ swap_frame_buffers(pbi);
#if WRITE_RECON_BUFFER == 2
if (cm->show_frame)
@@ -382,7 +354,8 @@
if (cm->filter_level) {
/* Apply the loop filter if appropriate. */
- vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0);
+ vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0,
+ cm->dering_enabled);
}
vp8_yv12_extend_frame_borders(cm->frame_to_show);
}
@@ -389,7 +362,8 @@
#if WRITE_RECON_BUFFER == 1
if (cm->show_frame)
- recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+ recon_write_yuv_frame("recon.yuv", cm->frame_to_show,
+ cm->width, cm->height);
#endif
vp9_clear_system_state();
@@ -440,9 +414,9 @@
if (pbi->common.frame_to_show) {
*sd = *pbi->common.frame_to_show;
- sd->y_width = pbi->common.Width;
- sd->y_height = pbi->common.Height;
- sd->uv_height = pbi->common.Height / 2;
+ sd->y_width = pbi->common.width;
+ sd->y_height = pbi->common.height;
+ sd->uv_height = pbi->common.height / 2;
ret = 0;
} else {
ret = -1;
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -18,41 +18,6 @@
// #define DEC_DEBUG
-typedef struct {
- int ithread;
- void *ptr1;
- void *ptr2;
-} DECODETHREAD_DATA;
-
-typedef struct {
- MACROBLOCKD mbd;
- int mb_row;
- int current_mb_col;
- short *coef_ptr;
-} MB_ROW_DEC;
-
-typedef struct {
- int const *scan;
- int const *scan_8x8;
- uint8_t const *ptr_block2leftabove;
- vp9_tree_index const *vp9_coef_tree_ptr;
- unsigned char *norm_ptr;
- uint8_t *ptr_coef_bands_x;
- uint8_t *ptr_coef_bands_x_8x8;
-
- ENTROPY_CONTEXT_PLANES *A;
- ENTROPY_CONTEXT_PLANES *L;
-
- int16_t *qcoeff_start_ptr;
-
- vp9_prob const *coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_prob const *coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_prob const *coef_probs_16X16[BLOCK_TYPES_16X16];
-
- uint8_t eob[25];
-
-} DETOK;
-
typedef struct VP9Decompressor {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -68,18 +33,13 @@
int64_t last_time_stamp;
int ready_for_new_data;
- DETOK detoken;
-
- vp9_dequant_idct_add_fn_t idct_add;
- vp9_dequant_dc_idct_add_fn_t dc_idct_add;
- vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
- vp9_dequant_idct_add_y_block_fn_t idct_add_y_block;
- vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
-
+ int refresh_frame_flags;
vp9_prob prob_skip_false;
int decoded_key_frame;
+ int initial_width;
+ int initial_height;
} VP9D_COMP;
int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end);
--- a/vp9/decoder/vp9_treereader.h
+++ b/vp9/decoder/vp9_treereader.h
@@ -13,7 +13,6 @@
#define VP9_DECODER_VP9_TREEREADER_H_
#include "vp9/common/vp9_treecoder.h"
-
#include "vp9/decoder/vp9_dboolhuff.h"
typedef BOOL_DECODER vp9_reader;
@@ -20,10 +19,10 @@
#define vp9_read decode_bool
#define vp9_read_literal decode_value
-#define vp9_read_bit(R) vp9_read(R, vp9_prob_half)
+#define vp9_read_bit(r) vp9_read(r, vp9_prob_half)
+#define vp9_read_prob(r) ((vp9_prob)vp9_read_literal(r, 8))
-/* Intent of tree data structure is to make decoding trivial. */
-
+// Intent of tree data structure is to make decoding trivial.
static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */
vp9_tree t,
const vp9_prob *const p) {
--- a/vp9/decoder/x86/vp9_dequantize_mmx.asm
+++ /dev/null
@@ -1,406 +1,0 @@
-;
-; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-align 16
-x_s1sqr2: times 4 dw 0x8A8C
-align 16
-x_c1sqr2less1: times 4 dw 0x4E7B
-align 16
-pw_16: times 4 dw 16
-
-SECTION .text
-
-INIT_MMX
-
-
-;void dequantize_b_impl_mmx(short *sq, short *dq, short *q)
-cglobal dequantize_b_impl_mmx, 3,3,0,sq,dq,arg3
- mova m1, [sqq]
- pmullw m1, [arg3q+0] ; mm4 *= kernel 0 modifiers.
- mova [dqq+ 0], m1
-
- mova m1, [sqq+8]
- pmullw m1, [arg3q+8] ; mm4 *= kernel 0 modifiers.
- mova [dqq+ 8], m1
-
- mova m1, [sqq+16]
- pmullw m1, [arg3q+16] ; mm4 *= kernel 0 modifiers.
- mova [dqq+16], m1
-
- mova m1, [sqq+24]
- pmullw m1, [arg3q+24] ; mm4 *= kernel 0 modifiers.
- mova [dqq+24], m1
- RET
-
-
-;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
-cglobal dequant_idct_add_mmx, 4,6,0,inp,dq,pred,dest,pit,stride
-
-%if ARCH_X86_64
- movsxd strideq, dword stridem
- movsxd pitq, dword pitm
-%else
- mov strideq, stridem
- mov pitq, pitm
-%endif
-
- mova m0, [inpq+ 0]
- pmullw m0, [dqq]
-
- mova m1, [inpq+ 8]
- pmullw m1, [dqq+ 8]
-
- mova m2, [inpq+16]
- pmullw m2, [dqq+16]
-
- mova m3, [inpq+24]
- pmullw m3, [dqq+24]
-
- pxor m7, m7
- mova [inpq], m7
- mova [inpq+8], m7
- mova [inpq+16], m7
- mova [inpq+24], m7
-
-
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2];
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1];
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- mova m3, m5 ; 33 23 13 03
-
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2];
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1];
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- paddw m0, [pw_16]
-
- paddw m2, [pw_16]
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
- psraw m2, 5
-
- psraw m0, 5
- psraw m4, 5
-
- psraw m6, 5
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- pxor m7, m7
-
- movh m4, [predq]
- punpcklbw m4, m7
- paddsw m0, m4
- packuswb m0, m7
- movh [destq], m0
-
- movh m4, [predq+pitq]
- punpcklbw m4, m7
- paddsw m1, m4
- packuswb m1, m7
- movh [destq+strideq], m1
-
- movh m4, [predq+2*pitq]
- punpcklbw m4, m7
- paddsw m2, m4
- packuswb m2, m7
- movh [destq+strideq*2], m2
-
- add destq, strideq
- add predq, pitq
-
- movh m4, [predq+2*pitq]
- punpcklbw m4, m7
- paddsw m5, m4
- packuswb m5, m7
- movh [destq+strideq*2], m5
- RET
-
-
-;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
-cglobal dequant_dc_idct_add_mmx, 4,7,0,inp,dq,pred,dest,pit,stride,Dc
-
-%if ARCH_X86_64
- movsxd strideq, dword stridem
- movsxd pitq, dword pitm
-%else
- mov strideq, stridem
- mov pitq, pitm
-%endif
-
- mov Dcq, Dcm
- mova m0, [inpq+ 0]
- pmullw m0, [dqq+ 0]
-
- mova m1, [inpq+ 8]
- pmullw m1, [dqq+ 8]
-
- mova m2, [inpq+16]
- pmullw m2, [dqq+16]
-
- mova m3, [inpq+24]
- pmullw m3, [dqq+24]
-
- pxor m7, m7
- mova [inpq+ 0], m7
- mova [inpq+ 8], m7
- mova [inpq+16], m7
- mova [inpq+24], m7
-
- ; move lower word of Dc to lower word of m0
- psrlq m0, 16
- psllq m0, 16
- and Dcq, 0xFFFF ; If Dc < 0, we don't want the full dword precision.
- movh m7, Dcq
- por m0, m7
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2];
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1];
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- mova m3, m5 ; 33 23 13 03
-
- psubw m0, m2 ; b1= 0-2
- paddw m2, m2 ;
-
- mova m5, m1
- paddw m2, m0 ; a1 =0+2
-
- pmulhw m5, [x_s1sqr2];
- paddw m5, m1 ; ip1 * sin(pi/8) * sqrt(2)
-
- mova m7, m3 ;
- pmulhw m7, [x_c1sqr2less1];
-
- paddw m7, m3 ; ip3 * cos(pi/8) * sqrt(2)
- psubw m7, m5 ; c1
-
- mova m5, m1
- mova m4, m3
-
- pmulhw m5, [x_c1sqr2less1]
- paddw m5, m1
-
- pmulhw m3, [x_s1sqr2]
- paddw m3, m4
-
- paddw m3, m5 ; d1
- paddw m0, [pw_16]
-
- paddw m2, [pw_16]
- mova m6, m2 ; a1
-
- mova m4, m0 ; b1
- paddw m2, m3 ;0
-
- paddw m4, m7 ;1
- psubw m0, m7 ;2
-
- psubw m6, m3 ;3
- psraw m2, 5
-
- psraw m0, 5
- psraw m4, 5
-
- psraw m6, 5
-
- mova m1, m2 ; 03 02 01 00
- mova m3, m4 ; 23 22 21 20
-
- punpcklwd m1, m0 ; 11 01 10 00
- punpckhwd m2, m0 ; 13 03 12 02
-
- punpcklwd m3, m6 ; 31 21 30 20
- punpckhwd m4, m6 ; 33 23 32 22
-
- mova m0, m1 ; 11 01 10 00
- mova m5, m2 ; 13 03 12 02
-
- punpckldq m0, m3 ; 30 20 10 00
- punpckhdq m1, m3 ; 31 21 11 01
-
- punpckldq m2, m4 ; 32 22 12 02
- punpckhdq m5, m4 ; 33 23 13 03
-
- pxor m7, m7
-
- movh m4, [predq]
- punpcklbw m4, m7
- paddsw m0, m4
- packuswb m0, m7
- movh [destq], m0
-
- movh m4, [predq+pitq]
- punpcklbw m4, m7
- paddsw m1, m4
- packuswb m1, m7
- movh [destq+strideq], m1
-
- movh m4, [predq+2*pitq]
- punpcklbw m4, m7
- paddsw m2, m4
- packuswb m2, m7
- movh [destq+strideq*2], m2
-
- add destq, strideq
- add predq, pitq
-
- movh m4, [predq+2*pitq]
- punpcklbw m4, m7
- paddsw m5, m4
- packuswb m5, m7
- movh [destq+strideq*2], m5
- RET
-
--- /dev/null
+++ b/vp9/decoder/x86/vp9_dequantize_x86.c
@@ -1,0 +1,455 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_idct.h"
+
+#if HAVE_SSE2
+
+void vp9_add_residual_4x4_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 4;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ const __m128i d0 = _mm_loadl_epi64((const __m128i *)(diff + 0 * width));
+ const __m128i d1 = _mm_loadl_epi64((const __m128i *)(diff + 1 * width));
+ const __m128i d2 = _mm_loadl_epi64((const __m128i *)(diff + 2 * width));
+ const __m128i d3 = _mm_loadl_epi64((const __m128i *)(diff + 3 * width));
+
+ // Prediction data.
+ __m128i p0 = _mm_cvtsi32_si128(*(const int *)(pred + 0 * pitch));
+ __m128i p1 = _mm_cvtsi32_si128(*(const int *)(pred + 1 * pitch));
+ __m128i p2 = _mm_cvtsi32_si128(*(const int *)(pred + 2 * pitch));
+ __m128i p3 = _mm_cvtsi32_si128(*(const int *)(pred + 3 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p0, zero);
+ p1 = _mm_unpacklo_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p2, zero);
+ p3 = _mm_unpacklo_epi8(p3, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p2 = _mm_packus_epi16(p2, p3);
+
+ *(int *)dest = _mm_cvtsi128_si32(p0);
+ dest += stride;
+
+ p0 = _mm_srli_si128(p0, 8);
+ *(int *)dest = _mm_cvtsi128_si32(p0);
+ dest += stride;
+
+ *(int *)dest = _mm_cvtsi128_si32(p2);
+ dest += stride;
+
+ p2 = _mm_srli_si128(p2, 8);
+ *(int *)dest = _mm_cvtsi128_si32(p2);
+}
+
+void vp9_add_residual_8x8_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 8;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ const __m128i d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ const __m128i d1 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ const __m128i d2 = _mm_load_si128((const __m128i *)(diff + 2 * width));
+ const __m128i d3 = _mm_load_si128((const __m128i *)(diff + 3 * width));
+ const __m128i d4 = _mm_load_si128((const __m128i *)(diff + 4 * width));
+ const __m128i d5 = _mm_load_si128((const __m128i *)(diff + 5 * width));
+ const __m128i d6 = _mm_load_si128((const __m128i *)(diff + 6 * width));
+ const __m128i d7 = _mm_load_si128((const __m128i *)(diff + 7 * width));
+
+ // Prediction data.
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p0, zero);
+ p1 = _mm_unpacklo_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p2, zero);
+ p3 = _mm_unpacklo_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p4, zero);
+ p5 = _mm_unpacklo_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p6, zero);
+ p7 = _mm_unpacklo_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p2 = _mm_packus_epi16(p2, p3);
+ p4 = _mm_packus_epi16(p4, p5);
+ p6 = _mm_packus_epi16(p6, p7);
+
+ _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
+
+ _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
+ p2 = _mm_srli_si128(p2, 8);
+ _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
+
+ _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
+ p4 = _mm_srli_si128(p4, 8);
+ _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
+
+ _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
+ p6 = _mm_srli_si128(p6, 8);
+ _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
+}
+
+void vp9_add_residual_16x16_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 16;
+ int i = 4;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ __m128i d0, d1, d2, d3, d4, d5, d6, d7;
+ __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+
+ do {
+ d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8));
+ d2 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ d3 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8));
+ d4 = _mm_load_si128((const __m128i *)(diff + 2 * width));
+ d5 = _mm_load_si128((const __m128i *)(diff + 2 * width + 8));
+ d6 = _mm_load_si128((const __m128i *)(diff + 3 * width));
+ d7 = _mm_load_si128((const __m128i *)(diff + 3 * width + 8));
+
+ // Prediction data.
+ p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+
+ p0 = _mm_unpacklo_epi8(p1, zero);
+ p1 = _mm_unpackhi_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p3, zero);
+ p3 = _mm_unpackhi_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p5, zero);
+ p5 = _mm_unpackhi_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p7, zero);
+ p7 = _mm_unpackhi_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p1 = _mm_packus_epi16(p2, p3);
+ p2 = _mm_packus_epi16(p4, p5);
+ p3 = _mm_packus_epi16(p6, p7);
+
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
+
+ diff += 4 * width;
+ pred += 4 * pitch;
+ dest += 4 * stride;
+ } while (--i);
+}
+
+void vp9_add_residual_32x32_sse2(const int16_t *diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ const int width = 32;
+ int i = 16;
+ const __m128i zero = _mm_setzero_si128();
+
+ // Diff data
+ __m128i d0, d1, d2, d3, d4, d5, d6, d7;
+ __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+
+ do {
+ d0 = _mm_load_si128((const __m128i *)(diff + 0 * width));
+ d1 = _mm_load_si128((const __m128i *)(diff + 0 * width + 8));
+ d2 = _mm_load_si128((const __m128i *)(diff + 0 * width + 16));
+ d3 = _mm_load_si128((const __m128i *)(diff + 0 * width + 24));
+ d4 = _mm_load_si128((const __m128i *)(diff + 1 * width));
+ d5 = _mm_load_si128((const __m128i *)(diff + 1 * width + 8));
+ d6 = _mm_load_si128((const __m128i *)(diff + 1 * width + 16));
+ d7 = _mm_load_si128((const __m128i *)(diff + 1 * width + 24));
+
+ // Prediction data.
+ p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ p3 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
+ p5 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ p7 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
+
+ p0 = _mm_unpacklo_epi8(p1, zero);
+ p1 = _mm_unpackhi_epi8(p1, zero);
+ p2 = _mm_unpacklo_epi8(p3, zero);
+ p3 = _mm_unpackhi_epi8(p3, zero);
+ p4 = _mm_unpacklo_epi8(p5, zero);
+ p5 = _mm_unpackhi_epi8(p5, zero);
+ p6 = _mm_unpacklo_epi8(p7, zero);
+ p7 = _mm_unpackhi_epi8(p7, zero);
+
+ p0 = _mm_add_epi16(p0, d0);
+ p1 = _mm_add_epi16(p1, d1);
+ p2 = _mm_add_epi16(p2, d2);
+ p3 = _mm_add_epi16(p3, d3);
+ p4 = _mm_add_epi16(p4, d4);
+ p5 = _mm_add_epi16(p5, d5);
+ p6 = _mm_add_epi16(p6, d6);
+ p7 = _mm_add_epi16(p7, d7);
+
+ p0 = _mm_packus_epi16(p0, p1);
+ p1 = _mm_packus_epi16(p2, p3);
+ p2 = _mm_packus_epi16(p4, p5);
+ p3 = _mm_packus_epi16(p6, p7);
+
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
+
+ diff += 2 * width;
+ pred += 2 * pitch;
+ dest += 2 * stride;
+ } while (--i);
+}
+
+void vp9_add_constant_residual_8x8_sse2(const int16_t diff, const uint8_t *pred,
+ int pitch, uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+
+ // Prediction data.
+ __m128i p0 = _mm_loadl_epi64((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_loadl_epi64((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_loadl_epi64((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_loadl_epi64((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_loadl_epi64((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_loadl_epi64((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_loadl_epi64((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_loadl_epi64((const __m128i *)(pred + 7 * pitch));
+
+ p0 = _mm_unpacklo_epi64(p0, p1);
+ p2 = _mm_unpacklo_epi64(p2, p3);
+ p4 = _mm_unpacklo_epi64(p4, p5);
+ p6 = _mm_unpacklo_epi64(p6, p7);
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_adds_epu8(p0, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p6 = _mm_adds_epu8(p6, d);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_subs_epu8(p0, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p6 = _mm_subs_epu8(p6, d);
+ }
+
+ _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
+
+ _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
+ p2 = _mm_srli_si128(p2, 8);
+ _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
+
+ _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
+ p4 = _mm_srli_si128(p4, 8);
+ _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
+
+ _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
+ p6 = _mm_srli_si128(p6, 8);
+ _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
+}
+
+void vp9_add_constant_residual_16x16_sse2(const int16_t diff,
+ const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+
+ // Prediction data.
+ __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ __m128i p2 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ __m128i p3 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+ __m128i p4 = _mm_load_si128((const __m128i *)(pred + 4 * pitch));
+ __m128i p5 = _mm_load_si128((const __m128i *)(pred + 5 * pitch));
+ __m128i p6 = _mm_load_si128((const __m128i *)(pred + 6 * pitch));
+ __m128i p7 = _mm_load_si128((const __m128i *)(pred + 7 * pitch));
+ __m128i p8 = _mm_load_si128((const __m128i *)(pred + 8 * pitch));
+ __m128i p9 = _mm_load_si128((const __m128i *)(pred + 9 * pitch));
+ __m128i p10 = _mm_load_si128((const __m128i *)(pred + 10 * pitch));
+ __m128i p11 = _mm_load_si128((const __m128i *)(pred + 11 * pitch));
+ __m128i p12 = _mm_load_si128((const __m128i *)(pred + 12 * pitch));
+ __m128i p13 = _mm_load_si128((const __m128i *)(pred + 13 * pitch));
+ __m128i p14 = _mm_load_si128((const __m128i *)(pred + 14 * pitch));
+ __m128i p15 = _mm_load_si128((const __m128i *)(pred + 15 * pitch));
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_adds_epu8(p0, d);
+ p1 = _mm_adds_epu8(p1, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p3 = _mm_adds_epu8(p3, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p5 = _mm_adds_epu8(p5, d);
+ p6 = _mm_adds_epu8(p6, d);
+ p7 = _mm_adds_epu8(p7, d);
+ p8 = _mm_adds_epu8(p8, d);
+ p9 = _mm_adds_epu8(p9, d);
+ p10 = _mm_adds_epu8(p10, d);
+ p11 = _mm_adds_epu8(p11, d);
+ p12 = _mm_adds_epu8(p12, d);
+ p13 = _mm_adds_epu8(p13, d);
+ p14 = _mm_adds_epu8(p14, d);
+ p15 = _mm_adds_epu8(p15, d);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+
+ p0 = _mm_subs_epu8(p0, d);
+ p1 = _mm_subs_epu8(p1, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p3 = _mm_subs_epu8(p3, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p5 = _mm_subs_epu8(p5, d);
+ p6 = _mm_subs_epu8(p6, d);
+ p7 = _mm_subs_epu8(p7, d);
+ p8 = _mm_subs_epu8(p8, d);
+ p9 = _mm_subs_epu8(p9, d);
+ p10 = _mm_subs_epu8(p10, d);
+ p11 = _mm_subs_epu8(p11, d);
+ p12 = _mm_subs_epu8(p12, d);
+ p13 = _mm_subs_epu8(p13, d);
+ p14 = _mm_subs_epu8(p14, d);
+ p15 = _mm_subs_epu8(p15, d);
+ }
+
+ // Store results
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
+ _mm_store_si128((__m128i *)(dest + 4 * stride), p4);
+ _mm_store_si128((__m128i *)(dest + 5 * stride), p5);
+ _mm_store_si128((__m128i *)(dest + 6 * stride), p6);
+ _mm_store_si128((__m128i *)(dest + 7 * stride), p7);
+ _mm_store_si128((__m128i *)(dest + 8 * stride), p8);
+ _mm_store_si128((__m128i *)(dest + 9 * stride), p9);
+ _mm_store_si128((__m128i *)(dest + 10 * stride), p10);
+ _mm_store_si128((__m128i *)(dest + 11 * stride), p11);
+ _mm_store_si128((__m128i *)(dest + 12 * stride), p12);
+ _mm_store_si128((__m128i *)(dest + 13 * stride), p13);
+ _mm_store_si128((__m128i *)(dest + 14 * stride), p14);
+ _mm_store_si128((__m128i *)(dest + 15 * stride), p15);
+}
+
+void vp9_add_constant_residual_32x32_sse2(const int16_t diff,
+ const uint8_t *pred, int pitch,
+ uint8_t *dest, int stride) {
+ uint8_t abs_diff;
+ __m128i d;
+ int i = 8;
+
+ if (diff >= 0) {
+ abs_diff = (diff > 255) ? 255 : diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+ } else {
+ abs_diff = (diff < -255) ? 255 : -diff;
+ d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
+ }
+
+ do {
+ // Prediction data.
+ __m128i p0 = _mm_load_si128((const __m128i *)(pred + 0 * pitch));
+ __m128i p1 = _mm_load_si128((const __m128i *)(pred + 0 * pitch + 16));
+ __m128i p2 = _mm_load_si128((const __m128i *)(pred + 1 * pitch));
+ __m128i p3 = _mm_load_si128((const __m128i *)(pred + 1 * pitch + 16));
+ __m128i p4 = _mm_load_si128((const __m128i *)(pred + 2 * pitch));
+ __m128i p5 = _mm_load_si128((const __m128i *)(pred + 2 * pitch + 16));
+ __m128i p6 = _mm_load_si128((const __m128i *)(pred + 3 * pitch));
+ __m128i p7 = _mm_load_si128((const __m128i *)(pred + 3 * pitch + 16));
+
+ // Clip diff value to [0, 255] range. Then, do addition or subtraction
+ // according to its sign.
+ if (diff >= 0) {
+ p0 = _mm_adds_epu8(p0, d);
+ p1 = _mm_adds_epu8(p1, d);
+ p2 = _mm_adds_epu8(p2, d);
+ p3 = _mm_adds_epu8(p3, d);
+ p4 = _mm_adds_epu8(p4, d);
+ p5 = _mm_adds_epu8(p5, d);
+ p6 = _mm_adds_epu8(p6, d);
+ p7 = _mm_adds_epu8(p7, d);
+ } else {
+ p0 = _mm_subs_epu8(p0, d);
+ p1 = _mm_subs_epu8(p1, d);
+ p2 = _mm_subs_epu8(p2, d);
+ p3 = _mm_subs_epu8(p3, d);
+ p4 = _mm_subs_epu8(p4, d);
+ p5 = _mm_subs_epu8(p5, d);
+ p6 = _mm_subs_epu8(p6, d);
+ p7 = _mm_subs_epu8(p7, d);
+ }
+
+ // Store results
+ _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
+ _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
+ _mm_store_si128((__m128i *)(dest + 1 * stride), p2);
+ _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
+ _mm_store_si128((__m128i *)(dest + 2 * stride), p4);
+ _mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5);
+ _mm_store_si128((__m128i *)(dest + 3 * stride), p6);
+ _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7);
+
+ pred += 4 * pitch;
+ dest += 4 * stride;
+ } while (--i);
+}
+#endif
--- a/vp9/decoder/x86/vp9_idct_blk_mmx.c
+++ /dev/null
@@ -1,145 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/decoder/vp9_dequantize.h"
-#include "vp9/decoder/x86/vp9_idct_mmx.h"
-
-void vp9_dequant_dc_idct_add_y_block_mmx(short *q, const short *dq,
- unsigned char *pre,
- unsigned char *dst,
- int stride, unsigned short *eobs,
- const short *dc) {
- int i;
-
- for (i = 0; i < 4; i++) {
- if (eobs[0] > 1)
- vp9_dequant_dc_idct_add_mmx(q, dq, pre, dst, 16, stride, dc[0]);
- else
- vp9_dc_only_idct_add_mmx(dc[0], pre, dst, 16, stride);
-
- if (eobs[1] > 1)
- vp9_dequant_dc_idct_add_mmx(q + 16, dq, pre + 4,
- dst + 4, 16, stride, dc[1]);
- else
- vp9_dc_only_idct_add_mmx(dc[1], pre + 4, dst + 4, 16, stride);
-
- if (eobs[2] > 1)
- vp9_dequant_dc_idct_add_mmx(q + 32, dq, pre + 8,
- dst + 8, 16, stride, dc[2]);
- else
- vp9_dc_only_idct_add_mmx(dc[2], pre + 8, dst + 8, 16, stride);
-
- if (eobs[3] > 1)
- vp9_dequant_dc_idct_add_mmx(q + 48, dq, pre + 12,
- dst + 12, 16, stride, dc[3]);
- else
- vp9_dc_only_idct_add_mmx(dc[3], pre + 12, dst + 12, 16, stride);
-
- q += 64;
- dc += 4;
- pre += 64;
- dst += 4 * stride;
- eobs += 4;
- }
-}
-
-void vp9_dequant_idct_add_y_block_mmx(short *q, const short *dq,
- unsigned char *pre,
- unsigned char *dst,
- int stride, unsigned short *eobs) {
- int i;
-
- for (i = 0; i < 4; i++) {
- if (eobs[0] > 1)
- vp9_dequant_idct_add_mmx(q, dq, pre, dst, 16, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dst, 16, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dst + 4, 16, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dst + 4, 16, stride);
- ((int *)(q + 16))[0] = 0;
- }
-
- if (eobs[2] > 1)
- vp9_dequant_idct_add_mmx(q + 32, dq, pre + 8, dst + 8, 16, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[32]*dq[0], pre + 8, dst + 8, 16, stride);
- ((int *)(q + 32))[0] = 0;
- }
-
- if (eobs[3] > 1)
- vp9_dequant_idct_add_mmx(q + 48, dq, pre + 12, dst + 12, 16, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[48]*dq[0], pre + 12, dst + 12, 16, stride);
- ((int *)(q + 48))[0] = 0;
- }
-
- q += 64;
- pre += 64;
- dst += 4 * stride;
- eobs += 4;
- }
-}
-
-void vp9_dequant_idct_add_uv_block_mmx(short *q, const short *dq,
- unsigned char *pre,
- unsigned char *dstu,
- unsigned char *dstv,
- int stride, unsigned short *eobs) {
- int i;
-
- for (i = 0; i < 2; i++) {
- if (eobs[0] > 1)
- vp9_dequant_idct_add_mmx(q, dq, pre, dstu, 8, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstu, 8, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstu + 4, 8, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstu + 4, 8, stride);
- ((int *)(q + 16))[0] = 0;
- }
-
- q += 32;
- pre += 32;
- dstu += 4 * stride;
- eobs += 2;
- }
-
- for (i = 0; i < 2; i++) {
- if (eobs[0] > 1)
- vp9_dequant_idct_add_mmx(q, dq, pre, dstv, 8, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[0]*dq[0], pre, dstv, 8, stride);
- ((int *)q)[0] = 0;
- }
-
- if (eobs[1] > 1)
- vp9_dequant_idct_add_mmx(q + 16, dq, pre + 4, dstv + 4, 8, stride);
- else {
- vp9_dc_only_idct_add_mmx(q[16]*dq[0], pre + 4, dstv + 4, 8, stride);
- ((int *)(q + 16))[0] = 0;
- }
-
- q += 32;
- pre += 32;
- dstv += 4 * stride;
- eobs += 2;
- }
-}
--- a/vp9/decoder/x86/vp9_idct_mmx.h
+++ b/vp9/decoder/x86/vp9_idct_mmx.h
@@ -16,9 +16,6 @@
unsigned char *pred, unsigned char *dest,
int pitch, int stride, int Dc);
-void vp9_dc_only_idct_add_mmx(short input_dc, const unsigned char *pred_ptr,
- unsigned char *dst_ptr, int pitch, int stride);
-
void vp9_dequant_idct_add_mmx(short *input, const short *dq, unsigned char *pred,
unsigned char *dest, int pitch, int stride);
--- a/vp9/decoder/x86/vp9_x86_dsystemdependent.c
+++ /dev/null
@@ -1,26 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "vpx_ports/x86.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
-
-#if HAVE_MMX
-void vp9_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
-
-void vp9_dequantize_b_mmx(BLOCKD *d) {
- short *sq = (short *) d->qcoeff;
- short *dq = (short *) d->dqcoeff;
- short *q = (short *) d->dequant;
- vp9_dequantize_b_impl_mmx(sq, dq, q);
-}
-#endif
-
-
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ b/vp9/encoder/vp9_asm_enc_offsets.c
@@ -32,7 +32,6 @@
DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant));
DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
-DEFINE(vp9_blockd_eob, offsetof(BLOCKD, eob));
END
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -14,6 +14,7 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_findnearmv.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/common/vp9_systemdependent.h"
#include <assert.h>
@@ -41,17 +42,32 @@
int intra_mode_stats[VP9_KF_BINTRAMODES]
[VP9_KF_BINTRAMODES]
[VP9_KF_BINTRAMODES];
-vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
+vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES];
extern unsigned int active_section;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+unsigned int nzc_stats_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+unsigned int nzc_stats_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+unsigned int nzc_stats_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+unsigned int nzc_stats_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+unsigned int nzc_pcat_stats[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA][2];
+void init_nzcstats();
+void update_nzcstats(VP9_COMMON *const cm);
+void print_nzcstats();
+#endif
+#endif
+
#ifdef MODE_STATS
int count_mb_seg[4] = { 0, 0, 0, 0 };
#endif
@@ -112,8 +128,8 @@
unsigned int new_b = 0, old_b = 0;
int i = 0;
- vp9_tree_probs_from_distribution(n--, tok, tree,
- Pnew, bct, num_events);
+ vp9_tree_probs_from_distribution(tree, Pnew, bct, num_events, 0);
+ n--;
do {
new_b += cost_branch(bct[i], Pnew[i]);
@@ -169,10 +185,9 @@
int i, j;
for (j = 0; j <= VP9_SWITCHABLE_FILTERS; ++j) {
vp9_tree_probs_from_distribution(
- VP9_SWITCHABLE_FILTERS,
- vp9_switchable_interp_encodings, vp9_switchable_interp_tree,
+ vp9_switchable_interp_tree,
pc->fc.switchable_interp_prob[j], branch_ct,
- cpi->switchable_interp_count[j]);
+ cpi->switchable_interp_count[j], 0);
for (i = 0; i < VP9_SWITCHABLE_FILTERS - 1; ++i) {
if (pc->fc.switchable_interp_prob[j][i] < 1)
pc->fc.switchable_interp_prob[j][i] = 1;
@@ -189,15 +204,7 @@
int old_cost, new_cost;
// Set the prediction probability structures to defaults
- if (cm->frame_type == KEY_FRAME) {
- // Set the prediction probabilities to defaults
- cm->ref_pred_probs[0] = 120;
- cm->ref_pred_probs[1] = 80;
- cm->ref_pred_probs[2] = 40;
-
- vpx_memset(cpi->ref_pred_probs_update, 0,
- sizeof(cpi->ref_pred_probs_update));
- } else {
+ if (cm->frame_type != KEY_FRAME) {
// From the prediction counts set the probabilities for each context
for (i = 0; i < PREDICTION_PROBS; i++) {
new_pred_probs[i] = get_binary_prob(cpi->ref_pred_count[i][0],
@@ -219,7 +226,6 @@
cm->ref_pred_probs[i] = new_pred_probs[i];
} else
cpi->ref_pred_probs_update[i] = 0;
-
}
}
}
@@ -230,8 +236,8 @@
//
// The branch counts table is re-populated during the actual pack stage and in
// the decoder to facilitate backwards update of the context.
-static void update_mode_probs(VP9_COMMON *cm,
- int mode_context[INTER_MODE_CONTEXTS][4]) {
+static void update_inter_mode_probs(VP9_COMMON *cm,
+ int mode_context[INTER_MODE_CONTEXTS][4]) {
int i, j;
unsigned int (*mv_ref_ct)[4][2];
@@ -393,6 +399,43 @@
return bestsavings;
}
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+static int prob_diff_update_savings_search_model(const unsigned int *ct,
+ const vp9_prob *oldp,
+ vp9_prob *bestp,
+ const vp9_prob upd,
+ int b, int r) {
+ int i, old_b, new_b, update_b, savings, bestsavings, step;
+ int newp;
+ vp9_prob bestnewp, newplist[ENTROPY_NODES];
+ for (i = UNCONSTRAINED_NODES - 1, old_b = 0; i < ENTROPY_NODES; ++i)
+ old_b += cost_branch256(ct + 2 * i, oldp[i]);
+
+ bestsavings = 0;
+ bestnewp = oldp[UNCONSTRAINED_NODES - 1];
+
+ step = (*bestp > oldp[UNCONSTRAINED_NODES - 1] ? -1 : 1);
+ newp = *bestp;
+ // newp = *bestp - step * (abs(*bestp - oldp[UNCONSTRAINED_NODES - 1]) >> 1);
+ for (; newp != oldp[UNCONSTRAINED_NODES - 1]; newp += step) {
+ if (newp < 1 || newp > 255) continue;
+ newplist[UNCONSTRAINED_NODES - 1] = newp;
+ vp9_get_model_distribution(newp, newplist, b, r);
+ for (i = UNCONSTRAINED_NODES - 1, new_b = 0; i < ENTROPY_NODES; ++i)
+ new_b += cost_branch256(ct + 2 * i, newplist[i]);
+ update_b = prob_diff_update_cost(newp, oldp[UNCONSTRAINED_NODES - 1]) +
+ vp9_cost_upd256;
+ savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+ *bestp = bestnewp;
+ return bestsavings;
+}
+#endif
+
static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd,
unsigned int *ct) {
vp9_prob newp;
@@ -508,7 +551,8 @@
vp9_sub_mv_ref_encoding_array - LEFT4X4 + m);
}
-static void write_nmv(vp9_writer *bc, const MV *mv, const int_mv *ref,
+static void write_nmv(VP9_COMP *cpi, vp9_writer *bc,
+ const MV *mv, const int_mv *ref,
const nmv_context *nmvc, int usehp) {
MV e;
e.row = mv->row - ref->as_mv.row;
@@ -585,6 +629,28 @@
}
}
+static void write_mb_segid_except(VP9_COMMON *cm,
+ vp9_writer *bc,
+ const MB_MODE_INFO *mi,
+ const MACROBLOCKD *xd,
+ int mb_row, int mb_col) {
+ // Encode the MB segment id.
+ int seg_id = mi->segment_id;
+ int pred_seg_id = vp9_get_pred_mb_segid(cm, xd,
+ mb_row * cm->mb_cols + mb_col);
+ const vp9_prob *p = xd->mb_segment_tree_probs;
+ const vp9_prob p1 = xd->mb_segment_mispred_tree_probs[pred_seg_id];
+
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
+ vp9_write(bc, seg_id >= 2, p1);
+ if (pred_seg_id >= 2 && seg_id < 2) {
+ vp9_write(bc, seg_id == 1, p[1]);
+ } else if (pred_seg_id < 2 && seg_id >= 2) {
+ vp9_write(bc, seg_id == 3, p[2]);
+ }
+ }
+}
+
// This function encodes the reference frame
static void encode_ref_frame(vp9_writer *const bc,
VP9_COMMON *const cm,
@@ -708,11 +774,10 @@
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV
// values that are in 1/8th pel units
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
- xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3;
- xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3;
+ set_mb_row(pc, xd, mb_row, mb_size);
+ set_mb_col(pc, xd, mb_col, mb_size);
+
#ifdef ENTROPY_STATS
active_section = 9;
#endif
@@ -728,7 +793,7 @@
// If the mb segment id wasn't predicted code explicitly
if (!prediction_flag)
- write_mb_segid(bc, mi, &cpi->mb.e_mbd);
+ write_mb_segid_except(pc, bc, mi, &cpi->mb.e_mbd, mb_row, mb_col);
} else {
// Normal unpredicted coding
write_mb_segid(bc, mi, &cpi->mb.e_mbd);
@@ -737,33 +802,16 @@
if (!pc->mb_no_coeff_skip) {
skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
- const int nmbs = mb_size;
- const int xmbs = MIN(nmbs, mb_cols_left);
- const int ymbs = MIN(nmbs, mb_rows_left);
- int x, y;
-
- skip_coeff = 1;
- for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
- skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
- }
- }
-
+ skip_coeff = m->mbmi.mb_skip_coeff;
vp9_write(bc, skip_coeff,
vp9_get_pred_prob(pc, xd, PRED_MBSKIP));
}
// Encode the reference frame.
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)
- || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) {
- encode_ref_frame(bc, pc, xd, segment_id, rf);
- } else {
- assert(rf == INTRA_FRAME);
- }
+ encode_ref_frame(bc, pc, xd, segment_id, rf);
if (rf == INTRA_FRAME) {
#ifdef ENTROPY_STATS
@@ -770,12 +818,11 @@
active_section = 6;
#endif
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
- if (m->mbmi.sb_type)
- write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
- else
- write_ymode(bc, mode, pc->fc.ymode_prob);
- }
+ if (m->mbmi.sb_type)
+ write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
+ else
+ write_ymode(bc, mode, pc->fc.ymode_prob);
+
if (mode == B_PRED) {
int j = 0;
do {
@@ -801,14 +848,12 @@
vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]);
- // #ifdef ENTROPY_STATS
#ifdef ENTROPY_STATS
- accum_mv_refs(mode, ct);
active_section = 3;
#endif
- // Is the segment coding of mode enabled
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // If segment skip is not enabled code the mode.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
if (mi->sb_type) {
write_sb_mv_ref(bc, mode, mv_ref_p);
} else {
@@ -878,12 +923,12 @@
#ifdef ENTROPY_STATS
active_section = 5;
#endif
- write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv,
+ write_nmv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
if (mi->second_ref_frame > 0) {
- write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv,
+ write_nmv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
}
@@ -915,7 +960,7 @@
#else
while (j != L[++k]);
#endif
- leftmv.as_int = left_block_mv(m, k);
+ leftmv.as_int = left_block_mv(xd, m, k);
abovemv.as_int = above_block_mv(m, k, mis);
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
@@ -926,12 +971,12 @@
#ifdef ENTROPY_STATS
active_section = 11;
#endif
- write_nmv(bc, &blockmv.as_mv, &mi->best_mv,
+ write_nmv(cpi, bc, &blockmv.as_mv, &mi->best_mv,
(const nmv_context*) nmvc,
xd->allow_high_precision_mv);
if (mi->second_ref_frame > 0) {
- write_nmv(bc,
+ write_nmv(cpi, bc,
&cpi->mb.partition_info->bmi[j].second_mv.as_mv,
&mi->best_second_mv,
(const nmv_context*) nmvc,
@@ -951,8 +996,7 @@
mi->partitioning == PARTITIONING_4X4))) &&
pc->txfm_mode == TX_MODE_SELECT &&
!((pc->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
@@ -965,7 +1009,7 @@
}
static void write_mb_modes_kf(const VP9_COMP *cpi,
- const MODE_INFO *m,
+ MODE_INFO *m,
vp9_writer *bc,
int mb_rows_left, int mb_cols_left) {
const VP9_COMMON *const c = &cpi->common;
@@ -981,22 +1025,10 @@
if (!c->mb_no_coeff_skip) {
skip_coeff = 0;
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_coeff = 1;
} else {
- const int nmbs = 1 << m->mbmi.sb_type;
- const int xmbs = MIN(nmbs, mb_cols_left);
- const int ymbs = MIN(nmbs, mb_rows_left);
- int x, y;
-
- skip_coeff = 1;
- for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
- skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff;
- }
- }
-
+ skip_coeff = m->mbmi.mb_skip_coeff;
vp9_write(bc, skip_coeff,
vp9_get_pred_prob(c, xd, PRED_MBSKIP));
}
@@ -1013,7 +1045,8 @@
int i = 0;
do {
const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE L = left_block_mode(m, i);
+ const B_PREDICTION_MODE L = (xd->left_available || (i & 3)) ?
+ left_block_mode(m, i) : B_DC_PRED;
const int bm = m->bmi[i].as_mode.first;
#ifdef ENTROPY_STATS
@@ -1041,8 +1074,7 @@
if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
!((c->mb_no_coeff_skip && skip_coeff) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
@@ -1054,45 +1086,609 @@
}
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void write_nzc(VP9_COMMON *const cm,
+ uint16_t nzc,
+ int nzc_context,
+ TX_SIZE tx_size,
+ int ref,
+ int type,
+ vp9_writer* const bc) {
+ int c, e;
+ c = codenzc(nzc);
+ if (tx_size == TX_32X32) {
+ write_token(bc, vp9_nzc32x32_tree,
+ cm->fc.nzc_probs_32x32[nzc_context][ref][type],
+ vp9_nzc32x32_encodings + c);
+ // cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_16X16) {
+ write_token(bc, vp9_nzc16x16_tree,
+ cm->fc.nzc_probs_16x16[nzc_context][ref][type],
+ vp9_nzc16x16_encodings + c);
+ // cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_8X8) {
+ write_token(bc, vp9_nzc8x8_tree,
+ cm->fc.nzc_probs_8x8[nzc_context][ref][type],
+ vp9_nzc8x8_encodings + c);
+ // cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++;
+ } else if (tx_size == TX_4X4) {
+ write_token(bc, vp9_nzc4x4_tree,
+ cm->fc.nzc_probs_4x4[nzc_context][ref][type],
+ vp9_nzc4x4_encodings + c);
+ // cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++;
+ } else {
+ assert(0);
+ }
+
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ int b = (x >> e) & 1;
+ vp9_write(bc, b,
+ cm->fc.nzc_pcat_probs[nzc_context][c - NZC_TOKENS_NOEXTRA][e]);
+ // cm->fc.nzc_pcat_counts[nzc_context][c - NZC_TOKENS_NOEXTRA][e][b]++;
+ }
+ }
+}
+
+static void write_nzcs_sb64(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 256; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 64) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 256; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 256; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 256; j < 384; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 256; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 256; j < 384; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void write_nzcs_sb32(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_32X32:
+ for (j = 0; j < 64; j += 64) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_16X16:
+ for (j = 0; j < 64; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 16) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 64; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ for (j = 64; j < 96; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 64; ++j) {
+ nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 64; j < 96; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void write_nzcs_mb16(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mb_row,
+ int mb_col,
+ vp9_writer* const bc) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO *m = xd->mode_info_context;
+ MB_MODE_INFO *const mi = &m->mbmi;
+ int j, nzc_context;
+ const int ref = m->mbmi.ref_frame != INTRA_FRAME;
+
+ assert(mb_col == get_mb_col(xd));
+ assert(mb_row == get_mb_row(xd));
+
+ if (mi->mb_skip_coeff)
+ return;
+
+ switch (mi->txfm_size) {
+ case TX_16X16:
+ for (j = 0; j < 16; j += 16) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc);
+ }
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ break;
+
+ case TX_8X8:
+ for (j = 0; j < 16; j += 4) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc);
+ }
+ if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) {
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ } else {
+ for (j = 16; j < 24; j += 4) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc);
+ }
+ }
+ break;
+
+ case TX_4X4:
+ for (j = 0; j < 16; ++j) {
+ nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc);
+ }
+ for (j = 16; j < 24; ++j) {
+ nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j);
+ write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+#ifdef NZC_STATS
+void init_nzcstats() {
+ vp9_zero(nzc_stats_4x4);
+ vp9_zero(nzc_stats_8x8);
+ vp9_zero(nzc_stats_16x16);
+ vp9_zero(nzc_stats_32x32);
+ vp9_zero(nzc_pcat_stats);
+}
+
+void update_nzcstats(VP9_COMMON *const cm) {
+ int c, r, b, t;
+
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ nzc_stats_4x4[c][r][b][t] += cm->fc.nzc_counts_4x4[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC8X8_TOKENS; ++t) {
+ nzc_stats_8x8[c][r][b][t] += cm->fc.nzc_counts_8x8[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC16X16_TOKENS; ++t) {
+ nzc_stats_16x16[c][r][b][t] += cm->fc.nzc_counts_16x16[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ for (t = 0; t < NZC32X32_TOKENS; ++t) {
+ nzc_stats_32x32[c][r][b][t] += cm->fc.nzc_counts_32x32[c][r][b][t];
+ }
+ }
+ }
+ }
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ nzc_pcat_stats[c][t][b][0] += cm->fc.nzc_pcat_counts[c][t][b][0];
+ nzc_pcat_stats[c][t][b][1] += cm->fc.nzc_pcat_counts[c][t][b][1];
+ }
+ }
+ }
+}
+
+void print_nzcstats() {
+ int c, r, b, t;
+ FILE *f;
+
+ printf(
+ "static const unsigned int default_nzc_counts_4x4[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC4X4_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC4X4_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_4x4[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_8x8[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC8X8_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC8X8_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_8x8[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_16x16[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC16X16_TOKENS] = {"
+ "\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC16X16_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_16x16[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const unsigned int default_nzc_counts_32x32[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC32X32_TOKENS] = {"
+ "\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ printf(" {");
+ for (t = 0; t < NZC32X32_TOKENS; ++t) {
+ printf(" %-3d,", nzc_stats_32x32[c][r][b][t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_pcat_counts[MAX_NZC_CONTEXTS]\n"
+ " [NZC_TOKENS_EXTRA]\n"
+ " [NZC_BITS_EXTRA] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ printf(" {");
+ for (b = 0; b < NZC_BITS_EXTRA; ++b) {
+ printf(" %d/%d,",
+ nzc_pcat_stats[c][t][b][0], nzc_pcat_stats[c][t][b][1]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_4x4[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC4X4_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC4X4_NODES];
+ unsigned int branch_ct[NZC4X4_NODES][2];
+ vp9_tree_probs_from_distribution(vp9_nzc4x4_tree,
+ probs, branch_ct,
+ nzc_stats_4x4[c][r][b], 0);
+ printf(" {");
+ for (t = 0; t < NZC4X4_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_8x8[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC8X8_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC8X8_NODES];
+ unsigned int branch_ct[NZC8X8_NODES][2];
+ vp9_tree_probs_from_distribution(vp9_nzc8x8_tree,
+ probs, branch_ct,
+ nzc_stats_8x8[c][r][b], 0);
+ printf(" {");
+ for (t = 0; t < NZC8X8_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_16x16[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC16X16_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC16X16_NODES];
+ unsigned int branch_ct[NZC16X16_NODES][2];
+ vp9_tree_probs_from_distribution(vp9_nzc16x16_tree,
+ probs, branch_ct,
+ nzc_stats_16x16[c][r][b], 0);
+ printf(" {");
+ for (t = 0; t < NZC16X16_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_probs_32x32[MAX_NZC_CONTEXTS]\n"
+ " [REF_TYPES]\n"
+ " [BLOCK_TYPES]\n"
+ " [NZC32X32_TOKENS] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (r = 0; r < REF_TYPES; ++r) {
+ printf(" {\n");
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ vp9_prob probs[NZC32X32_NODES];
+ unsigned int branch_ct[NZC32X32_NODES][2];
+ vp9_tree_probs_from_distribution(vp9_nzc32x32_tree,
+ probs, branch_ct,
+ nzc_stats_32x32[c][r][b], 0);
+ printf(" {");
+ for (t = 0; t < NZC32X32_NODES; ++t) {
+ printf(" %-3d,", probs[t]);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ printf(
+ "static const vp9_prob default_nzc_pcat_probs[MAX_NZC_CONTEXTS]\n"
+ " [NZC_TOKENS_EXTRA]\n"
+ " [NZC_BITS_EXTRA] = {\n");
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ printf(" {\n");
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ printf(" {");
+ for (b = 0; b < NZC_BITS_EXTRA; ++b) {
+ vp9_prob prob = get_binary_prob(nzc_pcat_stats[c][t][b][0],
+ nzc_pcat_stats[c][t][b][1]);
+ printf(" %-3d,", prob);
+ }
+ printf(" },\n");
+ }
+ printf(" },\n");
+ }
+ printf("};\n");
+
+ f = fopen("nzcstats.bin", "wb");
+ fwrite(nzc_stats_4x4, sizeof(nzc_stats_4x4), 1, f);
+ fwrite(nzc_stats_8x8, sizeof(nzc_stats_8x8), 1, f);
+ fwrite(nzc_stats_16x16, sizeof(nzc_stats_16x16), 1, f);
+ fwrite(nzc_stats_32x32, sizeof(nzc_stats_32x32), 1, f);
+ fwrite(nzc_pcat_stats, sizeof(nzc_pcat_stats), 1, f);
+ fclose(f);
+}
+#endif
+
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
TOKENEXTRA **tok, TOKENEXTRA *tok_end,
int mb_row, int mb_col) {
- VP9_COMMON *const c = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
xd->mode_info_context = m;
- if (c->frame_type == KEY_FRAME) {
+ set_mb_row(&cpi->common, xd, mb_row, (1 << m->mbmi.sb_type));
+ set_mb_col(&cpi->common, xd, mb_col, (1 << m->mbmi.sb_type));
+ if (cm->frame_type == KEY_FRAME) {
write_mb_modes_kf(cpi, m, bc,
- c->mb_rows - mb_row, c->mb_cols - mb_col);
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col);
#ifdef ENTROPY_STATS
active_section = 8;
#endif
} else {
pack_inter_mode_mvs(cpi, m, bc,
- c->mb_rows - mb_row, c->mb_cols - mb_col);
+ cm->mb_rows - mb_row, cm->mb_cols - mb_col);
#ifdef ENTROPY_STATS
active_section = 1;
#endif
}
+#if CONFIG_CODE_NONZEROCOUNT
+ if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64)
+ write_nzcs_sb64(cpi, xd, mb_row, mb_col, bc);
+ else if (m->mbmi.sb_type == BLOCK_SIZE_SB32X32)
+ write_nzcs_sb32(cpi, xd, mb_row, mb_col, bc);
+ else
+ write_nzcs_mb16(cpi, xd, mb_row, mb_col, bc);
+#endif
assert(*tok < tok_end);
pack_mb_tokens(bc, tok, tok_end);
}
-static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) {
+static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
+ TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
VP9_COMMON *const c = &cpi->common;
const int mis = c->mode_info_stride;
MODE_INFO *m, *m_ptr = c->mi;
int i, mb_row, mb_col;
- TOKENEXTRA *tok = cpi->tok;
- TOKENEXTRA *tok_end = tok + cpi->tok_count;
- for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) {
+ m_ptr += c->cur_tile_mb_col_start + c->cur_tile_mb_row_start * mis;
+ for (mb_row = c->cur_tile_mb_row_start;
+ mb_row < c->cur_tile_mb_row_end; mb_row += 4, m_ptr += 4 * mis) {
m = m_ptr;
- for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) {
+ for (mb_col = c->cur_tile_mb_col_start;
+ mb_col < c->cur_tile_mb_col_end; mb_col += 4, m += 4) {
vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded);
if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
- write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col);
+ write_modes_b(cpi, m, bc, tok, tok_end, mb_row, mb_col);
} else {
int j;
@@ -1107,7 +1703,7 @@
vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded);
if (sb_m->mbmi.sb_type) {
assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32);
- write_modes_b(cpi, sb_m, bc, &tok, tok_end,
+ write_modes_b(cpi, sb_m, bc, tok, tok_end,
mb_row + y_idx_sb, mb_col + x_idx_sb);
} else {
// Process the 4 MBs in the order:
@@ -1123,7 +1719,7 @@
}
assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16);
- write_modes_b(cpi, mb_m, bc, &tok, tok_end,
+ write_modes_b(cpi, mb_m, bc, tok, tok_end,
mb_row + y_idx, mb_col + x_idx);
}
}
@@ -1135,20 +1731,23 @@
/* This function is used for debugging probability trees. */
-static void print_prob_tree(vp9_coeff_probs *coef_probs) {
+static void print_prob_tree(vp9_coeff_probs *coef_probs, int block_types) {
/* print coef probability tree */
- int i, j, k, l;
+ int i, j, k, l, m;
FILE *f = fopen("enc_tree_probs.txt", "a");
fprintf(f, "{\n");
- for (i = 0; i < BLOCK_TYPES_4X4; i++) {
+ for (i = 0; i < block_types; i++) {
fprintf(f, " {\n");
- for (j = 0; j < COEF_BANDS; j++) {
- fprintf(f, " {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- fprintf(f, " {");
- for (l = 0; l < ENTROPY_NODES; l++) {
- fprintf(f, "%3u, ",
- (unsigned int)(coef_probs [i][j][k][l]));
+ for (j = 0; j < REF_TYPES; ++j) {
+ fprintf(f, " {\n");
+ for (k = 0; k < COEF_BANDS; k++) {
+ fprintf(f, " {\n");
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ fprintf(f, " {");
+ for (m = 0; m < ENTROPY_NODES; m++) {
+ fprintf(f, "%3u, ",
+ (unsigned int)(coef_probs[i][j][k][l][m]));
+ }
}
fprintf(f, " }\n");
}
@@ -1162,6 +1761,9 @@
static void build_tree_distribution(vp9_coeff_probs *coef_probs,
vp9_coeff_count *coef_counts,
+ unsigned int (*eob_branch_ct)[REF_TYPES]
+ [COEF_BANDS]
+ [PREV_COEF_CONTEXTS],
#ifdef ENTROPY_STATS
VP9_COMP *cpi,
vp9_coeff_accum *context_counters,
@@ -1168,26 +1770,35 @@
#endif
vp9_coeff_stats *coef_branch_ct,
int block_types) {
- int i = 0, j, k;
+ int i, j, k, l;
#ifdef ENTROPY_STATS
int t = 0;
#endif
for (i = 0; i < block_types; ++i) {
- for (j = 0; j < COEF_BANDS; ++j) {
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
- vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
- vp9_coef_encodings, vp9_coef_tree,
- coef_probs[i][j][k],
- coef_branch_ct[i][j][k],
- coef_counts[i][j][k]);
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ if (l >= 3 && k == 0)
+ continue;
+ vp9_tree_probs_from_distribution(vp9_coef_tree,
+ coef_probs[i][j][k][l],
+ coef_branch_ct[i][j][k][l],
+ coef_counts[i][j][k][l], 0);
+ coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
+ coef_branch_ct[i][j][k][l][0][0];
+ coef_probs[i][j][k][l][0] =
+ get_binary_prob(coef_branch_ct[i][j][k][l][0][0],
+ coef_branch_ct[i][j][k][l][0][1]);
#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- context_counters[i][j][k][t] += coef_counts[i][j][k][t];
+ if (!cpi->dummy_packing) {
+ for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+ context_counters[i][j][k][l][t] += coef_counts[i][j][k][l][t];
+ context_counters[i][j][k][l][MAX_ENTROPY_TOKENS] +=
+ eob_branch_ct[i][j][k][l];
+ }
#endif
+ }
}
}
}
@@ -1196,48 +1807,256 @@
static void build_coeff_contexts(VP9_COMP *cpi) {
build_tree_distribution(cpi->frame_coef_probs_4x4,
cpi->coef_counts_4x4,
+ cpi->common.fc.eob_branch_counts[TX_4X4],
#ifdef ENTROPY_STATS
cpi, context_counters_4x4,
#endif
- cpi->frame_branch_ct_4x4, BLOCK_TYPES_4X4);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_4x4,
- cpi->hybrid_coef_counts_4x4,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_4x4,
-#endif
- cpi->frame_hybrid_branch_ct_4x4, BLOCK_TYPES_4X4);
+ cpi->frame_branch_ct_4x4, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_8x8,
cpi->coef_counts_8x8,
+ cpi->common.fc.eob_branch_counts[TX_8X8],
#ifdef ENTROPY_STATS
cpi, context_counters_8x8,
#endif
- cpi->frame_branch_ct_8x8, BLOCK_TYPES_8X8);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_8x8,
- cpi->hybrid_coef_counts_8x8,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_8x8,
-#endif
- cpi->frame_hybrid_branch_ct_8x8, BLOCK_TYPES_8X8);
+ cpi->frame_branch_ct_8x8, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_16x16,
cpi->coef_counts_16x16,
+ cpi->common.fc.eob_branch_counts[TX_16X16],
#ifdef ENTROPY_STATS
cpi, context_counters_16x16,
#endif
- cpi->frame_branch_ct_16x16, BLOCK_TYPES_16X16);
- build_tree_distribution(cpi->frame_hybrid_coef_probs_16x16,
- cpi->hybrid_coef_counts_16x16,
-#ifdef ENTROPY_STATS
- cpi, hybrid_context_counters_16x16,
-#endif
- cpi->frame_hybrid_branch_ct_16x16, BLOCK_TYPES_16X16);
+ cpi->frame_branch_ct_16x16, BLOCK_TYPES);
build_tree_distribution(cpi->frame_coef_probs_32x32,
cpi->coef_counts_32x32,
+ cpi->common.fc.eob_branch_counts[TX_32X32],
#ifdef ENTROPY_STATS
cpi, context_counters_32x32,
#endif
- cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32);
+ cpi->frame_branch_ct_32x32, BLOCK_TYPES);
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void update_nzc_probs_common(VP9_COMP* cpi,
+ vp9_writer* const bc,
+ int block_size) {
+ VP9_COMMON *cm = &cpi->common;
+ int c, r, b, t;
+ int update[2] = {0, 0};
+ int savings = 0;
+ int tokens, nodes;
+ const vp9_tree_index *nzc_tree;
+ vp9_prob *new_nzc_probs;
+ vp9_prob *old_nzc_probs;
+ unsigned int *nzc_counts;
+ unsigned int (*nzc_branch_ct)[2];
+ vp9_prob upd;
+
+ if (block_size == 32) {
+ tokens = NZC32X32_TOKENS;
+ nzc_tree = vp9_nzc32x32_tree;
+ old_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_32x32[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_32x32[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_32x32[0][0][0];
+ upd = NZC_UPDATE_PROB_32X32;
+ } else if (block_size == 16) {
+ tokens = NZC16X16_TOKENS;
+ nzc_tree = vp9_nzc16x16_tree;
+ old_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_16x16[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_16x16[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_16x16[0][0][0];
+ upd = NZC_UPDATE_PROB_16X16;
+ } else if (block_size == 8) {
+ tokens = NZC8X8_TOKENS;
+ nzc_tree = vp9_nzc8x8_tree;
+ old_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_8x8[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_8x8[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_8x8[0][0][0];
+ upd = NZC_UPDATE_PROB_8X8;
+ } else {
+ nzc_tree = vp9_nzc4x4_tree;
+ tokens = NZC4X4_TOKENS;
+ old_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0];
+ new_nzc_probs = cpi->frame_nzc_probs_4x4[0][0][0];
+ nzc_counts = cm->fc.nzc_counts_4x4[0][0][0];
+ nzc_branch_ct = cpi->frame_nzc_branch_ct_4x4[0][0][0];
+ upd = NZC_UPDATE_PROB_4X4;
+ }
+ nodes = tokens - 1;
+ // Get the new probabilities and the branch counts
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ int offset_tokens = offset * tokens;
+ vp9_tree_probs_from_distribution(nzc_tree,
+ new_nzc_probs + offset_nodes,
+ nzc_branch_ct + offset_nodes,
+ nzc_counts + offset_tokens, 0);
+ }
+ }
+ }
+
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob newp = new_nzc_probs[offset_nodes + t];
+ vp9_prob oldp = old_nzc_probs[offset_nodes + t];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+#else
+ s = prob_update_savings(nzc_branch_ct[offset_nodes],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
+#endif
+ update[u]++;
+ }
+ }
+ }
+ }
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ } else {
+ vp9_write_bit(bc, 1);
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b;
+ int offset_nodes = offset * nodes;
+ for (t = 0; t < nodes; ++t) {
+ vp9_prob newp = new_nzc_probs[offset_nodes + t];
+ vp9_prob *oldp = &old_nzc_probs[offset_nodes + t];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+#else
+ s = prob_update_savings(nzc_branch_ct[offset_nodes],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+#endif
+ vp9_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_nzc_pcat_probs(VP9_COMP *cpi, vp9_writer* const bc) {
+ VP9_COMMON *cm = &cpi->common;
+ int c, t, b;
+ int update[2] = {0, 0};
+ int savings = 0;
+ vp9_prob upd = NZC_UPDATE_PROB_PCAT;
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ vp9_prob oldp = cm->fc.nzc_pcat_probs[c][t][b];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b],
+ oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+#else
+ s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
+#endif
+ update[u]++;
+ }
+ }
+ }
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ } else {
+ vp9_write_bit(bc, 1);
+ for (c = 0; c < MAX_NZC_CONTEXTS; ++c) {
+ for (t = 0; t < NZC_TOKENS_EXTRA; ++t) {
+ int bits = vp9_extranzcbits[t + NZC_TOKENS_NOEXTRA];
+ for (b = 0; b < bits; ++b) {
+ vp9_prob newp = get_binary_prob(cm->fc.nzc_pcat_counts[c][t][b][0],
+ cm->fc.nzc_pcat_counts[c][t][b][1]);
+ vp9_prob *oldp = &cm->fc.nzc_pcat_probs[c][t][b];
+ int s, u = 0;
+#if defined(SEARCH_NEWP)
+ s = prob_diff_update_savings_search(cm->fc.nzc_pcat_counts[c][t][b],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+#else
+ s = prob_update_savings(cm->fc.nzc_pcat_counts[c][t][b],
+ *oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+#endif
+ vp9_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_nzc_probs(VP9_COMP* cpi,
+ vp9_writer* const bc) {
+ update_nzc_probs_common(cpi, bc, 4);
+ if (cpi->common.txfm_mode != ONLY_4X4)
+ update_nzc_probs_common(cpi, bc, 8);
+ if (cpi->common.txfm_mode > ALLOW_8X8)
+ update_nzc_probs_common(cpi, bc, 16);
+ if (cpi->common.txfm_mode > ALLOW_16X16)
+ update_nzc_probs_common(cpi, bc, 32);
+#ifdef NZC_PCAT_UPDATE
+ update_nzc_pcat_probs(cpi, bc);
+#endif
+#ifdef NZC_STATS
+ if (!cpi->dummy_packing)
+ update_nzcstats(&cpi->common);
+#endif
+}
+#endif // CONFIG_CODE_NONZEROCOUNT
+
static void update_coef_probs_common(vp9_writer* const bc,
#ifdef ENTROPY_STATS
VP9_COMP *cpi,
@@ -1247,46 +2066,59 @@
vp9_coeff_probs *old_frame_coef_probs,
vp9_coeff_stats *frame_branch_ct,
int block_types) {
- int i, j, k, t;
+ int i, j, k, l, t;
int update[2] = {0, 0};
int savings;
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ const int entropy_nodes_update = UNCONSTRAINED_UPDATE_NODES;
+#else
+ const int entropy_nodes_update = ENTROPY_NODES;
+#endif
// vp9_prob bestupd = find_coef_update_prob(cpi);
/* dry run to see if there is any udpate at all needed */
savings = 0;
for (i = 0; i < block_types; ++i) {
- for (j = !i; j < COEF_BANDS; ++j) {
- int prev_coef_savings[ENTROPY_NODES] = {0};
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][t];
- const vp9_prob oldp = old_frame_coef_probs[i][j][k][t];
- const vp9_prob upd = COEF_UPDATE_PROB;
- int s = prev_coef_savings[t];
- int u = 0;
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
- continue;
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ // int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
+ const vp9_prob upd = vp9_coef_update_prob[t];
+ int s; // = prev_coef_savings[t];
+ int u = 0;
+
+ if (l >= 3 && k == 0)
+ continue;
#if defined(SEARCH_NEWP)
- s = prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][t],
- oldp, &newp, upd);
- if (s > 0 && newp != oldp)
- u = 1;
- if (u)
- savings += s - (int)(vp9_cost_zero(upd));
- else
- savings -= (int)(vp9_cost_zero(upd));
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ if (t == UNCONSTRAINED_NODES - 1)
+ s = prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+ else
+#endif
+ s = prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
#else
- s = prob_update_savings(
- frame_branch_ct[i][j][k][t],
- oldp, newp, upd);
- if (s > 0)
- u = 1;
- if (u)
- savings += s;
+ s = prob_update_savings(frame_branch_ct[i][j][k][l][t],
+ oldp, newp, upd);
+ if (s > 0)
+ u = 1;
+ if (u)
+ savings += s;
#endif
- update[u]++;
+ update[u]++;
+ }
}
}
}
@@ -1296,32 +2128,40 @@
/* Is coef updated at all */
if (update[1] == 0 || savings < 0) {
vp9_write_bit(bc, 0);
- } else {
- vp9_write_bit(bc, 1);
- for (i = 0; i < block_types; ++i) {
- for (j = !i; j < COEF_BANDS; ++j) {
- int prev_coef_savings[ENTROPY_NODES] = {0};
- for (k = 0; k < PREV_COEF_CONTEXTS; ++k) {
+ return;
+ }
+ vp9_write_bit(bc, 1);
+ for (i = 0; i < block_types; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ // int prev_coef_savings[ENTROPY_NODES] = {0};
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
// calc probs and branch cts for this frame only
- for (t = 0; t < ENTROPY_NODES; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][t];
- vp9_prob *oldp = old_frame_coef_probs[i][j][k] + t;
- const vp9_prob upd = COEF_UPDATE_PROB;
- int s = prev_coef_savings[t];
+ for (t = CONFIG_CODE_NONZEROCOUNT; t < entropy_nodes_update; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+ const vp9_prob upd = vp9_coef_update_prob[t];
+ int s; // = prev_coef_savings[t];
int u = 0;
- if (k >= 3 && ((i == 0 && j == 1) || (i > 0 && j == 0)))
+ if (l >= 3 && k == 0)
continue;
#if defined(SEARCH_NEWP)
- s = prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][t],
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ if (t == UNCONSTRAINED_NODES - 1)
+ s = prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+ else
+#endif
+ s = prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t],
*oldp, &newp, upd);
if (s > 0 && newp != *oldp)
u = 1;
#else
- s = prob_update_savings(
- frame_branch_ct[i][j][k][t],
- *oldp, newp, upd);
+ s = prob_update_savings(frame_branch_ct[i][j][k][l][t],
+ *oldp, newp, upd);
if (s > 0)
u = 1;
#endif
@@ -1328,12 +2168,17 @@
vp9_write(bc, u, upd);
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing)
- ++tree_update_hist[i][j][k][t][u];
+ ++tree_update_hist[i][j][k][l][t][u];
#endif
if (u) {
/* send/use new probability */
write_prob_diff_update(bc, newp, *oldp);
*oldp = newp;
+#if CONFIG_MODELCOEFPROB && MODEL_BASED_UPDATE
+ if (t == UNCONSTRAINED_NODES - 1)
+ vp9_get_model_distribution(
+ newp, old_frame_coef_probs[i][j][k][l], i, j);
+#endif
}
}
}
@@ -1356,18 +2201,8 @@
cpi->frame_coef_probs_4x4,
cpi->common.fc.coef_probs_4x4,
cpi->frame_branch_ct_4x4,
- BLOCK_TYPES_4X4);
+ BLOCK_TYPES);
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_4x4,
-#endif
- cpi->frame_hybrid_coef_probs_4x4,
- cpi->common.fc.hybrid_coef_probs_4x4,
- cpi->frame_hybrid_branch_ct_4x4,
- BLOCK_TYPES_4X4);
-
/* do not do this if not even allowed */
if (cpi->common.txfm_mode != ONLY_4X4) {
update_coef_probs_common(bc,
@@ -1378,17 +2213,7 @@
cpi->frame_coef_probs_8x8,
cpi->common.fc.coef_probs_8x8,
cpi->frame_branch_ct_8x8,
- BLOCK_TYPES_8X8);
-
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_8x8,
-#endif
- cpi->frame_hybrid_coef_probs_8x8,
- cpi->common.fc.hybrid_coef_probs_8x8,
- cpi->frame_hybrid_branch_ct_8x8,
- BLOCK_TYPES_8X8);
+ BLOCK_TYPES);
}
if (cpi->common.txfm_mode > ALLOW_8X8) {
@@ -1400,16 +2225,7 @@
cpi->frame_coef_probs_16x16,
cpi->common.fc.coef_probs_16x16,
cpi->frame_branch_ct_16x16,
- BLOCK_TYPES_16X16);
- update_coef_probs_common(bc,
-#ifdef ENTROPY_STATS
- cpi,
- hybrid_tree_update_hist_16x16,
-#endif
- cpi->frame_hybrid_coef_probs_16x16,
- cpi->common.fc.hybrid_coef_probs_16x16,
- cpi->frame_hybrid_branch_ct_16x16,
- BLOCK_TYPES_16X16);
+ BLOCK_TYPES);
}
if (cpi->common.txfm_mode > ALLOW_16X16) {
@@ -1421,7 +2237,7 @@
cpi->frame_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32,
cpi->frame_branch_ct_32x32,
- BLOCK_TYPES_32X32);
+ BLOCK_TYPES);
}
}
@@ -1523,34 +2339,49 @@
* and color type.
*/
if (oh.type == KEY_FRAME) {
- int v;
-
// Start / synch code
cx_data[0] = 0x9D;
cx_data[1] = 0x01;
cx_data[2] = 0x2a;
+ extra_bytes_packed = 3;
+ cx_data += extra_bytes_packed;
+ }
+ {
+ int v;
- v = (pc->horiz_scale << 14) | pc->Width;
- cx_data[3] = v;
- cx_data[4] = v >> 8;
+ if (pc->width != pc->display_width || pc->height != pc->display_height) {
+ v = pc->display_width;
+ cx_data[0] = v;
+ cx_data[1] = v >> 8;
- v = (pc->vert_scale << 14) | pc->Height;
- cx_data[5] = v;
- cx_data[6] = v >> 8;
+ v = pc->display_height;
+ cx_data[2] = v;
+ cx_data[3] = v >> 8;
+ cx_data += 4;
+ extra_bytes_packed += 4;
+ }
- extra_bytes_packed = 7;
- cx_data += extra_bytes_packed;
+ v = pc->width;
+ cx_data[0] = v;
+ cx_data[1] = v >> 8;
- vp9_start_encode(&header_bc, cx_data);
+ v = pc->height;
+ cx_data[2] = v;
+ cx_data[3] = v >> 8;
- // signal clr type
- vp9_write_bit(&header_bc, pc->clr_type);
- vp9_write_bit(&header_bc, pc->clamp_type);
-
- } else {
- vp9_start_encode(&header_bc, cx_data);
+ extra_bytes_packed += 4;
+ cx_data += 4;
}
+ vp9_start_encode(&header_bc, cx_data);
+
+ // TODO(jkoleszar): remove these two unused bits?
+ vp9_write_bit(&header_bc, pc->clr_type);
+ vp9_write_bit(&header_bc, pc->clamp_type);
+
+ // error resilient mode
+ vp9_write_bit(&header_bc, pc->error_resilient_mode);
+
// Signal whether or not Segmentation is enabled
vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0);
@@ -1655,7 +2486,10 @@
pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
vp9_write_literal(&header_bc, pc->sb32_coded, 8);
- {
+ vp9_write_bit(&header_bc, cpi->mb.e_mbd.lossless);
+ if (cpi->mb.e_mbd.lossless) {
+ pc->txfm_mode = ONLY_4X4;
+ } else {
if (pc->txfm_mode == TX_MODE_SELECT) {
pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
cpi->txfm_count_16x16p[TX_4X4] +
@@ -1699,6 +2533,14 @@
vp9_write_bit(&header_bc, pc->filter_type);
vp9_write_literal(&header_bc, pc->filter_level, 6);
vp9_write_literal(&header_bc, pc->sharpness_level, 3);
+#if CONFIG_LOOP_DERING
+ if (pc->dering_enabled) {
+ vp9_write_bit(&header_bc, 1);
+ vp9_write_literal(&header_bc, pc->dering_enabled - 1, 4);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ }
+#endif
// Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0);
@@ -1765,30 +2607,36 @@
// Transmit Dc, Second order and Uv quantizer delta information
put_delta_q(&header_bc, pc->y1dc_delta_q);
- put_delta_q(&header_bc, pc->y2dc_delta_q);
- put_delta_q(&header_bc, pc->y2ac_delta_q);
put_delta_q(&header_bc, pc->uvdc_delta_q);
put_delta_q(&header_bc, pc->uvac_delta_q);
// When there is a key frame all reference buffers are updated using the new key frame
if (pc->frame_type != KEY_FRAME) {
+ int refresh_mask;
+
// Should the GF or ARF be updated using the transmitted frame or buffer
- vp9_write_bit(&header_bc, pc->refresh_golden_frame);
- vp9_write_bit(&header_bc, pc->refresh_alt_ref_frame);
+ if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
+ /* Preserve the previously existing golden frame and update the frame in
+ * the alt ref slot instead. This is highly specific to the use of
+ * alt-ref as a forward reference, and this needs to be generalized as
+ * other uses are implemented (like RTC/temporal scaling)
+ *
+ * gld_fb_idx and alt_fb_idx need to be swapped for future frames, but
+ * that happens in vp9_onyx_if.c:update_reference_frames() so that it can
+ * be done outside of the recode loop.
+ */
+ refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
+ (cpi->refresh_golden_frame << cpi->alt_fb_idx);
+ } else {
+ refresh_mask = (cpi->refresh_last_frame << cpi->lst_fb_idx) |
+ (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
+ (cpi->refresh_alt_ref_frame << cpi->alt_fb_idx);
+ }
+ vp9_write_literal(&header_bc, refresh_mask, NUM_REF_FRAMES);
+ vp9_write_literal(&header_bc, cpi->lst_fb_idx, NUM_REF_FRAMES_LG2);
+ vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2);
+ vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2);
- // For inter frames the current default behavior is that when
- // cm->refresh_golden_frame is set we copy the old GF over to
- // the ARF buffer. This is purely an encoder decision at present.
- if (pc->refresh_golden_frame)
- pc->copy_buffer_to_arf = 2;
-
- // If not being updated from current frame should either GF or ARF be updated from another buffer
- if (!pc->refresh_golden_frame)
- vp9_write_literal(&header_bc, pc->copy_buffer_to_gf, 2);
-
- if (!pc->refresh_alt_ref_frame)
- vp9_write_literal(&header_bc, pc->copy_buffer_to_arf, 2);
-
// Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
@@ -1831,10 +2679,13 @@
#endif
}
- vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
+ if (!pc->error_resilient_mode) {
+ vp9_write_bit(&header_bc, pc->refresh_entropy_probs);
+ vp9_write_bit(&header_bc, pc->frame_parallel_decoding_mode);
+ }
- if (pc->frame_type != KEY_FRAME)
- vp9_write_bit(&header_bc, pc->refresh_last_frame);
+ vp9_write_literal(&header_bc, pc->frame_context_idx,
+ NUM_FRAME_CONTEXTS_LG2);
#ifdef ENTROPY_STATS
if (pc->frame_type == INTER_FRAME)
@@ -1848,7 +2699,13 @@
if (pc->frame_type != KEY_FRAME) {
int i, j;
int new_context[INTER_MODE_CONTEXTS][4];
- update_mode_probs(pc, new_context);
+ if (!cpi->dummy_packing) {
+ update_inter_mode_probs(pc, new_context);
+ } else {
+ // In dummy pack assume context unchanged.
+ vpx_memcpy(new_context, pc->fc.vp9_mode_contexts,
+ sizeof(pc->fc.vp9_mode_contexts));
+ }
for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
for (j = 0; j < 4; j++) {
@@ -1902,18 +2759,33 @@
vp9_copy(cpi->common.fc.pre_coef_probs_4x4,
cpi->common.fc.coef_probs_4x4);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_4x4,
- cpi->common.fc.hybrid_coef_probs_4x4);
vp9_copy(cpi->common.fc.pre_coef_probs_8x8,
cpi->common.fc.coef_probs_8x8);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_8x8,
- cpi->common.fc.hybrid_coef_probs_8x8);
vp9_copy(cpi->common.fc.pre_coef_probs_16x16,
cpi->common.fc.coef_probs_16x16);
- vp9_copy(cpi->common.fc.pre_hybrid_coef_probs_16x16,
- cpi->common.fc.hybrid_coef_probs_16x16);
vp9_copy(cpi->common.fc.pre_coef_probs_32x32,
cpi->common.fc.coef_probs_32x32);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cpi->common.fc.pre_nzc_probs_4x4,
+ cpi->common.fc.nzc_probs_4x4);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_8x8,
+ cpi->common.fc.nzc_probs_8x8);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_16x16,
+ cpi->common.fc.nzc_probs_16x16);
+ vp9_copy(cpi->common.fc.pre_nzc_probs_32x32,
+ cpi->common.fc.nzc_probs_32x32);
+ vp9_copy(cpi->common.fc.pre_nzc_pcat_probs,
+ cpi->common.fc.nzc_pcat_probs);
+ // NOTE that if the counts are reset, we also need to uncomment
+ // the count updates in the write_nzc function
+ /*
+ vp9_zero(cpi->common.fc.nzc_counts_4x4);
+ vp9_zero(cpi->common.fc.nzc_counts_8x8);
+ vp9_zero(cpi->common.fc.nzc_counts_16x16);
+ vp9_zero(cpi->common.fc.nzc_counts_32x32);
+ vp9_zero(cpi->common.fc.nzc_pcat_counts);
+ */
+#endif
vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob);
vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob);
vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
@@ -1930,6 +2802,9 @@
vp9_zero(cpi->common.fc.mv_ref_ct)
update_coef_probs(cpi, &header_bc);
+#if CONFIG_CODE_NONZEROCOUNT
+ update_nzc_probs(cpi, &header_bc);
+#endif
#ifdef ENTROPY_STATS
active_section = 2;
@@ -1941,8 +2816,9 @@
int k;
vp9_update_skip_probs(cpi);
- for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+ for (k = 0; k < MBSKIP_CONTEXTS; ++k) {
vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8);
+ }
}
if (pc->frame_type == KEY_FRAME) {
@@ -1960,7 +2836,7 @@
if (pc->mcomp_filter_type == SWITCHABLE)
update_switchable_interp_probs(cpi, &header_bc);
- #if CONFIG_COMP_INTERINTRA_PRED
+#if CONFIG_COMP_INTERINTRA_PRED
if (pc->use_interintra) {
vp9_cond_prob_update(&header_bc,
&pc->fc.interintra_prob,
@@ -1995,6 +2871,25 @@
vp9_write_nmv_probs(cpi, xd->allow_high_precision_mv, &header_bc);
}
+ /* tiling */
+ {
+ int min_log2_tiles, delta_log2_tiles, n_tile_bits, n;
+
+ vp9_get_tile_n_bits(pc, &min_log2_tiles, &delta_log2_tiles);
+ n_tile_bits = pc->log2_tile_columns - min_log2_tiles;
+ for (n = 0; n < delta_log2_tiles; n++) {
+ if (n_tile_bits--) {
+ vp9_write_bit(&header_bc, 1);
+ } else {
+ vp9_write_bit(&header_bc, 0);
+ break;
+ }
+ }
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 0);
+ if (pc->log2_tile_rows != 0)
+ vp9_write_bit(&header_bc, pc->log2_tile_rows != 1);
+ }
+
vp9_stop_encode(&header_bc);
oh.first_partition_length_in_bytes = header_bc.pos;
@@ -2001,11 +2896,15 @@
/* update frame tag */
{
- int v = (oh.first_partition_length_in_bytes << 5) |
+ int scaling = (pc->width != pc->display_width ||
+ pc->height != pc->display_height);
+ int v = (oh.first_partition_length_in_bytes << 8) |
+ (scaling << 5) |
(oh.show_frame << 4) |
(oh.version << 1) |
oh.type;
+ assert(oh.first_partition_length_in_bytes <= 0xffff);
dest[0] = v;
dest[1] = v >> 8;
dest[2] = v >> 16;
@@ -2012,23 +2911,57 @@
}
*size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos;
- vp9_start_encode(&residual_bc, cx_data + header_bc.pos);
if (pc->frame_type == KEY_FRAME) {
decide_kf_ymode_entropy(cpi);
- write_modes(cpi, &residual_bc);
} else {
/* This is not required if the counts in cpi are consistent with the
* final packing pass */
// if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount);
- write_modes(cpi, &residual_bc);
-
- vp9_update_mode_context(&cpi->common);
}
- vp9_stop_encode(&residual_bc);
+ {
+ int tile_row, tile_col, total_size = 0;
+ unsigned char *data_ptr = cx_data + header_bc.pos;
+ TOKENEXTRA *tok[1 << 6], *tok_end;
- *size += residual_bc.pos;
+ tok[0] = cpi->tok;
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ tok[tile_col] = tok[tile_col - 1] + cpi->tok_count[tile_col - 1];
+
+ for (tile_row = 0; tile_row < pc->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(pc, tile_row);
+ tok_end = cpi->tok + cpi->tok_count[0];
+ for (tile_col = 0; tile_col < pc->tile_columns;
+ tile_col++, tok_end += cpi->tok_count[tile_col]) {
+ vp9_get_tile_col_offsets(pc, tile_col);
+
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1)
+ vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
+ else
+ vp9_start_encode(&residual_bc, data_ptr + total_size);
+ write_modes(cpi, &residual_bc, &tok[tile_col], tok_end);
+ vp9_stop_encode(&residual_bc);
+ if (tile_col < pc->tile_columns - 1 || tile_row < pc->tile_rows - 1) {
+ /* size of this tile */
+ data_ptr[total_size + 0] = residual_bc.pos;
+ data_ptr[total_size + 1] = residual_bc.pos >> 8;
+ data_ptr[total_size + 2] = residual_bc.pos >> 16;
+ data_ptr[total_size + 3] = residual_bc.pos >> 24;
+ total_size += 4;
+ }
+
+ total_size += residual_bc.pos;
+ }
+ }
+
+ assert((unsigned int)(tok[0] - cpi->tok) == cpi->tok_count[0]);
+ for (tile_col = 1; tile_col < pc->tile_columns; tile_col++)
+ assert((unsigned int)(tok[tile_col] - tok[tile_col - 1]) ==
+ cpi->tok_count[tile_col]);
+
+ *size += total_size;
+ }
}
#ifdef ENTROPY_STATS
@@ -2035,19 +2968,23 @@
static void print_tree_update_for_type(FILE *f,
vp9_coeff_stats *tree_update_hist,
int block_types, const char *header) {
- int i, j, k, l;
+ int i, j, k, l, m;
fprintf(f, "const vp9_coeff_prob %s = {\n", header);
for (i = 0; i < block_types; i++) {
fprintf(f, " { \n");
- for (j = 0; j < COEF_BANDS; j++) {
- fprintf(f, " {\n");
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- fprintf(f, " {");
- for (l = 0; l < ENTROPY_NODES; l++) {
- fprintf(f, "%3d, ",
- get_binary_prob(tree_update_hist[i][j][k][l][0],
- tree_update_hist[i][j][k][l][1]));
+ for (j = 0; j < REF_TYPES; j++) {
+ fprintf(f, " { \n");
+ for (k = 0; k < COEF_BANDS; k++) {
+ fprintf(f, " {\n");
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ fprintf(f, " {");
+ for (m = 0; m < ENTROPY_NODES; m++) {
+ fprintf(f, "%3d, ",
+ get_binary_prob(tree_update_hist[i][j][k][l][m][0],
+ tree_update_hist[i][j][k][l][m][1]));
+ }
+ fprintf(f, "},\n");
}
fprintf(f, "},\n");
}
@@ -2062,21 +2999,14 @@
FILE *f = fopen("coefupdprob.h", "w");
fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n");
- print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES_4X4,
- "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_4x4, BLOCK_TYPES_4X4,
- "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]");
- print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES_8X8,
- "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_8x8, BLOCK_TYPES_8X8,
- "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]");
- print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES_16X16,
- "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
- print_tree_update_for_type(f, hybrid_tree_update_hist_16x16,
- BLOCK_TYPES_16X16,
- "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]");
- print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32,
- "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]");
+ print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES,
+ "vp9_coef_update_probs_4x4[BLOCK_TYPES]");
+ print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES,
+ "vp9_coef_update_probs_8x8[BLOCK_TYPES]");
+ print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES,
+ "vp9_coef_update_probs_16x16[BLOCK_TYPES]");
+ print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES,
+ "vp9_coef_update_probs_32x32[BLOCK_TYPES]");
fclose(f);
f = fopen("treeupdate.bin", "wb");
@@ -2083,6 +3013,7 @@
fwrite(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f);
fwrite(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
fwrite(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
+ fwrite(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f);
fclose(f);
}
#endif
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -50,10 +50,7 @@
int src;
int src_stride;
- int eob_max_offset;
- int eob_max_offset_8x8;
- int eob_max_offset_16x16;
- int eob_max_offset_32x32;
+ int skip_block;
} BLOCK;
typedef struct {
@@ -86,20 +83,13 @@
int64_t txfm_rd_diff[NB_TXFM_MODES];
} PICK_MODE_CONTEXT;
-typedef struct superblock {
- DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]);
- DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]);
-} SUPERBLOCK;
-
-typedef struct macroblock {
- DECLARE_ALIGNED(16, int16_t, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
- DECLARE_ALIGNED(16, int16_t, coeff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
+typedef struct macroblock MACROBLOCK;
+struct macroblock {
+ DECLARE_ALIGNED(16, int16_t, src_diff[64*64+32*32*2]);
+ DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]);
// 16 Y blocks, 4 U blocks, 4 V blocks,
- // 1 DC 2nd order block each with 16 entries
- BLOCK block[25];
+ BLOCK block[24];
- SUPERBLOCK sb_coeff_data;
-
YV12_BUFFER_CONFIG src;
MACROBLOCKD e_mbd;
@@ -160,8 +150,13 @@
unsigned char *active_ptr;
- vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4];
- vp9_coeff_count hybrid_token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES_4X4];
+ vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ unsigned int nzc_costs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][17];
+ unsigned int nzc_costs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][65];
+ unsigned int nzc_costs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][257];
+ unsigned int nzc_costs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][1025];
+#endif
int optimize;
@@ -172,17 +167,14 @@
PICK_MODE_CONTEXT sb32_context[4];
PICK_MODE_CONTEXT sb64_context;
- void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
- void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
- void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
- void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
- void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
- void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
- void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
-} MACROBLOCK;
+ void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+ void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+ void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx);
+ void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2);
+ void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
+ void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
+};
#endif // VP9_ENCODER_VP9_BLOCK_H_
--- a/vp9/encoder/vp9_boolhuff.c
+++ b/vp9/encoder/vp9_boolhuff.c
@@ -40,7 +40,6 @@
};
void vp9_start_encode(BOOL_CODER *br, unsigned char *source) {
-
br->lowvalue = 0;
br->range = 255;
br->value = 0;
@@ -54,6 +53,10 @@
for (i = 0; i < 32; i++)
encode_bool(br, 0, 128);
+
+ // Ensure there's no ambigous collision with any index marker bytes
+ if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0)
+ br->buffer[br->pos++] = 0;
}
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -15,806 +15,545 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_idct.h"
-// TODO: these transforms can be converted into integer forms to reduce
-// the complexity
-static const float dct_4[16] = {
- 0.500000000000000, 0.500000000000000, 0.500000000000000, 0.500000000000000,
- 0.653281482438188, 0.270598050073099, -0.270598050073099, -0.653281482438188,
- 0.500000000000000, -0.500000000000000, -0.500000000000000, 0.500000000000000,
- 0.270598050073099, -0.653281482438188, 0.653281482438188, -0.270598050073099
-};
+static void fdct4_1d(int16_t *input, int16_t *output) {
+ int16_t step[4];
+ int temp1, temp2;
-static const float adst_4[16] = {
- 0.228013428883779, 0.428525073124360, 0.577350269189626, 0.656538502008139,
- 0.577350269189626, 0.577350269189626, 0.000000000000000, -0.577350269189626,
- 0.656538502008139, -0.228013428883779, -0.577350269189626, 0.428525073124359,
- 0.428525073124360, -0.656538502008139, 0.577350269189626, -0.228013428883779
-};
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
-static const float dct_8[64] = {
- 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
- 0.353553390593274, 0.353553390593274, 0.353553390593274, 0.353553390593274,
- 0.490392640201615, 0.415734806151273, 0.277785116509801, 0.097545161008064,
- -0.097545161008064, -0.277785116509801, -0.415734806151273, -0.490392640201615,
- 0.461939766255643, 0.191341716182545, -0.191341716182545, -0.461939766255643,
- -0.461939766255643, -0.191341716182545, 0.191341716182545, 0.461939766255643,
- 0.415734806151273, -0.097545161008064, -0.490392640201615, -0.277785116509801,
- 0.277785116509801, 0.490392640201615, 0.097545161008064, -0.415734806151273,
- 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
- 0.353553390593274, -0.353553390593274, -0.353553390593274, 0.353553390593274,
- 0.277785116509801, -0.490392640201615, 0.097545161008064, 0.415734806151273,
- -0.415734806151273, -0.097545161008064, 0.490392640201615, -0.277785116509801,
- 0.191341716182545, -0.461939766255643, 0.461939766255643, -0.191341716182545,
- -0.191341716182545, 0.461939766255643, -0.461939766255643, 0.191341716182545,
- 0.097545161008064, -0.277785116509801, 0.415734806151273, -0.490392640201615,
- 0.490392640201615, -0.415734806151273, 0.277785116509801, -0.097545161008064
-};
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ output[0] = dct_const_round_shift(temp1);
+ output[2] = dct_const_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ output[1] = dct_const_round_shift(temp1);
+ output[3] = dct_const_round_shift(temp2);
+}
-static const float adst_8[64] = {
- 0.089131608307533, 0.175227946595735, 0.255357107325376, 0.326790388032145,
- 0.387095214016349, 0.434217976756762, 0.466553967085785, 0.483002021635509,
- 0.255357107325376, 0.434217976756762, 0.483002021635509, 0.387095214016349,
- 0.175227946595735, -0.089131608307533, -0.326790388032145, -0.466553967085785,
- 0.387095214016349, 0.466553967085785, 0.175227946595735, -0.255357107325376,
- -0.483002021635509, -0.326790388032145, 0.089131608307533, 0.434217976756762,
- 0.466553967085785, 0.255357107325376, -0.326790388032145, -0.434217976756762,
- 0.089131608307533, 0.483002021635509, 0.175227946595735, -0.387095214016348,
- 0.483002021635509, -0.089131608307533, -0.466553967085785, 0.175227946595735,
- 0.434217976756762, -0.255357107325376, -0.387095214016348, 0.326790388032145,
- 0.434217976756762, -0.387095214016348, -0.089131608307533, 0.466553967085786,
- -0.326790388032145, -0.175227946595735, 0.483002021635509, -0.255357107325375,
- 0.326790388032145, -0.483002021635509, 0.387095214016349, -0.089131608307534,
- -0.255357107325377, 0.466553967085785, -0.434217976756762, 0.175227946595736,
- 0.175227946595735, -0.326790388032145, 0.434217976756762, -0.483002021635509,
- 0.466553967085785, -0.387095214016348, 0.255357107325376, -0.089131608307532
-};
+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
-/* Converted the transforms to integers. */
-static const int16_t dct_i4[16] = {
- 16384, 16384, 16384, 16384,
- 21407, 8867, -8867, -21407,
- 16384, -16384, -16384, 16384,
- 8867, -21407, 21407, -8867
-};
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * short_pitch + i] << 4;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ fdct4_1d(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
+ }
-static const int16_t adst_i4[16] = {
- 7472, 14042, 18919, 21513,
- 18919, 18919, 0, -18919,
- 21513, -7472, -18919, 14042,
- 14042, -21513, 18919, -7472
-};
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ fdct4_1d(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
+}
-static const int16_t dct_i8[64] = {
- 11585, 11585, 11585, 11585,
- 11585, 11585, 11585, 11585,
- 16069, 13623, 9102, 3196,
- -3196, -9102, -13623, -16069,
- 15137, 6270, -6270, -15137,
- -15137, -6270, 6270, 15137,
- 13623, -3196, -16069, -9102,
- 9102, 16069, 3196, -13623,
- 11585, -11585, -11585, 11585,
- 11585, -11585, -11585, 11585,
- 9102, -16069, 3196, 13623,
- -13623, -3196, 16069, -9102,
- 6270, -15137, 15137, -6270,
- -6270, 15137, -15137, 6270,
- 3196, -9102, 13623, -16069,
- 16069, -13623, 9102, -3196
-};
+static void fadst4_1d(int16_t *input, int16_t *output) {
+ int x0, x1, x2, x3;
+ int s0, s1, s2, s3, s4, s5, s6, s7;
-static const int16_t adst_i8[64] = {
- 2921, 5742, 8368, 10708,
- 12684, 14228, 15288, 15827,
- 8368, 14228, 15827, 12684,
- 5742, -2921, -10708, -15288,
- 12684, 15288, 5742, -8368,
- -15827, -10708, 2921, 14228,
- 15288, 8368, -10708, -14228,
- 2921, 15827, 5742, -12684,
- 15827, -2921, -15288, 5742,
- 14228, -8368, -12684, 10708,
- 14228, -12684, -2921, 15288,
- -10708, -5742, 15827, -8368,
- 10708, -15827, 12684, -2921,
- -8368, 15288, -14228, 5742,
- 5742, -10708, 14228, -15827,
- 15288, -12684, 8368, -2921
-};
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
-static const float dct_16[256] = {
- 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
- 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
- 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
- -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
- 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
- -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
- 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
- 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
- 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
- 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
- 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
- -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
- 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
- -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
- 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
- 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
- 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
- 0.250000, -0.250000, -0.250000, 0.250000, 0.250000, -0.250000, -0.250000, 0.250000,
- 0.224292, -0.311806, -0.102631, 0.351851, -0.034654, -0.338330, 0.166664, 0.273300,
- -0.273300, -0.166664, 0.338330, 0.034654, -0.351851, 0.102631, 0.311806, -0.224292,
- 0.196424, -0.346760, 0.068975, 0.293969, -0.293969, -0.068975, 0.346760, -0.196424,
- -0.196424, 0.346760, -0.068975, -0.293969, 0.293969, 0.068975, -0.346760, 0.196424,
- 0.166664, -0.351851, 0.224292, 0.102631, -0.338330, 0.273300, 0.034654, -0.311806,
- 0.311806, -0.034654, -0.273300, 0.338330, -0.102631, -0.224292, 0.351851, -0.166664,
- 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
- 0.135299, -0.326641, 0.326641, -0.135299, -0.135299, 0.326641, -0.326641, 0.135299,
- 0.102631, -0.273300, 0.351851, -0.311806, 0.166664, 0.034654, -0.224292, 0.338330,
- -0.338330, 0.224292, -0.034654, -0.166664, 0.311806, -0.351851, 0.273300, -0.102631,
- 0.068975, -0.196424, 0.293969, -0.346760, 0.346760, -0.293969, 0.196424, -0.068975,
- -0.068975, 0.196424, -0.293969, 0.346760, -0.346760, 0.293969, -0.196424, 0.068975,
- 0.034654, -0.102631, 0.166664, -0.224292, 0.273300, -0.311806, 0.338330, -0.351851,
- 0.351851, -0.338330, 0.311806, -0.273300, 0.224292, -0.166664, 0.102631, -0.034654
-};
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
-static const float adst_16[256] = {
- 0.033094, 0.065889, 0.098087, 0.129396, 0.159534, 0.188227, 0.215215, 0.240255,
- 0.263118, 0.283599, 0.301511, 0.316693, 0.329007, 0.338341, 0.344612, 0.347761,
- 0.098087, 0.188227, 0.263118, 0.316693, 0.344612, 0.344612, 0.316693, 0.263118,
- 0.188227, 0.098087, 0.000000, -0.098087, -0.188227, -0.263118, -0.316693, -0.344612,
- 0.159534, 0.283599, 0.344612, 0.329007, 0.240255, 0.098087, -0.065889, -0.215215,
- -0.316693, -0.347761, -0.301511, -0.188227, -0.033094, 0.129396, 0.263118, 0.338341,
- 0.215215, 0.338341, 0.316693, 0.159534, -0.065889, -0.263118, -0.347761, -0.283599,
- -0.098087, 0.129396, 0.301511, 0.344612, 0.240255, 0.033094, -0.188227, -0.329007,
- 0.263118, 0.344612, 0.188227, -0.098087, -0.316693, -0.316693, -0.098087, 0.188227,
- 0.344612, 0.263118, 0.000000, -0.263118, -0.344612, -0.188227, 0.098087, 0.316693,
- 0.301511, 0.301511, 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511,
- 0.000000, -0.301511, -0.301511, -0.000000, 0.301511, 0.301511, 0.000000, -0.301511,
- 0.329007, 0.215215, -0.188227, -0.338341, -0.033094, 0.316693, 0.240255, -0.159534,
- -0.344612, -0.065889, 0.301511, 0.263118, -0.129396, -0.347761, -0.098087, 0.283599,
- 0.344612, 0.098087, -0.316693, -0.188227, 0.263118, 0.263118, -0.188227, -0.316693,
- 0.098087, 0.344612, 0.000000, -0.344612, -0.098087, 0.316693, 0.188227, -0.263118,
- 0.347761, -0.033094, -0.344612, 0.065889, 0.338341, -0.098087, -0.329007, 0.129396,
- 0.316693, -0.159534, -0.301511, 0.188227, 0.283599, -0.215215, -0.263118, 0.240255,
- 0.338341, -0.159534, -0.263118, 0.283599, 0.129396, -0.344612, 0.033094, 0.329007,
- -0.188227, -0.240255, 0.301511, 0.098087, -0.347761, 0.065889, 0.316693, -0.215215,
- 0.316693, -0.263118, -0.098087, 0.344612, -0.188227, -0.188227, 0.344612, -0.098087,
- -0.263118, 0.316693, 0.000000, -0.316693, 0.263118, 0.098087, -0.344612, 0.188227,
- 0.283599, -0.329007, 0.098087, 0.215215, -0.347761, 0.188227, 0.129396, -0.338341,
- 0.263118, 0.033094, -0.301511, 0.316693, -0.065889, -0.240255, 0.344612, -0.159534,
- 0.240255, -0.347761, 0.263118, -0.033094, -0.215215, 0.344612, -0.283599, 0.065889,
- 0.188227, -0.338341, 0.301511, -0.098087, -0.159534, 0.329007, -0.316693, 0.129396,
- 0.188227, -0.316693, 0.344612, -0.263118, 0.098087, 0.098087, -0.263118, 0.344612,
- -0.316693, 0.188227, 0.000000, -0.188227, 0.316693, -0.344612, 0.263118, -0.098087,
- 0.129396, -0.240255, 0.316693, -0.347761, 0.329007, -0.263118, 0.159534, -0.033094,
- -0.098087, 0.215215, -0.301511, 0.344612, -0.338341, 0.283599, -0.188227, 0.065889,
- 0.065889, -0.129396, 0.188227, -0.240255, 0.283599, -0.316693, 0.338341, -0.347761,
- 0.344612, -0.329007, 0.301511, -0.263118, 0.215215, -0.159534, 0.098087, -0.033094
-};
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_4_9 * x0;
+ s2 = sinpi_2_9 * x1;
+ s3 = sinpi_1_9 * x1;
+ s4 = sinpi_3_9 * x2;
+ s5 = sinpi_4_9 * x3;
+ s6 = sinpi_2_9 * x3;
+ s7 = x0 + x1 - x3;
-/* Converted the transforms to integers. */
-static const int16_t dct_i16[256] = {
- 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
- 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
- 11529, 11086, 10217, 8955, 7350, 5461, 3363, 1136,
- -1136, -3363, -5461, -7350, -8955, -10217, -11086, -11529,
- 11363, 9633, 6436, 2260, -2260, -6436, -9633, -11363,
- -11363, -9633, -6436, -2260, 2260, 6436, 9633, 11363,
- 11086, 7350, 1136, -5461, -10217, -11529, -8955, -3363,
- 3363, 8955, 11529, 10217, 5461, -1136, -7350, -11086,
- 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
- 10703, 4433, -4433, -10703, -10703, -4433, 4433, 10703,
- 10217, 1136, -8955, -11086, -3363, 7350, 11529, 5461,
- -5461, -11529, -7350, 3363, 11086, 8955, -1136, -10217,
- 9633, -2260, -11363, -6436, 6436, 11363, 2260, -9633,
- -9633, 2260, 11363, 6436, -6436, -11363, -2260, 9633,
- 8955, -5461, -11086, 1136, 11529, 3363, -10217, -7350,
- 7350, 10217, -3363, -11529, -1136, 11086, 5461, -8955,
- 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
- 8192, -8192, -8192, 8192, 8192, -8192, -8192, 8192,
- 7350, -10217, -3363, 11529, -1136, -11086, 5461, 8955,
- -8955, -5461, 11086, 1136, -11529, 3363, 10217, -7350,
- 6436, -11363, 2260, 9633, -9633, -2260, 11363, -6436,
- -6436, 11363, -2260, -9633, 9633, 2260, -11363, 6436,
- 5461, -11529, 7350, 3363, -11086, 8955, 1136, -10217,
- 10217, -1136, -8955, 11086, -3363, -7350, 11529, -5461,
- 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
- 4433, -10703, 10703, -4433, -4433, 10703, -10703, 4433,
- 3363, -8955, 11529, -10217, 5461, 1136, -7350, 11086,
- -11086, 7350, -1136, -5461, 10217, -11529, 8955, -3363,
- 2260, -6436, 9633, -11363, 11363, -9633, 6436, -2260,
- -2260, 6436, -9633, 11363, -11363, 9633, -6436, 2260,
- 1136, -3363, 5461, -7350, 8955, -10217, 11086, -11529,
- 11529, -11086, 10217, -8955, 7350, -5461, 3363, -1136
-};
+ x0 = s0 + s2 + s5;
+ x1 = sinpi_3_9 * s7;
+ x2 = s1 - s3 + s6;
+ x3 = s4;
-static const int16_t adst_i16[256] = {
- 1084, 2159, 3214, 4240, 5228, 6168, 7052, 7873,
- 8622, 9293, 9880, 10377, 10781, 11087, 11292, 11395,
- 3214, 6168, 8622, 10377, 11292, 11292, 10377, 8622,
- 6168, 3214, 0, -3214, -6168, -8622, -10377, -11292,
- 5228, 9293, 11292, 10781, 7873, 3214, -2159, -7052,
- -10377, -11395, -9880, -6168, -1084, 4240, 8622, 11087,
- 7052, 11087, 10377, 5228, -2159, -8622, -11395, -9293,
- -3214, 4240, 9880, 11292, 7873, 1084, -6168, -10781,
- 8622, 11292, 6168, -3214, -10377, -10377, -3214, 6168,
- 11292, 8622, 0, -8622, -11292, -6168, 3214, 10377,
- 9880, 9880, 0, -9880, -9880, 0, 9880, 9880,
- 0, -9880, -9880, 0, 9880, 9880, 0, -9880,
- 10781, 7052, -6168, -11087, -1084, 10377, 7873, -5228,
- -11292, -2159, 9880, 8622, -4240, -11395, -3214, 9293,
- 11292, 3214, -10377, -6168, 8622, 8622, -6168, -10377,
- 3214, 11292, 0, -11292, -3214, 10377, 6168, -8622,
- 11395, -1084, -11292, 2159, 11087, -3214, -10781, 4240,
- 10377, -5228, -9880, 6168, 9293, -7052, -8622, 7873,
- 11087, -5228, -8622, 9293, 4240, -11292, 1084, 10781,
- -6168, -7873, 9880, 3214, -11395, 2159, 10377, -7052,
- 10377, -8622, -3214, 11292, -6168, -6168, 11292, -3214,
- -8622, 10377, 0, -10377, 8622, 3214, -11292, 6168,
- 9293, -10781, 3214, 7052, -11395, 6168, 4240, -11087,
- 8622, 1084, -9880, 10377, -2159, -7873, 11292, -5228,
- 7873, -11395, 8622, -1084, -7052, 11292, -9293, 2159,
- 6168, -11087, 9880, -3214, -5228, 10781, -10377, 4240,
- 6168, -10377, 11292, -8622, 3214, 3214, -8622, 11292,
- -10377, 6168, 0, -6168, 10377, -11292, 8622, -3214,
- 4240, -7873, 10377, -11395, 10781, -8622, 5228, -1084,
- -3214, 7052, -9880, 11292, -11087, 9293, -6168, 2159,
- 2159, -4240, 6168, -7873, 9293, -10377, 11087, -11395,
- 11292, -10781, 9880, -8622, 7052, -5228, 3214, -1084
-};
+ s0 = x0 + x3;
+ s1 = x1;
+ s2 = x2 - x3;
+ s3 = x2 - x0 + x3;
-static const int xC1S7 = 16069;
-static const int xC2S6 = 15137;
-static const int xC3S5 = 13623;
-static const int xC4S4 = 11585;
-static const int xC5S3 = 9102;
-static const int xC6S2 = 6270;
-static const int xC7S1 = 3196;
+ // 1-D transform scaling factor is sqrt(2).
+ output[0] = dct_const_round_shift(s0);
+ output[1] = dct_const_round_shift(s1);
+ output[2] = dct_const_round_shift(s2);
+ output[3] = dct_const_round_shift(s3);
+}
-#define SHIFT_BITS 14
-#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
+static const transform_2d FHT_4[] = {
+ { fdct4_1d, fdct4_1d }, // DCT_DCT = 0
+ { fadst4_1d, fdct4_1d }, // ADST_DCT = 1
+ { fdct4_1d, fadst4_1d }, // DCT_ADST = 2
+ { fadst4_1d, fadst4_1d } // ADST_ADST = 3
+};
-#define FINAL_SHIFT 3
-#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
-#define IN_SHIFT (FINAL_SHIFT+1)
+void vp9_short_fht4x4_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
-
-void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
- int loop;
- int short_pitch = pitch >> 1;
- int is07, is12, is34, is56;
- int is0734, is1256;
- int id07, id12, id34, id56;
- int irot_input_x, irot_input_y;
- int icommon_product1; // Re-used product (c4s4 * (s12 - s56))
- int icommon_product2; // Re-used product (c4s4 * (d12 + d56))
- int temp1, temp2; // intermediate variable for computation
-
- int InterData[64];
- int *ip = InterData;
- short *op = OutputData;
-
- for (loop = 0; loop < 8; loop++) {
- // Pre calculate some common sums and differences.
- is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
- is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
- is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
- is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
- id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
- id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
- id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
- id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- // Pre-Calculate some common product terms.
- icommon_product1 = xC4S4 * (is12 - is56);
- DOROUND(icommon_product1)
- icommon_product1 >>= SHIFT_BITS;
-
- icommon_product2 = xC4S4 * (id12 + id56);
- DOROUND(icommon_product2)
- icommon_product2 >>= SHIFT_BITS;
-
-
- ip[0] = (xC4S4 * (is0734 + is1256));
- DOROUND(ip[0]);
- ip[0] >>= SHIFT_BITS;
-
- ip[4] = (xC4S4 * (is0734 - is1256));
- DOROUND(ip[4]);
- ip[4] >>= SHIFT_BITS;
-
- // Define inputs to rotation for outputs 2 and 6
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- // Apply rotation for outputs 2 and 6.
- temp1 = xC6S2 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[2] = temp1 + temp2;
-
- temp1 = xC6S2 * irot_input_y;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_x;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[6] = temp1 - temp2;
-
- // Define inputs to rotation for outputs 1 and 7
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -(id34 + icommon_product2);
-
- // Apply rotation for outputs 1 and 7.
- temp1 = xC1S7 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC7S1 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[1] = temp1 - temp2;
-
- temp1 = xC7S1 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC1S7 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[7] = temp1 + temp2;
-
- // Define inputs to rotation for outputs 3 and 5
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- // Apply rotation for outputs 3 and 5.
- temp1 = xC3S5 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC5S3 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[3] = temp1 - temp2;
-
-
- temp1 = xC5S3 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC3S5 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- ip[5] = temp1 + temp2;
-
- // Increment data pointer for next row
- InputData += short_pitch;
- ip += 8;
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * pitch + i] << 4;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
}
- // Performed DCT on rows, now transform the columns
- ip = InterData;
- for (loop = 0; loop < 8; loop++) {
- // Pre calculate some common sums and differences.
- is07 = ip[0 * 8] + ip[7 * 8];
- is12 = ip[1 * 8] + ip[2 * 8];
- is34 = ip[3 * 8] + ip[4 * 8];
- is56 = ip[5 * 8] + ip[6 * 8];
-
- id07 = ip[0 * 8] - ip[7 * 8];
- id12 = ip[1 * 8] - ip[2 * 8];
- id34 = ip[3 * 8] - ip[4 * 8];
- id56 = ip[5 * 8] - ip[6 * 8];
-
- is0734 = is07 + is34;
- is1256 = is12 + is56;
-
- // Pre-Calculate some common product terms
- icommon_product1 = xC4S4 * (is12 - is56);
- icommon_product2 = xC4S4 * (id12 + id56);
- DOROUND(icommon_product1)
- DOROUND(icommon_product2)
- icommon_product1 >>= SHIFT_BITS;
- icommon_product2 >>= SHIFT_BITS;
-
-
- temp1 = xC4S4 * (is0734 + is1256);
- temp2 = xC4S4 * (is0734 - is1256);
- DOROUND(temp1);
- DOROUND(temp2);
- temp1 >>= SHIFT_BITS;
-
- temp2 >>= SHIFT_BITS;
- op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
- op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 2 and 6
- irot_input_x = id12 - id56;
- irot_input_y = is07 - is34;
-
- // Apply rotation for outputs 2 and 6.
- temp1 = xC6S2 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- temp1 = xC6S2 * irot_input_y;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC2S6 * irot_input_x;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 1 and 7
- irot_input_x = icommon_product1 + id07;
- irot_input_y = -(id34 + icommon_product2);
-
- // Apply rotation for outputs 1 and 7.
- temp1 = xC1S7 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC7S1 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- temp1 = xC7S1 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC1S7 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Define inputs to rotation for outputs 3 and 5
- irot_input_x = id07 - icommon_product1;
- irot_input_y = id34 - icommon_product2;
-
- // Apply rotation for outputs 3 and 5.
- temp1 = xC3S5 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC5S3 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-
- temp1 = xC5S3 * irot_input_x;
- DOROUND(temp1);
- temp1 >>= SHIFT_BITS;
- temp2 = xC3S5 * irot_input_y;
- DOROUND(temp2);
- temp2 >>= SHIFT_BITS;
- op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
- // Increment data pointer for next column.
- ip++;
- op++;
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
}
-void vp9_short_fhaar2x2_c(short *input, short *output, int pitch) {
- /* [1 1; 1 -1] orthogonal transform */
- /* use position: 0,1, 4, 8 */
- int i;
- short *ip1 = input;
- short *op1 = output;
- for (i = 0; i < 16; i++) {
- op1[i] = 0;
- }
-
- op1[0] = (ip1[0] + ip1[1] + ip1[4] + ip1[8] + 1) >> 1;
- op1[1] = (ip1[0] - ip1[1] + ip1[4] - ip1[8]) >> 1;
- op1[4] = (ip1[0] + ip1[1] - ip1[4] - ip1[8]) >> 1;
- op1[8] = (ip1[0] - ip1[1] - ip1[4] + ip1[8]) >> 1;
+void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {
+ vp9_short_fdct4x4_c(input, output, pitch);
+ vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
}
-/* For test */
-#define TEST_INT 1
-#if TEST_INT
-#define vp9_fht_int_c vp9_fht_c
-#else
-#define vp9_fht_float_c vp9_fht_c
-#endif
+static void fdct8_1d(int16_t *input, int16_t *output) {
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
-void vp9_fht_float_c(const int16_t *input, int pitch, int16_t *output,
- TX_TYPE tx_type, int tx_dim) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int i, j, k;
- float bufa[256], bufb[256]; // buffers are for floating-point test purpose
- // the implementation could be simplified in
- // conjunction with integer transform
- const int16_t *ip = input;
- int16_t *op = output;
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
- float *pfa = &bufa[0];
- float *pfb = &bufb[0];
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0] = dct_const_round_shift(t0);
+ output[2] = dct_const_round_shift(t2);
+ output[4] = dct_const_round_shift(t1);
+ output[6] = dct_const_round_shift(t3);
- // pointers to vertical and horizontal transforms
- const float *ptv, *pth;
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
- assert(tx_type != DCT_DCT);
- // load and convert residual array into floating-point
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfa[i] = (float)ip[i];
- }
- pfa += tx_dim;
- ip += pitch / 2;
- }
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
- // vertical transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1] = dct_const_round_shift(t0);
+ output[3] = dct_const_round_shift(t2);
+ output[5] = dct_const_round_shift(t1);
+ output[7] = dct_const_round_shift(t3);
+}
- switch (tx_type) {
- case ADST_ADST :
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
+void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
+ const int stride = pitch >> 1;
+ int i, j;
+ int16_t intermediate[64];
- default :
- ptv = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
+ // Transform columns
+ {
+ int16_t *output = intermediate;
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfb[i] = 0;
- for (k = 0; k < tx_dim; k++) {
- pfb[i] += ptv[k] * pfa[(k * tx_dim)];
- }
- pfa += 1;
- }
- pfb += tx_dim;
- ptv += tx_dim;
- pfa = &bufa[0];
- }
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ s0 = (input[0 * stride] + input[7 * stride]) << 2;
+ s1 = (input[1 * stride] + input[6 * stride]) << 2;
+ s2 = (input[2 * stride] + input[5 * stride]) << 2;
+ s3 = (input[3 * stride] + input[4 * stride]) << 2;
+ s4 = (input[3 * stride] - input[4 * stride]) << 2;
+ s5 = (input[2 * stride] - input[5 * stride]) << 2;
+ s6 = (input[1 * stride] - input[6 * stride]) << 2;
+ s7 = (input[0 * stride] - input[7 * stride]) << 2;
- // horizontal transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0 * 8] = dct_const_round_shift(t0);
+ output[2 * 8] = dct_const_round_shift(t2);
+ output[4 * 8] = dct_const_round_shift(t1);
+ output[6 * 8] = dct_const_round_shift(t3);
- switch (tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
- default :
- pth = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- pfa[i] = 0;
- for (k = 0; k < tx_dim; k++) {
- pfa[i] += pfb[k] * pth[k];
- }
- pth += tx_dim;
- }
-
- pfa += tx_dim;
- pfb += tx_dim;
- // pth -= tx_dim * tx_dim;
-
- switch (tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &adst_4[0] :
- ((tx_dim == 8) ? &adst_8[0] : &adst_16[0]);
- break;
-
- default :
- pth = (tx_dim == 4) ? &dct_4[0] :
- ((tx_dim == 8) ? &dct_8[0] : &dct_16[0]);
- break;
- }
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1 * 8] = dct_const_round_shift(t0);
+ output[3 * 8] = dct_const_round_shift(t2);
+ output[5 * 8] = dct_const_round_shift(t1);
+ output[7 * 8] = dct_const_round_shift(t3);
+ input++;
+ output++;
}
+ }
- // convert to short integer format and load BLOCKD buffer
- op = output;
- pfa = &bufa[0];
-
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- op[i] = (pfa[i] > 0 ) ? (int16_t)( 8 * pfa[i] + 0.49) :
- -(int16_t)(- 8 * pfa[i] + 0.49);
- }
- op += tx_dim;
- pfa += tx_dim;
- }
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ fdct8_1d(&intermediate[i * 8], &final_output[i * 8]);
+ for (j = 0; j < 8; ++j)
+ final_output[j + i * 8] /= 2;
}
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
-/* Converted the transforms to integer form. */
-#define VERTICAL_SHIFT 11
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-#define HORIZONTAL_SHIFT 16
-#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
-void vp9_fht_int_c(const int16_t *input, int pitch, int16_t *output,
- TX_TYPE tx_type, int tx_dim) {
- int i, j, k;
- int16_t imbuf[256];
-
- const int16_t *ip = input;
- int16_t *op = output;
- int16_t *im = &imbuf[0];
-
- /* pointers to vertical and horizontal transforms. */
- const int16_t *ptv = NULL, *pth = NULL;
-
- switch (tx_type) {
- case ADST_ADST :
- ptv = pth = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0]
- : &adst_i16[0]);
- break;
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
- pth = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- break;
- case DCT_ADST :
- ptv = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- pth = (tx_dim == 4) ? &adst_i4[0]
- : ((tx_dim == 8) ? &adst_i8[0] : &adst_i16[0]);
- break;
- case DCT_DCT :
- ptv = pth = (tx_dim == 4) ? &dct_i4[0]
- : ((tx_dim == 8) ? &dct_i8[0] : &dct_i16[0]);
- break;
- default:
- assert(0);
- break;
- }
-
- /* vertical transformation */
- for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
- int temp = 0;
-
- for (k = 0; k < tx_dim; k++) {
- temp += ptv[k] * ip[(k * (pitch >> 1))];
+void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we tranpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ const int stride = pitch >> 1;
+ int pass;
+ // We need an intermediate buffer between passes.
+ int16_t intermediate[256];
+ int16_t *in = input;
+ int16_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ /*canbe16*/ int step1[8];
+ /*canbe16*/ int step2[8];
+ /*canbe16*/ int step3[8];
+ /*canbe16*/ int input[8];
+ /*needs32*/ int temp1, temp2;
+ int i;
+ for (i = 0; i < 16; i++) {
+ if (0 == pass) {
+ // Calculate input for the first 8 results.
+ input[0] = (in[0 * stride] + in[15 * stride]) << 2;
+ input[1] = (in[1 * stride] + in[14 * stride]) << 2;
+ input[2] = (in[2 * stride] + in[13 * stride]) << 2;
+ input[3] = (in[3 * stride] + in[12 * stride]) << 2;
+ input[4] = (in[4 * stride] + in[11 * stride]) << 2;
+ input[5] = (in[5 * stride] + in[10 * stride]) << 2;
+ input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;
+ input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;
+ // Calculate input for the next 8 results.
+ step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;
+ step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;
+ step1[2] = (in[5 * stride] - in[10 * stride]) << 2;
+ step1[3] = (in[4 * stride] - in[11 * stride]) << 2;
+ step1[4] = (in[3 * stride] - in[12 * stride]) << 2;
+ step1[5] = (in[2 * stride] - in[13 * stride]) << 2;
+ step1[6] = (in[1 * stride] - in[14 * stride]) << 2;
+ step1[7] = (in[0 * stride] - in[15 * stride]) << 2;
+ } else {
+ // Calculate input for the first 8 results.
+ input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
+ input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
+ input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
+ input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
+ input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
+ input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
+ input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
+ input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
+ // Calculate input for the next 8 results.
+ step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
+ step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
+ step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
+ step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
+ step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
+ step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
+ step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
+ step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
}
+ // Work on the first eight values; fdct8_1d(input, even_results);
+ {
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
- im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
- ip++;
- }
- im += tx_dim; // 16
- ptv += tx_dim;
- ip = input;
- }
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
- /* horizontal transformation */
- im = &imbuf[0];
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+ t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+ out[0] = dct_const_round_shift(t0);
+ out[4] = dct_const_round_shift(t2);
+ out[8] = dct_const_round_shift(t1);
+ out[12] = dct_const_round_shift(t3);
- for (j = 0; j < tx_dim; j++) {
- const int16_t *pthc = pth;
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
- for (i = 0; i < tx_dim; i++) {
- int temp = 0;
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
- for (k = 0; k < tx_dim; k++) {
- temp += im[k] * pthc[k];
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ out[2] = dct_const_round_shift(t0);
+ out[6] = dct_const_round_shift(t2);
+ out[10] = dct_const_round_shift(t1);
+ out[14] = dct_const_round_shift(t3);
}
-
- op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
- pthc += tx_dim;
+ // Work on the next eight values; step1 -> odd_results
+ {
+ // step 2
+ temp1 = (step1[5] - step1[2]) * cospi_16_64;
+ temp2 = (step1[4] - step1[3]) * cospi_16_64;
+ step2[2] = dct_const_round_shift(temp1);
+ step2[3] = dct_const_round_shift(temp2);
+ temp1 = (step1[4] + step1[3]) * cospi_16_64;
+ temp2 = (step1[5] + step1[2]) * cospi_16_64;
+ step2[4] = dct_const_round_shift(temp1);
+ step2[5] = dct_const_round_shift(temp2);
+ // step 3
+ step3[0] = step1[0] + step2[3];
+ step3[1] = step1[1] + step2[2];
+ step3[2] = step1[1] - step2[2];
+ step3[3] = step1[0] - step2[3];
+ step3[4] = step1[7] - step2[4];
+ step3[5] = step1[6] - step2[5];
+ step3[6] = step1[6] + step2[5];
+ step3[7] = step1[7] + step2[4];
+ // step 4
+ temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+ temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
+ step2[1] = dct_const_round_shift(temp1);
+ step2[2] = dct_const_round_shift(temp2);
+ temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
+ temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+ step2[5] = dct_const_round_shift(temp1);
+ step2[6] = dct_const_round_shift(temp2);
+ // step 5
+ step1[0] = step3[0] + step2[1];
+ step1[1] = step3[0] - step2[1];
+ step1[2] = step3[3] - step2[2];
+ step1[3] = step3[3] + step2[2];
+ step1[4] = step3[4] + step2[5];
+ step1[5] = step3[4] - step2[5];
+ step1[6] = step3[7] - step2[6];
+ step1[7] = step3[7] + step2[6];
+ // step 6
+ temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+ temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+ out[1] = dct_const_round_shift(temp1);
+ out[9] = dct_const_round_shift(temp2);
+ temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+ temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+ out[5] = dct_const_round_shift(temp1);
+ out[13] = dct_const_round_shift(temp2);
+ temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+ temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+ out[3] = dct_const_round_shift(temp1);
+ out[11] = dct_const_round_shift(temp2);
+ temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+ temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+ out[7] = dct_const_round_shift(temp1);
+ out[15] = dct_const_round_shift(temp2);
+ }
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in++;
+ out += 16;
}
-
- im += tx_dim; // 16
- op += tx_dim;
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
}
}
-void vp9_short_fdct4x4_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
+static void fadst8_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
- for (i = 0; i < 4; i++) {
- a1 = ((ip[0] + ip[3]) << 5);
- b1 = ((ip[1] + ip[2]) << 5);
- c1 = ((ip[1] - ip[2]) << 5);
- d1 = ((ip[0] - ip[3]) << 5);
+ int x0 = input[7];
+ int x1 = input[0];
+ int x2 = input[5];
+ int x3 = input[2];
+ int x4 = input[3];
+ int x5 = input[4];
+ int x6 = input[1];
+ int x7 = input[6];
- op[0] = a1 + b1;
- op[2] = a1 - b1;
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
- op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12;
- op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12;
+ x0 = dct_const_round_shift(s0 + s4);
+ x1 = dct_const_round_shift(s1 + s5);
+ x2 = dct_const_round_shift(s2 + s6);
+ x3 = dct_const_round_shift(s3 + s7);
+ x4 = dct_const_round_shift(s0 - s4);
+ x5 = dct_const_round_shift(s1 - s5);
+ x6 = dct_const_round_shift(s2 - s6);
+ x7 = dct_const_round_shift(s3 - s7);
- ip += pitch / 2;
- op += 4;
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
- }
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[12];
- b1 = ip[4] + ip[8];
- c1 = ip[4] - ip[8];
- d1 = ip[0] - ip[12];
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
- op[0] = (a1 + b1 + 7) >> 4;
- op[8] = (a1 - b1 + 7) >> 4;
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
- op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0);
- op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16;
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
- ip++;
- op++;
- }
+ output[0] = x0;
+ output[1] = - x4;
+ output[2] = x6;
+ output[3] = - x2;
+ output[4] = x3;
+ output[5] = - x7;
+ output[6] = x5;
+ output[7] = - x1;
}
-void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
-{
- vp9_short_fdct4x4_c(input, output, pitch);
- vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
+static const transform_2d FHT_8[] = {
+ { fdct8_1d, fdct8_1d }, // DCT_DCT = 0
+ { fadst8_1d, fdct8_1d }, // ADST_DCT = 1
+ { fdct8_1d, fadst8_1d }, // DCT_ADST = 2
+ { fadst8_1d, fadst8_1d } // ADST_ADST = 3
+};
-void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
- int pitch_short = pitch >> 1;
+void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[64];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
- for (i = 0; i < 4; i++) {
- a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
- b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
- c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
- d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[4] = (c1 + d1) >> 1;
- op[8] = (a1 - b1) >> 1;
- op[12] = (d1 - c1) >> 1;
-
- ip++;
- op++;
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * pitch + i] << 2;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ outptr[j * 8 + i] = temp_out[j];
}
- ip = output;
- op = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[3];
- b1 = ip[1] + ip[2];
- c1 = ip[1] - ip[2];
- d1 = ip[0] - ip[3];
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[1] = (c1 + d1) >> 1;
- op[2] = (a1 - b1) >> 1;
- op[3] = (d1 - c1) >> 1;
-
- ip += 4;
- op += 4;
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = temp_out[j] >> 1;
}
}
-#if CONFIG_LOSSLESS
-void vp9_short_walsh4x4_lossless_c(short *input, short *output, int pitch) {
+void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
int i;
int a1, b1, c1, d1;
short *ip = input;
@@ -822,46 +561,6 @@
int pitch_short = pitch >> 1;
for (i = 0; i < 4; i++) {
- a1 = (ip[0 * pitch_short] + ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- b1 = (ip[1 * pitch_short] + ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- c1 = (ip[1 * pitch_short] - ip[2 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
- d1 = (ip[0 * pitch_short] - ip[3 * pitch_short]) >> Y2_WHT_UPSCALE_FACTOR;
-
- op[0] = (a1 + b1 + 1) >> 1;
- op[4] = (c1 + d1) >> 1;
- op[8] = (a1 - b1) >> 1;
- op[12] = (d1 - c1) >> 1;
-
- ip++;
- op++;
- }
- ip = output;
- op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0] + ip[3];
- b1 = ip[1] + ip[2];
- c1 = ip[1] - ip[2];
- d1 = ip[0] - ip[3];
-
- op[0] = ((a1 + b1 + 1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[1] = ((c1 + d1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[2] = ((a1 - b1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
- op[3] = ((d1 - c1) >> 1) << Y2_WHT_UPSCALE_FACTOR;
-
- ip += 4;
- op += 4;
- }
-}
-
-void vp9_short_walsh4x4_x8_c(short *input, short *output, int pitch) {
- int i;
- int a1, b1, c1, d1;
- short *ip = input;
- short *op = output;
- int pitch_short = pitch >> 1;
-
- for (i = 0; i < 4; i++) {
a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
@@ -894,1495 +593,658 @@
}
}
-void vp9_short_walsh8x4_x8_c(short *input, short *output, int pitch) {
- vp9_short_walsh4x4_x8_c(input, output, pitch);
- vp9_short_walsh4x4_x8_c(input + 4, output + 16, pitch);
+void vp9_short_walsh8x4_c(short *input, short *output, int pitch) {
+ vp9_short_walsh4x4_c(input, output, pitch);
+ vp9_short_walsh4x4_c(input + 4, output + 16, pitch);
}
-#endif
-#define TEST_INT_16x16_DCT 1
-#if !TEST_INT_16x16_DCT
-static void dct16x16_1d(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
+// Rewrote to use same algorithm as others.
+static void fdct16_1d(int16_t in[16], int16_t out[16]) {
+ /*canbe16*/ int step1[8];
+ /*canbe16*/ int step2[8];
+ /*canbe16*/ int step3[8];
+ /*canbe16*/ int input[8];
+ /*needs32*/ int temp1, temp2;
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
+ // step 1
+ input[0] = in[0] + in[15];
+ input[1] = in[1] + in[14];
+ input[2] = in[2] + in[13];
+ input[3] = in[3] + in[12];
+ input[4] = in[4] + in[11];
+ input[5] = in[5] + in[10];
+ input[6] = in[6] + in[ 9];
+ input[7] = in[7] + in[ 8];
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
+ step1[0] = in[7] - in[ 8];
+ step1[1] = in[6] - in[ 9];
+ step1[2] = in[5] - in[10];
+ step1[3] = in[4] - in[11];
+ step1[4] = in[3] - in[12];
+ step1[5] = in[2] - in[13];
+ step1[6] = in[1] - in[14];
+ step1[7] = in[0] - in[15];
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
+ // fdct8_1d(step, step);
+ {
+ /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
+ /*needs32*/ int t0, t1, t2, t3;
+ /*canbe16*/ int x0, x1, x2, x3;
- temp1 = step[ 8]*C7;
- temp2 = step[15]*C9;
- output[ 8] = temp1 + temp2;
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
- temp1 = step[ 9]*C11;
- temp2 = step[14]*C5;
- output[ 9] = temp1 - temp2;
+ // fdct4_1d(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+ t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+ out[0] = dct_const_round_shift(t0);
+ out[4] = dct_const_round_shift(t2);
+ out[8] = dct_const_round_shift(t1);
+ out[12] = dct_const_round_shift(t3);
- temp1 = step[10]*C3;
- temp2 = step[13]*C13;
- output[10] = temp1 + temp2;
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = dct_const_round_shift(t0);
+ t3 = dct_const_round_shift(t1);
- temp1 = step[11]*C15;
- temp2 = step[12]*C1;
- output[11] = temp1 - temp2;
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
- temp1 = step[11]*C1;
- temp2 = step[12]*C15;
- output[12] = temp2 + temp1;
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ out[2] = dct_const_round_shift(t0);
+ out[6] = dct_const_round_shift(t2);
+ out[10] = dct_const_round_shift(t1);
+ out[14] = dct_const_round_shift(t3);
+ }
- temp1 = step[10]*C13;
- temp2 = step[13]*C3;
- output[13] = temp2 - temp1;
+ // step 2
+ temp1 = (step1[5] - step1[2]) * cospi_16_64;
+ temp2 = (step1[4] - step1[3]) * cospi_16_64;
+ step2[2] = dct_const_round_shift(temp1);
+ step2[3] = dct_const_round_shift(temp2);
+ temp1 = (step1[4] + step1[3]) * cospi_16_64;
+ temp2 = (step1[5] + step1[2]) * cospi_16_64;
+ step2[4] = dct_const_round_shift(temp1);
+ step2[5] = dct_const_round_shift(temp2);
- temp1 = step[ 9]*C5;
- temp2 = step[14]*C11;
- output[14] = temp2 + temp1;
+ // step 3
+ step3[0] = step1[0] + step2[3];
+ step3[1] = step1[1] + step2[2];
+ step3[2] = step1[1] - step2[2];
+ step3[3] = step1[0] - step2[3];
+ step3[4] = step1[7] - step2[4];
+ step3[5] = step1[6] - step2[5];
+ step3[6] = step1[6] + step2[5];
+ step3[7] = step1[7] + step2[4];
- temp1 = step[ 8]*C9;
- temp2 = step[15]*C7;
- output[15] = temp2 - temp1;
+ // step 4
+ temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+ temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
+ step2[1] = dct_const_round_shift(temp1);
+ step2[2] = dct_const_round_shift(temp2);
+ temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
+ temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+ step2[5] = dct_const_round_shift(temp1);
+ step2[6] = dct_const_round_shift(temp2);
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
+ // step 5
+ step1[0] = step3[0] + step2[1];
+ step1[1] = step3[0] - step2[1];
+ step1[2] = step3[3] - step2[2];
+ step1[3] = step3[3] + step2[2];
+ step1[4] = step3[4] + step2[5];
+ step1[5] = step3[4] - step2[5];
+ step1[6] = step3[7] - step2[6];
+ step1[7] = step3[7] + step2[6];
- temp1 = output[4]*C14;
- temp2 = output[7]*C2;
- step[ 4] = temp1 + temp2;
+ // step 6
+ temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+ temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+ out[1] = dct_const_round_shift(temp1);
+ out[9] = dct_const_round_shift(temp2);
- temp1 = output[5]*C10;
- temp2 = output[6]*C6;
- step[ 5] = temp1 + temp2;
+ temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+ temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+ out[5] = dct_const_round_shift(temp1);
+ out[13] = dct_const_round_shift(temp2);
- temp1 = output[5]*C6;
- temp2 = output[6]*C10;
- step[ 6] = temp2 - temp1;
+ temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+ temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+ out[3] = dct_const_round_shift(temp1);
+ out[11] = dct_const_round_shift(temp2);
- temp1 = output[4]*C2;
- temp2 = output[7]*C14;
- step[ 7] = temp2 - temp1;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1]);
- output[ 8] = (step[ 0] - step[ 1]);
-
- temp1 = step[2]*C12;
- temp2 = step[3]*C4;
- temp1 = temp1 + temp2;
- output[ 4] = 2*(temp1*C8);
-
- temp1 = step[2]*C4;
- temp2 = step[3]*C12;
- temp1 = temp2 - temp1;
- output[12] = 2*(temp1*C8);
-
- output[ 2] = 2*((step[4] + step[ 5])*C8);
- output[14] = 2*((step[7] - step[ 6])*C8);
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2);
- output[10] = (temp1 - temp2);
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8]*C12;
- temp2 = intermediate[9]*C4;
- temp1 = temp1 - temp2;
- output[3] = 2*(temp1*C8);
-
- temp1 = intermediate[8]*C4;
- temp2 = intermediate[9]*C12;
- temp1 = temp2 + temp1;
- output[13] = 2*(temp1*C8);
-
- output[ 9] = 2*((step[10] + step[11])*C8);
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12]);
- output[ 1] = -(intermediate[11] - intermediate[12]);
-
- output[ 7] = 2*(intermediate[13]*C8);
-
- temp1 = intermediate[14]*C12;
- temp2 = intermediate[15]*C4;
- temp1 = temp1 - temp2;
- output[11] = -2*(temp1*C8);
-
- temp1 = intermediate[14]*C4;
- temp2 = intermediate[15]*C12;
- temp1 = temp2 + temp1;
- output[ 5] = 2*(temp1*C8);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
+ temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+ temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+ out[7] = dct_const_round_shift(temp1);
+ out[15] = dct_const_round_shift(temp2);
}
-void vp9_short_fdct16x16_c(short *input, short *out, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[256];
- // First transform columns
- for (i = 0; i < 16; i++) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct16x16_1d(temp_in, temp_out);
- for (j = 0; j < 16; j++)
- output[j*16 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = output[j + i*16];
- dct16x16_1d(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i*16] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i]/2);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
+void fadst16_1d(int16_t *input, int16_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
-#else
-static const int16_t C1 = 16305;
-static const int16_t C2 = 16069;
-static const int16_t C3 = 15679;
-static const int16_t C4 = 15137;
-static const int16_t C5 = 14449;
-static const int16_t C6 = 13623;
-static const int16_t C7 = 12665;
-static const int16_t C8 = 11585;
-static const int16_t C9 = 10394;
-static const int16_t C10 = 9102;
-static const int16_t C11 = 7723;
-static const int16_t C12 = 6270;
-static const int16_t C13 = 4756;
-static const int16_t C14 = 3196;
-static const int16_t C15 = 1606;
-
-#define RIGHT_SHIFT 14
-#define ROUNDING (1 << (RIGHT_SHIFT - 1))
-
-static void dct16x16_1d(int16_t input[16], int16_t output[16],
- int last_shift_bits) {
- int16_t step[16];
- int intermediate[16];
- int temp1, temp2;
- int final_shift = RIGHT_SHIFT;
- int final_rounding = ROUNDING;
- int output_shift = 0;
- int output_rounding = 0;
-
- final_shift += last_shift_bits;
- if (final_shift > 0)
- final_rounding = 1 << (final_shift - 1);
-
- output_shift += last_shift_bits;
- if (output_shift > 0)
- output_rounding = 1 << (output_shift - 1);
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8] * C7;
- temp2 = step[15] * C9;
- output[ 8] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 9] * C11;
- temp2 = step[14] * C5;
- output[ 9] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[10] * C3;
- temp2 = step[13] * C13;
- output[10] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[11] * C15;
- temp2 = step[12] * C1;
- output[11] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[11] * C1;
- temp2 = step[12] * C15;
- output[12] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[10] * C13;
- temp2 = step[13] * C3;
- output[13] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 9] * C5;
- temp2 = step[14] * C11;
- output[14] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = step[ 8] * C9;
- temp2 = step[15] * C7;
- output[15] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4] * C14;
- temp2 = output[7] * C2;
- step[ 4] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[5] * C10;
- temp2 = output[6] * C6;
- step[ 5] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[5] * C6;
- temp2 = output[6] * C10;
- step[ 6] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = output[4] * C2;
- temp2 = output[7] * C14;
- step[ 7] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1] + output_rounding) >> output_shift;
- output[ 8] = (step[ 0] - step[ 1] + output_rounding) >> output_shift;
+ int x0 = input[15];
+ int x1 = input[0];
+ int x2 = input[13];
+ int x3 = input[2];
+ int x4 = input[11];
+ int x5 = input[4];
+ int x6 = input[9];
+ int x7 = input[6];
+ int x8 = input[7];
+ int x9 = input[8];
+ int x10 = input[5];
+ int x11 = input[10];
+ int x12 = input[3];
+ int x13 = input[12];
+ int x14 = input[1];
+ int x15 = input[14];
- temp1 = step[2] * C12;
- temp2 = step[3] * C4;
- temp1 = (temp1 + temp2 + final_rounding) >> final_shift;
- output[ 4] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
- temp1 = step[2] * C4;
- temp2 = step[3] * C12;
- temp1 = (temp2 - temp1 + final_rounding) >> final_shift;
- output[12] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+ x0 = dct_const_round_shift(s0 + s8);
+ x1 = dct_const_round_shift(s1 + s9);
+ x2 = dct_const_round_shift(s2 + s10);
+ x3 = dct_const_round_shift(s3 + s11);
+ x4 = dct_const_round_shift(s4 + s12);
+ x5 = dct_const_round_shift(s5 + s13);
+ x6 = dct_const_round_shift(s6 + s14);
+ x7 = dct_const_round_shift(s7 + s15);
+ x8 = dct_const_round_shift(s0 - s8);
+ x9 = dct_const_round_shift(s1 - s9);
+ x10 = dct_const_round_shift(s2 - s10);
+ x11 = dct_const_round_shift(s3 - s11);
+ x12 = dct_const_round_shift(s4 - s12);
+ x13 = dct_const_round_shift(s5 - s13);
+ x14 = dct_const_round_shift(s6 - s14);
+ x15 = dct_const_round_shift(s7 - s15);
- output[ 2] = (2 * ((step[4] + step[ 5]) * C8) + final_rounding)
- >> final_shift;
- output[14] = (2 * ((step[7] - step[ 6]) * C8) + final_rounding)
- >> final_shift;
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2 + output_rounding) >> output_shift;
- output[10] = (temp1 - temp2 + output_rounding) >> output_shift;
+ x0 = s0 + s4;
+ x1 = s1 + s5;
+ x2 = s2 + s6;
+ x3 = s3 + s7;
+ x4 = s0 - s4;
+ x5 = s1 - s5;
+ x6 = s2 - s6;
+ x7 = s3 - s7;
+ x8 = dct_const_round_shift(s8 + s12);
+ x9 = dct_const_round_shift(s9 + s13);
+ x10 = dct_const_round_shift(s10 + s14);
+ x11 = dct_const_round_shift(s11 + s15);
+ x12 = dct_const_round_shift(s8 - s12);
+ x13 = dct_const_round_shift(s9 - s13);
+ x14 = dct_const_round_shift(s10 - s14);
+ x15 = dct_const_round_shift(s11 - s15);
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
- temp1 = intermediate[8] * C12;
- temp2 = intermediate[9] * C4;
- temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
- output[3] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = intermediate[8] * C4;
- temp2 = intermediate[9] * C12;
- temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
- output[13] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- output[ 9] = (2 * ((step[10] + step[11]) * C8) + final_rounding)
- >> final_shift;
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+ x8 = s8 + s10;
+ x9 = s9 + s11;
+ x10 = s8 - s10;
+ x11 = s9 - s11;
+ x12 = dct_const_round_shift(s12 + s14);
+ x13 = dct_const_round_shift(s13 + s15);
+ x14 = dct_const_round_shift(s12 - s14);
+ x15 = dct_const_round_shift(s13 - s15);
- output[15] = (intermediate[11] + intermediate[12] + output_rounding)
- >> output_shift;
- output[ 1] = -(intermediate[11] - intermediate[12] + output_rounding)
- >> output_shift;
+ // stage 4
+ s2 = (- cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (- x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (- x10 + x11);
+ s14 = (- cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
- output[ 7] = (2 * (intermediate[13] * C8) + final_rounding) >> final_shift;
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+ x10 = dct_const_round_shift(s10);
+ x11 = dct_const_round_shift(s11);
+ x14 = dct_const_round_shift(s14);
+ x15 = dct_const_round_shift(s15);
- temp1 = intermediate[14] * C12;
- temp2 = intermediate[15] * C4;
- temp1 = (temp1 - temp2 + final_rounding) >> final_shift;
- output[11] = (-2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
-
- temp1 = intermediate[14] * C4;
- temp2 = intermediate[15] * C12;
- temp1 = (temp2 + temp1 + final_rounding) >> final_shift;
- output[ 5] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;
+ output[0] = x0;
+ output[1] = - x8;
+ output[2] = x12;
+ output[3] = - x4;
+ output[4] = x6;
+ output[5] = x14;
+ output[6] = x10;
+ output[7] = x2;
+ output[8] = x3;
+ output[9] = x11;
+ output[10] = x15;
+ output[11] = x7;
+ output[12] = x5;
+ output[13] = - x13;
+ output[14] = x9;
+ output[15] = - x1;
}
-void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {
- int shortpitch = pitch >> 1;
- int i, j;
- int16_t output[256];
- int16_t *outptr = &output[0];
+static const transform_2d FHT_16[] = {
+ { fdct16_1d, fdct16_1d }, // DCT_DCT = 0
+ { fadst16_1d, fdct16_1d }, // ADST_DCT = 1
+ { fdct16_1d, fadst16_1d }, // DCT_ADST = 2
+ { fadst16_1d, fadst16_1d } // ADST_ADST = 3
+};
- // First transform columns
- for (i = 0; i < 16; i++) {
- int16_t temp_in[16];
- int16_t temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j * shortpitch + i];
- dct16x16_1d(temp_in, temp_out, 0);
- for (j = 0; j < 16; j++)
- output[j * 16 + i] = temp_out[j];
- }
+void vp9_short_fht16x16_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[256];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- dct16x16_1d(outptr, out, 1);
- outptr += 16;
- out += 16;
- }
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j * pitch + i] << 2;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i * 16] = temp_out[j];
+ }
}
-#undef RIGHT_SHIFT
-#undef ROUNDING
-#endif
-#if !CONFIG_DWTDCTHYBRID
-static void dct32_1d(double *input, double *output, int stride) {
- static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
- static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
- static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
- static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
- static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
- static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
- static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
- static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
- static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
- static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
- static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
- static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
- static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
- static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
- static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
- static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
- static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
- static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
- static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
- static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
- static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
- static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
- static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
- static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
- static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
- static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
- static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
- static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
- static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
- static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
- static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
- double step[32];
-
+static void dct32_1d(int *input, int *output) {
+ int step[32];
// Stage 1
- step[0] = input[stride*0] + input[stride*(32 - 1)];
- step[1] = input[stride*1] + input[stride*(32 - 2)];
- step[2] = input[stride*2] + input[stride*(32 - 3)];
- step[3] = input[stride*3] + input[stride*(32 - 4)];
- step[4] = input[stride*4] + input[stride*(32 - 5)];
- step[5] = input[stride*5] + input[stride*(32 - 6)];
- step[6] = input[stride*6] + input[stride*(32 - 7)];
- step[7] = input[stride*7] + input[stride*(32 - 8)];
- step[8] = input[stride*8] + input[stride*(32 - 9)];
- step[9] = input[stride*9] + input[stride*(32 - 10)];
- step[10] = input[stride*10] + input[stride*(32 - 11)];
- step[11] = input[stride*11] + input[stride*(32 - 12)];
- step[12] = input[stride*12] + input[stride*(32 - 13)];
- step[13] = input[stride*13] + input[stride*(32 - 14)];
- step[14] = input[stride*14] + input[stride*(32 - 15)];
- step[15] = input[stride*15] + input[stride*(32 - 16)];
- step[16] = -input[stride*16] + input[stride*(32 - 17)];
- step[17] = -input[stride*17] + input[stride*(32 - 18)];
- step[18] = -input[stride*18] + input[stride*(32 - 19)];
- step[19] = -input[stride*19] + input[stride*(32 - 20)];
- step[20] = -input[stride*20] + input[stride*(32 - 21)];
- step[21] = -input[stride*21] + input[stride*(32 - 22)];
- step[22] = -input[stride*22] + input[stride*(32 - 23)];
- step[23] = -input[stride*23] + input[stride*(32 - 24)];
- step[24] = -input[stride*24] + input[stride*(32 - 25)];
- step[25] = -input[stride*25] + input[stride*(32 - 26)];
- step[26] = -input[stride*26] + input[stride*(32 - 27)];
- step[27] = -input[stride*27] + input[stride*(32 - 28)];
- step[28] = -input[stride*28] + input[stride*(32 - 29)];
- step[29] = -input[stride*29] + input[stride*(32 - 30)];
- step[30] = -input[stride*30] + input[stride*(32 - 31)];
- step[31] = -input[stride*31] + input[stride*(32 - 32)];
+ step[0] = input[0] + input[(32 - 1)];
+ step[1] = input[1] + input[(32 - 2)];
+ step[2] = input[2] + input[(32 - 3)];
+ step[3] = input[3] + input[(32 - 4)];
+ step[4] = input[4] + input[(32 - 5)];
+ step[5] = input[5] + input[(32 - 6)];
+ step[6] = input[6] + input[(32 - 7)];
+ step[7] = input[7] + input[(32 - 8)];
+ step[8] = input[8] + input[(32 - 9)];
+ step[9] = input[9] + input[(32 - 10)];
+ step[10] = input[10] + input[(32 - 11)];
+ step[11] = input[11] + input[(32 - 12)];
+ step[12] = input[12] + input[(32 - 13)];
+ step[13] = input[13] + input[(32 - 14)];
+ step[14] = input[14] + input[(32 - 15)];
+ step[15] = input[15] + input[(32 - 16)];
+ step[16] = -input[16] + input[(32 - 17)];
+ step[17] = -input[17] + input[(32 - 18)];
+ step[18] = -input[18] + input[(32 - 19)];
+ step[19] = -input[19] + input[(32 - 20)];
+ step[20] = -input[20] + input[(32 - 21)];
+ step[21] = -input[21] + input[(32 - 22)];
+ step[22] = -input[22] + input[(32 - 23)];
+ step[23] = -input[23] + input[(32 - 24)];
+ step[24] = -input[24] + input[(32 - 25)];
+ step[25] = -input[25] + input[(32 - 26)];
+ step[26] = -input[26] + input[(32 - 27)];
+ step[27] = -input[27] + input[(32 - 28)];
+ step[28] = -input[28] + input[(32 - 29)];
+ step[29] = -input[29] + input[(32 - 30)];
+ step[30] = -input[30] + input[(32 - 31)];
+ step[31] = -input[31] + input[(32 - 32)];
// Stage 2
- output[stride*0] = step[0] + step[16 - 1];
- output[stride*1] = step[1] + step[16 - 2];
- output[stride*2] = step[2] + step[16 - 3];
- output[stride*3] = step[3] + step[16 - 4];
- output[stride*4] = step[4] + step[16 - 5];
- output[stride*5] = step[5] + step[16 - 6];
- output[stride*6] = step[6] + step[16 - 7];
- output[stride*7] = step[7] + step[16 - 8];
- output[stride*8] = -step[8] + step[16 - 9];
- output[stride*9] = -step[9] + step[16 - 10];
- output[stride*10] = -step[10] + step[16 - 11];
- output[stride*11] = -step[11] + step[16 - 12];
- output[stride*12] = -step[12] + step[16 - 13];
- output[stride*13] = -step[13] + step[16 - 14];
- output[stride*14] = -step[14] + step[16 - 15];
- output[stride*15] = -step[15] + step[16 - 16];
+ output[0] = step[0] + step[16 - 1];
+ output[1] = step[1] + step[16 - 2];
+ output[2] = step[2] + step[16 - 3];
+ output[3] = step[3] + step[16 - 4];
+ output[4] = step[4] + step[16 - 5];
+ output[5] = step[5] + step[16 - 6];
+ output[6] = step[6] + step[16 - 7];
+ output[7] = step[7] + step[16 - 8];
+ output[8] = -step[8] + step[16 - 9];
+ output[9] = -step[9] + step[16 - 10];
+ output[10] = -step[10] + step[16 - 11];
+ output[11] = -step[11] + step[16 - 12];
+ output[12] = -step[12] + step[16 - 13];
+ output[13] = -step[13] + step[16 - 14];
+ output[14] = -step[14] + step[16 - 15];
+ output[15] = -step[15] + step[16 - 16];
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18];
- output[stride*19] = step[19];
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = step[18];
+ output[19] = step[19];
- output[stride*20] = (-step[20] + step[27])*C16;
- output[stride*21] = (-step[21] + step[26])*C16;
- output[stride*22] = (-step[22] + step[25])*C16;
- output[stride*23] = (-step[23] + step[24])*C16;
+ output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
+ output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
+ output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
+ output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
- output[stride*24] = (step[24] + step[23])*C16;
- output[stride*25] = (step[25] + step[22])*C16;
- output[stride*26] = (step[26] + step[21])*C16;
- output[stride*27] = (step[27] + step[20])*C16;
+ output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
+ output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
+ output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
+ output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
- output[stride*28] = step[28];
- output[stride*29] = step[29];
- output[stride*30] = step[30];
- output[stride*31] = step[31];
+ output[28] = step[28];
+ output[29] = step[29];
+ output[30] = step[30];
+ output[31] = step[31];
// Stage 3
- step[0] = output[stride*0] + output[stride*(8 - 1)];
- step[1] = output[stride*1] + output[stride*(8 - 2)];
- step[2] = output[stride*2] + output[stride*(8 - 3)];
- step[3] = output[stride*3] + output[stride*(8 - 4)];
- step[4] = -output[stride*4] + output[stride*(8 - 5)];
- step[5] = -output[stride*5] + output[stride*(8 - 6)];
- step[6] = -output[stride*6] + output[stride*(8 - 7)];
- step[7] = -output[stride*7] + output[stride*(8 - 8)];
- step[8] = output[stride*8];
- step[9] = output[stride*9];
- step[10] = (-output[stride*10] + output[stride*13])*C16;
- step[11] = (-output[stride*11] + output[stride*12])*C16;
- step[12] = (output[stride*12] + output[stride*11])*C16;
- step[13] = (output[stride*13] + output[stride*10])*C16;
- step[14] = output[stride*14];
- step[15] = output[stride*15];
+ step[0] = output[0] + output[(8 - 1)];
+ step[1] = output[1] + output[(8 - 2)];
+ step[2] = output[2] + output[(8 - 3)];
+ step[3] = output[3] + output[(8 - 4)];
+ step[4] = -output[4] + output[(8 - 5)];
+ step[5] = -output[5] + output[(8 - 6)];
+ step[6] = -output[6] + output[(8 - 7)];
+ step[7] = -output[7] + output[(8 - 8)];
+ step[8] = output[8];
+ step[9] = output[9];
+ step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
+ step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
+ step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
+ step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
+ step[14] = output[14];
+ step[15] = output[15];
- step[16] = output[stride*16] + output[stride*23];
- step[17] = output[stride*17] + output[stride*22];
- step[18] = output[stride*18] + output[stride*21];
- step[19] = output[stride*19] + output[stride*20];
- step[20] = -output[stride*20] + output[stride*19];
- step[21] = -output[stride*21] + output[stride*18];
- step[22] = -output[stride*22] + output[stride*17];
- step[23] = -output[stride*23] + output[stride*16];
- step[24] = -output[stride*24] + output[stride*31];
- step[25] = -output[stride*25] + output[stride*30];
- step[26] = -output[stride*26] + output[stride*29];
- step[27] = -output[stride*27] + output[stride*28];
- step[28] = output[stride*28] + output[stride*27];
- step[29] = output[stride*29] + output[stride*26];
- step[30] = output[stride*30] + output[stride*25];
- step[31] = output[stride*31] + output[stride*24];
+ step[16] = output[16] + output[23];
+ step[17] = output[17] + output[22];
+ step[18] = output[18] + output[21];
+ step[19] = output[19] + output[20];
+ step[20] = -output[20] + output[19];
+ step[21] = -output[21] + output[18];
+ step[22] = -output[22] + output[17];
+ step[23] = -output[23] + output[16];
+ step[24] = -output[24] + output[31];
+ step[25] = -output[25] + output[30];
+ step[26] = -output[26] + output[29];
+ step[27] = -output[27] + output[28];
+ step[28] = output[28] + output[27];
+ step[29] = output[29] + output[26];
+ step[30] = output[30] + output[25];
+ step[31] = output[31] + output[24];
// Stage 4
- output[stride*0] = step[0] + step[3];
- output[stride*1] = step[1] + step[2];
- output[stride*2] = -step[2] + step[1];
- output[stride*3] = -step[3] + step[0];
- output[stride*4] = step[4];
- output[stride*5] = (-step[5] + step[6])*C16;
- output[stride*6] = (step[6] + step[5])*C16;
- output[stride*7] = step[7];
- output[stride*8] = step[8] + step[11];
- output[stride*9] = step[9] + step[10];
- output[stride*10] = -step[10] + step[9];
- output[stride*11] = -step[11] + step[8];
- output[stride*12] = -step[12] + step[15];
- output[stride*13] = -step[13] + step[14];
- output[stride*14] = step[14] + step[13];
- output[stride*15] = step[15] + step[12];
+ output[0] = step[0] + step[3];
+ output[1] = step[1] + step[2];
+ output[2] = -step[2] + step[1];
+ output[3] = -step[3] + step[0];
+ output[4] = step[4];
+ output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
+ output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
+ output[7] = step[7];
+ output[8] = step[8] + step[11];
+ output[9] = step[9] + step[10];
+ output[10] = -step[10] + step[9];
+ output[11] = -step[11] + step[8];
+ output[12] = -step[12] + step[15];
+ output[13] = -step[13] + step[14];
+ output[14] = step[14] + step[13];
+ output[15] = step[15] + step[12];
- output[stride*16] = step[16];
- output[stride*17] = step[17];
- output[stride*18] = step[18]*-C8 + step[29]*C24;
- output[stride*19] = step[19]*-C8 + step[28]*C24;
- output[stride*20] = step[20]*-C24 + step[27]*-C8;
- output[stride*21] = step[21]*-C24 + step[26]*-C8;
- output[stride*22] = step[22];
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25];
- output[stride*26] = step[26]*C24 + step[21]*-C8;
- output[stride*27] = step[27]*C24 + step[20]*-C8;
- output[stride*28] = step[28]*C8 + step[19]*C24;
- output[stride*29] = step[29]*C8 + step[18]*C24;
- output[stride*30] = step[30];
- output[stride*31] = step[31];
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
+ output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
+ output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
+ output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
+ output[22] = step[22];
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = step[25];
+ output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
+ output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
+ output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
+ output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
+ output[30] = step[30];
+ output[31] = step[31];
// Stage 5
- step[0] = (output[stride*0] + output[stride*1]) * C16;
- step[1] = (-output[stride*1] + output[stride*0]) * C16;
- step[2] = output[stride*2]*C24 + output[stride*3] * C8;
- step[3] = output[stride*3]*C24 - output[stride*2] * C8;
- step[4] = output[stride*4] + output[stride*5];
- step[5] = -output[stride*5] + output[stride*4];
- step[6] = -output[stride*6] + output[stride*7];
- step[7] = output[stride*7] + output[stride*6];
- step[8] = output[stride*8];
- step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
- step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
- step[11] = output[stride*11];
- step[12] = output[stride*12];
- step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
- step[14] = output[stride*14]*C8 + output[stride*9]*C24;
- step[15] = output[stride*15];
+ step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
+ step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
+ step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
+ step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
+ step[4] = output[4] + output[5];
+ step[5] = -output[5] + output[4];
+ step[6] = -output[6] + output[7];
+ step[7] = output[7] + output[6];
+ step[8] = output[8];
+ step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
+ step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
+ step[11] = output[11];
+ step[12] = output[12];
+ step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
+ step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
+ step[15] = output[15];
- step[16] = output[stride*16] + output[stride*19];
- step[17] = output[stride*17] + output[stride*18];
- step[18] = -output[stride*18] + output[stride*17];
- step[19] = -output[stride*19] + output[stride*16];
- step[20] = -output[stride*20] + output[stride*23];
- step[21] = -output[stride*21] + output[stride*22];
- step[22] = output[stride*22] + output[stride*21];
- step[23] = output[stride*23] + output[stride*20];
- step[24] = output[stride*24] + output[stride*27];
- step[25] = output[stride*25] + output[stride*26];
- step[26] = -output[stride*26] + output[stride*25];
- step[27] = -output[stride*27] + output[stride*24];
- step[28] = -output[stride*28] + output[stride*31];
- step[29] = -output[stride*29] + output[stride*30];
- step[30] = output[stride*30] + output[stride*29];
- step[31] = output[stride*31] + output[stride*28];
+ step[16] = output[16] + output[19];
+ step[17] = output[17] + output[18];
+ step[18] = -output[18] + output[17];
+ step[19] = -output[19] + output[16];
+ step[20] = -output[20] + output[23];
+ step[21] = -output[21] + output[22];
+ step[22] = output[22] + output[21];
+ step[23] = output[23] + output[20];
+ step[24] = output[24] + output[27];
+ step[25] = output[25] + output[26];
+ step[26] = -output[26] + output[25];
+ step[27] = -output[27] + output[24];
+ step[28] = -output[28] + output[31];
+ step[29] = -output[29] + output[30];
+ step[30] = output[30] + output[29];
+ step[31] = output[31] + output[28];
// Stage 6
- output[stride*0] = step[0];
- output[stride*1] = step[1];
- output[stride*2] = step[2];
- output[stride*3] = step[3];
- output[stride*4] = step[4]*C28 + step[7]*C4;
- output[stride*5] = step[5]*C12 + step[6]*C20;
- output[stride*6] = step[6]*C12 + step[5]*-C20;
- output[stride*7] = step[7]*C28 + step[4]*-C4;
- output[stride*8] = step[8] + step[9];
- output[stride*9] = -step[9] + step[8];
- output[stride*10] = -step[10] + step[11];
- output[stride*11] = step[11] + step[10];
- output[stride*12] = step[12] + step[13];
- output[stride*13] = -step[13] + step[12];
- output[stride*14] = -step[14] + step[15];
- output[stride*15] = step[15] + step[14];
+ output[0] = step[0];
+ output[1] = step[1];
+ output[2] = step[2];
+ output[3] = step[3];
+ output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
+ output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
+ output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
+ output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
+ output[8] = step[8] + step[9];
+ output[9] = -step[9] + step[8];
+ output[10] = -step[10] + step[11];
+ output[11] = step[11] + step[10];
+ output[12] = step[12] + step[13];
+ output[13] = -step[13] + step[12];
+ output[14] = -step[14] + step[15];
+ output[15] = step[15] + step[14];
- output[stride*16] = step[16];
- output[stride*17] = step[17]*-C4 + step[30]*C28;
- output[stride*18] = step[18]*-C28 + step[29]*-C4;
- output[stride*19] = step[19];
- output[stride*20] = step[20];
- output[stride*21] = step[21]*-C20 + step[26]*C12;
- output[stride*22] = step[22]*-C12 + step[25]*-C20;
- output[stride*23] = step[23];
- output[stride*24] = step[24];
- output[stride*25] = step[25]*C12 + step[22]*-C20;
- output[stride*26] = step[26]*C20 + step[21]*C12;
- output[stride*27] = step[27];
- output[stride*28] = step[28];
- output[stride*29] = step[29]*C28 + step[18]*-C4;
- output[stride*30] = step[30]*C4 + step[17]*C28;
- output[stride*31] = step[31];
+ output[16] = step[16];
+ output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
+ output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
+ output[19] = step[19];
+ output[20] = step[20];
+ output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
+ output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
+ output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
+ output[27] = step[27];
+ output[28] = step[28];
+ output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
+ output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
+ output[31] = step[31];
// Stage 7
- step[0] = output[stride*0];
- step[1] = output[stride*1];
- step[2] = output[stride*2];
- step[3] = output[stride*3];
- step[4] = output[stride*4];
- step[5] = output[stride*5];
- step[6] = output[stride*6];
- step[7] = output[stride*7];
- step[8] = output[stride*8]*C30 + output[stride*15]*C2;
- step[9] = output[stride*9]*C14 + output[stride*14]*C18;
- step[10] = output[stride*10]*C22 + output[stride*13]*C10;
- step[11] = output[stride*11]*C6 + output[stride*12]*C26;
- step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
- step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
- step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
- step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ step[4] = output[4];
+ step[5] = output[5];
+ step[6] = output[6];
+ step[7] = output[7];
+ step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
+ step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
+ step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
+ step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
+ step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
+ step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
+ step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
+ step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
- step[16] = output[stride*16] + output[stride*17];
- step[17] = -output[stride*17] + output[stride*16];
- step[18] = -output[stride*18] + output[stride*19];
- step[19] = output[stride*19] + output[stride*18];
- step[20] = output[stride*20] + output[stride*21];
- step[21] = -output[stride*21] + output[stride*20];
- step[22] = -output[stride*22] + output[stride*23];
- step[23] = output[stride*23] + output[stride*22];
- step[24] = output[stride*24] + output[stride*25];
- step[25] = -output[stride*25] + output[stride*24];
- step[26] = -output[stride*26] + output[stride*27];
- step[27] = output[stride*27] + output[stride*26];
- step[28] = output[stride*28] + output[stride*29];
- step[29] = -output[stride*29] + output[stride*28];
- step[30] = -output[stride*30] + output[stride*31];
- step[31] = output[stride*31] + output[stride*30];
+ step[16] = output[16] + output[17];
+ step[17] = -output[17] + output[16];
+ step[18] = -output[18] + output[19];
+ step[19] = output[19] + output[18];
+ step[20] = output[20] + output[21];
+ step[21] = -output[21] + output[20];
+ step[22] = -output[22] + output[23];
+ step[23] = output[23] + output[22];
+ step[24] = output[24] + output[25];
+ step[25] = -output[25] + output[24];
+ step[26] = -output[26] + output[27];
+ step[27] = output[27] + output[26];
+ step[28] = output[28] + output[29];
+ step[29] = -output[29] + output[28];
+ step[30] = -output[30] + output[31];
+ step[31] = output[31] + output[30];
// Final stage --- outputs indices are bit-reversed.
- output[stride*0] = step[0];
- output[stride*16] = step[1];
- output[stride*8] = step[2];
- output[stride*24] = step[3];
- output[stride*4] = step[4];
- output[stride*20] = step[5];
- output[stride*12] = step[6];
- output[stride*28] = step[7];
- output[stride*2] = step[8];
- output[stride*18] = step[9];
- output[stride*10] = step[10];
- output[stride*26] = step[11];
- output[stride*6] = step[12];
- output[stride*22] = step[13];
- output[stride*14] = step[14];
- output[stride*30] = step[15];
+ output[0] = step[0];
+ output[16] = step[1];
+ output[8] = step[2];
+ output[24] = step[3];
+ output[4] = step[4];
+ output[20] = step[5];
+ output[12] = step[6];
+ output[28] = step[7];
+ output[2] = step[8];
+ output[18] = step[9];
+ output[10] = step[10];
+ output[26] = step[11];
+ output[6] = step[12];
+ output[22] = step[13];
+ output[14] = step[14];
+ output[30] = step[15];
- output[stride*1] = step[16]*C31 + step[31]*C1;
- output[stride*17] = step[17]*C15 + step[30]*C17;
- output[stride*9] = step[18]*C23 + step[29]*C9;
- output[stride*25] = step[19]*C7 + step[28]*C25;
- output[stride*5] = step[20]*C27 + step[27]*C5;
- output[stride*21] = step[21]*C11 + step[26]*C21;
- output[stride*13] = step[22]*C19 + step[25]*C13;
- output[stride*29] = step[23]*C3 + step[24]*C29;
- output[stride*3] = step[24]*C3 + step[23]*-C29;
- output[stride*19] = step[25]*C19 + step[22]*-C13;
- output[stride*11] = step[26]*C11 + step[21]*-C21;
- output[stride*27] = step[27]*C27 + step[20]*-C5;
- output[stride*7] = step[28]*C7 + step[19]*-C25;
- output[stride*23] = step[29]*C23 + step[18]*-C9;
- output[stride*15] = step[30]*C15 + step[17]*-C17;
- output[stride*31] = step[31]*C31 + step[16]*-C1;
+ output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
+ output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
+ output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
+ output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
+ output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
+ output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
+ output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
+ output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
+ output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
+ output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
+ output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
+ output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
+ output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
+ output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
+ output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
+ output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[1024];
- // First transform columns
- for (i = 0; i < 32; i++) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; j++)
- output[j*32 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 32; ++i) {
- double temp_in[32], temp_out[32];
- for (j = 0; j < 32; ++j)
- temp_in[j] = output[j + i*32];
- dct32_1d(temp_in, temp_out, 1);
- for (j = 0; j < 32; ++j)
- output[j + i*32] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 1024; i++) {
- out[i] = (short)round(output[i]/4);
- }
- }
+ int shortpitch = pitch >> 1;
+ int i, j;
+ int output[32 * 32];
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-#else // CONFIG_DWTDCTHYBRID
-
-#if DWT_TYPE == 53
-
-// Note: block length must be even for this implementation
-static void analysis_53_row(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++) << 1;
- *b++ = *x - ((r + x[1] + 1) >> 1);
- x++;
- }
- *a = (r = *x++) << 1;
- *b = *x - r;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void analysis_53_col(int length, short *x,
- short *lowpass, short *highpass) {
- int n;
- short r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++);
- *b++ = (((*x) << 1) - (r + x[1]) + 2) >> 2;
- x++;
- }
- *a = (r = *x++);
- *b = (*x - r + 1) >> 1;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void dyadic_analyze_53(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
- }
-}
-
-#elif DWT_TYPE == 26
-
-static void analysis_26_row(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = r + s;
- *b++ = r - s;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
-
-static void analysis_26_col(int length, short *x,
- short *lowpass, short *highpass) {
- int i, n;
- short r, s, *a, *b;
- a = lowpass;
- b = highpass;
- for (i = length >> 1; i; i--) {
- r = *x++;
- s = *x++;
- *a++ = (r + s + 1) >> 1;
- *b++ = (r - s + 1) >> 1;
- }
- n = length >> 1;
- if (n >= 4) {
- a = lowpass;
- b = highpass;
- r = *lowpass;
- while (--n) {
- *b++ -= (r - a[1] + 4) >> 3;
- r = *a++;
- }
- *b -= (r - *a + 4) >> 3;
- }
-}
-
-static void dyadic_analyze_26(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- short buffer[2 * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(short));
- analysis_26_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = c[i * pitch_c + j];
- analysis_26_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = buffer[i];
- }
- }
-}
-
-#elif DWT_TYPE == 97
-
-static void analysis_97(int length, double *x,
- double *lowpass, double *highpass) {
- static const double a_predict1 = -1.586134342;
- static const double a_update1 = -0.05298011854;
- static const double a_predict2 = 0.8829110762;
- static const double a_update2 = 0.4435068522;
- static const double s_low = 1.149604398;
- static const double s_high = 1/1.149604398;
- int i;
- double y[DWT_MAX_LENGTH];
- // Predict 1
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict1 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict1 * x[length - 2];
- // Update 1
- for (i = 2; i < length; i += 2) {
- x[i] += a_update1 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update1 * x[1];
- // Predict 2
- for (i = 1; i < length - 2; i += 2) {
- x[i] += a_predict2 * (x[i - 1] + x[i + 1]);
- }
- x[length - 1] += 2 * a_predict2 * x[length - 2];
- // Update 2
- for (i = 2; i < length; i += 2) {
- x[i] += a_update2 * (x[i - 1] + x[i + 1]);
- }
- x[0] += 2 * a_update2 * x[1];
- memcpy(y, x, sizeof(*y) * length);
- // Scale and pack
- for (i = 0; i < length / 2; i++) {
- lowpass[i] = y[2 * i] * s_low;
- highpass[i] = y[2 * i + 1] * s_high;
- }
-}
-
-static void dyadic_analyze_97(int levels, int width, int height,
- short *x, int pitch_x, short *c, int pitch_c) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- double buffer[2 * DWT_MAX_LENGTH];
- double y[DWT_MAX_LENGTH * DWT_MAX_LENGTH];
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- y[i * DWT_MAX_LENGTH + j] = x[i * pitch_x + j] << DWT_PRECISION_BITS;
- }
- }
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &y[i * DWT_MAX_LENGTH], nw * sizeof(*buffer));
- analysis_97(nw, buffer, &y[i * DWT_MAX_LENGTH],
- &y[i * DWT_MAX_LENGTH] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++)
- buffer[i + nh] = y[i * DWT_MAX_LENGTH + j];
- analysis_97(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++)
- c[i * pitch_c + j] = round(buffer[i]);
- }
- }
-}
-
-#endif // DWT_TYPE
-
-// TODO(debargha): Implement the scaling differently so as not to have to
-// use the floating point dct
-static void dct16x16_1d_f(double input[16], double output[16]) {
- static const double C1 = 0.995184726672197;
- static const double C2 = 0.98078528040323;
- static const double C3 = 0.956940335732209;
- static const double C4 = 0.923879532511287;
- static const double C5 = 0.881921264348355;
- static const double C6 = 0.831469612302545;
- static const double C7 = 0.773010453362737;
- static const double C8 = 0.707106781186548;
- static const double C9 = 0.634393284163646;
- static const double C10 = 0.555570233019602;
- static const double C11 = 0.471396736825998;
- static const double C12 = 0.38268343236509;
- static const double C13 = 0.290284677254462;
- static const double C14 = 0.195090322016128;
- static const double C15 = 0.098017140329561;
-
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- double step[16];
- double intermediate[16];
- double temp1, temp2;
-
- // step 1
- step[ 0] = input[0] + input[15];
- step[ 1] = input[1] + input[14];
- step[ 2] = input[2] + input[13];
- step[ 3] = input[3] + input[12];
- step[ 4] = input[4] + input[11];
- step[ 5] = input[5] + input[10];
- step[ 6] = input[6] + input[ 9];
- step[ 7] = input[7] + input[ 8];
- step[ 8] = input[7] - input[ 8];
- step[ 9] = input[6] - input[ 9];
- step[10] = input[5] - input[10];
- step[11] = input[4] - input[11];
- step[12] = input[3] - input[12];
- step[13] = input[2] - input[13];
- step[14] = input[1] - input[14];
- step[15] = input[0] - input[15];
-
- // step 2
- output[0] = step[0] + step[7];
- output[1] = step[1] + step[6];
- output[2] = step[2] + step[5];
- output[3] = step[3] + step[4];
- output[4] = step[3] - step[4];
- output[5] = step[2] - step[5];
- output[6] = step[1] - step[6];
- output[7] = step[0] - step[7];
-
- temp1 = step[ 8]*C7;
- temp2 = step[15]*C9;
- output[ 8] = temp1 + temp2;
-
- temp1 = step[ 9]*C11;
- temp2 = step[14]*C5;
- output[ 9] = temp1 - temp2;
-
- temp1 = step[10]*C3;
- temp2 = step[13]*C13;
- output[10] = temp1 + temp2;
-
- temp1 = step[11]*C15;
- temp2 = step[12]*C1;
- output[11] = temp1 - temp2;
-
- temp1 = step[11]*C1;
- temp2 = step[12]*C15;
- output[12] = temp2 + temp1;
-
- temp1 = step[10]*C13;
- temp2 = step[13]*C3;
- output[13] = temp2 - temp1;
-
- temp1 = step[ 9]*C5;
- temp2 = step[14]*C11;
- output[14] = temp2 + temp1;
-
- temp1 = step[ 8]*C9;
- temp2 = step[15]*C7;
- output[15] = temp2 - temp1;
-
- // step 3
- step[ 0] = output[0] + output[3];
- step[ 1] = output[1] + output[2];
- step[ 2] = output[1] - output[2];
- step[ 3] = output[0] - output[3];
-
- temp1 = output[4]*C14;
- temp2 = output[7]*C2;
- step[ 4] = temp1 + temp2;
-
- temp1 = output[5]*C10;
- temp2 = output[6]*C6;
- step[ 5] = temp1 + temp2;
-
- temp1 = output[5]*C6;
- temp2 = output[6]*C10;
- step[ 6] = temp2 - temp1;
-
- temp1 = output[4]*C2;
- temp2 = output[7]*C14;
- step[ 7] = temp2 - temp1;
-
- step[ 8] = output[ 8] + output[11];
- step[ 9] = output[ 9] + output[10];
- step[10] = output[ 9] - output[10];
- step[11] = output[ 8] - output[11];
-
- step[12] = output[12] + output[15];
- step[13] = output[13] + output[14];
- step[14] = output[13] - output[14];
- step[15] = output[12] - output[15];
-
- // step 4
- output[ 0] = (step[ 0] + step[ 1]);
- output[ 8] = (step[ 0] - step[ 1]);
-
- temp1 = step[2]*C12;
- temp2 = step[3]*C4;
- temp1 = temp1 + temp2;
- output[ 4] = 2*(temp1*C8);
-
- temp1 = step[2]*C4;
- temp2 = step[3]*C12;
- temp1 = temp2 - temp1;
- output[12] = 2*(temp1*C8);
-
- output[ 2] = 2*((step[4] + step[ 5])*C8);
- output[14] = 2*((step[7] - step[ 6])*C8);
-
- temp1 = step[4] - step[5];
- temp2 = step[6] + step[7];
- output[ 6] = (temp1 + temp2);
- output[10] = (temp1 - temp2);
-
- intermediate[8] = step[8] + step[14];
- intermediate[9] = step[9] + step[15];
-
- temp1 = intermediate[8]*C12;
- temp2 = intermediate[9]*C4;
- temp1 = temp1 - temp2;
- output[3] = 2*(temp1*C8);
-
- temp1 = intermediate[8]*C4;
- temp2 = intermediate[9]*C12;
- temp1 = temp2 + temp1;
- output[13] = 2*(temp1*C8);
-
- output[ 9] = 2*((step[10] + step[11])*C8);
-
- intermediate[11] = step[10] - step[11];
- intermediate[12] = step[12] + step[13];
- intermediate[13] = step[12] - step[13];
- intermediate[14] = step[ 8] - step[14];
- intermediate[15] = step[ 9] - step[15];
-
- output[15] = (intermediate[11] + intermediate[12]);
- output[ 1] = -(intermediate[11] - intermediate[12]);
-
- output[ 7] = 2*(intermediate[13]*C8);
-
- temp1 = intermediate[14]*C12;
- temp2 = intermediate[15]*C4;
- temp1 = temp1 - temp2;
- output[11] = -2*(temp1*C8);
-
- temp1 = intermediate[14]*C4;
- temp2 = intermediate[15]*C12;
- temp1 = temp2 + temp1;
- output[ 5] = 2*(temp1*C8);
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-
-static void vp9_short_fdct16x16_c_f(short *input, short *out, int pitch,
- int scale) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int shortpitch = pitch >> 1;
- int i, j;
- double output[256];
- // First transform columns
- for (i = 0; i < 16; i++) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; j++)
- temp_in[j] = input[j*shortpitch + i];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; j++)
- output[j*16 + i] = temp_out[j];
- }
- // Then transform rows
- for (i = 0; i < 16; ++i) {
- double temp_in[16], temp_out[16];
- for (j = 0; j < 16; ++j)
- temp_in[j] = output[j + i*16];
- dct16x16_1d_f(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i*16] = temp_out[j];
- }
- // Scale by some magic number
- for (i = 0; i < 256; i++)
- out[i] = (short)round(output[i] / (2 << scale));
+ // Columns
+ for (i = 0; i < 32; i++) {
+ int temp_in[32], temp_out[32];
+ for (j = 0; j < 32; j++)
+ temp_in[j] = input[j * shortpitch + i] << 2;
+ dct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; j++)
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
}
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
-}
-void vp9_short_fdct8x8_c_f(short *block, short *coefs, int pitch, int scale) {
- int j1, i, j, k;
- float b[8];
- float b1[8];
- float d[8][8];
- float f0 = (float) .7071068;
- float f1 = (float) .4903926;
- float f2 = (float) .4619398;
- float f3 = (float) .4157348;
- float f4 = (float) .3535534;
- float f5 = (float) .2777851;
- float f6 = (float) .1913417;
- float f7 = (float) .0975452;
- pitch = pitch / 2;
- for (i = 0, k = 0; i < 8; i++, k += pitch) {
- for (j = 0; j < 8; j++) {
- b[j] = (float)(block[k + j] << (3 - scale));
- }
- /* Horizontal transform */
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = b[j] + b[j1];
- b1[j1] = b[j] - b[j1];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[i][0] = (b[0] + b[1]) * f4;
- d[i][4] = (b[0] - b[1]) * f4;
- d[i][2] = b[2] * f6 + b[3] * f2;
- d[i][6] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[i][1] = b1[4] * f7 + b1[7] * f1;
- d[i][5] = b1[5] * f3 + b1[6] * f5;
- d[i][7] = b1[7] * f7 - b1[4] * f1;
- d[i][3] = b1[6] * f3 - b1[5] * f5;
- }
- /* Vertical transform */
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 4; j++) {
- j1 = 7 - j;
- b1[j] = d[j][i] + d[j1][i];
- b1[j1] = d[j][i] - d[j1][i];
- }
- b[0] = b1[0] + b1[3];
- b[1] = b1[1] + b1[2];
- b[2] = b1[1] - b1[2];
- b[3] = b1[0] - b1[3];
- b[4] = b1[4];
- b[5] = (b1[6] - b1[5]) * f0;
- b[6] = (b1[6] + b1[5]) * f0;
- b[7] = b1[7];
- d[0][i] = (b[0] + b[1]) * f4;
- d[4][i] = (b[0] - b[1]) * f4;
- d[2][i] = b[2] * f6 + b[3] * f2;
- d[6][i] = b[3] * f6 - b[2] * f2;
- b1[4] = b[4] + b[5];
- b1[7] = b[7] + b[6];
- b1[5] = b[4] - b[5];
- b1[6] = b[7] - b[6];
- d[1][i] = b1[4] * f7 + b1[7] * f1;
- d[5][i] = b1[5] * f3 + b1[6] * f5;
- d[7][i] = b1[7] * f7 - b1[4] * f1;
- d[3][i] = b1[6] * f3 - b1[5] * f5;
- }
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- *(coefs + j + i * 8) = (short) floor(d[i][j] + 0.5);
- }
- }
- return;
-}
-
-#define divide_bits(d, n) ((n) < 0 ? (d) << (n) : (d) >> (n))
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
-
-#elif DWTDCT_TYPE == DWTDCT16X16
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(1, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(1, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 16, sizeof(short) * 16);
- vp9_short_fdct16x16_c_f(out + 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 32 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 32 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 33 * 16, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 32 + 33 * 16, buffer + i * 16, sizeof(short) * 16);
-}
-
-#elif DWTDCT_TYPE == DWTDCT8X8
-
-void vp9_short_fdct32x32_c(short *input, short *out, int pitch) {
- // assume out is a 32x32 buffer
- short buffer[8 * 8];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 32, 32, input, short_pitch, out, 32);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 32, 32, input, short_pitch, out, 32);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct8x8_c_f(out, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 32 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 32 * 8, buffer + i * 8, sizeof(short) * 8);
-
- vp9_short_fdct8x8_c_f(out + 33 * 8, buffer, 64, 1 + DWT_PRECISION_BITS);
- for (i = 0; i < 8; ++i)
- vpx_memcpy(out + i * 32 + 33 * 8, buffer + i * 8, sizeof(short) * 8);
-
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
- for (i = 16; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- out[i * 32 + j] = divide_bits(out[i * 32 + j], DWT_PRECISION_BITS - 2);
- }
- }
-}
-
-#endif
-
-#if CONFIG_TX64X64
-void vp9_short_fdct64x64_c(short *input, short *out, int pitch) {
- // assume out is a 64x64 buffer
- short buffer[16 * 16];
- int i, j;
- const int short_pitch = pitch >> 1;
-#if DWT_TYPE == 26
- dyadic_analyze_26(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 97
- dyadic_analyze_97(2, 64, 64, input, short_pitch, out, 64);
-#elif DWT_TYPE == 53
- dyadic_analyze_53(2, 64, 64, input, short_pitch, out, 64);
-#endif
- // TODO(debargha): Implement more efficiently by adding output pitch
- // argument to the dct16x16 function
- vp9_short_fdct16x16_c_f(out, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64, buffer + i * 16, sizeof(short) * 16);
-
-#if DWTDCT_TYPE == DWTDCT16X16_LEAN
- for (i = 0; i < 16; ++i) {
- for (j = 16; j < 48; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
- for (i = 16; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
-#elif DWTDCT_TYPE == DWTDCT16X16
- vp9_short_fdct16x16_c_f(out + 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 64 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 64 * 16, buffer + i * 16, sizeof(short) * 16);
-
- vp9_short_fdct16x16_c_f(out + 65 * 16, buffer, 128, 2 + DWT_PRECISION_BITS);
- for (i = 0; i < 16; ++i)
- vpx_memcpy(out + i * 64 + 65 * 16, buffer + i * 16, sizeof(short) * 16);
-
- // There is no dct used on the highest bands for now.
- // Need to scale these coeffs by a factor of 2/2^DWT_PRECISION_BITS
- // TODO(debargha): experiment with turning these coeffs to 0
+ // Rows
for (i = 0; i < 32; ++i) {
- for (j = 32; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
+ int temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = output[j + i * 32];
+ dct32_1d(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
- for (i = 32; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- out[i * 64 + j] = divide_bits(out[i * 64 + j], DWT_PRECISION_BITS - 1);
- }
- }
-#endif // DWTDCT_TYPE
}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_DWTDCTHYBRID
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -21,7 +21,6 @@
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_setupintrarecon.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_invtrans.h"
@@ -29,8 +28,9 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_tokenize.h"
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include <stdio.h>
#include <math.h>
#include <limits.h>
@@ -45,18 +45,15 @@
int enc_debug = 0;
#endif
-extern void select_interp_filter_type(VP9_COMP *cpi);
+void vp9_select_interp_filter_type(VP9_COMP *cpi);
static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col);
static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
@@ -103,7 +100,7 @@
*/
act = vp9_variance16x16(x->src.y_buffer, x->src.y_stride, VP9_VAR_OFFS, 0,
&sse);
- act = act << 4;
+ act <<= 4;
/* If the region is flat, lower the activity some more. */
if (act < 8 << 12)
@@ -201,7 +198,7 @@
#define OUTPUT_NORM_ACT_STATS 0
#if USE_ACT_INDEX
-// Calculate and activity index for each mb
+// Calculate an activity index for each mb
static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
VP9_COMMON *const cm = &cpi->common;
int mb_row, mb_col;
@@ -271,6 +268,8 @@
unsigned int mb_activity;
int64_t activity_sum = 0;
+ x->mb_activity_ptr = cpi->mb_activity_map;
+
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
#if ALT_ACT_MEASURE
@@ -488,8 +487,7 @@
{
int segment_id = mbmi->segment_id;
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB)) {
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < NB_TXFM_MODES; i++) {
cpi->rd_tx_select_diff[i] += ctx->txfm_rd_diff[i];
}
@@ -598,9 +596,6 @@
[vp9_switchable_interp_map[mbmi->interp_filter]];
}
- cpi->prediction_error += ctx->distortion;
- cpi->intra_error += ctx->intra_error;
-
cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
@@ -625,24 +620,12 @@
}
static void set_offsets(VP9_COMP *cpi,
- int mb_row, int mb_col, int block_size,
- int *ref_yoffset, int *ref_uvoffset) {
+ int mb_row, int mb_col, int block_size) {
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
const int dst_fb_idx = cm->new_fb_idx;
- const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride;
- const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride;
- const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;
- const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;
- const int src_y_stride = x->src.y_stride;
- const int src_uv_stride = x->src.uv_stride;
- const int src_yoffset = 16 * mb_row * src_y_stride + 16 * mb_col;
- const int src_uvoffset = 8 * mb_row * src_uv_stride + 8 * mb_col;
- const int ref_fb_idx = cm->lst_fb_idx;
- const int ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
- const int ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
const int idx_map = mb_row * cm->mb_cols + mb_col;
const int idx_str = xd->mode_info_stride * mb_row + mb_col;
@@ -664,9 +647,9 @@
xd->prev_mode_info_context = cm->prev_mi + idx_str;
// Set up destination pointers
- xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
- xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
- xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->dst,
+ &cm->yv12_fb[dst_fb_idx],
+ mb_row, mb_col, NULL, NULL);
/* Set up limit values for MV components to prevent them from
* extending beyond the UMV borders assuming 16x16 block size */
@@ -680,23 +663,11 @@
// Set up distance of MB to edge of frame in 1/8th pel units
block_size >>= 4; // in macroblock units
assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1)));
- xd->mb_to_top_edge = -((mb_row * 16) << 3);
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3;
+ set_mb_row(cm, xd, mb_row, block_size);
+ set_mb_col(cm, xd, mb_col, block_size);
- // Are edges available for intra prediction?
- xd->up_available = (mb_row != 0);
- xd->left_available = (mb_col != 0);
-
- /* Reference buffer offsets */
- *ref_yoffset = (mb_row * ref_y_stride * 16) + (mb_col * 16);
- *ref_uvoffset = (mb_row * ref_uv_stride * 8) + (mb_col * 8);
-
/* set up source buffers */
- x->src.y_buffer = cpi->Source->y_buffer + src_yoffset;
- x->src.u_buffer = cpi->Source->u_buffer + src_uvoffset;
- x->src.v_buffer = cpi->Source->v_buffer + src_uvoffset;
+ setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL);
/* R/D setup */
x->rddiv = cpi->RDDIV;
@@ -727,9 +698,11 @@
const int x = mb_col & ~3;
const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
+ const int tile_progress = cm->cur_tile_mb_col_start * cm->mb_rows;
+ const int mb_cols = cm->cur_tile_mb_col_end - cm->cur_tile_mb_col_start;
cpi->seg0_progress =
- ((y * cm->mb_cols + x * 4 + p32 + p16) << 16) / cm->MBs;
+ ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs;
}
} else {
mbmi->segment_id = 0;
@@ -736,25 +709,25 @@
}
}
-static void pick_mb_modes(VP9_COMP *cpi,
- int mb_row,
- int mb_col,
- TOKENEXTRA **tp,
- int *totalrate,
- int *totaldist) {
+static int pick_mb_modes(VP9_COMP *cpi,
+ int mb_row0,
+ int mb_col0,
+ TOKENEXTRA **tp,
+ int *totalrate,
+ int *totaldist) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int i;
- int recon_yoffset, recon_uvoffset;
+ int splitmodes_used = 0;
ENTROPY_CONTEXT_PLANES left_context[2];
ENTROPY_CONTEXT_PLANES above_context[2];
ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
- + mb_col;
+ + mb_col0;
/* Function should not modify L & A contexts; save and restore on exit */
vpx_memcpy(left_context,
- cm->left_context + (mb_row & 2),
+ cm->left_context + (mb_row0 & 2),
sizeof(left_context));
vpx_memcpy(above_context,
initial_above_context_ptr,
@@ -763,9 +736,11 @@
/* Encode MBs in raster order within the SB */
for (i = 0; i < 4; i++) {
const int x_idx = i & 1, y_idx = i >> 1;
+ const int mb_row = mb_row0 + y_idx;
+ const int mb_col = mb_col0 + x_idx;
MB_MODE_INFO *mbmi;
- if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) {
+ if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) {
// MB lies outside frame, move on
continue;
}
@@ -772,8 +747,7 @@
// Index of the MB in the SB 0..3
xd->mb_index = i;
- set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16,
- &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 16);
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
@@ -781,15 +755,11 @@
mbmi = &xd->mode_info_context->mbmi;
mbmi->sb_type = BLOCK_SIZE_MB16X16;
- cpi->update_context = 0; // TODO Do we need this now??
-
- vp9_intra_prediction_down_copy(xd);
-
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (cm->frame_type == KEY_FRAME) {
int r, d;
-#ifdef ENC_DEBUG
+#if 0 // ENC_DEBUG
if (enc_debug)
printf("intra pick_mb_modes %d %d\n", mb_row, mb_col);
#endif
@@ -798,8 +768,8 @@
*totaldist += d;
// Dummy encode, do not do the tokenization
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0,
- mb_row + y_idx, mb_col + x_idx);
+ encode_macroblock(cpi, tp, 0, mb_row, mb_col);
+
// Note the encoder may have changed the segment_id
// Save the coding context
@@ -808,18 +778,18 @@
} else {
int seg_id, r, d;
-#ifdef ENC_DEBUG
+#if 0 // ENC_DEBUG
if (enc_debug)
printf("inter pick_mb_modes %d %d\n", mb_row, mb_col);
#endif
- vp9_pick_mode_inter_macroblock(cpi, x, recon_yoffset,
- recon_uvoffset, &r, &d);
+ vp9_pick_mode_inter_macroblock(cpi, x, mb_row, mb_col, &r, &d);
*totalrate += r;
*totaldist += d;
+ splitmodes_used += (mbmi->mode == SPLITMV);
+
// Dummy encode, do not do the tokenization
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0,
- mb_row + y_idx, mb_col + x_idx);
+ encode_macroblock(cpi, tp, 0, mb_row, mb_col);
seg_id = mbmi->segment_id;
if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) {
@@ -842,12 +812,14 @@
}
/* Restore L & A coding context to those in place on entry */
- vpx_memcpy(cm->left_context + (mb_row & 2),
+ vpx_memcpy(cm->left_context + (mb_row0 & 2),
left_context,
sizeof(left_context));
vpx_memcpy(initial_above_context_ptr,
above_context,
sizeof(above_context));
+
+ return splitmodes_used;
}
static void pick_sb_modes(VP9_COMP *cpi,
@@ -859,13 +831,11 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
- set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 32);
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- cpi->update_context = 0; // TODO Do we need this now??
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
@@ -878,11 +848,7 @@
vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context,
sizeof(MODE_INFO));
} else {
- vp9_rd_pick_inter_mode_sb32(cpi, x,
- recon_yoffset,
- recon_uvoffset,
- totalrate,
- totaldist);
+ vp9_rd_pick_inter_mode_sb32(cpi, x, mb_row, mb_col, totalrate, totaldist);
}
}
@@ -895,34 +861,25 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
- set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 64);
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- cpi->update_context = 0; // TODO(rbultje) Do we need this now??
/* Find best coding mode & reconstruct the MB so it is available
* as a predictor for MBs that follow in the SB */
if (cm->frame_type == KEY_FRAME) {
- vp9_rd_pick_intra_mode_sb64(cpi, x,
- totalrate,
- totaldist);
+ vp9_rd_pick_intra_mode_sb64(cpi, x, totalrate, totaldist);
/* Save the coding context */
- vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context,
- sizeof(MODE_INFO));
+ vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, sizeof(MODE_INFO));
} else {
- vp9_rd_pick_inter_mode_sb64(cpi, x,
- recon_yoffset,
- recon_uvoffset,
- totalrate,
- totaldist);
+ vp9_rd_pick_inter_mode_sb64(cpi, x, mb_row, mb_col, totalrate, totaldist);
}
}
-static void update_stats(VP9_COMP *cpi) {
+static void update_stats(VP9_COMP *cpi, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -976,6 +933,9 @@
if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME))
cpi->inter_zz_count++;
}
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_update_nzc_counts(&cpi->common, xd, mb_row, mb_col);
+#endif
}
static void encode_sb(VP9_COMP *cpi,
@@ -986,17 +946,17 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- int recon_yoffset, recon_uvoffset;
cpi->sb32_count[is_sb]++;
if (is_sb) {
- set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 32);
update_state(cpi, &x->sb32_context[xd->sb_index], 32, output_enabled);
- encode_superblock32(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_superblock32(cpi, tp,
output_enabled, mb_row, mb_col);
- if (output_enabled)
- update_stats(cpi);
+ if (output_enabled) {
+ update_stats(cpi, mb_row, mb_col);
+ }
if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
@@ -1015,8 +975,7 @@
continue;
}
- set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16,
- &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16);
xd->mb_index = i;
update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled);
@@ -1023,16 +982,15 @@
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
vp9_activity_masking(cpi, x);
- vp9_intra_prediction_down_copy(xd);
-
- encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_macroblock(cpi, tp,
output_enabled, mb_row + y_idx, mb_col + x_idx);
- if (output_enabled)
- update_stats(cpi);
+ if (output_enabled) {
+ update_stats(cpi, mb_row + y_idx, mb_col + x_idx);
+ }
if (output_enabled) {
(*tp)->Token = EOSB_TOKEN;
- (*tp)++;
+ (*tp)++;
if (mb_row + y_idx < cm->mb_rows)
cpi->tplist[mb_row + y_idx].stop = *tp;
}
@@ -1060,13 +1018,11 @@
cpi->sb64_count[is_sb[0] == 2]++;
if (is_sb[0] == 2) {
- int recon_yoffset, recon_uvoffset;
-
- set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset);
+ set_offsets(cpi, mb_row, mb_col, 64);
update_state(cpi, &x->sb64_context, 64, 1);
- encode_superblock64(cpi, tp, recon_yoffset, recon_uvoffset,
+ encode_superblock64(cpi, tp,
1, mb_row, mb_col);
- update_stats(cpi);
+ update_stats(cpi, mb_row, mb_col);
(*tp)->Token = EOSB_TOKEN;
(*tp)++;
@@ -1098,17 +1054,18 @@
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int mb_col;
- int mb_cols = cm->mb_cols;
// Initialize the left context for the new SB row
vpx_memset(cm->left_context, 0, sizeof(cm->left_context));
// Code each SB in the row
- for (mb_col = 0; mb_col < mb_cols; mb_col += 4) {
+ for (mb_col = cm->cur_tile_mb_col_start;
+ mb_col < cm->cur_tile_mb_col_end; mb_col += 4) {
int i;
int sb32_rate = 0, sb32_dist = 0;
int is_sb[4];
int sb64_rate = INT_MAX, sb64_dist;
+ int sb64_skip = 0;
ENTROPY_CONTEXT_PLANES l[4], a[4];
TOKENEXTRA *tp_orig = *tp;
@@ -1118,6 +1075,8 @@
const int x_idx = (i & 1) << 1, y_idx = i & 2;
int mb_rate = 0, mb_dist = 0;
int sb_rate = INT_MAX, sb_dist;
+ int splitmodes_used = 0;
+ int sb32_skip = 0;
if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols)
continue;
@@ -1124,12 +1083,19 @@
xd->sb_index = i;
- pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
- tp, &mb_rate, &mb_dist);
+ splitmodes_used = pick_mb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
+ tp, &mb_rate, &mb_dist);
+
mb_rate += vp9_cost_bit(cm->sb32_coded, 0);
- if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) ||
- ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) {
+ if (cpi->sf.splitmode_breakout) {
+ sb32_skip = splitmodes_used;
+ sb64_skip += splitmodes_used;
+ }
+
+ if ( !sb32_skip &&
+ !(((cm->mb_cols & 1) && mb_col + x_idx == cm->mb_cols - 1) ||
+ ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) {
/* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
pick_sb_modes(cpi, mb_row + y_idx, mb_col + x_idx,
tp, &sb_rate, &sb_dist);
@@ -1147,6 +1113,11 @@
is_sb[i] = 0;
sb32_rate += mb_rate;
sb32_dist += mb_dist;
+
+ // If we used 16x16 instead of 32x32 then skip 64x64 (if enabled).
+ if (cpi->sf.mb16_breakout) {
+ ++sb64_skip;
+ }
}
/* Encode SB using best computed mode(s) */
@@ -1162,7 +1133,8 @@
memcpy(cm->left_context, &l, sizeof(l));
sb32_rate += vp9_cost_bit(cm->sb64_coded, 0);
- if (!((( mb_cols & 3) && mb_col + 3 >= mb_cols) ||
+ if (!sb64_skip &&
+ !(((cm->mb_cols & 3) && mb_col + 3 >= cm->mb_cols) ||
((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) {
pick_sb64_modes(cpi, mb_row, mb_col, tp, &sb64_rate, &sb64_dist);
sb64_rate += vp9_cost_bit(cm->sb64_coded, 1);
@@ -1205,7 +1177,7 @@
// Copy data over into macro block data structures.
x->src = *cpi->Source;
- xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+ xd->pre = cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
xd->dst = cm->yv12_fb[cm->new_fb_idx];
// set up frame for intra coded blocks
@@ -1239,22 +1211,38 @@
vpx_memset(cm->above_context, 0,
sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
- xd->fullpixel_mask = 0xffffffff;
- if (cm->full_pixel)
- xd->fullpixel_mask = 0xfffffff8;
+ xd->fullpixel_mask = cm->full_pixel ? 0xfffffff8 : 0xffffffff;
}
+static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
+ if (lossless) {
+ cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4;
+ cpi->mb.optimize = 0;
+ cpi->common.filter_level = 0;
+ cpi->zbin_mode_boost_enabled = FALSE;
+ cpi->common.txfm_mode = ONLY_4X4;
+ } else {
+ cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4;
+ }
+}
+
+
static void encode_frame_internal(VP9_COMP *cpi) {
int mb_row;
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
-
- TOKENEXTRA *tp = cpi->tok;
int totalrate;
- // printf("encode_frame_internal frame %d (%d)\n",
- // cpi->common.current_video_frame, cpi->common.show_frame);
+// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
+// cpi->common.current_video_frame, cpi->common.show_frame,
+// cm->frame_type);
// Compute a modified set of reference frame probabilities to use when
// prediction fails. These are based on the current general estimates for
@@ -1273,14 +1261,9 @@
totalrate = 0;
- // Functions setup for all frame types so we can use MC in AltRef
- vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm);
-
// Reset frame count of inter 0,0 motion vector usage.
cpi->inter_zz_count = 0;
- cpi->prediction_error = 0;
- cpi->intra_error = 0;
cpi->skip_true_count[0] = cpi->skip_true_count[1] = cpi->skip_true_count[2] = 0;
cpi->skip_false_count[0] = cpi->skip_false_count[1] = cpi->skip_false_count[2] = 0;
@@ -1292,16 +1275,27 @@
vp9_zero(cpi->NMVcount);
vp9_zero(cpi->coef_counts_4x4);
- vp9_zero(cpi->hybrid_coef_counts_4x4);
vp9_zero(cpi->coef_counts_8x8);
- vp9_zero(cpi->hybrid_coef_counts_8x8);
vp9_zero(cpi->coef_counts_16x16);
- vp9_zero(cpi->hybrid_coef_counts_16x16);
vp9_zero(cpi->coef_counts_32x32);
+ vp9_zero(cm->fc.eob_branch_counts);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_zero(cm->fc.nzc_counts_4x4);
+ vp9_zero(cm->fc.nzc_counts_8x8);
+ vp9_zero(cm->fc.nzc_counts_16x16);
+ vp9_zero(cm->fc.nzc_counts_32x32);
+ vp9_zero(cm->fc.nzc_pcat_counts);
+#endif
#if CONFIG_NEW_MVREF
vp9_zero(cpi->mb_mv_ref_count);
#endif
+ cpi->mb.e_mbd.lossless = (cm->base_qindex == 0 &&
+ cm->y1dc_delta_q == 0 &&
+ cm->uvdc_delta_q == 0 &&
+ cm->uvac_delta_q == 0);
+ switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
vp9_frame_init_quantizer(cpi);
vp9_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
@@ -1330,17 +1324,29 @@
vpx_usec_timer_start(&emr_timer);
{
- // For each row of SBs in the frame
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) {
- encode_sb_row(cpi, mb_row, &tp, &totalrate);
- }
+ // Take tiles into account and give start/end MB
+ int tile_col, tile_row;
+ TOKENEXTRA *tp = cpi->tok;
- cpi->tok_count = (unsigned int)(tp - cpi->tok);
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ vp9_get_tile_row_offsets(cm, tile_row);
+
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ TOKENEXTRA *tp_old = tp;
+
+ // For each row of SBs in the frame
+ vp9_get_tile_col_offsets(cm, tile_col);
+ for (mb_row = cm->cur_tile_mb_row_start;
+ mb_row < cm->cur_tile_mb_row_end; mb_row += 4) {
+ encode_sb_row(cpi, mb_row, &tp, &totalrate);
+ }
+ cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
+ }
+ }
}
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
-
}
// 256 rate units to the bit,
@@ -1347,7 +1353,6 @@
// projected_frame_size in units of BYTES
cpi->projected_frame_size = totalrate >> 8;
-
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
@@ -1388,8 +1393,7 @@
const int segment_id = mbmi->segment_id;
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && mbmi->mb_skip_coeff));
mbmi->txfm_size = txfm_max;
}
@@ -1413,9 +1417,8 @@
int x, y;
for (y = 0; y < ymbs; y++) {
- for (x = 0; x < xmbs; x++) {
+ for (x = 0; x < xmbs; x++)
mi[y * mis + x].mbmi.txfm_size = txfm_size;
- }
}
}
@@ -1433,8 +1436,7 @@
const int xmbs = MIN(2, mb_cols_left);
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs)));
set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
}
@@ -1454,8 +1456,7 @@
const int xmbs = MIN(4, mb_cols_left);
xd->mode_info_context = mi;
- assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) ||
+ assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) ||
(cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs)));
set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max);
}
@@ -1526,9 +1527,9 @@
*/
if (cpi->common.frame_type == KEY_FRAME)
frame_type = 0;
- else if (cpi->is_src_frame_alt_ref && cpi->common.refresh_golden_frame)
+ else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
frame_type = 3;
- else if (cpi->common.refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
+ else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
frame_type = 1;
else
frame_type = 2;
@@ -1549,35 +1550,21 @@
pred_type = HYBRID_PREDICTION;
/* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
-#if CONFIG_LOSSLESS
+
+ cpi->mb.e_mbd.lossless = 0;
if (cpi->oxcf.lossless) {
txfm_type = ONLY_4X4;
+ cpi->mb.e_mbd.lossless = 1;
} else
-#endif
- /* FIXME (rbultje)
- * this is a hack (no really), basically to work around the complete
- * nonsense coefficient cost prediction for keyframes. The probabilities
- * are reset to defaults, and thus we basically have no idea how expensive
- * a 4x4 vs. 8x8 will really be. The result is that any estimate at which
- * of the two is better is utterly bogus.
- * I'd like to eventually remove this hack, but in order to do that, we
- * need to move the frame reset code from the frame encode init to the
- * bitstream write code, or alternatively keep a backup of the previous
- * keyframe's probabilities as an estimate of what the current keyframe's
- * coefficient cost distributions may look like. */
- if (frame_type == 0) {
- txfm_type = ALLOW_32X32;
- } else
#if 0
- /* FIXME (rbultje)
- * this code is disabled for a similar reason as the code above; the
- * problem is that each time we "revert" to 4x4 only (or even 8x8 only),
- * the coefficient probabilities for 16x16 (and 8x8) start lagging behind,
- * thus leading to them lagging further behind and not being chosen for
- * subsequent frames either. This is essentially a local minimum problem
- * that we can probably fix by estimating real costs more closely within
- * a frame, perhaps by re-calculating costs on-the-fly as frame encoding
- * progresses. */
+ /* FIXME (rbultje): this code is disabled until we support cost updates
+ * while a frame is being encoded; the problem is that each time we
+ * "revert" to 4x4 only (or even 8x8 only), the coefficient probabilities
+ * for 16x16 (and 8x8) start lagging behind, thus leading to them lagging
+ * further behind and not being chosen for subsequent frames either. This
+ * is essentially a local minimum problem that we can probably fix by
+ * estimating real costs more closely within a frame, perhaps by re-
+ * calculating costs on-the-fly as frame encoding progresses. */
if (cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
cpi->rd_tx_select_threshes[frame_type][ONLY_4X4] &&
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] >
@@ -1671,7 +1658,7 @@
// Update interpolation filter strategy for next frame.
if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))
- select_interp_filter_type(cpi);
+ vp9_select_interp_filter_type(cpi);
} else {
encode_frame_internal(cpi);
}
@@ -1683,30 +1670,23 @@
int i;
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
- }
}
for (r = 0; r < 2; r++) {
- for (c = 0; c < 2; c++) {
+ for (c = 0; c < 2; c++)
x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
- }
}
for (r = 0; r < 2; r++) {
- for (c = 0; c < 2; c++) {
+ for (c = 0; c < 2; c++)
x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
- }
}
- x->block[24].src_diff = x->src_diff + 384;
-
-
- for (i = 0; i < 25; i++) {
+ for (i = 0; i < 24; i++)
x->block[i].coeff = x->coeff + i * 16;
- }
}
void vp9_build_block_offsets(MACROBLOCK *x) {
@@ -1826,63 +1806,6 @@
#endif
}
-static void update_sb_skip_coeff_state(VP9_COMP *cpi,
- ENTROPY_CONTEXT_PLANES ta[4],
- ENTROPY_CONTEXT_PLANES tl[4],
- TOKENEXTRA *t[4],
- TOKENEXTRA **tp,
- int skip[4], int output_enabled) {
- MACROBLOCK *const x = &cpi->mb;
- TOKENEXTRA tokens[4][16 * 25];
- int n_tokens[4], n;
-
- // if there were no skips, we don't need to do anything
- if (!skip[0] && !skip[1] && !skip[2] && !skip[3])
- return;
-
- // if we don't do coeff skipping for this frame, we don't
- // need to do anything here
- if (!cpi->common.mb_no_coeff_skip)
- return;
-
- // if all 4 MBs skipped coeff coding, nothing to be done
- if (skip[0] && skip[1] && skip[2] && skip[3])
- return;
-
- // so the situation now is that we want to skip coeffs
- // for some MBs, but not all, and we didn't code EOB
- // coefficients for them. However, the skip flag for this
- // SB will be 0 overall, so we need to insert EOBs in the
- // middle of the token tree. Do so here.
- n_tokens[0] = t[1] - t[0];
- n_tokens[1] = t[2] - t[1];
- n_tokens[2] = t[3] - t[2];
- n_tokens[3] = *tp - t[3];
- if (n_tokens[0])
- memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0]));
- if (n_tokens[1])
- memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0]));
- if (n_tokens[2])
- memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0]));
- if (n_tokens[3])
- memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0]));
-
- // reset pointer, stuff EOBs where necessary
- *tp = t[0];
- for (n = 0; n < 4; n++) {
- if (skip[n]) {
- x->e_mbd.above_context = &ta[n];
- x->e_mbd.left_context = &tl[n];
- vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled);
- } else {
- if (n_tokens[n]) {
- memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
- }
- (*tp) += n_tokens[n];
- }
- }
-}
-
static void update_sb64_skip_coeff_state(VP9_COMP *cpi,
ENTROPY_CONTEXT_PLANES ta[16],
ENTROPY_CONTEXT_PLANES tl[16],
@@ -1994,21 +1917,151 @@
}
}
+#if CONFIG_CODE_NONZEROCOUNT
+static void gather_nzcs_mb16(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i;
+ vpx_memset(xd->mode_info_context->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 24; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 16; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV) {
+ for (i = 16; i < 24; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ } else {
+ for (i = 16; i < 24; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ }
+ break;
+
+ case TX_16X16:
+ xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0];
+ for (i = 16; i < 24; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void gather_nzcs_sb32(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i, j;
+ MODE_INFO *m = xd->mode_info_context;
+ int mis = cm->mode_info_stride;
+ vpx_memset(m->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 96; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 96; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_16X16:
+ for (i = 0; i < 96; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_32X32:
+ xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0];
+ for (i = 64; i < 96; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+ for (i = 0; i < 2; ++i)
+ for (j = 0; j < 2; ++j) {
+ if (i == 0 && j == 0) continue;
+ vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+ 384 * sizeof(m->mbmi.nzcs[0]));
+ }
+}
+
+static void gather_nzcs_sb64(VP9_COMMON *const cm,
+ MACROBLOCKD *xd) {
+ int i, j;
+ MODE_INFO *m = xd->mode_info_context;
+ int mis = cm->mode_info_stride;
+ vpx_memset(xd->mode_info_context->mbmi.nzcs, 0,
+ 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0]));
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_4X4:
+ for (i = 0; i < 384; ++i) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_8X8:
+ for (i = 0; i < 384; i += 4) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_16X16:
+ for (i = 0; i < 384; i += 16) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ case TX_32X32:
+ for (i = 0; i < 384; i += 64) {
+ xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i];
+ }
+ break;
+
+ default:
+ break;
+ }
+ for (i = 0; i < 4; ++i)
+ for (j = 0; j < 4; ++j) {
+ if (i == 0 && j == 0) continue;
+ vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs,
+ 384 * sizeof(m->mbmi.nzcs[0]));
+ }
+}
+#endif
+
static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled,
int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ MODE_INFO *mi = xd->mode_info_context;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const int mis = cm->mode_info_stride;
unsigned char ref_pred_flag;
assert(!xd->mode_info_context->mbmi.sb_type);
#ifdef ENC_DEBUG
- enc_debug = (cpi->common.current_video_frame == 46 &&
- mb_row == 5 && mb_col == 2);
+ enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+ mb_row == 8 && mb_col == 0 && output_enabled);
if (enc_debug)
printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
#endif
@@ -2037,9 +2090,11 @@
else
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
} else if (mbmi->mode == SPLITMV)
- cpi->zbin_mode_boost = 0;
+ cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
else
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ } else {
+ cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
}
}
@@ -2053,7 +2108,7 @@
}
if (mbmi->ref_frame == INTRA_FRAME) {
-#ifdef ENC_DEBUG
+#if 0 // def ENC_DEBUG
if (enc_debug) {
printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip,
mbmi->txfm_size);
@@ -2060,14 +2115,14 @@
}
#endif
if (mbmi->mode == B_PRED) {
- vp9_encode_intra16x16mbuv(x);
+ vp9_encode_intra16x16mbuv(cm, x);
vp9_encode_intra4x4mby(x);
} else if (mbmi->mode == I8X8_PRED) {
vp9_encode_intra8x8mby(x);
vp9_encode_intra8x8mbuv(x);
} else {
- vp9_encode_intra16x16mbuv(x);
- vp9_encode_intra16x16mby(x);
+ vp9_encode_intra16x16mbuv(cm, x);
+ vp9_encode_intra16x16mby(cm, x);
}
if (output_enabled)
@@ -2086,36 +2141,35 @@
assert(cm->frame_type != KEY_FRAME);
if (mbmi->ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (mbmi->second_ref_frame > 0) {
int second_ref_fb_idx;
if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (mbmi->second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
- recon_yoffset;
- xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
- recon_uvoffset;
- xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
- recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
if (!x->skip) {
- vp9_encode_inter16x16(x);
+ vp9_encode_inter16x16(cm, x, mb_row, mb_col);
// Clear mb_skip_coeff if mb_no_coeff_skip is not set
if (!cpi->common.mb_no_coeff_skip)
@@ -2122,22 +2176,15 @@
mbmi->mb_skip_coeff = 0;
} else {
- vp9_build_1st_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- vp9_build_2nd_inter16x16_predictors_mb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
- }
+ vp9_build_inter16x16_predictors_mb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
#if CONFIG_COMP_INTERINTRA_PRED
- else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
vp9_build_interintra_16x16_predictors_mb(xd,
xd->dst.y_buffer,
xd->dst.u_buffer,
@@ -2155,7 +2202,7 @@
int i, j;
printf("\n");
printf("qcoeff\n");
- for (i = 0; i < 400; i++) {
+ for (i = 0; i < 384; i++) {
printf("%3d ", xd->qcoeff[i]);
if (i % 16 == 15) printf("\n");
}
@@ -2202,15 +2249,17 @@
}
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_mb16(cm, xd);
+#endif
vp9_tokenize_mb(cpi, xd, t, !output_enabled);
} else {
- int mb_skip_context =
- cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
- 0;
- if (cpi->common.mb_no_coeff_skip) {
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cpi->common.mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
+
+ if (cm->mb_no_coeff_skip) {
mbmi->mb_skip_coeff = 1;
if (output_enabled)
cpi->skip_true_count[mb_skip_context]++;
@@ -2227,8 +2276,7 @@
int segment_id = mbmi->segment_id;
if (cpi->common.txfm_mode == TX_MODE_SELECT &&
!((cpi->common.mb_no_coeff_skip && mbmi->mb_skip_coeff) ||
- (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(&x->e_mbd, segment_id, SEG_LVL_EOB) == 0))) {
+ (vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP)))) {
assert(mbmi->txfm_size <= TX_16X16);
if (mbmi->mode != B_PRED && mbmi->mode != I8X8_PRED &&
mbmi->mode != SPLITMV) {
@@ -2253,7 +2301,6 @@
}
static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
@@ -2267,14 +2314,22 @@
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
unsigned char ref_pred_flag;
- int n;
- TOKENEXTRA *tp[4];
- int skip[4];
MODE_INFO *mi = x->e_mbd.mode_info_context;
unsigned int segment_id = mi->mbmi.segment_id;
- ENTROPY_CONTEXT_PLANES ta[4], tl[4];
const int mis = cm->mode_info_stride;
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+ mb_row == 8 && mb_col == 0 && output_enabled);
+ if (enc_debug) {
+ printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled);
+ printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n",
+ mi->mbmi.mode, x->skip, mi->mbmi.txfm_size,
+ mi->mbmi.ref_frame, mi->mbmi.second_ref_frame,
+ mi->mbmi.mv[0].as_mv.row, mi->mbmi.mv[0].as_mv.col,
+ mi->mbmi.interp_filter);
+ }
+#endif
if (cm->frame_type == KEY_FRAME) {
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
adjust_act_zbin(cpi, x);
@@ -2299,9 +2354,11 @@
else
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
} else if (xd->mode_info_context->mbmi.mode == SPLITMV)
- cpi->zbin_mode_boost = 0;
+ cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
else
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ } else {
+ cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
}
}
@@ -2326,152 +2383,137 @@
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
- recon_yoffset;
- xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
- recon_uvoffset;
- xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
- recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- if (!x->skip) {
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
- dst, dst_y_stride);
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
- usrc, vsrc, src_uv_stride,
- udst, vdst, dst_uv_stride);
- vp9_transform_sby_32x32(x);
- vp9_transform_sbuv_16x16(x);
- vp9_quantize_sby_32x32(x);
- vp9_quantize_sbuv_16x16(x);
- // TODO(rbultje): trellis optimize
- vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data);
- vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data);
- vp9_recon_sby_s_c(&x->e_mbd, dst);
- vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst);
-
- vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled);
- } else {
- int mb_skip_context =
- cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff +
- (mi - mis)->mbmi.mb_skip_coeff :
- 0;
- mi->mbmi.mb_skip_coeff = 1;
- if (cm->mb_no_coeff_skip) {
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_fix_contexts_sb(xd);
- } else {
- vp9_stuff_sb(cpi, xd, t, !output_enabled);
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
+ if (!x->skip) {
+ vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride,
+ dst, dst_y_stride);
+ vp9_subtract_sbuv_s_c(x->src_diff,
+ usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
+ switch (mi->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_transform_sby_32x32(x);
+ vp9_transform_sbuv_16x16(x);
+ vp9_quantize_sby_32x32(x);
+ vp9_quantize_sbuv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sby_32x32(cm, x);
+ vp9_optimize_sbuv_16x16(cm, x);
+ }
+ vp9_inverse_transform_sby_32x32(xd);
+ vp9_inverse_transform_sbuv_16x16(xd);
+ break;
+ case TX_16X16:
+ vp9_transform_sby_16x16(x);
+ vp9_transform_sbuv_16x16(x);
+ vp9_quantize_sby_16x16(x);
+ vp9_quantize_sbuv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sby_16x16(cm, x);
+ vp9_optimize_sbuv_16x16(cm, x);
+ }
+ vp9_inverse_transform_sby_16x16(xd);
+ vp9_inverse_transform_sbuv_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_sby_8x8(x);
+ vp9_transform_sbuv_8x8(x);
+ vp9_quantize_sby_8x8(x);
+ vp9_quantize_sbuv_8x8(x);
+ if (x->optimize) {
+ vp9_optimize_sby_8x8(cm, x);
+ vp9_optimize_sbuv_8x8(cm, x);
+ }
+ vp9_inverse_transform_sby_8x8(xd);
+ vp9_inverse_transform_sbuv_8x8(xd);
+ break;
+ case TX_4X4:
+ vp9_transform_sby_4x4(x);
+ vp9_transform_sbuv_4x4(x);
+ vp9_quantize_sby_4x4(x);
+ vp9_quantize_sbuv_4x4(x);
+ if (x->optimize) {
+ vp9_optimize_sby_4x4(cm, x);
+ vp9_optimize_sbuv_4x4(cm, x);
+ }
+ vp9_inverse_transform_sby_4x4(xd);
+ vp9_inverse_transform_sbuv_4x4(xd);
+ break;
+ default: assert(0);
}
+ vp9_recon_sby_s_c(xd, dst);
+ vp9_recon_sbuv_s_c(xd, udst, vdst);
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_sb32(cm, xd);
+#endif
- // copy skip flag on all mb_mode_info contexts in this SB
- // if this was a skip at this txfm size
- if (mb_col < cm->mb_cols - 1)
- mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- if (mb_row < cm->mb_rows - 1) {
- mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- if (mb_col < cm->mb_cols - 1)
- mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- }
- skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff;
+ vp9_tokenize_sb(cpi, xd, t, !output_enabled);
} else {
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cm->mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->left_context = cm->left_context + y_idx + (mb_row & 2);
- xd->above_context = cm->above_context + mb_col + x_idx;
- memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
- memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx + y_idx * mis;
-
- if (!x->skip) {
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_mbuv_s_c(x->src_diff,
- usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- dst_uv_stride);
- vp9_fidct_mb(x);
- vp9_recon_mby_s_c(&x->e_mbd,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
- vp9_recon_mbuv_s_c(&x->e_mbd,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
-
- vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled);
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff :
- 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
- if (cpi->common.mb_no_coeff_skip) {
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_mb_tokens_context(xd);
- } else {
- vp9_stuff_mb(cpi, xd, t, !output_enabled);
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
- }
+ mi->mbmi.mb_skip_coeff = 1;
+ if (cm->mb_no_coeff_skip) {
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb_tokens_context(xd);
+ } else {
+ vp9_stuff_sb(cpi, xd, t, !output_enabled);
+ if (output_enabled)
+ cpi->skip_false_count[mb_skip_context]++;
}
+ }
- xd->mode_info_context = mi;
- update_sb_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled);
+ // copy skip flag on all mb_mode_info contexts in this SB
+ // if this was a skip at this txfm size
+ if (mb_col < cm->mb_cols - 1)
+ mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ if (mb_row < cm->mb_rows - 1) {
+ mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ if (mb_col < cm->mb_cols - 1)
+ mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
}
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) ||
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
- TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
- TX_32X32 :
- cm->txfm_mode;
+ TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
mi->mbmi.txfm_size = sz;
if (mb_col < cm->mb_cols - 1)
mi[1].mbmi.txfm_size = sz;
@@ -2485,7 +2527,6 @@
}
static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t,
- int recon_yoffset, int recon_uvoffset,
int output_enabled, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
@@ -2500,13 +2541,16 @@
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
unsigned char ref_pred_flag;
int n;
- TOKENEXTRA *tp[16];
- int skip[16];
MODE_INFO *mi = x->e_mbd.mode_info_context;
unsigned int segment_id = mi->mbmi.segment_id;
- ENTROPY_CONTEXT_PLANES ta[16], tl[16];
const int mis = cm->mode_info_stride;
+#ifdef ENC_DEBUG
+ enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
+ mb_row == 8 && mb_col == 0 && output_enabled);
+ if (enc_debug)
+ printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled);
+#endif
if (cm->frame_type == KEY_FRAME) {
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
adjust_act_zbin(cpi, x);
@@ -2531,10 +2575,12 @@
else
cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
} else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
- cpi->zbin_mode_boost = 0;
+ cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
} else {
cpi->zbin_mode_boost = MV_ZBIN_BOOST;
}
+ } else {
+ cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
}
}
@@ -2557,186 +2603,134 @@
assert(cm->frame_type != KEY_FRAME);
if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.lst_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.gld_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- ref_fb_idx = cpi->common.alt_fb_idx;
+ ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->pre.y_buffer =
- cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
- xd->pre.u_buffer =
- cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
- xd->pre.v_buffer =
- cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->pre,
+ &cpi->common.yv12_fb[ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[0], &xd->scale_factor_uv[0]);
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
int second_ref_fb_idx;
if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.lst_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.gld_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
else
- second_ref_fb_idx = cpi->common.alt_fb_idx;
+ second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- xd->second_pre.y_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;
- xd->second_pre.u_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset;
- xd->second_pre.v_buffer =
- cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset;
+ setup_pred_block(&xd->second_pre,
+ &cpi->common.yv12_fb[second_ref_fb_idx],
+ mb_row, mb_col,
+ &xd->scale_factor[1], &xd->scale_factor_uv[1]);
}
vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,
xd->dst.u_buffer, xd->dst.v_buffer,
- xd->dst.y_stride, xd->dst.uv_stride);
+ xd->dst.y_stride, xd->dst.uv_stride,
+ mb_row, mb_col);
}
- if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {
- int n;
+ if (!x->skip) {
+ vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
-
- xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2;
- xd->left_context = cm->left_context + (y_idx << 1);
- xd->above_context = cm->above_context + mb_col + (x_idx << 1);
- memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2);
- memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2);
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2;
- if (!x->skip) {
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
- src + x_idx * 32 + y_idx * 32 * src_y_stride,
- src_y_stride,
- dst + x_idx * 32 + y_idx * 32 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
- usrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
- vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- dst_uv_stride);
- vp9_transform_sby_32x32(x);
- vp9_transform_sbuv_16x16(x);
- vp9_quantize_sby_32x32(x);
- vp9_quantize_sbuv_16x16(x);
- // TODO(rbultje): trellis optimize
- vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data);
- vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data);
- vp9_recon_sby_s_c(&x->e_mbd,
- dst + 32 * x_idx + 32 * y_idx * dst_y_stride);
- vp9_recon_sbuv_s_c(&x->e_mbd,
- udst + x_idx * 16 + y_idx * 16 * dst_uv_stride,
- vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride);
-
- vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled);
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (mi - 1)->mbmi.mb_skip_coeff +
- (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = 1;
- if (cm->mb_no_coeff_skip) {
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_fix_contexts_sb(xd);
- } else {
- vp9_stuff_sb(cpi, xd, t, !output_enabled);
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ vp9_transform_sb64y_32x32(x);
+ vp9_transform_sb64uv_32x32(x);
+ vp9_quantize_sb64y_32x32(x);
+ vp9_quantize_sb64uv_32x32(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_32x32(cm, x);
+ vp9_optimize_sb64uv_32x32(cm, x);
}
- }
-
- // copy skip flag on all mb_mode_info contexts in this SB
- // if this was a skip at this txfm size
- if (mb_col + x_idx * 2 < cm->mb_cols - 1)
- mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- if (mb_row + y_idx * 2 < cm->mb_rows - 1) {
- mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- if (mb_col + x_idx * 2 < cm->mb_cols - 1)
- mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff =
- mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff;
- }
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
+ vp9_inverse_transform_sb64y_32x32(xd);
+ vp9_inverse_transform_sb64uv_32x32(xd);
+ break;
+ case TX_16X16:
+ vp9_transform_sb64y_16x16(x);
+ vp9_transform_sb64uv_16x16(x);
+ vp9_quantize_sb64y_16x16(x);
+ vp9_quantize_sb64uv_16x16(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_16x16(cm, x);
+ vp9_optimize_sb64uv_16x16(cm, x);
+ }
+ vp9_inverse_transform_sb64y_16x16(xd);
+ vp9_inverse_transform_sb64uv_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_sb64y_8x8(x);
+ vp9_transform_sb64uv_8x8(x);
+ vp9_quantize_sb64y_8x8(x);
+ vp9_quantize_sb64uv_8x8(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_8x8(cm, x);
+ vp9_optimize_sb64uv_8x8(cm, x);
+ }
+ vp9_inverse_transform_sb64y_8x8(xd);
+ vp9_inverse_transform_sb64uv_8x8(xd);
+ break;
+ case TX_4X4:
+ vp9_transform_sb64y_4x4(x);
+ vp9_transform_sb64uv_4x4(x);
+ vp9_quantize_sb64y_4x4(x);
+ vp9_quantize_sb64uv_4x4(x);
+ if (x->optimize) {
+ vp9_optimize_sb64y_4x4(cm, x);
+ vp9_optimize_sb64uv_4x4(cm, x);
+ }
+ vp9_inverse_transform_sb64y_4x4(xd);
+ vp9_inverse_transform_sb64uv_4x4(xd);
+ break;
+ default: assert(0);
}
+ vp9_recon_sb64y_s_c(xd, dst);
+ vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst);
+#if CONFIG_CODE_NONZEROCOUNT
+ gather_nzcs_sb64(cm, &x->e_mbd);
+#endif
+ vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled);
} else {
- for (n = 0; n < 16; n++) {
- const int x_idx = n & 3, y_idx = n >> 2;
+ // FIXME(rbultje): not tile-aware (mi - 1)
+ int mb_skip_context = cpi->common.mb_no_coeff_skip ?
+ (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0;
- xd->left_context = cm->left_context + y_idx;
- xd->above_context = cm->above_context + mb_col + x_idx;
- memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
- memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
- tp[n] = *t;
- xd->mode_info_context = mi + x_idx + y_idx * mis;
-
- if (!x->skip) {
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
- vp9_subtract_mbuv_s_c(x->src_diff,
- usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
- src_uv_stride,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- dst_uv_stride);
- vp9_fidct_mb(x);
- vp9_recon_mby_s_c(&x->e_mbd,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
- vp9_recon_mbuv_s_c(&x->e_mbd,
- udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
- vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
-
- vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled);
- skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
- } else {
- int mb_skip_context = cpi->common.mb_no_coeff_skip ?
- (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
- (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0;
- xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1;
- if (cpi->common.mb_no_coeff_skip) {
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_mb_tokens_context(xd);
- } else {
- vp9_stuff_mb(cpi, xd, t, !output_enabled);
- // TODO(rbultje) this should be done per-sb instead of per-mb?
- if (output_enabled)
- cpi->skip_false_count[mb_skip_context]++;
- }
- }
+ xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ if (cm->mb_no_coeff_skip) {
+ if (output_enabled)
+ cpi->skip_true_count[mb_skip_context]++;
+ vp9_reset_sb64_tokens_context(xd);
+ } else {
+ vp9_stuff_sb64(cpi, xd, t, !output_enabled);
+ if (output_enabled)
+ cpi->skip_false_count[mb_skip_context]++;
}
}
- xd->mode_info_context = mi;
- update_sb64_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled);
+ // copy skip flag on all mb_mode_info contexts in this SB
+ // if this was a skip at this txfm size
+ for (n = 1; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows)
+ mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
+ }
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !((cm->mb_no_coeff_skip &&
- ((mi->mbmi.txfm_size == TX_32X32 &&
- skip[0] && skip[1] && skip[2] && skip[3]) ||
- (mi->mbmi.txfm_size != TX_32X32 &&
- skip[0] && skip[1] && skip[2] && skip[3] &&
- skip[4] && skip[5] && skip[6] && skip[7] &&
- skip[8] && skip[9] && skip[10] && skip[11] &&
- skip[12] && skip[13] && skip[14] && skip[15]))) ||
- (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) &&
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) {
+ !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) ||
+ (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
} else {
int x, y;
- TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ?
- TX_32X32 :
- cm->txfm_mode;
+ TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) {
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -14,8 +14,8 @@
struct macroblock;
-extern void vp9_build_block_offsets(struct macroblock *x);
+void vp9_build_block_offsets(struct macroblock *x);
-extern void vp9_setup_block_ptrs(struct macroblock *x);
+void vp9_setup_block_ptrs(struct macroblock *x);
#endif // VP9_ENCODER_VP9_ENCODEFRAME_H_
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -12,14 +12,11 @@
#include "vp9_rtcd.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/common/vp9_reconintra.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/common/vp9_invtrans.h"
#include "vp9/encoder/vp9_encodeintra.h"
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
- int i;
- int intra_pred_var = 0;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
(void) cpi;
@@ -28,8 +25,10 @@
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame = INTRA_FRAME;
- vp9_encode_intra16x16mby(x);
+ vp9_encode_intra16x16mby(&cpi->common, x);
} else {
+ int i;
+
for (i = 0; i < 16; i++) {
x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
vp9_encode_intra4x4block(x, i);
@@ -36,9 +35,7 @@
}
}
- intra_pred_var = vp9_get_mb_ss(x->src_diff);
-
- return intra_pred_var;
+ return vp9_get_mb_ss(x->src_diff);
}
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
@@ -47,21 +44,22 @@
TX_TYPE tx_type;
#if CONFIG_NEWBINTRAMODES
- b->bmi.as_mode.context = vp9_find_bpred_context(b);
+ b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b);
#endif
- vp9_intra4x4_predict(b, b->bmi.as_mode.first, b->predictor);
+ vp9_intra4x4_predict(&x->e_mbd, b, b->bmi.as_mode.first, b->predictor);
vp9_subtract_b(be, b, 16);
- tx_type = get_tx_type_4x4(&x->e_mbd, b);
+ tx_type = get_tx_type_4x4(&x->e_mbd, ib);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib, tx_type);
+ vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b) ;
- vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib);
+ vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
+ b->dqcoeff, b->diff, 32);
}
vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -72,10 +70,9 @@
for (i = 0; i < 16; i++)
vp9_encode_intra4x4block(mb, i);
- return;
}
-void vp9_encode_intra16x16mby(MACROBLOCK *x) {
+void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -84,30 +81,34 @@
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
- if (tx_size == TX_16X16) {
- vp9_transform_mby_16x16(x);
- vp9_quantize_mby_16x16(x);
- if (x->optimize)
- vp9_optimize_mby_16x16(x);
- vp9_inverse_transform_mby_16x16(xd);
- } else if (tx_size == TX_8X8) {
- vp9_transform_mby_8x8(x);
- vp9_quantize_mby_8x8(x);
- if (x->optimize)
- vp9_optimize_mby_8x8(x);
- vp9_inverse_transform_mby_8x8(xd);
- } else {
- vp9_transform_mby_4x4(x);
- vp9_quantize_mby_4x4(x);
- if (x->optimize)
- vp9_optimize_mby_4x4(x);
- vp9_inverse_transform_mby_4x4(xd);
+ switch (tx_size) {
+ case TX_16X16:
+ vp9_transform_mby_16x16(x);
+ vp9_quantize_mby_16x16(x);
+ if (x->optimize)
+ vp9_optimize_mby_16x16(cm, x);
+ vp9_inverse_transform_mby_16x16(xd);
+ break;
+ case TX_8X8:
+ vp9_transform_mby_8x8(x);
+ vp9_quantize_mby_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mby_8x8(cm, x);
+ vp9_inverse_transform_mby_8x8(xd);
+ break;
+ default:
+ vp9_transform_mby_4x4(x);
+ vp9_quantize_mby_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mby_4x4(cm, x);
+ vp9_inverse_transform_mby_4x4(xd);
+ break;
}
vp9_recon_mby(xd);
}
-void vp9_encode_intra16x16mbuv(MACROBLOCK *x) {
+void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -116,19 +117,22 @@
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
xd->predictor, x->src.uv_stride);
- if (tx_size == TX_4X4) {
- vp9_transform_mbuv_4x4(x);
- vp9_quantize_mbuv_4x4(x);
- if (x->optimize)
- vp9_optimize_mbuv_4x4(x);
- vp9_inverse_transform_mbuv_4x4(xd);
- } else /* 16x16 or 8x8 */ {
- vp9_transform_mbuv_8x8(x);
- vp9_quantize_mbuv_8x8(x);
- if (x->optimize)
- vp9_optimize_mbuv_8x8(x);
- vp9_inverse_transform_mbuv_8x8(xd);
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_transform_mbuv_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_4x4(cm, x);
+ vp9_inverse_transform_mbuv_4x4(xd);
+ break;
+ default: // 16x16 or 8x8
+ vp9_transform_mbuv_8x8(x);
+ vp9_quantize_mbuv_8x8(x);
+ if (x->optimize)
+ vp9_optimize_mbuv_8x8(cm, x);
+ vp9_inverse_transform_mbuv_8x8(xd);
+ break;
+ }
vp9_recon_intra_mbuv(xd);
}
@@ -141,7 +145,7 @@
int i;
TX_TYPE tx_type;
- vp9_intra8x8_predict(b, b->bmi.as_mode.first, b->predictor);
+ vp9_intra8x8_predict(xd, b, b->bmi.as_mode.first, b->predictor);
// generate residual blocks
vp9_subtract_4b_c(be, b, 16);
@@ -148,16 +152,15 @@
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
int idx = (ib & 0x02) ? (ib + 2) : ib;
- tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
+ tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, (x->block + idx)->coeff,
- tx_type, 8);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
- vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
- tx_type, 8, xd->block[idx].eob);
+ vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
+ x->quantize_b_8x8(x, idx, tx_type);
+ vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
+ 16, tx_type);
} else {
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x, idx, DCT_DCT);
vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
}
} else {
@@ -164,15 +167,25 @@
for (i = 0; i < 4; i++) {
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
- vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
+ } else if (!(i & 1) &&
+ get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
+ b->dqcoeff, b->diff, 32);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1],
+ (b + 1)->dqcoeff, (b + 1)->diff, 32);
+ i++;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
- vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib + iblock[i]);
+ vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
+ b->dqcoeff, b->diff, 32);
}
}
}
@@ -186,26 +199,24 @@
}
void vp9_encode_intra8x8mby(MACROBLOCK *x) {
- int i, ib;
+ int i;
- for (i = 0; i < 4; i++) {
- ib = vp9_i8x8_block[i];
- vp9_encode_intra8x8(x, ib);
- }
+ for (i = 0; i < 4; i++)
+ vp9_encode_intra8x8(x, vp9_i8x8_block[i]);
}
-static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
- int mode) {
+static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
BLOCKD *b = &x->e_mbd.block[ib];
BLOCK *be = &x->block[ib];
- vp9_intra_uv4x4_predict(b, mode, b->predictor);
+ vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor);
vp9_subtract_b(be, b, 8);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
- x->quantize_b_4x4(be, b);
- vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 16);
+ x->quantize_b_4x4(x, ib);
+ vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
+ b->dqcoeff, b->diff, 16);
vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
b->dst_stride);
@@ -212,17 +223,13 @@
}
void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
- int i, ib, mode;
- BLOCKD *b;
+ int i;
for (i = 0; i < 4; i++) {
- ib = vp9_i8x8_block[i];
- b = &x->e_mbd.block[ib];
- mode = b->bmi.as_mode.first;
+ BLOCKD *b = &x->e_mbd.block[vp9_i8x8_block[i]];
+ int mode = b->bmi.as_mode.first;
- /*u */
- encode_intra_uv4x4(x, i + 16, mode);
- /*v */
- encode_intra_uv4x4(x, i + 20, mode);
+ encode_intra_uv4x4(x, i + 16, mode); // u
+ encode_intra_uv4x4(x, i + 20, mode); // v
}
}
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -14,8 +14,8 @@
#include "vp9/encoder/vp9_onyx_int.h"
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
-void vp9_encode_intra16x16mby(MACROBLOCK *x);
-void vp9_encode_intra16x16mbuv(MACROBLOCK *x);
+void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_intra4x4mby(MACROBLOCK *mb);
void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
void vp9_encode_intra8x8mby(MACROBLOCK *x);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -29,9 +29,8 @@
int r, c;
for (r = 0; r < 4; r++) {
- for (c = 0; c < 4; c++) {
+ for (c = 0; c < 4; c++)
diff_ptr[c] = src_ptr[c] - pred_ptr[c];
- }
diff_ptr += pitch;
pred_ptr += pitch;
@@ -47,9 +46,9 @@
int r, c;
for (r = 0; r < 8; r++) {
- for (c = 0; c < 8; c++) {
+ for (c = 0; c < 8; c++)
diff_ptr[c] = src_ptr[c] - pred_ptr[c];
- }
+
diff_ptr += pitch;
pred_ptr += pitch;
src_ptr += src_stride;
@@ -65,9 +64,8 @@
int r, c;
for (r = 0; r < 8; r++) {
- for (c = 0; c < 8; c++) {
+ for (c = 0; c < 8; c++)
udiff[c] = usrc[c] - upred[c];
- }
udiff += 8;
upred += dst_stride;
@@ -98,9 +96,8 @@
int r, c;
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
diff[c] = src[c] - pred[c];
- }
diff += 16;
pred += dst_stride;
@@ -113,9 +110,8 @@
int r, c;
for (r = 0; r < 32; r++) {
- for (c = 0; c < 32; c++) {
+ for (c = 0; c < 32; c++)
diff[c] = src[c] - pred[c];
- }
diff += 32;
pred += dst_stride;
@@ -132,9 +128,8 @@
int r, c;
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
udiff[c] = usrc[c] - upred[c];
- }
udiff += 16;
upred += dst_stride;
@@ -142,9 +137,8 @@
}
for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
+ for (c = 0; c < 16; c++)
vdiff[c] = vsrc[c] - vpred[c];
- }
vdiff += 16;
vpred += dst_stride;
@@ -152,6 +146,50 @@
}
}
+void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
+ const uint8_t *pred, int dst_stride) {
+ int r, c;
+
+ for (r = 0; r < 64; r++) {
+ for (c = 0; c < 64; c++) {
+ diff[c] = src[c] - pred[c];
+ }
+
+ diff += 64;
+ pred += dst_stride;
+ src += src_stride;
+ }
+}
+
+void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
+ const uint8_t *vsrc, int src_stride,
+ const uint8_t *upred,
+ const uint8_t *vpred, int dst_stride) {
+ int16_t *udiff = diff + 4096;
+ int16_t *vdiff = diff + 4096 + 1024;
+ int r, c;
+
+ for (r = 0; r < 32; r++) {
+ for (c = 0; c < 32; c++) {
+ udiff[c] = usrc[c] - upred[c];
+ }
+
+ udiff += 32;
+ upred += dst_stride;
+ usrc += src_stride;
+ }
+
+ for (r = 0; r < 32; r++) {
+ for (c = 0; c < 32; c++) {
+ vdiff[c] = vsrc[c] - vpred[c];
+ }
+
+ vdiff += 32;
+ vpred += dst_stride;
+ vsrc += src_stride;
+ }
+}
+
void vp9_subtract_mby_c(int16_t *diff, uint8_t *src,
uint8_t *pred, int stride) {
vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
@@ -166,52 +204,29 @@
x->e_mbd.predictor, x->src.uv_stride);
}
-static void build_dcblock_4x4(MACROBLOCK *x) {
- int16_t *src_diff_ptr = &x->src_diff[384];
- int i;
-
- for (i = 0; i < 16; i++) {
- src_diff_ptr[i] = x->coeff[i * 16];
- x->coeff[i * 16] = 0;
- }
-}
-
void vp9_transform_mby_4x4(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
- int has_2nd_order = get_2nd_order_usage(xd);
for (i = 0; i < 16; i++) {
BLOCK *b = &x->block[i];
- TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(xd, i);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
+ vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);
+ } else if (!(i & 1) && get_tx_type_4x4(xd, i + 1) == DCT_DCT) {
+ x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 32);
+ i++;
} else {
- x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 32);
+ x->fwd_txm4x4(x->block[i].src_diff, x->block[i].coeff, 32);
}
}
-
- if (has_2nd_order) {
- // build dc block from 16 y dc values
- build_dcblock_4x4(x);
-
- // do 2nd order transform on the dc block
- x->short_walsh4x4(&x->block[24].src_diff[0],
- &x->block[24].coeff[0], 8);
- } else {
- vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
- }
}
void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i += 2) {
- x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 16);
- }
+ for (i = 16; i < 24; i += 2)
+ x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 16);
}
static void transform_mb_4x4(MACROBLOCK *x) {
@@ -219,71 +234,36 @@
vp9_transform_mbuv_4x4(x);
}
-static void build_dcblock_8x8(MACROBLOCK *x) {
- int16_t *src_diff_ptr = x->block[24].src_diff;
- int i;
-
- for (i = 0; i < 16; i++) {
- src_diff_ptr[i] = 0;
- }
- src_diff_ptr[0] = x->coeff[0 * 16];
- src_diff_ptr[1] = x->coeff[4 * 16];
- src_diff_ptr[4] = x->coeff[8 * 16];
- src_diff_ptr[8] = x->coeff[12 * 16];
- x->coeff[0 * 16] = 0;
- x->coeff[4 * 16] = 0;
- x->coeff[8 * 16] = 0;
- x->coeff[12 * 16] = 0;
-}
-
void vp9_transform_mby_8x8(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
TX_TYPE tx_type;
- int has_2nd_order = get_2nd_order_usage(xd);
for (i = 0; i < 9; i += 8) {
BLOCK *b = &x->block[i];
- tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
+ vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 32);
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 32);
}
}
for (i = 2; i < 11; i += 8) {
BLOCK *b = &x->block[i];
- tx_type = get_tx_type_8x8(xd, &xd->block[i]);
+ tx_type = get_tx_type_8x8(xd, i);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
+ vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i + 2].coeff[0], 32);
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i + 2].coeff, 32);
}
}
-
- if (has_2nd_order) {
- // build dc block from 2x2 y dc values
- build_dcblock_8x8(x);
-
- // do 2nd order transform on the dc block
- x->short_fhaar2x2(&x->block[24].src_diff[0],
- &x->block[24].coeff[0], 8);
- } else {
- vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
- }
}
void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i += 4) {
- x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
- &x->block[i].coeff[0], 16);
- }
+ for (i = 16; i < 24; i += 4)
+ x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 16);
}
void vp9_transform_mb_8x8(MACROBLOCK *x) {
@@ -294,13 +274,12 @@
void vp9_transform_mby_16x16(MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
- TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
+ TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
vp9_clear_system_state();
if (tx_type != DCT_DCT) {
- vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
+ vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);
} else {
- x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
- &x->block[0].coeff[0], 32);
+ x->fwd_txm16x16(x->block[0].src_diff, x->block[0].coeff, 32);
}
}
@@ -310,19 +289,212 @@
}
void vp9_transform_sby_32x32(MACROBLOCK *x) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
- vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64);
+ vp9_short_fdct32x32(x->src_diff, x->coeff, 64);
}
+void vp9_transform_sby_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + n * 256, 32, tx_type);
+ } else {
+ x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + n * 256, 64);
+ }
+ }
+}
+
+void vp9_transform_sby_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + n * 64, 32, tx_type);
+ } else {
+ x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + n * 64, 64);
+ }
+ }
+}
+
+void vp9_transform_sby_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + n * 16, 32, tx_type);
+ } else {
+ x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + n * 16, 64);
+ }
+ }
+}
+
void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
vp9_clear_system_state();
- x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
- x_sb->coeff + 1024, 32);
- x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
- x_sb->coeff + 1280, 32);
+ x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32);
+ x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32);
}
+void vp9_transform_sbuv_8x8(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8,
+ x->coeff + 1024 + n * 64, 32);
+ x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8,
+ x->coeff + 1280 + n * 64, 32);
+ }
+}
+
+void vp9_transform_sbuv_4x4(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4,
+ x->coeff + 1024 + n * 16, 32);
+ x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4,
+ x->coeff + 1280 + n * 16, 32);
+ }
+}
+
+void vp9_transform_sb64y_32x32(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32,
+ x->coeff + n * 1024, 128);
+ }
+}
+
+void vp9_transform_sb64y_16x16(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
+ x->coeff + n * 256, 64, tx_type);
+ } else {
+ x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
+ x->coeff + n * 256, 128);
+ }
+ }
+}
+
+void vp9_transform_sb64y_8x8(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
+ x->coeff + n * 64, 64, tx_type);
+ } else {
+ x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
+ x->coeff + n * 64, 128);
+ }
+ }
+}
+
+void vp9_transform_sb64y_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
+
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
+ x->coeff + n * 16, 64, tx_type);
+ } else {
+ x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
+ x->coeff + n * 16, 128);
+ }
+ }
+}
+
+void vp9_transform_sb64uv_32x32(MACROBLOCK *x) {
+ vp9_clear_system_state();
+ vp9_short_fdct32x32(x->src_diff + 4096,
+ x->coeff + 4096, 64);
+ vp9_short_fdct32x32(x->src_diff + 4096 + 1024,
+ x->coeff + 4096 + 1024, 64);
+}
+
+void vp9_transform_sb64uv_16x16(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + 4096 + n * 256, 64);
+ x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16,
+ x->coeff + 4096 + 1024 + n * 256, 64);
+ }
+}
+
+void vp9_transform_sb64uv_8x8(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + 4096 + n * 64, 64);
+ x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8,
+ x->coeff + 4096 + 1024 + n * 64, 64);
+ }
+}
+
+void vp9_transform_sb64uv_4x4(MACROBLOCK *x) {
+ int n;
+
+ vp9_clear_system_state();
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + 4096 + n * 16, 64);
+ x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4,
+ x->coeff + 4096 + 1024 + n * 16, 64);
+ }
+}
+
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp9_token_state vp9_token_state;
@@ -338,13 +510,10 @@
// TODO: experiments to find optimal multiple numbers
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
-#define Y2_RD_MULT 4
static const int plane_rd_mult[4] = {
Y1_RD_MULT,
- Y2_RD_MULT,
UV_RD_MULT,
- Y1_RD_MULT
};
#define UPDATE_RD_COST()\
@@ -357,72 +526,120 @@
}\
}
-static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
+// This function is a place holder for now but may ultimately need
+// to scan previous tokens to work out the correct context.
+static int trellis_get_coeff_context(const int *scan,
+ const int *nb,
+ int idx, int token,
+ uint8_t *token_cache,
+ int pad, int l) {
+ int bak = token_cache[idx], pt;
+ token_cache[idx] = token;
+ pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
+ token_cache[idx] = bak;
+ return pt;
+}
+
+static void optimize_b(VP9_COMMON *const cm,
+ MACROBLOCK *mb, int ib, PLANE_TYPE type,
+ const int16_t *dequant_ptr,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int tx_size) {
- BLOCK *b = &mb->block[i];
- BLOCKD *d = &mb->e_mbd.block[i];
- vp9_token_state tokens[257][2];
- unsigned best_index[257][2];
- const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
- int eob = d->eob, final_eob, sz = 0;
- int i0 = (type == PLANE_TYPE_Y_NO_DC);
- int rc, x, next;
+ const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ vp9_token_state tokens[1025][2];
+ unsigned best_index[1025][2];
+ const int16_t *coeff_ptr = mb->coeff + ib * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16;
+ int eob = xd->eobs[ib], final_eob, sz = 0;
+ const int i0 = 0;
+ int rc, x, next, i;
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
int rate0, rate1, error0, error1, t0, t1;
int best, band, pt;
int err_mult = plane_rd_mult[type];
- int default_eob;
- int const *scan, *bands;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
+ int default_eob, pad;
+ int const *scan, *nb;
+ const int mul = 1 + (tx_size == TX_32X32);
+ uint8_t token_cache[1024];
+#if CONFIG_CODE_NONZEROCOUNT
+ // TODO(debargha): the dynamic programming approach used in this function
+ // is not compatible with the true rate cost when nzcs are used. Note
+ // the total rate is the sum of the nzc rate and the indicvidual token
+ // rates. The latter part can be optimized in this function, but because
+ // the nzc rate is a function of all the other tokens without a Markov
+ // relationship this rate cannot be considered correctly.
+ // The current implementation uses a suboptimal approach to account for
+ // the nzc rates somewhat, but in reality the optimization approach needs
+ // to change substantially.
+ uint16_t nzc = xd->nzcs[ib];
+ uint16_t nzc0, nzc1;
+ uint16_t final_nzc = 0, final_nzc_exp;
+ int nzc_context = vp9_get_nzc_context(cm, xd, ib);
+ unsigned int *nzc_cost;
+ nzc0 = nzc1 = nzc;
#endif
switch (tx_size) {
default:
- case TX_4X4:
- scan = vp9_default_zig_zag1d_4x4;
- bands = vp9_coef_bands_4x4;
+ case TX_4X4: {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, ib);
default_eob = 16;
- // TODO: this isn't called (for intra4x4 modes), but will be left in
- // since it could be used later
- {
- TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
- if (tx_type != DCT_DCT) {
- switch (tx_type) {
- case ADST_DCT:
- scan = vp9_row_scan_4x4;
- break;
-
- case DCT_ADST:
- scan = vp9_col_scan_4x4;
- break;
-
- default:
- scan = vp9_default_zig_zag1d_4x4;
- break;
- }
- } else {
- scan = vp9_default_zig_zag1d_4x4;
- }
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
+#endif
+ if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_4x4;
+ } else if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_4x4;
+ } else {
+ scan = vp9_default_zig_zag1d_4x4;
}
break;
- case TX_8X8:
- scan = vp9_default_zig_zag1d_8x8;
- bands = vp9_coef_bands_8x8;
+ }
+ case TX_8X8: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1));
+ if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_8x8;
+ } else if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_8x8;
+ } else {
+ scan = vp9_default_zig_zag1d_8x8;
+ }
default_eob = 64;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
+#endif
break;
- case TX_16X16:
- scan = vp9_default_zig_zag1d_16x16;
- bands = vp9_coef_bands_16x16;
+ }
+ case TX_16X16: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2));
+ if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_16x16;
+ } else if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_16x16;
+ } else {
+ scan = vp9_default_zig_zag1d_16x16;
+ }
default_eob = 256;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
+#endif
break;
- }
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
+ }
+ case TX_32X32:
+ scan = vp9_default_zig_zag1d_32x32;
+ default_eob = 1024;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
#endif
+ break;
+ }
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
rdmult = mb->rdmult * err_mult;
@@ -431,7 +648,11 @@
rddiv = mb->rddiv;
memset(best_index, 0, sizeof(best_index));
/* Initialize the sentinel node of the trellis. */
+#if CONFIG_CODE_NONZEROCOUNT
+ tokens[eob][0].rate = nzc_cost[nzc];
+#else
tokens[eob][0].rate = 0;
+#endif
tokens[eob][0].error = 0;
tokens[eob][0].next = default_eob;
tokens[eob][0].token = DCT_EOB_TOKEN;
@@ -438,8 +659,15 @@
tokens[eob][0].qc = 0;
*(tokens[eob] + 1) = *(tokens[eob] + 0);
next = eob;
+ for (i = 0; i < eob; i++)
+ token_cache[i] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].Token;
+ nb = vp9_get_coef_neighbors_handle(scan, &pad);
+
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
+#if CONFIG_CODE_NONZEROCOUNT
+ int new_nzc0, new_nzc1;
+#endif
rc = scan[i];
x = qcoeff_ptr[rc];
@@ -454,23 +682,19 @@
t0 = (vp9_dct_value_tokens_ptr + x)->Token;
/* Consider both possible successor states. */
if (next < default_eob) {
- band = bands[i + 1];
- pt = vp9_prev_token_class[t0];
-#if CONFIG_NEWCOEFCONTEXT
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
-#endif
+ band = get_coef_band(scan, tx_size, i + 1);
+ pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
+ pad, default_eob);
rate0 +=
- mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
+ mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
rate1 +=
- mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
+ mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = *(vp9_dct_value_cost_ptr + x);
- dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+ dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
tokens[i][0].error = d2 + (best ? error1 : error0);
@@ -478,12 +702,17 @@
tokens[i][0].token = t0;
tokens[i][0].qc = x;
best_index[i][0] = best;
+#if CONFIG_CODE_NONZEROCOUNT
+ new_nzc0 = (best ? nzc1 : nzc0);
+#endif
+
/* Evaluate the second possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
- (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
+ if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
+ (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
+ dequant_ptr[rc != 0]))
shortcut = 1;
else
shortcut = 0;
@@ -502,41 +731,27 @@
DCT_EOB_TOKEN : ZERO_TOKEN;
t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
DCT_EOB_TOKEN : ZERO_TOKEN;
+#if CONFIG_CODE_NONZEROCOUNT
+ // Account for rate drop because of the nzc change.
+ // TODO(debargha): Find a better solution
+ rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1];
+ rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1];
+#endif
} else {
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
}
if (next < default_eob) {
- band = bands[i + 1];
+ band = get_coef_band(scan, tx_size, i + 1);
if (t0 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t0];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t0];
-#endif
- rate0 += mb->token_costs[tx_size][type][band][pt][
+ pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
+ pad, default_eob);
+ rate0 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
-#if CONFIG_NEWCOEFCONTEXT
- int tmp = qcoeff_ptr[scan[i]];
- qcoeff_ptr[scan[i]] = x;
- if (NEWCOEFCONTEXT_BAND_COND(band))
- pt = vp9_get_coef_neighbor_context(
- qcoeff_ptr, i0, neighbors, scan[i + 1]);
- else
- pt = vp9_prev_token_class[t1];
- qcoeff_ptr[scan[i]] = tmp;
-#else
- pt = vp9_prev_token_class[t1];
-#endif
- rate1 += mb->token_costs[tx_size][type][band][pt][
+ pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
+ pad, default_eob);
+ rate1 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][1].token];
}
}
@@ -556,6 +771,11 @@
tokens[i][1].token = best ? t1 : t0;
tokens[i][1].qc = x;
best_index[i][1] = best;
+#if CONFIG_CODE_NONZEROCOUNT
+ new_nzc1 = (best ? nzc1 : nzc0) - (!x);
+ nzc0 = new_nzc0;
+ nzc1 = new_nzc1;
+#endif
/* Finally, make this the new head of the trellis. */
next = i;
}
@@ -563,16 +783,18 @@
* add a new trellis node, but we do need to update the costs.
*/
else {
- band = bands[i + 1];
+ band = get_coef_band(scan, tx_size, i + 1);
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
if (t0 != DCT_EOB_TOKEN) {
- tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
+ tokens[next][0].rate +=
+ mb->token_costs[tx_size][type][ref][band][0][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != DCT_EOB_TOKEN) {
- tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
+ tokens[next][1].rate +=
+ mb->token_costs[tx_size][type][ref][band][0][t1];
tokens[next][1].token = ZERO_TOKEN;
}
/* Don't update next, because we didn't add a new node. */
@@ -580,7 +802,7 @@
}
/* Now pick the best path through the whole trellis. */
- band = bands[i + 1];
+ band = get_coef_band(scan, tx_size, i + 1);
VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
@@ -588,18 +810,25 @@
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][band][pt][t0];
- rate1 += mb->token_costs[tx_size][type][band][pt][t1];
+ rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
+ rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
+#if CONFIG_CODE_NONZEROCOUNT
+ final_nzc_exp = (best ? nzc1 : nzc0);
+#endif
final_eob = i0 - 1;
for (i = next; i < eob; i = next) {
x = tokens[i][best].qc;
- if (x)
+ if (x) {
final_eob = i;
+#if CONFIG_CODE_NONZEROCOUNT
+ ++final_nzc;
+#endif
+ }
rc = scan[i];
qcoeff_ptr[rc] = x;
- dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);
+ dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
next = tokens[i][best].next;
best = best_index[i][best];
@@ -606,81 +835,16 @@
}
final_eob++;
- d->eob = final_eob;
- *a = *l = (d->eob > !type);
+ xd->eobs[ib] = final_eob;
+ *a = *l = (final_eob > 0);
+#if CONFIG_CODE_NONZEROCOUNT
+ assert(final_nzc == final_nzc_exp);
+ xd->nzcs[ib] = final_nzc;
+#endif
}
-/**************************************************************************
-our inverse hadamard transform effectively is weighted sum of all 16 inputs
-with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
-dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
-output after inverse wht and idct will be all zero. A sum of absolute value
-smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
-fall between -65 and +65.
-**************************************************************************/
-#define SUM_2ND_COEFF_THRESH 65
-
-static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
- int sum = 0;
- int i;
- BLOCKD *bd = &xd->block[24];
- if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
- && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
- return;
-
- for (i = 0; i < bd->eob; i++) {
- int coef = bd->dqcoeff[vp9_default_zig_zag1d_4x4[i]];
- sum += (coef >= 0) ? coef : -coef;
- if (sum >= SUM_2ND_COEFF_THRESH)
- return;
- }
-
- if (sum < SUM_2ND_COEFF_THRESH) {
- for (i = 0; i < bd->eob; i++) {
- int rc = vp9_default_zig_zag1d_4x4[i];
- bd->qcoeff[rc] = 0;
- bd->dqcoeff[rc] = 0;
- }
- bd->eob = 0;
- *a = *l = (bd->eob != 0);
- }
-}
-
-#define SUM_2ND_COEFF_THRESH_8X8 32
-static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
- int sum = 0;
- BLOCKD *bd = &xd->block[24];
- int coef;
-
- coef = bd->dqcoeff[0];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[1];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[4];
- sum += (coef >= 0) ? coef : -coef;
- coef = bd->dqcoeff[8];
- sum += (coef >= 0) ? coef : -coef;
-
- if (sum < SUM_2ND_COEFF_THRESH_8X8) {
- bd->qcoeff[0] = 0;
- bd->dqcoeff[0] = 0;
- bd->qcoeff[1] = 0;
- bd->dqcoeff[1] = 0;
- bd->qcoeff[4] = 0;
- bd->dqcoeff[4] = 0;
- bd->qcoeff[8] = 0;
- bd->dqcoeff[8] = 0;
- bd->eob = 0;
- *a = *l = (bd->eob != 0);
- }
-}
-
-void vp9_optimize_mby_4x4(MACROBLOCK *x) {
+void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
- PLANE_TYPE type;
- int has_2nd_order;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
@@ -694,28 +858,14 @@
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
- has_2nd_order = get_2nd_order_usage(&x->e_mbd);
-
- type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
-
for (b = 0; b < 16; b++) {
- optimize_b(x, b, type,
+ optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
-
- if (has_2nd_order) {
- b = 24;
- optimize_b(x, b, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_4X4][b],
- tl + vp9_block2left[TX_4X4][b], TX_4X4);
- check_reset_2nd_coeffs(&x->e_mbd,
- ta + vp9_block2above[TX_4X4][b],
- tl + vp9_block2left[TX_4X4][b]);
- }
}
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
+void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
@@ -731,24 +881,22 @@
tl = (ENTROPY_CONTEXT *)&t_left;
for (b = 16; b < 24; b++) {
- optimize_b(x, b, PLANE_TYPE_UV,
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b], TX_4X4);
}
}
-static void optimize_mb_4x4(MACROBLOCK *x) {
- vp9_optimize_mby_4x4(x);
- vp9_optimize_mbuv_4x4(x);
+static void optimize_mb_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_4x4(cm, x);
+ vp9_optimize_mbuv_4x4(cm, x);
}
-void vp9_optimize_mby_8x8(MACROBLOCK *x) {
+void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
- PLANE_TYPE type;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
if (!x->e_mbd.above_context || !x->e_mbd.left_context)
return;
@@ -758,31 +906,19 @@
ta = (ENTROPY_CONTEXT *)&t_above;
tl = (ENTROPY_CONTEXT *)&t_left;
- type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
for (b = 0; b < 16; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
- optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8);
+ optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
+ &above_ec, &left_ec, TX_8X8);
a[1] = a[0] = above_ec;
l[1] = l[0] = left_ec;
}
-
- // 8x8 always have 2nd order block
- if (has_2nd_order) {
- check_reset_8x8_2nd_coeffs(&x->e_mbd,
- ta + vp9_block2above[TX_8X8][24],
- tl + vp9_block2left[TX_8X8][24]);
- }
}
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
+void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
int b;
ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context;
ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context;
@@ -793,23 +929,19 @@
for (b = 16; b < 24; b += 4) {
ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
-#if CONFIG_CNVCONTEXT
ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
-#else
- ENTROPY_CONTEXT above_ec = a[0];
- ENTROPY_CONTEXT left_ec = l[0];
-#endif
- optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
+ &above_ec, &left_ec, TX_8X8);
}
}
-static void optimize_mb_8x8(MACROBLOCK *x) {
- vp9_optimize_mby_8x8(x);
- vp9_optimize_mbuv_8x8(x);
+static void optimize_mb_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_8x8(cm, x);
+ vp9_optimize_mbuv_8x8(cm, x);
}
-void vp9_optimize_mby_16x16(MACROBLOCK *x) {
+void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context;
ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context;
ENTROPY_CONTEXT ta, tl;
@@ -817,22 +949,345 @@
if (!t_above || !t_left)
return;
-#if CONFIG_CNVCONTEXT
ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
-#else
- ta = t_above->y1[0];
- tl = t_left->y1[0];
-#endif
- optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
+ optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ &ta, &tl, TX_16X16);
}
-static void optimize_mb_16x16(MACROBLOCK *x) {
- vp9_optimize_mby_16x16(x);
- vp9_optimize_mbuv_8x8(x);
+static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ vp9_optimize_mby_16x16(cm, x);
+ vp9_optimize_mbuv_8x8(cm, x);
}
-void vp9_fidct_mb(MACROBLOCK *x) {
+void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta, tl;
+
+ ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ &ta, &tl, TX_32X32);
+}
+
+void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta[2], tl[2];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
+ ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
+ tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_16X16);
+ }
+}
+
+void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT ta[4], tl[4];
+ int n;
+
+ ta[0] = (a[0] + a[1]) != 0;
+ ta[1] = (a[2] + a[3]) != 0;
+ ta[2] = (a1[0] + a1[1]) != 0;
+ ta[3] = (a1[2] + a1[3]) != 0;
+ tl[0] = (l[0] + l[1]) != 0;
+ tl[1] = (l[2] + l[3]) != 0;
+ tl[2] = (l1[0] + l1[1]) != 0;
+ tl[3] = (l1[2] + l1[3]) != 0;
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_8X8);
+ }
+}
+
+void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT ta[8], tl[8];
+ int n;
+
+ vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_4X4);
+ }
+}
+
+void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
+ int b;
+
+ for (b = 64; b < 96; b += 16) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_16X16][b];
+ l = tl + vp9_block2left_sb[TX_16X16][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_16X16);
+ }
+}
+
+void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 64; b < 96; b += 4) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_8X8][b];
+ l = tl + vp9_block2left_sb[TX_8X8][b];
+ above_ec = (a[0] + a[1]) != 0;
+ left_ec = (l[0] + l[1]) != 0;
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_8X8);
+ a[0] = a[1] = above_ec;
+ l[0] = l[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 64; b < 96; b++) {
+ const int cidx = b >= 80 ? 20 : 16;
+ a = ta + vp9_block2above_sb[TX_4X4][b];
+ l = tl + vp9_block2left_sb[TX_4X4][b];
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ a, l, TX_4X4);
+ }
+}
+
+void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[2], tl[2];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0;
+ for (n = 0; n < 4; n++) {
+ const int x_idx = n & 1, y_idx = n >> 1;
+
+ optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_32X32);
+ }
+}
+
+void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[4], tl[4];
+ int n;
+
+ ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
+ ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0;
+ ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
+ tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0;
+ tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0;
+ for (n = 0; n < 16; n++) {
+ const int x_idx = n & 3, y_idx = n >> 2;
+
+ optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_16X16);
+ }
+}
+
+void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
+ ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2);
+ ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3);
+ ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
+ ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2);
+ ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3);
+ ENTROPY_CONTEXT ta[8], tl[8];
+ int n;
+
+ ta[0] = (a[0] + a[1]) != 0;
+ ta[1] = (a[2] + a[3]) != 0;
+ ta[2] = (a1[0] + a1[1]) != 0;
+ ta[3] = (a1[2] + a1[3]) != 0;
+ ta[4] = (a2[0] + a2[1]) != 0;
+ ta[5] = (a2[2] + a2[3]) != 0;
+ ta[6] = (a3[0] + a3[1]) != 0;
+ ta[7] = (a3[2] + a3[3]) != 0;
+ tl[0] = (l[0] + l[1]) != 0;
+ tl[1] = (l[2] + l[3]) != 0;
+ tl[2] = (l1[0] + l1[1]) != 0;
+ tl[3] = (l1[2] + l1[3]) != 0;
+ tl[4] = (l2[0] + l2[1]) != 0;
+ tl[5] = (l2[2] + l2[3]) != 0;
+ tl[6] = (l3[0] + l3[1]) != 0;
+ tl[7] = (l3[2] + l3[3]) != 0;
+ for (n = 0; n < 64; n++) {
+ const int x_idx = n & 7, y_idx = n >> 3;
+
+ optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_8X8);
+ }
+}
+
+void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT ta[16], tl[16];
+ int n;
+
+ vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT));
+ vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT));
+ for (n = 0; n < 256; n++) {
+ const int x_idx = n & 15, y_idx = n >> 4;
+
+ optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
+ ta + x_idx, tl + y_idx, TX_4X4);
+ }
+}
+
+void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+ int b;
+
+ for (b = 256; b < 384; b += 64) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_32X32][b];
+ l = tl + vp9_block2left_sb64[TX_32X32][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &a_ec, &l_ec, TX_32X32);
+ }
+}
+
+void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b += 16) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_16X16][b];
+ l = tl + vp9_block2left_sb64[TX_16X16][b];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_16X16);
+ a[0] = a[1] = a1[0] = a1[1] = above_ec;
+ l[0] = l[1] = l1[0] = l1[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l, above_ec, left_ec;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b += 4) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_8X8][b];
+ l = tl + vp9_block2left_sb64[TX_8X8][b];
+ above_ec = (a[0] + a[1]) != 0;
+ left_ec = (l[0] + l[1]) != 0;
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ &above_ec, &left_ec, TX_8X8);
+ a[0] = a[1] = above_ec;
+ l[0] = l[1] = left_ec;
+ }
+}
+
+void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left;
+ ENTROPY_CONTEXT *a, *l;
+ int b;
+
+ vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above));
+ vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left));
+ for (b = 256; b < 384; b++) {
+ const int cidx = b >= 320 ? 20 : 16;
+ a = ta + vp9_block2above_sb64[TX_4X4][b];
+ l = tl + vp9_block2left_sb64[TX_4X4][b];
+ optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant,
+ a, l, TX_4X4);
+ }
+}
+
+void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *const xd = &x->e_mbd;
TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -840,7 +1295,7 @@
vp9_transform_mb_16x16(x);
vp9_quantize_mb_16x16(x);
if (x->optimize)
- optimize_mb_16x16(x);
+ optimize_mb_16x16(cm, x);
vp9_inverse_transform_mb_16x16(xd);
} else if (tx_size == TX_8X8) {
if (xd->mode_info_context->mbmi.mode == SPLITMV) {
@@ -850,8 +1305,8 @@
vp9_quantize_mby_8x8(x);
vp9_quantize_mbuv_4x4(x);
if (x->optimize) {
- vp9_optimize_mby_8x8(x);
- vp9_optimize_mbuv_4x4(x);
+ vp9_optimize_mby_8x8(cm, x);
+ vp9_optimize_mbuv_4x4(cm, x);
}
vp9_inverse_transform_mby_8x8(xd);
vp9_inverse_transform_mbuv_4x4(xd);
@@ -859,7 +1314,7 @@
vp9_transform_mb_8x8(x);
vp9_quantize_mb_8x8(x);
if (x->optimize)
- optimize_mb_8x8(x);
+ optimize_mb_8x8(cm, x);
vp9_inverse_transform_mb_8x8(xd);
}
} else {
@@ -866,26 +1321,27 @@
transform_mb_4x4(x);
vp9_quantize_mb_4x4(x);
if (x->optimize)
- optimize_mb_4x4(x);
+ optimize_mb_4x4(cm, x);
vp9_inverse_transform_mb_4x4(xd);
}
}
-void vp9_encode_inter16x16(MACROBLOCK *x) {
+void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int mb_row, int mb_col) {
MACROBLOCKD *const xd = &x->e_mbd;
- vp9_build_inter_predictors_mb(xd);
+ vp9_build_inter_predictors_mb(xd, mb_row, mb_col);
subtract_mb(x);
- vp9_fidct_mb(x);
+ vp9_fidct_mb(cm, x);
vp9_recon_mb(xd);
}
/* this function is used by first pass only */
-void vp9_encode_inter16x16y(MACROBLOCK *x) {
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {
MACROBLOCKD *xd = &x->e_mbd;
BLOCK *b = &x->block[0];
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -13,6 +13,8 @@
#include "./vpx_config.h"
#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/common/vp9_onyxc_int.h"
typedef struct {
MB_PREDICTION_MODE mode;
@@ -21,33 +23,61 @@
} MODE_DEFINITION;
-#include "vp9/encoder/vp9_onyx_int.h"
struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(MACROBLOCK *x);
+void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int mb_row, int mb_col);
void vp9_transform_mbuv_4x4(MACROBLOCK *x);
void vp9_transform_mby_4x4(MACROBLOCK *x);
-void vp9_optimize_mby_4x4(MACROBLOCK *x);
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
-void vp9_encode_inter16x16y(MACROBLOCK *x);
+void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);
void vp9_transform_mb_8x8(MACROBLOCK *mb);
void vp9_transform_mby_8x8(MACROBLOCK *x);
void vp9_transform_mbuv_8x8(MACROBLOCK *x);
-void vp9_build_dcblock_8x8(MACROBLOCK *b);
-void vp9_optimize_mby_8x8(MACROBLOCK *x);
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x);
+void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_mb_16x16(MACROBLOCK *mb);
void vp9_transform_mby_16x16(MACROBLOCK *x);
-void vp9_optimize_mby_16x16(MACROBLOCK *x);
+void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sby_32x32(MACROBLOCK *x);
+void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sby_16x16(MACROBLOCK *x);
+void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sby_8x8(MACROBLOCK *x);
+void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sby_4x4(MACROBLOCK *x);
+void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_transform_sbuv_16x16(MACROBLOCK *x);
+void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sbuv_8x8(MACROBLOCK *x);
+void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sbuv_4x4(MACROBLOCK *x);
+void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
-void vp9_fidct_mb(MACROBLOCK *x);
+void vp9_transform_sb64y_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64y_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64y_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64y_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64uv_32x32(MACROBLOCK *x);
+void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64uv_16x16(MACROBLOCK *x);
+void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64uv_8x8(MACROBLOCK *x);
+void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_transform_sb64uv_4x4(MACROBLOCK *x);
+void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x);
+void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x);
+
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc,
@@ -63,5 +93,11 @@
const uint8_t *vsrc, int src_stride,
const uint8_t *upred,
const uint8_t *vpred, int dst_stride);
+void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
+ const uint8_t *pred, int dst_stride);
+void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
+ const uint8_t *vsrc, int src_stride,
+ const uint8_t *upred,
+ const uint8_t *vpred, int dst_stride);
#endif // VP9_ENCODER_VP9_ENCODEMB_H_
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -378,6 +378,19 @@
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
int new_mv_mode_penalty = 256;
+ int sr = 0;
+ int quart_frm = MIN(cpi->common.width, cpi->common.height);
+
+ // refine the motion search range accroding to the frame dimension
+ // for first pass test
+ while ((quart_frm << sr) < MAX_FULL_PEL_VAL)
+ sr++;
+ if (sr)
+ sr--;
+
+ step_param += sr;
+ further_steps -= sr;
+
// override the default variance function to use MSE
v_fn_ptr.vf = vp9_mse16x16;
@@ -435,9 +448,11 @@
MACROBLOCKD *const xd = &x->e_mbd;
int recon_yoffset, recon_uvoffset;
- YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+ YV12_BUFFER_CONFIG *lst_yv12 =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]];
YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
- YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+ YV12_BUFFER_CONFIG *gld_yv12 =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
int recon_y_stride = lst_yv12->y_stride;
int recon_uv_stride = lst_yv12->uv_stride;
int64_t intra_error = 0;
@@ -611,7 +626,7 @@
this_error = motion_error;
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- vp9_encode_inter16x16y(x);
+ vp9_encode_inter16x16y(x, mb_row, mb_col);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
@@ -843,16 +858,15 @@
power_term = (vp9_convert_qindex_to_q(Q) * 0.01) + pt_low;
power_term = (power_term > pt_high) ? pt_high : power_term;
- // Adjustments to error term
- // TBD
-
// Calculate correction factor
+ if (power_term < 1.0)
+ assert(error_term >= 0.0);
correction_factor = pow(error_term, power_term);
// Clip range
correction_factor =
(correction_factor < 0.05)
- ? 0.05 : (correction_factor > 2.0) ? 2.0 : correction_factor;
+ ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor;
return correction_factor;
}
@@ -886,8 +900,7 @@
static int estimate_max_q(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh,
- int overhead_bits) {
+ int section_target_bandwitdh) {
int Q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -898,7 +911,6 @@
double err_per_mb = section_err / num_mbs;
double err_correction_factor;
double speed_correction = 1.0;
- double overhead_bits_per_mb;
if (section_target_bandwitdh <= 0)
return cpi->twopass.maxq_max_limit; // Highest value allowed
@@ -910,15 +922,19 @@
// Look at the drop in prediction quality between the last frame
// and the GF buffer (which contained an older frame).
- sr_err_diff =
- (fpstats->sr_coded_error - fpstats->coded_error) /
- (fpstats->count * cpi->common.MBs);
- sr_correction = (sr_err_diff / 32.0);
- sr_correction = pow(sr_correction, 0.25);
- if (sr_correction < 0.75)
+ if (fpstats->sr_coded_error > fpstats->coded_error) {
+ sr_err_diff =
+ (fpstats->sr_coded_error - fpstats->coded_error) /
+ (fpstats->count * cpi->common.MBs);
+ sr_correction = (sr_err_diff / 32.0);
+ sr_correction = pow(sr_correction, 0.25);
+ if (sr_correction < 0.75)
+ sr_correction = 0.75;
+ else if (sr_correction > 1.25)
+ sr_correction = 1.25;
+ } else {
sr_correction = 0.75;
- else if (sr_correction > 1.25)
- sr_correction = 1.25;
+ }
// Calculate a corrective factor based on a rolling ratio of bits spent
// vs target bits
@@ -950,13 +966,6 @@
speed_correction = 1.25;
}
- // Estimate of overhead bits per mb
- // Correction to overhead bits for min allowed Q.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- overhead_bits_per_mb = overhead_bits / num_mbs;
- overhead_bits_per_mb *= pow(0.98, (double)cpi->twopass.maxq_min_limit);
-
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; Q++) {
@@ -967,24 +976,10 @@
sr_correction * speed_correction *
cpi->twopass.est_max_qcorrection_factor;
- if (err_correction_factor < 0.05)
- err_correction_factor = 0.05;
- else if (err_correction_factor > 5.0)
- err_correction_factor = 5.0;
bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
+ vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
- bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
- (double)bits_per_mb_at_this_q);
-
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- // overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
-
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
@@ -1001,7 +996,7 @@
// PGW TODO.. This code is broken for the extended Q range
if ((cpi->ni_frames >
((int)cpi->twopass.total_stats->count >> 8)) &&
- (cpi->ni_frames > 150)) {
+ (cpi->ni_frames > 25)) {
adjust_maxq_qrange(cpi);
}
@@ -1012,8 +1007,7 @@
// complexity and data rate.
static int estimate_cq(VP9_COMP *cpi,
FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh,
- int overhead_bits) {
+ int section_target_bandwitdh) {
int Q;
int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -1026,15 +1020,11 @@
double speed_correction = 1.0;
double clip_iiratio;
double clip_iifactor;
- double overhead_bits_per_mb;
-
target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))
? (512 * section_target_bandwitdh) / num_mbs
: 512 * (section_target_bandwitdh / num_mbs);
- // Estimate of overhead bits per mb
- overhead_bits_per_mb = overhead_bits / num_mbs;
// Corrections for higher compression speed settings
// (reduced compression expected)
@@ -1047,15 +1037,19 @@
// Look at the drop in prediction quality between the last frame
// and the GF buffer (which contained an older frame).
- sr_err_diff =
- (fpstats->sr_coded_error - fpstats->coded_error) /
- (fpstats->count * cpi->common.MBs);
- sr_correction = (sr_err_diff / 32.0);
- sr_correction = pow(sr_correction, 0.25);
- if (sr_correction < 0.75)
+ if (fpstats->sr_coded_error > fpstats->coded_error) {
+ sr_err_diff =
+ (fpstats->sr_coded_error - fpstats->coded_error) /
+ (fpstats->count * cpi->common.MBs);
+ sr_correction = (sr_err_diff / 32.0);
+ sr_correction = pow(sr_correction, 0.25);
+ if (sr_correction < 0.75)
+ sr_correction = 0.75;
+ else if (sr_correction > 1.25)
+ sr_correction = 1.25;
+ } else {
sr_correction = 0.75;
- else if (sr_correction > 1.25)
- sr_correction = 1.25;
+ }
// II ratio correction factor for clip as a whole
clip_iiratio = cpi->twopass.total_stats->intra_error /
@@ -1073,24 +1067,9 @@
calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, Q) *
sr_correction * speed_correction * clip_iifactor;
- if (err_correction_factor < 0.05)
- err_correction_factor = 0.05;
- else if (err_correction_factor > 5.0)
- err_correction_factor = 5.0;
-
bits_per_mb_at_this_q =
- vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;
+ vp9_bits_per_mb(INTER_FRAME, Q, err_correction_factor);
- bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *
- (double)bits_per_mb_at_this_q);
-
- // Mode and motion overhead
- // As Q rises in real encode loop rd code will force overhead down
- // We make a crude adjustment for this here as *.98 per Q step.
- // PGW TODO.. This code is broken for the extended Q range
- // for now overhead set to 0.
- overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98);
-
if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
break;
}
@@ -1209,12 +1188,16 @@
mb_sr_err_diff =
(next_frame->sr_coded_error - next_frame->coded_error) /
(cpi->common.MBs);
- second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0);
- second_ref_decay = pow(second_ref_decay, 0.5);
- if (second_ref_decay < 0.85)
+ if (mb_sr_err_diff <= 512.0) {
+ second_ref_decay = 1.0 - (mb_sr_err_diff / 512.0);
+ second_ref_decay = pow(second_ref_decay, 0.5);
+ if (second_ref_decay < 0.85)
+ second_ref_decay = 0.85;
+ else if (second_ref_decay > 1.0)
+ second_ref_decay = 1.0;
+ } else {
second_ref_decay = 0.85;
- else if (second_ref_decay > 1.0)
- second_ref_decay = 1.0;
+ }
if (second_ref_decay < prediction_decay_rate)
prediction_decay_rate = second_ref_decay;
@@ -1459,11 +1442,14 @@
return arf_boost;
}
-static void configure_arnr_filter(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+static void configure_arnr_filter(VP9_COMP *cpi,
+ FIRSTPASS_STATS *this_frame,
+ int group_boost) {
int half_gf_int;
int frames_after_arf;
int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
+ int q;
// Define the arnr filter width for this group of frames:
// We only filter frames that lie within a distance of half
@@ -1508,6 +1494,25 @@
}
cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
+
+ // Adjust the strength based on active max q
+ q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1);
+ if (q > 8) {
+ cpi->active_arnr_strength = cpi->oxcf.arnr_strength;
+ } else {
+ cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q);
+ if (cpi->active_arnr_strength < 0)
+ cpi->active_arnr_strength = 0;
+ }
+
+ // Adjust number of frames in filter and strength based on gf boost level.
+ if (cpi->active_arnr_frames > (group_boost / 150)) {
+ cpi->active_arnr_frames = (group_boost / 150);
+ cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1);
+ }
+ if (cpi->active_arnr_strength > (group_boost / 300)) {
+ cpi->active_arnr_strength = (group_boost / 300);
+ }
}
// Analyse and define a gf/arf group .
@@ -1531,7 +1536,7 @@
double this_frame_mv_in_out = 0.0;
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
-
+ double mv_ratio_accumulator_thresh;
int max_bits = frame_max_bits(cpi); // Max for a single frame
unsigned int allow_alt_ref =
@@ -1540,6 +1545,7 @@
int f_boost = 0;
int b_boost = 0;
int flash_detected;
+ int active_max_gf_interval;
cpi->twopass.gf_group_bits = 0;
@@ -1562,11 +1568,22 @@
if (cpi->common.frame_type == KEY_FRAME)
gf_group_err -= gf_first_frame_err;
- // Scan forward to try and work out how many frames the next gf group
- // should contain and what level of boost is appropriate for the GF
- // or ARF that will be coded with the group
- i = 0;
+ // Motion breakout threshold for loop below depends on image size.
+ mv_ratio_accumulator_thresh = (cpi->common.width + cpi->common.height) / 10.0;
+ // Work out a maximum interval for the GF.
+ // If the image appears completely static we can extend beyond this.
+ // The value chosen depends on the active Q range. At low Q we have
+ // bits to spare and are better with a smaller interval and smaller boost.
+ // At high Q when there are few bits to spare we are better with a longer
+ // interval to spread the cost of the GF.
+ active_max_gf_interval =
+ 12 + ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 5);
+
+ if (active_max_gf_interval > cpi->max_gf_interval)
+ active_max_gf_interval = cpi->max_gf_interval;
+
+ i = 0;
while (((i < cpi->twopass.static_scene_max_gf_interval) ||
((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) &&
(i < cpi->twopass.frames_to_key)) {
@@ -1618,7 +1635,7 @@
// Break out conditions.
if (
// Break at cpi->max_gf_interval unless almost totally static
- (i >= cpi->max_gf_interval && (zero_motion_accumulator < 0.995)) ||
+ (i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) ||
(
// Dont break out with a very short interval
(i > MIN_GF_INTERVAL) &&
@@ -1626,7 +1643,7 @@
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 125.0) || (next_frame.pcnt_inter < 0.75)) &&
(!flash_detected) &&
- ((mv_ratio_accumulator > 100.0) ||
+ ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > 3.0) ||
(mv_in_out_accumulator < -2.0) ||
((boost_score - old_boost_score) < IIFACTOR))
@@ -1673,7 +1690,7 @@
cpi->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost);
cpi->source_alt_ref_pending = TRUE;
- configure_arnr_filter(cpi, this_frame);
+ configure_arnr_filter(cpi, this_frame, cpi->gfu_boost);
} else {
cpi->gfu_boost = (int)boost_score;
cpi->source_alt_ref_pending = FALSE;
@@ -1945,7 +1962,8 @@
void vp9_second_pass(VP9_COMP *cpi) {
int tmp_q;
- int frames_left = (int)(cpi->twopass.total_stats->count - cpi->common.current_video_frame);
+ int frames_left = (int)(cpi->twopass.total_stats->count -
+ cpi->common.current_video_frame);
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
@@ -1953,8 +1971,6 @@
double this_frame_intra_error;
double this_frame_coded_error;
- int overhead_bits;
-
if (!cpi->twopass.stats_in) {
return;
}
@@ -1961,68 +1977,6 @@
vp9_clear_system_state();
- vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS));
-
- if (EOF == input_stats(cpi, &this_frame))
- return;
-
- this_frame_intra_error = this_frame.intra_error;
- this_frame_coded_error = this_frame.coded_error;
-
- // keyframe and section processing !
- if (cpi->twopass.frames_to_key == 0) {
- // Define next KF group and assign bits to it
- vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
- find_next_key_frame(cpi, &this_frame_copy);
- }
-
- // Is this a GF / ARF (Note that a KF is always also a GF)
- if (cpi->frames_till_gf_update_due == 0) {
- // Define next gf group and assign bits to it
- vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
- define_gf_group(cpi, &this_frame_copy);
-
- // If we are going to code an altref frame at the end of the group and the current frame is not a key frame....
- // If the previous group used an arf this frame has already benefited from that arf boost and it should not be given extra bits
- // If the previous group was NOT coded using arf we may want to apply some boost to this GF as well
- if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) {
- // Assign a standard frames worth of bits from those allocated to the GF group
- int bak = cpi->per_frame_bandwidth;
- vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
- assign_std_frame_bits(cpi, &this_frame_copy);
- cpi->per_frame_bandwidth = bak;
- }
- }
-
- // Otherwise this is an ordinary frame
- else {
- // Assign bits from those allocated to the GF group
- vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
- assign_std_frame_bits(cpi, &this_frame_copy);
- }
-
- // Keep a globally available copy of this and the next frame's iiratio.
- cpi->twopass.this_iiratio = (int)(this_frame_intra_error /
- DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
- {
- FIRSTPASS_STATS next_frame;
- if (lookup_next_frame_stats(cpi, &next_frame) != EOF) {
- cpi->twopass.next_iiratio = (int)(next_frame.intra_error /
- DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
- }
- }
-
- // Set nominal per second bandwidth for this frame
- cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth
- * cpi->output_frame_rate);
- if (cpi->target_bandwidth < 0)
- cpi->target_bandwidth = 0;
-
-
- // Account for mv, mode and other overheads.
- overhead_bits = (int)estimate_modemvcost(
- cpi, cpi->twopass.total_left_stats);
-
// Special case code for first frame.
if (cpi->common.current_video_frame == 0) {
cpi->twopass.est_max_qcorrection_factor = 1.0;
@@ -2034,8 +1988,7 @@
est_cq =
estimate_cq(cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
cpi->cq_target_quality = cpi->oxcf.cq_level;
if (est_cq > cpi->cq_target_quality)
@@ -2049,13 +2002,13 @@
tmp_q = estimate_max_q(
cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
cpi->active_worst_quality = tmp_q;
cpi->ni_av_qi = tmp_q;
cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
+#ifndef ONE_SHOT_Q_ESTIMATE
// Limit the maxq value returned subsequently.
// This increases the risk of overspend or underspend if the initial
// estimate for the clip is bad, but helps prevent excessive
@@ -2062,8 +2015,10 @@
// variation in Q, especially near the end of a clip
// where for example a small overspend may cause Q to crash
adjust_maxq_qrange(cpi);
+#endif
}
+#ifndef ONE_SHOT_Q_ESTIMATE
// The last few frames of a clip almost always have to few or too many
// bits and for the sake of over exact rate control we dont want to make
// radical adjustments to the allowed quantizer range just to use up a
@@ -2078,20 +2033,77 @@
tmp_q = estimate_max_q(
cpi,
cpi->twopass.total_left_stats,
- (int)(cpi->twopass.bits_left / frames_left),
- overhead_bits);
+ (int)(cpi->twopass.bits_left / frames_left));
// Make a damped adjustment to active max Q
cpi->active_worst_quality =
adjust_active_maxq(cpi->active_worst_quality, tmp_q);
}
+#endif
+ vpx_memset(&this_frame, 0, sizeof(FIRSTPASS_STATS));
+ if (EOF == input_stats(cpi, &this_frame))
+ return;
+
+ this_frame_intra_error = this_frame.intra_error;
+ this_frame_coded_error = this_frame.coded_error;
+
+ // keyframe and section processing !
+ if (cpi->twopass.frames_to_key == 0) {
+ // Define next KF group and assign bits to it
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ find_next_key_frame(cpi, &this_frame_copy);
+ }
+
+ // Is this a GF / ARF (Note that a KF is always also a GF)
+ if (cpi->frames_till_gf_update_due == 0) {
+ // Define next gf group and assign bits to it
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ define_gf_group(cpi, &this_frame_copy);
+
+ // If we are going to code an altref frame at the end of the group
+ // and the current frame is not a key frame....
+ // If the previous group used an arf this frame has already benefited
+ // from that arf boost and it should not be given extra bits
+ // If the previous group was NOT coded using arf we may want to apply
+ // some boost to this GF as well
+ if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) {
+ // Assign a standard frames worth of bits from those allocated
+ // to the GF group
+ int bak = cpi->per_frame_bandwidth;
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ assign_std_frame_bits(cpi, &this_frame_copy);
+ cpi->per_frame_bandwidth = bak;
+ }
+ } else {
+ // Otherwise this is an ordinary frame
+ // Assign bits from those allocated to the GF group
+ vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame));
+ assign_std_frame_bits(cpi, &this_frame_copy);
+ }
+
+ // Keep a globally available copy of this and the next frame's iiratio.
+ cpi->twopass.this_iiratio = (int)(this_frame_intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
+ {
+ FIRSTPASS_STATS next_frame;
+ if (lookup_next_frame_stats(cpi, &next_frame) != EOF) {
+ cpi->twopass.next_iiratio = (int)(next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
+ }
+ }
+
+ // Set nominal per second bandwidth for this frame
+ cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth
+ * cpi->output_frame_rate);
+ if (cpi->target_bandwidth < 0)
+ cpi->target_bandwidth = 0;
+
cpi->twopass.frames_to_key--;
// Update the total stats remaining sturcture
subtract_stats(cpi->twopass.total_left_stats, &this_frame);
}
-
static int test_candidate_kf(VP9_COMP *cpi,
FIRSTPASS_STATS *last_frame,
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -11,12 +11,12 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
-extern void vp9_init_first_pass(VP9_COMP *cpi);
-extern void vp9_first_pass(VP9_COMP *cpi);
-extern void vp9_end_first_pass(VP9_COMP *cpi);
+void vp9_init_first_pass(VP9_COMP *cpi);
+void vp9_first_pass(VP9_COMP *cpi);
+void vp9_end_first_pass(VP9_COMP *cpi);
-extern void vp9_init_second_pass(VP9_COMP *cpi);
-extern void vp9_second_pass(VP9_COMP *cpi);
-extern void vp9_end_second_pass(VP9_COMP *cpi);
+void vp9_init_second_pass(VP9_COMP *cpi);
+void vp9_second_pass(VP9_COMP *cpi);
+void vp9_end_second_pass(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_FIRSTPASS_H_
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -9,7 +9,9 @@
*/
#include <assert.h>
#include <stdlib.h>
+
#include "vpx_config.h"
+#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/common/vp9_extend.h"
@@ -25,10 +27,9 @@
/* Return the buffer at the given absolute index and increment the index */
-static struct lookahead_entry *
-pop(struct lookahead_ctx *ctx,
- unsigned int *idx) {
- unsigned int index = *idx;
+static struct lookahead_entry * pop(struct lookahead_ctx *ctx,
+ unsigned int *idx) {
+ unsigned int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
@@ -39,8 +40,7 @@
}
-void
-vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
+void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
unsigned int i;
@@ -54,23 +54,15 @@
}
-struct lookahead_ctx *
-vp9_lookahead_init(unsigned int width,
- unsigned int height,
- unsigned int depth) {
+struct lookahead_ctx * vp9_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int depth) {
struct lookahead_ctx *ctx = NULL;
- /* Clamp the lookahead queue depth */
- if (depth < 1)
- depth = 1;
- else if (depth > MAX_LAG_BUFFERS)
- depth = MAX_LAG_BUFFERS;
+ // Clamp the lookahead queue depth
+ depth = clamp(depth, 1, MAX_LAG_BUFFERS);
- /* Align the buffer dimensions */
- width = (width + 15) &~15;
- height = (height + 15) &~15;
-
- /* Allocate the lookahead structures */
+ // Allocate the lookahead structures
ctx = calloc(1, sizeof(*ctx));
if (ctx) {
unsigned int i;
@@ -90,13 +82,9 @@
}
-int
-vp9_lookahead_push(struct lookahead_ctx *ctx,
- YV12_BUFFER_CONFIG *src,
- int64_t ts_start,
- int64_t ts_end,
- unsigned int flags,
- unsigned char *active_map) {
+int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+ int64_t ts_start, int64_t ts_end, unsigned int flags,
+ unsigned char *active_map) {
struct lookahead_entry *buf;
int row, col, active_end;
int mb_rows = (src->y_height + 15) >> 4;
@@ -156,9 +144,8 @@
}
-struct lookahead_entry *
-vp9_lookahead_pop(struct lookahead_ctx *ctx,
- int drain) {
+struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain) {
struct lookahead_entry *buf = NULL;
if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) {
@@ -169,9 +156,8 @@
}
-struct lookahead_entry *
-vp9_lookahead_peek(struct lookahead_ctx *ctx,
- int index) {
+struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx,
+ int index) {
struct lookahead_entry *buf = NULL;
assert(index < (int)ctx->max_sz);
@@ -184,8 +170,6 @@
return buf;
}
-
-unsigned int
-vp9_lookahead_depth(struct lookahead_ctx *ctx) {
+unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx) {
return ctx->sz;
}
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -28,17 +28,13 @@
*
* The lookahead stage is a queue of frame buffers on which some analysis
* may be done when buffers are enqueued.
- *
- *
*/
struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
unsigned int height,
- unsigned int depth
- );
+ unsigned int depth);
/**\brief Destroys the lookahead stage
- *
*/
void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
@@ -58,13 +54,9 @@
* \param[in] flags Flags set on this frame
* \param[in] active_map Map that specifies which macroblock is active
*/
-int
-vp9_lookahead_push(struct lookahead_ctx *ctx,
- YV12_BUFFER_CONFIG *src,
- int64_t ts_start,
- int64_t ts_end,
- unsigned int flags,
- unsigned char *active_map);
+int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+ int64_t ts_start, int64_t ts_end, unsigned int flags,
+ unsigned char *active_map);
/**\brief Get the next source buffer to encode
@@ -76,11 +68,9 @@
*
* \retval NULL, if drain set and queue is empty
* \retval NULL, if drain not set and queue not of the configured depth
- *
*/
-struct lookahead_entry *
-vp9_lookahead_pop(struct lookahead_ctx *ctx,
- int drain);
+struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain);
/**\brief Get a future source buffer to encode
@@ -89,11 +79,9 @@
* \param[in] index Index of the frame to be returned, 0 == next frame
*
* \retval NULL, if no buffer exists at the specified index
- *
*/
-struct lookahead_entry *
-vp9_lookahead_peek(struct lookahead_ctx *ctx,
- int index);
+struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
+ int index);
/**\brief Get the number of frames currently in the lookahead queue
@@ -100,7 +88,6 @@
*
* \param[in] ctx Pointer to the lookahead context
*/
-unsigned int
-vp9_lookahead_depth(struct lookahead_ctx *ctx);
+unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx);
#endif // VP9_ENCODER_VP9_LOOKAHEAD_H_
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -20,7 +20,9 @@
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv *ref_mv,
- int_mv *dst_mv) {
+ int_mv *dst_mv,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
BLOCK *b = &x->block[0];
@@ -27,8 +29,8 @@
BLOCKD *d = &xd->block[0];
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
unsigned int best_err;
- int step_param;
+
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
@@ -36,11 +38,8 @@
int_mv ref_full;
// Further step/diamond searches as necessary
- if (cpi->Speed < 8) {
- step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0);
- } else {
- step_param = cpi->sf.first_step + 2;
- }
+ int step_param = cpi->sf.first_step +
+ (cpi->Speed < 8 ? (cpi->Speed > 5 ? 1 : 0) : 2);
vp9_clamp_mv_min_max(x, ref_mv);
@@ -72,7 +71,7 @@
}
vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
+ vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);
best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,
xd->predictor, 16, INT_MAX);
@@ -93,8 +92,9 @@
YV12_BUFFER_CONFIG *buf,
int buf_mb_y_offset,
YV12_BUFFER_CONFIG *ref,
- int mb_y_offset
-) {
+ int mb_y_offset,
+ int mb_row,
+ int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err, tmp_err;
@@ -124,7 +124,7 @@
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search
- tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
if (tmp_err < err) {
err = tmp_err;
dst_mv->as_int = tmp_mv.as_int;
@@ -136,7 +136,8 @@
int_mv zero_ref_mv, tmp_mv;
zero_ref_mv.as_int = 0;
- tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv);
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
+ mb_row, mb_col);
if (tmp_err < err) {
dst_mv->as_int = tmp_mv.as_int;
err = tmp_err;
@@ -229,7 +230,9 @@
int gld_y_offset,
YV12_BUFFER_CONFIG *alt_ref,
int_mv *prev_alt_ref_mv,
- int arf_y_offset
+ int arf_y_offset,
+ int mb_row,
+ int mb_col
) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -249,7 +252,8 @@
int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,
&stats->ref[GOLDEN_FRAME].m.mv,
buf, mb_y_offset,
- golden_ref, gld_y_offset);
+ golden_ref, gld_y_offset,
+ mb_row, mb_col);
stats->ref[GOLDEN_FRAME].err = g_motion_error;
} else {
stats->ref[GOLDEN_FRAME].err = INT_MAX;
@@ -292,6 +296,9 @@
int_mv arf_top_mv, gld_top_mv;
MODE_INFO mi_local;
+ // Make sure the mi context starts in a consistent state.
+ memset(&mi_local, 0, sizeof(mi_local));
+
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
@@ -323,7 +330,8 @@
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, gld_y_in_offset,
- alt_ref, &arf_left_mv, arf_y_in_offset);
+ alt_ref, &arf_left_mv, arf_y_in_offset,
+ mb_row, mb_col);
arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
if (mb_col == 0) {
@@ -412,7 +420,7 @@
cpi->static_mb_pct = (ncnt[1] * 100) / cm->MBs;
// This error case should not be reachable as this function should
- // never be called with the common data structure unititialized.
+ // never be called with the common data structure uninitialized.
else
cpi->static_mb_pct = 0;
@@ -427,13 +435,11 @@
vpx_free(arf_not_zz);
}
-void vp9_update_mbgraph_stats
-(
- VP9_COMP *cpi
-) {
+void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
- YV12_BUFFER_CONFIG *golden_ref = &cm->yv12_fb[cm->gld_fb_idx];
+ YV12_BUFFER_CONFIG *golden_ref =
+ &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]];
// we need to look ahead beyond where the ARF transitions into
// being a GF - so exit if we don't look ahead beyond that
--- a/vp9/encoder/vp9_mbgraph.h
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -11,6 +11,6 @@
#ifndef VP9_ENCODER_VP9_MBGRAPH_H_
#define VP9_ENCODER_VP9_MBGRAPH_H_
-extern void vp9_update_mbgraph_stats(VP9_COMP *cpi);
+void vp9_update_mbgraph_stats(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_MBGRAPH_H_
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -8,27 +8,22 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vpx_mem/vpx_mem.h"
#include "./vpx_config.h"
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_common.h"
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.col & 7) ? 1 : 0);
+ ((ref_mv->as_mv.col & 7) ? 1 : 0);
int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
- ((ref_mv->as_mv.row & 7) ? 1 : 0);
+ ((ref_mv->as_mv.row & 7) ? 1 : 0);
int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
@@ -43,14 +38,26 @@
x->mv_row_max = row_max;
}
+int vp9_init_search_range(int width, int height) {
+ int sr = 0;
+ int frm = MIN(width, height);
+
+ while ((frm << sr) < MAX_FULL_PEL_VAL)
+ sr++;
+
+ if (sr)
+ sr--;
+
+ return sr;
+}
+
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int Weight, int ishp) {
+ int weight, int ishp) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjcost[vp9_get_mv_joint(v)] +
- mvcost[0][v.row] + mvcost[1][v.col]) *
- Weight) >> 7;
+ mvcost[0][v.row] + mvcost[1][v.col]) * weight) >> 7;
}
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
@@ -57,11 +64,11 @@
int error_per_bit, int ishp) {
if (mvcost) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjcost[vp9_get_mv_joint(v)] +
mvcost[0][v.row] + mvcost[1][v.col]) *
- error_per_bit + 128) >> 8;
+ error_per_bit + 4096) >> 13;
}
return 0;
}
@@ -68,11 +75,10 @@
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
int *mvsadcost[2], int error_per_bit) {
-
if (mvsadcost) {
MV v;
- v.row = (mv->as_mv.row - ref->as_mv.row);
- v.col = (mv->as_mv.col - ref->as_mv.col);
+ v.row = mv->as_mv.row - ref->as_mv.row;
+ v.col = mv->as_mv.col - ref->as_mv.col;
return ((mvjsadcost[vp9_get_mv_joint(v)] +
mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
error_per_bit + 128) >> 8;
@@ -81,45 +87,39 @@
}
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
- int Len;
+ int len;
int search_site_count = 0;
-
// Generate offsets for 4 search sites per step.
- Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
- while (Len > 0) {
-
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.col = -len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
+ x->ss[search_site_count].offset = -len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.col = len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
+ x->ss[search_site_count].offset = len;
search_site_count++;
-
- // Contract.
- Len /= 2;
}
x->ss_count = search_site_count;
@@ -127,68 +127,63 @@
}
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
- int Len;
+ int len;
int search_site_count = 0;
// Generate offsets for 8 search sites per step.
- Len = MAX_FIRST_STEP;
x->ss[search_site_count].mv.col = 0;
x->ss[search_site_count].mv.row = 0;
x->ss[search_site_count].offset = 0;
search_site_count++;
- while (Len > 0) {
-
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride;
search_site_count++;
// Compute offsets for search sites.
x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
+ x->ss[search_site_count].mv.col = -len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -Len;
+ x->ss[search_site_count].offset = -len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
+ x->ss[search_site_count].mv.col = len;
x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = Len;
+ x->ss[search_site_count].offset = len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride - Len;
+ x->ss[search_site_count].mv.col = -len;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride - len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = -Len;
- x->ss[search_site_count].offset = -Len * stride + Len;
+ x->ss[search_site_count].mv.col = len;
+ x->ss[search_site_count].mv.row = -len;
+ x->ss[search_site_count].offset = -len * stride + len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride - Len;
+ x->ss[search_site_count].mv.col = -len;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride - len;
search_site_count++;
// Compute offsets for search sites.
- x->ss[search_site_count].mv.col = Len;
- x->ss[search_site_count].mv.row = Len;
- x->ss[search_site_count].offset = Len * stride + Len;
+ x->ss[search_site_count].mv.col = len;
+ x->ss[search_site_count].mv.row = len;
+ x->ss[search_site_count].offset = len * stride + len;
search_site_count++;
-
- // Contract.
- Len /= 2;
}
x->ss_count = search_site_count;
@@ -210,8 +205,9 @@
(mvcost ? \
((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
- error_per_bit + 128) >> 8 : 0)
+ error_per_bit + 4096) >> 13 : 0)
+
#define SP(x) (((x) & 7) << 1) // convert motion vector component to offset
// for svf calc
@@ -1546,7 +1542,7 @@
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
int bestsad = INT_MAX;
int r, c;
@@ -1641,7 +1637,7 @@
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1770,7 +1766,7 @@
int in_what_stride = d->pre_stride;
int mv_stride = d->pre_stride;
uint8_t *bestaddress;
- int_mv *best_mv = &d->bmi.as_mv.first;
+ int_mv *best_mv = &d->bmi.as_mv[0];
int_mv this_mv;
unsigned int bestsad = INT_MAX;
int r, c;
@@ -1787,7 +1783,7 @@
int col_min = ref_col - distance;
int col_max = ref_col + distance;
- DECLARE_ALIGNED_ARRAY(16, uint16_t, sad_array8, 8);
+ DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
unsigned int sad_array[3];
int_mv fcenter_mv;
@@ -2023,13 +2019,11 @@
for (i = 0; i < search_range; i++) {
int best_site = -1;
- int all_in = 1;
+ int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
+ ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
+ ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
+ ((ref_mv->as_mv.col + 1) < x->mv_col_max);
- all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
- all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
- all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
- all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
-
if (all_in) {
unsigned int sad_array[4];
unsigned char const *block_offset[4];
@@ -2103,14 +2097,14 @@
#ifdef ENTROPY_STATS
-void print_mode_context(void) {
+void print_mode_context(VP9_COMMON *pc) {
FILE *f = fopen("vp9_modecont.c", "a");
int i, j;
fprintf(f, "#include \"vp9_entropy.h\"\n");
- fprintf(f, "const int vp9_mode_contexts[6][4] =");
+ fprintf(f, "const int vp9_mode_contexts[INTER_MODE_CONTEXTS][4] =");
fprintf(f, "{\n");
- for (j = 0; j < 6; j++) {
+ for (j = 0; j < INTER_MODE_CONTEXTS; j++) {
fprintf(f, " {/* %d */ ", j);
fprintf(f, " ");
for (i = 0; i < 4; i++) {
@@ -2117,7 +2111,8 @@
int this_prob;
// context probs
- this_prob = get_binary_prob(mv_ref_ct[j][i][0], mv_ref_ct[j][i][1]);
+ this_prob = get_binary_prob(pc->fc.mv_ref_ct[j][i][0],
+ pc->fc.mv_ref_ct[j][i][1]);
fprintf(f, "%5d, ", this_prob);
}
@@ -2126,46 +2121,6 @@
fprintf(f, "};\n");
fclose(f);
-}
-
-/* MV ref count ENTROPY_STATS stats code */
-void init_mv_ref_counts() {
- vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
- vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
- if (m == ZEROMV) {
- ++mv_ref_ct [ct[0]] [0] [0];
- ++mv_mode_cts[0][0];
- } else {
- ++mv_ref_ct [ct[0]] [0] [1];
- ++mv_mode_cts[0][1];
-
- if (m == NEARESTMV) {
- ++mv_ref_ct [ct[1]] [1] [0];
- ++mv_mode_cts[1][0];
- } else {
- ++mv_ref_ct [ct[1]] [1] [1];
- ++mv_mode_cts[1][1];
-
- if (m == NEARMV) {
- ++mv_ref_ct [ct[2]] [2] [0];
- ++mv_mode_cts[2][0];
- } else {
- ++mv_ref_ct [ct[2]] [2] [1];
- ++mv_mode_cts[2][1];
-
- if (m == NEWMV) {
- ++mv_ref_ct [ct[3]] [3] [0];
- ++mv_mode_cts[3][0];
- } else {
- ++mv_ref_ct [ct[3]] [3] [1];
- ++mv_mode_cts[3][1];
- }
- }
- }
- }
}
#endif/* END MV ref count ENTROPY_STATS stats code */
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -16,21 +16,25 @@
#include "vp9/encoder/vp9_variance.h"
#ifdef ENTROPY_STATS
-extern void init_mv_ref_counts();
-extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
-void print_mode_context(void);
+void print_mode_context(VP9_COMMON *pc);
#endif
+// The maximum number of steps in a step search given the largest
+// allowed initial step
+#define MAX_MVSEARCH_STEPS 11
+// Max full pel mv specified in 1 pel units
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
+// Maximum size of the first step in full pel units
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
-#define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Max full pel mv specified in 1 pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units
+void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
+int vp9_init_search_range(int width, int height);
-extern void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
-extern int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
- int *mvcost[2], int Weight, int ishp);
-extern void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-extern void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
+int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
+ int *mvcost[2], int weight, int ishp);
+void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
+void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
+
// Runs sequence of diamond searches in smaller steps for RD
struct VP9_COMP;
int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK *b,
@@ -39,20 +43,13 @@
vp9_variance_fn_ptr_t *fn_ptr,
int_mv *ref_mv, int_mv *dst_mv);
-extern int vp9_hex_search
-(
- MACROBLOCK *x,
- BLOCK *b,
- BLOCKD *d,
- int_mv *ref_mv,
- int_mv *best_mv,
- int search_param,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vf,
- int *mvjsadcost, int *mvsadcost[2],
- int *mvjcost, int *mvcost[2],
- int_mv *center_mv
-);
+int vp9_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
+ int_mv *ref_mv, int_mv *best_mv,
+ int search_param, int error_per_bit,
+ const vp9_variance_fn_ptr_t *vf,
+ int *mvjsadcost, int *mvsadcost[2],
+ int *mvjcost, int *mvcost[2],
+ int_mv *center_mv);
typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv
*bestmv, int_mv *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -10,7 +10,9 @@
#include "vpx_config.h"
+#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_quantize.h"
@@ -22,6 +24,7 @@
#include "vp9/common/vp9_extend.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_tile_common.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
@@ -111,6 +114,13 @@
extern void print_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+extern void init_nzcstats();
+extern void print_nzcstats();
+#endif
+#endif
+
#ifdef SPEEDSTATS
unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
#endif
@@ -146,31 +156,24 @@
// The formulae were derived from computing a 3rd order polynomial best
// fit to the original data (after plotting real maxq vs minq (not q index))
static int calculate_minq_index(double maxq,
- double x3, double x2, double x, double c) {
+ double x3, double x2, double x1, double c) {
int i;
- double minqtarget;
+ const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c,
+ maxq);
- minqtarget = ((x3 * maxq * maxq * maxq) +
- (x2 * maxq * maxq) +
- (x * maxq) +
- c);
-
- if (minqtarget > maxq)
- minqtarget = maxq;
-
for (i = 0; i < QINDEX_RANGE; i++) {
if (minqtarget <= vp9_convert_qindex_to_q(i))
return i;
}
+
return QINDEX_RANGE - 1;
}
static void init_minq_luts(void) {
int i;
- double maxq;
for (i = 0; i < QINDEX_RANGE; i++) {
- maxq = vp9_convert_qindex_to_q(i);
+ const double maxq = vp9_convert_qindex_to_q(i);
kf_low_motion_minq[i] = calculate_minq_index(maxq,
@@ -206,7 +209,6 @@
if (mb->e_mbd.allow_high_precision_mv) {
mb->mvcost = mb->nmvcost_hp;
mb->mvsadcost = mb->nmvsadcost_hp;
-
} else {
mb->mvcost = mb->nmvcost;
mb->mvsadcost = mb->nmvsadcost;
@@ -214,15 +216,13 @@
}
static void init_base_skip_probs(void) {
int i;
- double q;
- int t;
for (i = 0; i < QINDEX_RANGE; i++) {
- q = vp9_convert_qindex_to_q(i);
+ const double q = vp9_convert_qindex_to_q(i);
// Exponential decay caluclation of baseline skip prob with clamping
// Based on crude best fit of old table.
- t = (int)(564.25 * pow(2.71828, (-0.012 * q)));
+ const int t = (int)(564.25 * pow(2.71828, (-0.012 * q)));
base_skip_false_prob[i][1] = clip_prob(t);
base_skip_false_prob[i][2] = clip_prob(t * 3 / 4);
@@ -236,12 +236,12 @@
if (cm->frame_type != KEY_FRAME) {
vp9_update_skip_probs(cpi);
- if (cm->refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
int k;
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
cpi->last_skip_false_probs[2][k] = cm->mbskip_pred_probs[k];
cpi->last_skip_probs_q[2] = cm->base_qindex;
- } else if (cpi->common.refresh_golden_frame) {
+ } else if (cpi->refresh_golden_frame) {
int k;
for (k = 0; k < MBSKIP_CONTEXTS; ++k)
cpi->last_skip_false_probs[1][k] = cm->mbskip_pred_probs[k];
@@ -258,7 +258,6 @@
cm->mbskip_pred_probs[k];
}
}
-
}
void vp9_initialize_enc() {
@@ -299,7 +298,6 @@
vpx_memset(xd->last_mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
set_default_lf_deltas(cpi);
-
}
@@ -332,9 +330,7 @@
vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source);
-#if VP9_TEMPORAL_ALT_REF
vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer);
-#endif
vp9_lookahead_destroy(cpi->lookahead);
vpx_free(cpi->tok);
@@ -388,7 +384,7 @@
return target_index - start_index;
}
-static void init_seg_features(VP9_COMP *cpi) {
+static void configure_static_seg_features(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
@@ -408,10 +404,8 @@
// Clear down the segment features.
vp9_clearall_segfeatures(xd);
- }
-
- // If this is an alt ref frame
- else if (cm->refresh_alt_ref_frame) {
+ } else if (cpi->refresh_alt_ref_frame) {
+ // If this is an alt ref frame
// Clear down the global segmentation map
vpx_memset(cpi->segmentation_map, 0, (cm->mb_rows * cm->mb_cols));
xd->update_mb_segmentation_map = 0;
@@ -448,7 +442,7 @@
else if (xd->segmentation_enabled) {
// First normal frame in a valid gf or alt ref group
if (cpi->common.frames_since_golden == 0) {
- // Set up segment features for normal frames in an af group
+ // Set up segment features for normal frames in an arf group
if (cpi->source_alt_ref_active) {
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 1;
@@ -465,16 +459,9 @@
// Segment coding disabled for compred testing
if (high_q || (cpi->static_mb_pct == 100)) {
- // set_segref(xd, 1, LAST_FRAME);
vp9_set_segref(xd, 1, ALTREF_FRAME);
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
-
- vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
- vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
-
- // EOB segment coding not fixed for 8x8 yet
- vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
- vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP);
}
}
// Disable segmentation and clear down features if alt ref
@@ -493,29 +480,23 @@
}
// Special case where we are coding over the top of a previous
- // alt ref frame
+ // alt ref frame.
// Segment coding disabled for compred testing
else if (cpi->is_src_frame_alt_ref) {
- // Enable mode and ref frame features for segment 0 as well
+ // Enable ref frame features for segment 0 as well
vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(xd, 0, SEG_LVL_MODE);
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
- vp9_enable_segfeature(xd, 1, SEG_LVL_MODE);
- // All mbs should use ALTREF_FRAME, ZEROMV exclusively
+ // All mbs should use ALTREF_FRAME
vp9_clear_segref(xd, 0);
vp9_set_segref(xd, 0, ALTREF_FRAME);
vp9_clear_segref(xd, 1);
vp9_set_segref(xd, 1, ALTREF_FRAME);
- vp9_set_segdata(xd, 0, SEG_LVL_MODE, ZEROMV);
- vp9_set_segdata(xd, 1, SEG_LVL_MODE, ZEROMV);
- // Skip all MBs if high Q
+ // Skip all MBs if high Q (0,0 mv and skip coeffs)
if (high_q) {
- vp9_enable_segfeature(xd, 0, SEG_LVL_EOB);
- vp9_set_segdata(xd, 0, SEG_LVL_EOB, 0);
- vp9_enable_segfeature(xd, 1, SEG_LVL_EOB);
- vp9_set_segdata(xd, 1, SEG_LVL_EOB, 0);
+ vp9_enable_segfeature(xd, 0, SEG_LVL_SKIP);
+ vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP);
}
// Enable data udpate
xd->update_mb_segmentation_data = 1;
@@ -534,17 +515,13 @@
VP9_COMMON *cm = &cpi->common;
int row, col;
int map_index = 0;
- FILE *statsfile;
+ FILE *statsfile = fopen("segmap.stt", "a");
- statsfile = fopen("segmap.stt", "a");
+ fprintf(statsfile, "%10d\n", cm->current_video_frame);
- fprintf(statsfile, "%10d\n",
- cm->current_video_frame);
-
for (row = 0; row < cpi->common.mb_rows; row++) {
for (col = 0; col < cpi->common.mb_cols; col++) {
- fprintf(statsfile, "%10d",
- cpi->segmentation_map[map_index]);
+ fprintf(statsfile, "%10d", cpi->segmentation_map[map_index]);
map_index++;
}
fprintf(statsfile, "\n");
@@ -590,368 +567,88 @@
cpi->mb.e_mbd.mode_lf_deltas[3] = 4; // Split mv
}
-void vp9_set_speed_features(VP9_COMP *cpi) {
+static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
SPEED_FEATURES *sf = &cpi->sf;
- int Mode = cpi->compressor_speed;
- int Speed = cpi->Speed;
+ int speed_multiplier = speed + 1;
int i;
- VP9_COMMON *cm = &cpi->common;
- // Only modes 0 and 1 supported for now in experimental code basae
- if (Mode > 1)
- Mode = 1;
-
- // Initialise default mode frequency sampling variables
- for (i = 0; i < MAX_MODES; i ++) {
- cpi->mode_check_freq[i] = 0;
- cpi->mode_test_hit_counts[i] = 0;
- cpi->mode_chosen_counts[i] = 0;
+ // Set baseline threshold values
+ for (i = 0; i < MAX_MODES; ++i) {
+ sf->thresh_mult[i] = (mode == 0) ? -500 : 0;
}
- // best quality defaults
- sf->RD = 1;
- sf->search_method = NSTEP;
- sf->improved_dct = 1;
- sf->auto_filter = 1;
- sf->recode_loop = 1;
- sf->quarter_pixel_search = 1;
- sf->half_pixel_search = 1;
- sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
- sf->optimize_coefficients = 0;
-#else
- sf->optimize_coefficients = 1;
-#endif
- sf->no_skip_block4x4_search = 1;
+ sf->thresh_mult[THR_ZEROMV ] = 0;
+ sf->thresh_mult[THR_ZEROG ] = 0;
+ sf->thresh_mult[THR_ZEROA ] = 0;
- sf->first_step = 0;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->thresh_mult[THR_NEARESTMV] = 0;
+ sf->thresh_mult[THR_NEARESTG ] = 0;
+ sf->thresh_mult[THR_NEARESTA ] = 0;
- // default thresholds to 0
- for (i = 0; i < MAX_MODES; i++)
- sf->thresh_mult[i] = 0;
+ sf->thresh_mult[THR_NEARMV ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEARG ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEARA ] += speed_multiplier * 1000;
- switch (Mode) {
- case 0: // best quality mode
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_ZEROG ] = 0;
- sf->thresh_mult[THR_ZEROA ] = 0;
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_NEARESTG ] = 0;
- sf->thresh_mult[THR_NEARESTA ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_NEARG ] = 0;
- sf->thresh_mult[THR_NEARA ] = 0;
+ sf->thresh_mult[THR_DC ] = 0;
+ sf->thresh_mult[THR_TM ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_V_PRED ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_H_PRED ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_D45_PRED ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D135_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D117_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D153_PRED] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D27_PRED ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500;
- sf->thresh_mult[THR_DC ] = 0;
+ sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500;
- sf->thresh_mult[THR_V_PRED ] = 1000;
- sf->thresh_mult[THR_H_PRED ] = 1000;
- sf->thresh_mult[THR_D45_PRED ] = 1000;
- sf->thresh_mult[THR_D135_PRED] = 1000;
- sf->thresh_mult[THR_D117_PRED] = 1000;
- sf->thresh_mult[THR_D153_PRED] = 1000;
- sf->thresh_mult[THR_D27_PRED ] = 1000;
- sf->thresh_mult[THR_D63_PRED ] = 1000;
- sf->thresh_mult[THR_B_PRED ] = 2000;
- sf->thresh_mult[THR_I8X8_PRED] = 2000;
- sf->thresh_mult[THR_TM ] = 1000;
+ sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000;
+ sf->thresh_mult[THR_NEWA ] += speed_multiplier * 1000;
- sf->thresh_mult[THR_NEWMV ] = 1000;
- sf->thresh_mult[THR_NEWG ] = 1000;
- sf->thresh_mult[THR_NEWA ] = 1000;
+ sf->thresh_mult[THR_SPLITMV ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_SPLITG ] += speed_multiplier * 2500;
+ sf->thresh_mult[THR_SPLITA ] += speed_multiplier * 2500;
- sf->thresh_mult[THR_SPLITMV ] = 2500;
- sf->thresh_mult[THR_SPLITG ] = 5000;
- sf->thresh_mult[THR_SPLITA ] = 5000;
+ sf->thresh_mult[THR_COMP_ZEROLG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_ZEROLA ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_ZEROGA ] += speed_multiplier * 1500;
- sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
- sf->thresh_mult[THR_COMP_NEARLG ] = 0;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
- sf->thresh_mult[THR_COMP_NEARLA ] = 0;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
- sf->thresh_mult[THR_COMP_NEARGA ] = 0;
+ sf->thresh_mult[THR_COMP_NEARESTLG] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARESTLA] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARESTGA] += speed_multiplier * 1500;
- sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
+ sf->thresh_mult[THR_COMP_NEARLG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARLA ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_NEARGA ] += speed_multiplier * 1500;
- sf->thresh_mult[THR_COMP_SPLITLA ] = 2500;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 5000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 5000;
+ sf->thresh_mult[THR_COMP_NEWLG ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_NEWLA ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_NEWGA ] += speed_multiplier * 2000;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
+ sf->thresh_mult[THR_COMP_SPLITLA ] += speed_multiplier * 4500;
+ sf->thresh_mult[THR_COMP_SPLITGA ] += speed_multiplier * 4500;
+ sf->thresh_mult[THR_COMP_SPLITLG ] += speed_multiplier * 4500;
- sf->first_step = 0;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
- sf->search_best_filter = SEARCH_BEST_FILTER;
- break;
- case 1:
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_DC ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_V_PRED ] = 1000;
- sf->thresh_mult[THR_H_PRED ] = 1000;
- sf->thresh_mult[THR_D45_PRED ] = 1000;
- sf->thresh_mult[THR_D135_PRED] = 1000;
- sf->thresh_mult[THR_D117_PRED] = 1000;
- sf->thresh_mult[THR_D153_PRED] = 1000;
- sf->thresh_mult[THR_D27_PRED ] = 1000;
- sf->thresh_mult[THR_D63_PRED ] = 1000;
- sf->thresh_mult[THR_B_PRED ] = 2500;
- sf->thresh_mult[THR_I8X8_PRED] = 2500;
- sf->thresh_mult[THR_TM ] = 1000;
-
- sf->thresh_mult[THR_NEARESTG ] = 1000;
- sf->thresh_mult[THR_NEARESTA ] = 1000;
-
- sf->thresh_mult[THR_ZEROG ] = 1000;
- sf->thresh_mult[THR_ZEROA ] = 1000;
- sf->thresh_mult[THR_NEARG ] = 1000;
- sf->thresh_mult[THR_NEARA ] = 1000;
-
- sf->thresh_mult[THR_ZEROMV ] = 0;
- sf->thresh_mult[THR_ZEROG ] = 0;
- sf->thresh_mult[THR_ZEROA ] = 0;
- sf->thresh_mult[THR_NEARESTMV] = 0;
- sf->thresh_mult[THR_NEARESTG ] = 0;
- sf->thresh_mult[THR_NEARESTA ] = 0;
- sf->thresh_mult[THR_NEARMV ] = 0;
- sf->thresh_mult[THR_NEARG ] = 0;
- sf->thresh_mult[THR_NEARA ] = 0;
-
- sf->thresh_mult[THR_NEWMV ] = 1000;
- sf->thresh_mult[THR_NEWG ] = 1000;
- sf->thresh_mult[THR_NEWA ] = 1000;
-
- sf->thresh_mult[THR_SPLITMV ] = 1700;
- sf->thresh_mult[THR_SPLITG ] = 4500;
- sf->thresh_mult[THR_SPLITA ] = 4500;
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 0;
- sf->thresh_mult[THR_COMP_NEARLG ] = 0;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 0;
- sf->thresh_mult[THR_COMP_NEARLA ] = 0;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 0;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 0;
- sf->thresh_mult[THR_COMP_NEARGA ] = 0;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 1000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 1000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 1000;
-
- sf->thresh_mult[THR_COMP_SPLITLA ] = 1700;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 4500;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 4500;
#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] += speed_multiplier * 1500;
- if (Speed > 0) {
- /* Disable coefficient optimization above speed 0 */
- sf->optimize_coefficients = 0;
- sf->no_skip_block4x4_search = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += speed_multiplier * 1500;
- sf->first_step = 1;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] += speed_multiplier * 1500;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] += speed_multiplier * 1500;
- cpi->mode_check_freq[THR_SPLITG] = 2;
- cpi->mode_check_freq[THR_SPLITA] = 2;
- cpi->mode_check_freq[THR_SPLITMV] = 0;
-
- cpi->mode_check_freq[THR_COMP_SPLITGA] = 2;
- cpi->mode_check_freq[THR_COMP_SPLITLG] = 2;
- cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
- }
-
- if (Speed > 1) {
- cpi->mode_check_freq[THR_SPLITG] = 4;
- cpi->mode_check_freq[THR_SPLITA] = 4;
- cpi->mode_check_freq[THR_SPLITMV] = 2;
-
- cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
- cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
- cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
-
- sf->thresh_mult[THR_TM ] = 1500;
- sf->thresh_mult[THR_V_PRED ] = 1500;
- sf->thresh_mult[THR_H_PRED ] = 1500;
- sf->thresh_mult[THR_D45_PRED ] = 1500;
- sf->thresh_mult[THR_D135_PRED] = 1500;
- sf->thresh_mult[THR_D117_PRED] = 1500;
- sf->thresh_mult[THR_D153_PRED] = 1500;
- sf->thresh_mult[THR_D27_PRED ] = 1500;
- sf->thresh_mult[THR_D63_PRED ] = 1500;
- sf->thresh_mult[THR_B_PRED ] = 5000;
- sf->thresh_mult[THR_I8X8_PRED] = 5000;
-
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- sf->thresh_mult[THR_NEWMV ] = 2000;
- sf->thresh_mult[THR_SPLITMV ] = 10000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 20000;
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- sf->thresh_mult[THR_NEARESTG ] = 1500;
- sf->thresh_mult[THR_ZEROG ] = 1500;
- sf->thresh_mult[THR_NEARG ] = 1500;
- sf->thresh_mult[THR_NEWG ] = 2000;
- sf->thresh_mult[THR_SPLITG ] = 20000;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 20000;
- }
-
- if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- sf->thresh_mult[THR_NEARESTA ] = 1500;
- sf->thresh_mult[THR_ZEROA ] = 1500;
- sf->thresh_mult[THR_NEARA ] = 1500;
- sf->thresh_mult[THR_NEWA ] = 2000;
- sf->thresh_mult[THR_SPLITA ] = 20000;
- sf->thresh_mult[THR_COMP_SPLITLA ] = 10000;
- }
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 1500;
- sf->thresh_mult[THR_COMP_NEARLG ] = 1500;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 1500;
- sf->thresh_mult[THR_COMP_NEARLA ] = 1500;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 1500;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 1500;
- sf->thresh_mult[THR_COMP_NEARGA ] = 1500;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 2000;
- sf->thresh_mult[THR_COMP_NEWLA ] = 2000;
- sf->thresh_mult[THR_COMP_NEWGA ] = 2000;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] += speed_multiplier * 2000;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] += speed_multiplier * 2000;
#endif
- }
- if (Speed > 2) {
- cpi->mode_check_freq[THR_SPLITG] = 15;
- cpi->mode_check_freq[THR_SPLITA] = 15;
- cpi->mode_check_freq[THR_SPLITMV] = 7;
-
- cpi->mode_check_freq[THR_COMP_SPLITGA] = 15;
- cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
- cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
-
- sf->thresh_mult[THR_TM ] = 2000;
- sf->thresh_mult[THR_V_PRED ] = 2000;
- sf->thresh_mult[THR_H_PRED ] = 2000;
- sf->thresh_mult[THR_D45_PRED ] = 2000;
- sf->thresh_mult[THR_D135_PRED] = 2000;
- sf->thresh_mult[THR_D117_PRED] = 2000;
- sf->thresh_mult[THR_D153_PRED] = 2000;
- sf->thresh_mult[THR_D27_PRED ] = 2000;
- sf->thresh_mult[THR_D63_PRED ] = 2000;
- sf->thresh_mult[THR_B_PRED ] = 7500;
- sf->thresh_mult[THR_I8X8_PRED] = 7500;
-
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- sf->thresh_mult[THR_NEWMV ] = 2000;
- sf->thresh_mult[THR_SPLITMV ] = 25000;
- sf->thresh_mult[THR_COMP_SPLITLG ] = 50000;
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- sf->thresh_mult[THR_NEARESTG ] = 2000;
- sf->thresh_mult[THR_ZEROG ] = 2000;
- sf->thresh_mult[THR_NEARG ] = 2000;
- sf->thresh_mult[THR_NEWG ] = 2500;
- sf->thresh_mult[THR_SPLITG ] = 50000;
- sf->thresh_mult[THR_COMP_SPLITGA ] = 50000;
- }
-
- if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- sf->thresh_mult[THR_NEARESTA ] = 2000;
- sf->thresh_mult[THR_ZEROA ] = 2000;
- sf->thresh_mult[THR_NEARA ] = 2000;
- sf->thresh_mult[THR_NEWA ] = 2500;
- sf->thresh_mult[THR_SPLITA ] = 50000;
- sf->thresh_mult[THR_COMP_SPLITLA ] = 25000;
- }
-
- sf->thresh_mult[THR_COMP_ZEROLG ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTLG] = 2000;
- sf->thresh_mult[THR_COMP_NEARLG ] = 2000;
- sf->thresh_mult[THR_COMP_ZEROLA ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTLA] = 2000;
- sf->thresh_mult[THR_COMP_NEARLA ] = 2000;
- sf->thresh_mult[THR_COMP_ZEROGA ] = 2000;
- sf->thresh_mult[THR_COMP_NEARESTGA] = 2000;
- sf->thresh_mult[THR_COMP_NEARGA ] = 2000;
-
- sf->thresh_mult[THR_COMP_NEWLG ] = 2500;
- sf->thresh_mult[THR_COMP_NEWLA ] = 2500;
- sf->thresh_mult[THR_COMP_NEWGA ] = 2500;
-#if CONFIG_COMP_INTERINTRA_PRED
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = 0;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = 0;
-#endif
-
- sf->improved_dct = 0;
-
- // Only do recode loop on key frames, golden frames and
- // alt ref frames
- sf->recode_loop = 2;
-
- }
-
- break;
-
- }; /* switch */
-
/* disable frame modes if flags not set */
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
@@ -959,13 +656,19 @@
sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
sf->thresh_mult[THR_NEARMV ] = INT_MAX;
sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
+#if CONFIG_COMP_INTERINTRA_PRED
+ sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX;
+ sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX;
+#endif
}
-
if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
sf->thresh_mult[THR_ZEROG ] = INT_MAX;
sf->thresh_mult[THR_NEARG ] = INT_MAX;
sf->thresh_mult[THR_NEWG ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITG ] = INT_MAX;
#if CONFIG_COMP_INTERINTRA_PRED
sf->thresh_mult[THR_COMP_INTERINTRA_ZEROG ] = INT_MAX;
sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] = INT_MAX;
@@ -972,14 +675,13 @@
sf->thresh_mult[THR_COMP_INTERINTRA_NEARG ] = INT_MAX;
sf->thresh_mult[THR_COMP_INTERINTRA_NEWG ] = INT_MAX;
#endif
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
}
-
if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
sf->thresh_mult[THR_ZEROA ] = INT_MAX;
sf->thresh_mult[THR_NEARA ] = INT_MAX;
sf->thresh_mult[THR_NEWA ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITA ] = INT_MAX;
#if CONFIG_COMP_INTERINTRA_PRED
sf->thresh_mult[THR_COMP_INTERINTRA_ZEROA ] = INT_MAX;
sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] = INT_MAX;
@@ -986,10 +688,10 @@
sf->thresh_mult[THR_COMP_INTERINTRA_NEARA ] = INT_MAX;
sf->thresh_mult[THR_COMP_INTERINTRA_NEWA ] = INT_MAX;
#endif
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
}
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) != (VP9_LAST_FLAG | VP9_GOLD_FLAG)) {
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_GOLD_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_GOLD_FLAG)) {
sf->thresh_mult[THR_COMP_ZEROLG ] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARESTLG] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARLG ] = INT_MAX;
@@ -996,8 +698,8 @@
sf->thresh_mult[THR_COMP_NEWLG ] = INT_MAX;
sf->thresh_mult[THR_COMP_SPLITLG ] = INT_MAX;
}
-
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
@@ -1004,8 +706,8 @@
sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
sf->thresh_mult[THR_COMP_SPLITLA ] = INT_MAX;
}
-
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
@@ -1012,15 +714,105 @@
sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
sf->thresh_mult[THR_COMP_SPLITGA ] = INT_MAX;
}
-#if CONFIG_COMP_INTERINTRA_PRED
- if ((cpi->ref_frame_flags & VP9_LAST_FLAG) != VP9_LAST_FLAG) {
- sf->thresh_mult[THR_COMP_INTERINTRA_ZEROL ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEARL ] = INT_MAX;
- sf->thresh_mult[THR_COMP_INTERINTRA_NEWL ] = INT_MAX;
+}
+
+void vp9_set_speed_features(VP9_COMP *cpi) {
+ SPEED_FEATURES *sf = &cpi->sf;
+ int mode = cpi->compressor_speed;
+ int speed = cpi->Speed;
+ int i;
+
+ // Only modes 0 and 1 supported for now in experimental code basae
+ if (mode > 1)
+ mode = 1;
+
+ // Initialise default mode frequency sampling variables
+ for (i = 0; i < MAX_MODES; i ++) {
+ cpi->mode_check_freq[i] = 0;
+ cpi->mode_test_hit_counts[i] = 0;
+ cpi->mode_chosen_counts[i] = 0;
}
-#endif
+ // best quality defaults
+ sf->RD = 1;
+ sf->search_method = NSTEP;
+ sf->improved_dct = 1;
+ sf->auto_filter = 1;
+ sf->recode_loop = 1;
+ sf->quarter_pixel_search = 1;
+ sf->half_pixel_search = 1;
+ sf->iterative_sub_pixel = 1;
+ sf->no_skip_block4x4_search = 1;
+ if (cpi->oxcf.lossless)
+ sf->optimize_coefficients = 0;
+ else
+ sf->optimize_coefficients = 1;
+
+ sf->first_step = 0;
+ sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->static_segmentation = 1;
+ sf->splitmode_breakout = 0;
+ sf->mb16_breakout = 0;
+
+ switch (mode) {
+ case 0: // best quality mode
+ sf->search_best_filter = SEARCH_BEST_FILTER;
+ break;
+
+ case 1:
+ sf->static_segmentation = 1;
+ sf->splitmode_breakout = 1;
+ sf->mb16_breakout = 0;
+
+ if (speed > 0) {
+ /* Disable coefficient optimization above speed 0 */
+ sf->optimize_coefficients = 0;
+ sf->no_skip_block4x4_search = 0;
+
+ sf->first_step = 1;
+
+ cpi->mode_check_freq[THR_SPLITG] = 2;
+ cpi->mode_check_freq[THR_SPLITA] = 2;
+ cpi->mode_check_freq[THR_SPLITMV] = 0;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 2;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 2;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 0;
+ }
+
+ if (speed > 1) {
+ cpi->mode_check_freq[THR_SPLITG] = 4;
+ cpi->mode_check_freq[THR_SPLITA] = 4;
+ cpi->mode_check_freq[THR_SPLITMV] = 2;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 4;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 4;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 2;
+ }
+
+ if (speed > 2) {
+ cpi->mode_check_freq[THR_SPLITG] = 15;
+ cpi->mode_check_freq[THR_SPLITA] = 15;
+ cpi->mode_check_freq[THR_SPLITMV] = 7;
+
+ cpi->mode_check_freq[THR_COMP_SPLITGA] = 15;
+ cpi->mode_check_freq[THR_COMP_SPLITLG] = 15;
+ cpi->mode_check_freq[THR_COMP_SPLITLA] = 7;
+
+ sf->improved_dct = 0;
+
+ // Only do recode loop on key frames, golden frames and
+ // alt ref frames
+ sf->recode_loop = 2;
+ }
+
+ break;
+
+ }; /* switch */
+
+ // Set rd thresholds based on mode and speed setting
+ set_rd_speed_thresholds(cpi, mode, speed);
+
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (cpi->pass == 1) {
@@ -1028,36 +820,19 @@
sf->improved_dct = 0;
}
- if (cpi->sf.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->mb,
- cm->yv12_fb[cm->lst_fb_idx].y_stride);
- } else if (cpi->sf.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->mb,
- cm->yv12_fb[cm->lst_fb_idx].y_stride);
+ cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16;
+ cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8;
+ cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
+ if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
+ cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
+ cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
}
- cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
- cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
- cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
-
-#if CONFIG_LOSSLESS
- if (cpi->oxcf.lossless) {
- cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
- cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
- cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
- cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
- }
-#endif
-
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8;
cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16;
- cpi->mb.quantize_b_2x2 = vp9_regular_quantize_b_2x2;
vp9_init_quantizer(cpi);
@@ -1078,24 +853,19 @@
frames_at_speed[cpi->Speed]++;
#endif
}
-static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
- int width = (cpi->oxcf.Width + 15) & ~15;
- int height = (cpi->oxcf.Height + 15) & ~15;
- cpi->lookahead = vp9_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height,
+static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
+ cpi->lookahead = vp9_lookahead_init(cpi->oxcf.width, cpi->oxcf.height,
cpi->oxcf.lag_in_frames);
if (!cpi->lookahead)
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
-#if VP9_TEMPORAL_ALT_REF
-
if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer,
- width, height, VP9BORDERINPIXELS))
+ cpi->oxcf.width, cpi->oxcf.height,
+ VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
-
-#endif
}
static int alloc_partition_data(VP9_COMP *cpi) {
@@ -1115,10 +885,7 @@
void vp9_alloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
- int width = cm->Width;
- int height = cm->Height;
-
- if (vp9_alloc_frame_buffers(cm, width, height))
+ if (vp9_alloc_frame_buffers(cm, cm->width, cm->height))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
@@ -1126,25 +893,16 @@
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate partition data");
-
- if ((width & 0xf) != 0)
- width += 16 - (width & 0xf);
-
- if ((height & 0xf) != 0)
- height += 16 - (height & 0xf);
-
-
if (vp8_yv12_alloc_frame_buffer(&cpi->last_frame_uf,
- width, height, VP9BORDERINPIXELS))
+ cm->width, cm->height, VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source,
- width, height, VP9BORDERINPIXELS))
+ cm->width, cm->height, VP9BORDERINPIXELS))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
-
vpx_free(cpi->tok);
{
@@ -1199,6 +957,48 @@
}
+static void update_frame_size(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ /* our internal buffers are always multiples of 16 */
+ int aligned_width = (cm->width + 15) & ~15;
+ int aligned_height = (cm->height + 15) & ~15;
+
+ cm->mb_rows = aligned_height >> 4;
+ cm->mb_cols = aligned_width >> 4;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+ cm->mode_info_stride = cm->mb_cols + 1;
+ memset(cm->mip, 0,
+ (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));
+ vp9_update_mode_info_border(cm, cm->mip);
+
+ cm->mi = cm->mip + cm->mode_info_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
+ vp9_update_mode_info_in_image(cm, cm->mi);
+
+ /* Update size of buffers local to this frame */
+ if (vp8_yv12_realloc_frame_buffer(&cpi->last_frame_uf,
+ cm->width, cm->height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to reallocate last frame buffer");
+
+ if (vp8_yv12_realloc_frame_buffer(&cpi->scaled_source,
+ cm->width, cm->height, VP9BORDERINPIXELS))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to reallocate scaled source buffer");
+
+ {
+ int y_stride = cpi->scaled_source.y_stride;
+
+ if (cpi->sf.search_method == NSTEP) {
+ vp9_init3smotion_compensation(&cpi->mb, y_stride);
+ } else if (cpi->sf.search_method == DIAMOND) {
+ vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+ }
+ }
+}
+
+
// TODO perhaps change number of steps expose to outside world when setting
// max and min limits. Also this will likely want refining for the extended Q
// range.
@@ -1239,15 +1039,12 @@
cpi->min_frame_bandwidth = FRAME_OVERHEAD_BITS;
// Set Maximum gf/arf interval
- cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);
+ cpi->max_gf_interval = 16;
- if (cpi->max_gf_interval < 12)
- cpi->max_gf_interval = 12;
-
// Extended interval for genuinely static scenes
cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
- // Special conditions when altr ref frame enabled in lagged compress mode
+ // Special conditions when alt ref frame enabled in lagged compress mode
if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) {
if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
@@ -1260,28 +1057,45 @@
cpi->max_gf_interval = cpi->twopass.static_scene_max_gf_interval;
}
-
-static int
-rescale(int val, int num, int denom) {
+static int64_t rescale(int val, int64_t num, int denom) {
int64_t llnum = num;
int64_t llden = denom;
int64_t llval = val;
- return (int)(llval * llnum / llden);
+ return (llval * llnum / llden);
}
+static void set_tile_limits(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int min_log2_tiles, max_log2_tiles;
+ cm->log2_tile_columns = cpi->oxcf.tile_columns;
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+ vp9_get_tile_n_bits(cm, &min_log2_tiles, &max_log2_tiles);
+ max_log2_tiles += min_log2_tiles;
+ if (cm->log2_tile_columns < min_log2_tiles)
+ cm->log2_tile_columns = min_log2_tiles;
+ else if (cm->log2_tile_columns > max_log2_tiles)
+ cm->log2_tile_columns = max_log2_tiles;
+ cm->tile_columns = 1 << cm->log2_tile_columns;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+}
+
static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
cpi->oxcf = *oxcf;
cpi->goldfreq = 7;
- cm->version = oxcf->Version;
+ cm->version = oxcf->version;
vp9_setup_version(cm);
+ cm->width = oxcf->width;
+ cm->height = oxcf->height;
+
// change includes all joint functionality
vp9_change_config(ptr, oxcf);
@@ -1304,31 +1118,30 @@
cpi->static_mb_pct = 0;
-#if VP9_TEMPORAL_ALT_REF
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+
+ set_tile_limits(cpi);
+
{
int i;
-
cpi->fixed_divide[0] = 0;
-
for (i = 1; i < 512; i++)
cpi->fixed_divide[i] = 0x80000 / i;
}
-#endif
}
void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
- if (!cpi)
+ if (!cpi || !oxcf)
return;
- if (!oxcf)
- return;
-
- if (cm->version != oxcf->Version) {
- cm->version = oxcf->Version;
+ if (cm->version != oxcf->version) {
+ cm->version = oxcf->version;
vp9_setup_version(cm);
}
@@ -1351,7 +1164,6 @@
if (cpi->oxcf.cpu_used > 5)
cpi->oxcf.cpu_used = 5;
-
break;
case MODE_SECONDPASS_BEST:
@@ -1364,20 +1176,14 @@
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_idct4x4llm;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-
-#if CONFIG_LOSSLESS
cpi->oxcf.lossless = oxcf->lossless;
if (cpi->oxcf.lossless) {
- cpi->mb.e_mbd.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8;
- cpi->mb.e_mbd.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8;
- cpi->mb.e_mbd.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
- cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_iwalsh4x4_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_iwalsh4x4;
+ } else {
+ cpi->mb.e_mbd.inv_txm4x4_1 = vp9_short_idct4x4_1;
+ cpi->mb.e_mbd.inv_txm4x4 = vp9_short_idct4x4;
}
-#endif
cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -1385,8 +1191,8 @@
// cpi->use_golden_frame_only = 0;
// cpi->use_last_frame_only = 0;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
cm->refresh_entropy_probs = 1;
setup_features(cpi);
@@ -1414,31 +1220,28 @@
// Convert target bandwidth from Kbit/s to Bit/s
cpi->oxcf.target_bandwidth *= 1000;
- cpi->oxcf.starting_buffer_level =
- rescale(cpi->oxcf.starting_buffer_level,
- cpi->oxcf.target_bandwidth, 1000);
+ cpi->oxcf.starting_buffer_level = rescale(cpi->oxcf.starting_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
// Set or reset optimal and maximum buffer levels.
if (cpi->oxcf.optimal_buffer_level == 0)
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.optimal_buffer_level =
- rescale(cpi->oxcf.optimal_buffer_level,
- cpi->oxcf.target_bandwidth, 1000);
+ cpi->oxcf.optimal_buffer_level = rescale(cpi->oxcf.optimal_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
if (cpi->oxcf.maximum_buffer_size == 0)
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
else
- cpi->oxcf.maximum_buffer_size =
- rescale(cpi->oxcf.maximum_buffer_size,
- cpi->oxcf.target_bandwidth, 1000);
+ cpi->oxcf.maximum_buffer_size = rescale(cpi->oxcf.maximum_buffer_size,
+ cpi->oxcf.target_bandwidth, 1000);
// Set up frame rate and related parameters rate control values.
vp9_new_frame_rate(cpi, cpi->oxcf.frame_rate);
// Set absolute upper and lower quality limits
- cpi->worst_quality = cpi->oxcf.worst_allowed_q;
- cpi->best_quality = cpi->oxcf.best_allowed_q;
+ cpi->worst_quality = cpi->oxcf.worst_allowed_q;
+ cpi->best_quality = cpi->oxcf.best_allowed_q;
// active values should only be modified if out of new range
if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) {
@@ -1467,12 +1270,9 @@
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
- cm->Width = cpi->oxcf.Width;
- cm->Height = cpi->oxcf.Height;
+ cm->display_width = cpi->oxcf.width;
+ cm->display_height = cpi->oxcf.height;
- cm->horiz_scale = cpi->horiz_scale;
- cm->vert_scale = cpi->vert_scale;
-
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
if (cpi->oxcf.Sharpness > 7)
cpi->oxcf.Sharpness = 7;
@@ -1479,26 +1279,18 @@
cm->sharpness_level = cpi->oxcf.Sharpness;
- if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL) {
- int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
- int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
-
- Scale2Ratio(cm->horiz_scale, &hr, &hs);
- Scale2Ratio(cm->vert_scale, &vr, &vs);
-
- // always go to the next whole number
- cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
- cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
- }
-
- if (((cm->Width + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_width ||
- ((cm->Height + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_height ||
- cm->yv12_fb[cm->lst_fb_idx].y_width == 0) {
+ // Increasing the size of the frame beyond the first seen frame, or some
+ // otherwise signalled maximum size, is not supported.
+ // TODO(jkoleszar): exit gracefully.
+ if (!cpi->initial_width) {
alloc_raw_frame_buffers(cpi);
vp9_alloc_compressor_data(cpi);
+ cpi->initial_width = cm->width;
+ cpi->initial_height = cm->height;
}
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
+ update_frame_size(cpi);
if (cpi->oxcf.fixed_q >= 0) {
cpi->last_q[0] = cpi->oxcf.fixed_q;
@@ -1526,6 +1318,7 @@
cpi->last_frame_distortion = 0;
#endif
+ set_tile_limits(cpi);
}
#define M_LOG2_E 0.693147180559945309417
@@ -1541,15 +1334,15 @@
static void cal_nmvsadcosts(int *mvsadcost[2]) {
int i = 1;
- mvsadcost [0] [0] = 0;
- mvsadcost [1] [0] = 0;
+ mvsadcost[0][0] = 0;
+ mvsadcost[1][0] = 0;
do {
double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost [0][i] = (int) z;
- mvsadcost [1][i] = (int) z;
- mvsadcost [0][-i] = (int) z;
- mvsadcost [1][-i] = (int) z;
+ mvsadcost[0][i] = (int)z;
+ mvsadcost[1][i] = (int)z;
+ mvsadcost[0][-i] = (int)z;
+ mvsadcost[1][-i] = (int)z;
} while (++i <= MV_MAX);
}
@@ -1556,15 +1349,15 @@
static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
int i = 1;
- mvsadcost [0] [0] = 0;
- mvsadcost [1] [0] = 0;
+ mvsadcost[0][0] = 0;
+ mvsadcost[1][0] = 0;
do {
double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost [0][i] = (int) z;
- mvsadcost [1][i] = (int) z;
- mvsadcost [0][-i] = (int) z;
- mvsadcost [1][-i] = (int) z;
+ mvsadcost[0][i] = (int)z;
+ mvsadcost[1][i] = (int)z;
+ mvsadcost[0][-i] = (int)z;
+ mvsadcost[1][-i] = (int)z;
} while (++i <= MV_MAX);
}
@@ -1682,6 +1475,11 @@
#ifdef NMV_STATS
init_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+ init_nzcstats();
+#endif
+#endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90 << 12;
@@ -1693,7 +1491,7 @@
cpi->source_alt_ref_pending = FALSE;
cpi->source_alt_ref_active = FALSE;
- cpi->common.refresh_alt_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
@@ -1795,10 +1593,6 @@
cpi->rd_thresh_mult[i] = 128;
}
-#ifdef ENTROPY_STATS
- init_mv_ref_counts();
-#endif
-
#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].vf = VF; \
@@ -1838,14 +1632,6 @@
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
-#if ARCH_X86 || ARCH_X86_64
- cpi->fn_ptr[BLOCK_16X16].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_16X8].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_8X16].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_8X8].copymem = vp9_copy32xn;
- cpi->fn_ptr[BLOCK_4X4].copymem = vp9_copy32xn;
-#endif
-
cpi->full_search_sad = vp9_full_search_sad;
cpi->diamond_search_sad = vp9_diamond_search_sad;
cpi->refining_search_sad = vp9_refining_search_sad;
@@ -1865,6 +1651,13 @@
cpi->common.error.setjmp = 0;
vp9_zero(cpi->y_uv_mode_count)
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_zero(cm->fc.nzc_counts_4x4);
+ vp9_zero(cm->fc.nzc_counts_8x8);
+ vp9_zero(cm->fc.nzc_counts_16x16);
+ vp9_zero(cm->fc.nzc_counts_32x32);
+ vp9_zero(cm->fc.nzc_pcat_counts);
+#endif
return (VP9_PTR) cpi;
}
@@ -1885,7 +1678,7 @@
if (cpi->pass != 1) {
print_context_counters();
print_tree_update_probs();
- print_mode_context();
+ print_mode_context(&cpi->common);
}
#endif
#ifdef NMV_STATS
@@ -1892,6 +1685,12 @@
if (cpi->pass != 1)
print_nmvstats();
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+ if (cpi->pass != 1)
+ print_nzcstats();
+#endif
+#endif
#if CONFIG_INTERNAL_STATS
@@ -1908,7 +1707,8 @@
print_mode_contexts(&cpi->common);
#endif
if (cpi->b_calculate_psnr) {
- YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+ YV12_BUFFER_CONFIG *lst_yv12 =
+ &cpi->common.yv12_fb[cpi->common.ref_frame_map[cpi->lst_fb_idx]];
double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error);
double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2);
@@ -2176,8 +1976,8 @@
struct vpx_codec_cx_pkt pkt;
uint64_t sse;
int i;
- unsigned int width = cpi->common.Width;
- unsigned int height = cpi->common.Height;
+ unsigned int width = cpi->common.width;
+ unsigned int height = cpi->common.height;
pkt.kind = VPX_CODEC_PSNR_PKT;
sse = calc_plane_error(orig->y_buffer, orig->y_stride,
@@ -2230,34 +2030,34 @@
if (ref_frame_flags > 7)
return -1;
- cpi->common.refresh_golden_frame = 0;
- cpi->common.refresh_alt_ref_frame = 0;
- cpi->common.refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->refresh_last_frame = 0;
if (ref_frame_flags & VP9_LAST_FLAG)
- cpi->common.refresh_last_frame = 1;
+ cpi->refresh_last_frame = 1;
if (ref_frame_flags & VP9_GOLD_FLAG)
- cpi->common.refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 1;
if (ref_frame_flags & VP9_ALT_FLAG)
- cpi->common.refresh_alt_ref_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
return 0;
}
-int vp9_get_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
- YV12_BUFFER_CONFIG *sd) {
+int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
VP9_COMMON *cm = &cpi->common;
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->lst_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->gld_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->alt_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2266,6 +2066,17 @@
return 0;
}
+int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
+ VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMMON *cm = &cpi->common;
+
+ if (index < 0 || index >= NUM_REF_FRAMES)
+ return -1;
+
+ *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
+ return 0;
+}
+
int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
@@ -2274,11 +2085,11 @@
int ref_fb_idx;
if (ref_frame_flag == VP9_LAST_FLAG)
- ref_fb_idx = cm->lst_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->lst_fb_idx];
else if (ref_frame_flag == VP9_GOLD_FLAG)
- ref_fb_idx = cm->gld_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->gld_fb_idx];
else if (ref_frame_flag == VP9_ALT_FLAG)
- ref_fb_idx = cm->alt_fb_idx;
+ ref_fb_idx = cm->ref_frame_map[cpi->alt_fb_idx];
else
return -1;
@@ -2327,7 +2138,7 @@
void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
YV12_BUFFER_CONFIG *s = cm->frame_to_show;
uint8_t *src = s->y_buffer;
- int h = cm->Height;
+ int h = cm->height;
do {
fwrite(src, s->y_width, 1, yuv_rec_file);
@@ -2335,7 +2146,7 @@
} while (--h);
src = s->u_buffer;
- h = (cm->Height + 1) / 2;
+ h = (cm->height + 1) / 2;
do {
fwrite(src, s->uv_width, 1, yuv_rec_file);
@@ -2343,15 +2154,79 @@
} while (--h);
src = s->v_buffer;
- h = (cm->Height + 1) / 2;
+ h = (cm->height + 1) / 2;
do {
fwrite(src, s->uv_width, 1, yuv_rec_file);
src += s->uv_stride;
} while (--h);
+ fflush(yuv_rec_file);
}
#endif
+static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
+ YV12_BUFFER_CONFIG *dst_fb) {
+ const int in_w = src_fb->y_crop_width;
+ const int in_h = src_fb->y_crop_height;
+ const int out_w = dst_fb->y_crop_width;
+ const int out_h = dst_fb->y_crop_height;
+ int x, y;
+
+ for (y = 0; y < out_h; y += 16) {
+ for (x = 0; x < out_w; x += 16) {
+ int x_q4 = x * 16 * in_w / out_w;
+ int y_q4 = y * 16 * in_h / out_h;
+ uint8_t *src, *dst;
+ int src_stride, dst_stride;
+
+
+ src = src_fb->y_buffer +
+ y * in_h / out_h * src_fb->y_stride +
+ x * in_w / out_w;
+ dst = dst_fb->y_buffer +
+ y * dst_fb->y_stride +
+ x;
+ src_stride = src_fb->y_stride;
+ dst_stride = dst_fb->y_stride;
+
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 16, 16);
+
+ x_q4 >>= 1;
+ y_q4 >>= 1;
+ src_stride = src_fb->uv_stride;
+ dst_stride = dst_fb->uv_stride;
+
+ src = src_fb->u_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->u_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+
+ src = src_fb->v_buffer +
+ y / 2 * in_h / out_h * src_fb->uv_stride +
+ x / 2 * in_w / out_w;
+ dst = dst_fb->v_buffer +
+ y / 2 * dst_fb->uv_stride +
+ x / 2;
+ vp9_convolve8(src, src_stride, dst, dst_stride,
+ vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
+ vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+ 8, 8);
+ }
+ }
+
+ vp8_yv12_extend_frame_borders(dst_fb);
+}
+
+
static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
@@ -2374,13 +2249,13 @@
VP9_COMMON *cm = &cpi->common;
// Update the Golden frame usage counts.
- if (cm->refresh_golden_frame) {
+ if (cpi->refresh_golden_frame) {
// Update data structure that monitors level of reference to last GF
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
// this frame refreshes means next frames don't unless specified by user
- cm->refresh_golden_frame = 0;
+ cpi->refresh_golden_frame = 0;
cpi->common.frames_since_golden = 0;
// if ( cm->frame_type == KEY_FRAME )
@@ -2402,7 +2277,7 @@
// ******** Fixed Q test code only ************
// If we are going to use the ALT reference for the next group of frames set a flag to say so.
if (cpi->oxcf.fixed_q >= 0 &&
- cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) {
+ cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) {
cpi->source_alt_ref_pending = TRUE;
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
}
@@ -2414,7 +2289,7 @@
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
- } else if (!cpi->common.refresh_alt_ref_frame) {
+ } else if (!cpi->refresh_alt_ref_frame) {
// Decrement count down till next gf
if (cpi->frames_till_gf_update_due > 0)
cpi->frames_till_gf_update_due--;
@@ -2535,8 +2410,8 @@
if ((cpi->sf.recode_loop == 1) ||
((cpi->sf.recode_loop == 2) &&
((cm->frame_type == KEY_FRAME) ||
- cm->refresh_golden_frame ||
- cm->refresh_alt_ref_frame))) {
+ cpi->refresh_golden_frame ||
+ cpi->refresh_alt_ref_frame))) {
// General over and under shoot tests
if (((cpi->projected_frame_size > high_limit) && (q < maxq)) ||
((cpi->projected_frame_size < low_limit) && (q > minq))) {
@@ -2563,86 +2438,56 @@
return force_recode;
}
-static void update_reference_frames(VP9_COMMON *cm) {
- YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb;
+static void update_reference_frames(VP9_COMP * const cpi) {
+ VP9_COMMON * const cm = &cpi->common;
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
-
if (cm->frame_type == KEY_FRAME) {
- yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG | VP9_ALT_FLAG;
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
+ } else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
+ /* Preserve the previously existing golden frame and update the frame in
+ * the alt ref slot instead. This is highly specific to the current use of
+ * alt-ref as a forward reference, and this needs to be generalized as
+ * other uses are implemented (like RTC/temporal scaling)
+ *
+ * The update to the buffer in the alt ref slot was signalled in
+ * vp9_pack_bitstream(), now swap the buffer pointers so that it's treated
+ * as the golden frame next time.
+ */
+ int tmp;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
- cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx;
- } else { /* For non key frames */
- if (cm->refresh_alt_ref_frame) {
- assert(!cm->copy_buffer_to_arf);
-
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_ALT_FLAG;
- cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->new_fb_idx;
- } else if (cm->copy_buffer_to_arf) {
- assert(!(cm->copy_buffer_to_arf & ~0x3));
-
- if (cm->copy_buffer_to_arf == 1) {
- if (cm->alt_fb_idx != cm->lst_fb_idx) {
- yv12_fb[cm->lst_fb_idx].flags |= VP9_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->lst_fb_idx;
- }
- } else { /* if (cm->copy_buffer_to_arf == 2) */
- if (cm->alt_fb_idx != cm->gld_fb_idx) {
- yv12_fb[cm->gld_fb_idx].flags |= VP9_ALT_FLAG;
- yv12_fb[cm->alt_fb_idx].flags &= ~VP9_ALT_FLAG;
- cm->alt_fb_idx = cm->gld_fb_idx;
- }
- }
+ tmp = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->gld_fb_idx;
+ cpi->gld_fb_idx = tmp;
+ } else { /* For non key/golden frames */
+ if (cpi->refresh_alt_ref_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
}
- if (cm->refresh_golden_frame) {
- assert(!cm->copy_buffer_to_gf);
-
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_GOLD_FLAG;
- cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->new_fb_idx;
- } else if (cm->copy_buffer_to_gf) {
- assert(!(cm->copy_buffer_to_arf & ~0x3));
-
- if (cm->copy_buffer_to_gf == 1) {
- if (cm->gld_fb_idx != cm->lst_fb_idx) {
- yv12_fb[cm->lst_fb_idx].flags |= VP9_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->lst_fb_idx;
- }
- } else { /* if (cm->copy_buffer_to_gf == 2) */
- if (cm->alt_fb_idx != cm->gld_fb_idx) {
- yv12_fb[cm->alt_fb_idx].flags |= VP9_GOLD_FLAG;
- yv12_fb[cm->gld_fb_idx].flags &= ~VP9_GOLD_FLAG;
- cm->gld_fb_idx = cm->alt_fb_idx;
- }
- }
+ if (cpi->refresh_golden_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
}
}
- if (cm->refresh_last_frame) {
- cm->yv12_fb[cm->new_fb_idx].flags |= VP9_LAST_FLAG;
- cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP9_LAST_FLAG;
- cm->lst_fb_idx = cm->new_fb_idx;
+ if (cpi->refresh_last_frame) {
+ ref_cnt_fb(cm->fb_idx_ref_cnt,
+ &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
}
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
- if (cm->no_lpf) {
+ if (cm->no_lpf || cpi->mb.e_mbd.lossless) {
cm->filter_level = 0;
- }
-#if CONFIG_LOSSLESS
- else if (cpi->oxcf.lossless) {
- cm->filter_level = 0;
- }
-#endif
- else {
+ } else {
struct vpx_usec_timer timer;
vp9_clear_system_state();
@@ -2659,7 +2504,8 @@
if (cm->filter_level > 0) {
vp9_set_alt_lf_level(cpi, cm->filter_level);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0,
+ cm->dering_enabled);
}
vp8_yv12_extend_frame_borders(cm->frame_to_show);
@@ -2666,7 +2512,7 @@
}
-void select_interp_filter_type(VP9_COMP *cpi) {
+void vp9_select_interp_filter_type(VP9_COMP *cpi) {
int i;
int high_filter_index = 0;
unsigned int thresh;
@@ -2719,6 +2565,38 @@
}
#endif
+static void scale_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]];
+
+ if (ref->y_crop_width != cm->width ||
+ ref->y_crop_height != cm->height) {
+ int new_fb = get_free_fb(cm);
+
+ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[new_fb],
+ cm->width, cm->height,
+ VP9BORDERINPIXELS);
+ scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
+ cpi->scaled_ref_idx[i] = new_fb;
+ } else {
+ cpi->scaled_ref_idx[i] = cm->ref_frame_map[i];
+ cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++;
+ }
+ }
+}
+
+static void release_scaled_references(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--;
+ }
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi,
unsigned long *size,
unsigned char *dest,
@@ -2735,8 +2613,6 @@
int q_low;
int q_high;
- int zbin_oq_high;
- int zbin_oq_low = 0;
int top_index;
int bottom_index;
@@ -2749,11 +2625,7 @@
#if RESET_FOREACH_FILTER
int q_low0;
int q_high0;
- int zbin_oq_high0;
- int zbin_oq_low0 = 0;
int Q0;
- int last_zbin_oq;
- int last_zbin_oq0;
int active_best_quality0;
int active_worst_quality0;
double rate_correction_factor0;
@@ -2773,6 +2645,17 @@
int mcomp_filter_index = 0;
int64_t mcomp_filter_cost[4];
+ /* Scale the source buffer, if required */
+ if (cm->mb_cols * 16 != cpi->un_scaled_source->y_width ||
+ cm->mb_rows * 16 != cpi->un_scaled_source->y_height) {
+ scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source);
+ cpi->Source = &cpi->scaled_source;
+ } else {
+ cpi->Source = cpi->un_scaled_source;
+ }
+
+ scale_references(cpi);
+
// Clear down mmx registers to allow floating point in what follows
vp9_clear_system_state();
@@ -2779,7 +2662,7 @@
// For an alt ref frame in 2 pass we skip the call to the second
// pass function that sets the target bandwidth so must set it here
- if (cpi->common.refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
cpi->per_frame_bandwidth = cpi->twopass.gf_bits; // Per frame bit target for the alt ref frame
// per second target bitrate
cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
@@ -2786,12 +2669,7 @@
cpi->output_frame_rate);
}
- // Default turn off buffer to buffer copying
- cm->copy_buffer_to_gf = 0;
- cm->copy_buffer_to_arf = 0;
-
// Clear zbin over-quant value and mode boost values.
- cpi->zbin_over_quant = 0;
cpi->zbin_mode_boost = 0;
// Enable or disable mode based tweaking of the zbin
@@ -2798,14 +2676,11 @@
// For 2 Pass Only used where GF/ARF prediction quality
// is above a threshold
cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
- cpi->zbin_mode_boost_enabled = FALSE;
-#else
- cpi->zbin_mode_boost_enabled = TRUE;
-#endif
- if (cpi->gfu_boost <= 400) {
+
+ // if (cpi->oxcf.lossless)
cpi->zbin_mode_boost_enabled = FALSE;
- }
+ // else
+ // cpi->zbin_mode_boost_enabled = TRUE;
// Current default encoder behaviour for the altref sign bias
if (cpi->source_alt_ref_active)
@@ -2846,10 +2721,22 @@
for (i = 0; i < MAX_MODES; i++) {
cpi->rd_thresh_mult[i] = 128;
}
+
+ cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
+ cm->frame_parallel_decoding_mode =
+ (cpi->oxcf.frame_parallel_decoding_mode != 0);
+ if (cm->error_resilient_mode) {
+ cm->frame_parallel_decoding_mode = 1;
+ cm->refresh_entropy_probs = 0;
+ }
}
- // Test code for new segment features
- init_seg_features(cpi);
+ // Configure use of segmentation for enhanced coding of static regions.
+ // Only allowed for now in second pass of two pass (as requires lagged coding)
+ // and if the relevent speed feature flag is set.
+ if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+ configure_static_seg_features(cpi);
+ }
// Decide how big to make the frame
vp9_pick_frame_size(cpi);
@@ -2896,9 +2783,7 @@
if (cpi->active_best_quality < cpi->best_quality)
cpi->active_best_quality = cpi->best_quality;
}
- }
-
- else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) {
+ } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
int high = 2000;
int low = 400;
@@ -2935,7 +2820,15 @@
cpi->active_best_quality * 15 / 16;
}
} else {
+#ifdef ONE_SHOT_Q_ESTIMATE
+#ifdef STRICT_ONE_SHOT_Q
+ cpi->active_best_quality = Q;
+#else
cpi->active_best_quality = inter_minq[Q];
+#endif
+#else
+ cpi->active_best_quality = inter_minq[Q];
+#endif
// For the constant/constrained quality mode we dont want
// q to fall below the cq level.
@@ -2971,19 +2864,8 @@
// Determine initial Q to try
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
}
-#if RESET_FOREACH_FILTER
- last_zbin_oq = cpi->zbin_over_quant;
-#endif
- // Set highest allowed value for Zbin over quant
- if (cm->frame_type == KEY_FRAME)
- zbin_oq_high = 0; // ZBIN_OQ_MAX/16
- else if (cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active))
- zbin_oq_high = 16;
- else
- zbin_oq_high = ZBIN_OQ_MAX;
-
- vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit,
+ vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit,
&frame_over_shoot_limit);
// Limit Q range for the adaptive loop.
@@ -3016,7 +2898,6 @@
#if CONFIG_POSTPROC
if (cpi->oxcf.noise_sensitivity > 0) {
- uint8_t *src;
int l = 0;
switch (cpi->oxcf.noise_sensitivity) {
@@ -3030,7 +2911,6 @@
l = 60;
break;
case 4:
-
case 5:
l = 100;
break;
@@ -3039,18 +2919,7 @@
break;
}
-
- if (cm->frame_type == KEY_FRAME) {
- vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0);
- } else {
- vp9_de_noise(cpi->Source, cpi->Source, l, 1, 0);
-
- src = cpi->Source->y_buffer;
-
- if (cpi->Source->y_stride < 0) {
- src += cpi->Source->y_stride * (cpi->Source->y_height - 1);
- }
- }
+ vp9_denoise(cpi->Source, cpi->Source, l, 1, 0);
}
#endif
@@ -3064,9 +2933,6 @@
q_low0 = q_low;
q_high0 = q_high;
Q0 = Q;
- zbin_oq_low0 = zbin_oq_low;
- zbin_oq_high0 = zbin_oq_high;
- last_zbin_oq0 = last_zbin_oq;
rate_correction_factor0 = cpi->rate_correction_factor;
gf_rate_correction_factor0 = cpi->gf_rate_correction_factor;
active_best_quality0 = cpi->active_best_quality;
@@ -3087,12 +2953,12 @@
cm->mbskip_pred_probs[k] = cpi->base_skip_false_prob[Q][k];
if (cm->frame_type != KEY_FRAME) {
- if (cpi->common.refresh_alt_ref_frame) {
+ if (cpi->refresh_alt_ref_frame) {
for (k = 0; k < MBSKIP_CONTEXTS; k++) {
if (cpi->last_skip_false_probs[2][k] != 0)
cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[2][k];
}
- } else if (cpi->common.refresh_golden_frame) {
+ } else if (cpi->refresh_golden_frame) {
for (k = 0; k < MBSKIP_CONTEXTS; k++) {
if (cpi->last_skip_false_probs[1][k] != 0)
cm->mbskip_pred_probs[k] = cpi->last_skip_false_probs[1][k];
@@ -3124,13 +2990,28 @@
}
// Set up entropy depending on frame type.
- if (cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME) {
+ /* Choose which entropy context to use. When using a forward reference
+ * frame, it immediately follows the keyframe, and thus benefits from
+ * using the same entropy context established by the keyframe. Otherwise,
+ * use the default context 0.
+ */
+ cm->frame_context_idx = cpi->oxcf.play_alternate;
vp9_setup_key_frame(cpi);
- else
+ } else {
+ /* Choose which entropy context to use. Currently there are only two
+ * contexts used, one for normal frames and one for alt ref frames.
+ */
+ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
vp9_setup_inter_frame(cpi);
+ }
}
// transform / motion compensation build reconstruction frame
+#if CONFIG_MODELCOEFPROB && ADJUST_KF_COEF_PROBS
+ if (cm->frame_type == KEY_FRAME)
+ vp9_adjust_default_coef_probs(cm);
+#endif
vp9_encode_frame(cpi);
@@ -3214,9 +3095,6 @@
if (cpi->projected_frame_size > cpi->this_frame_target) {
q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value
- if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
-
if (undershoot_seen || (loop_count > 1)) {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
@@ -3223,14 +3101,6 @@
vp9_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low + 1) / 2;
-
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
- if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
- else {
- zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high;
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
- }
} else {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
@@ -3238,7 +3108,7 @@
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
- while (((Q < q_low) || (cpi->zbin_over_quant < zbin_oq_low)) && (Retries < 10)) {
+ while ((Q < q_low) && (Retries < 10)) {
vp9_update_rate_correction_factors(cpi, 0);
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
Retries++;
@@ -3249,10 +3119,7 @@
}
// Frame is too small
else {
- if (cpi->zbin_over_quant == 0)
- q_high = (Q > q_low) ? (Q - 1) : q_low; // Lower q_high if not using over quant
- else // else lower zbin_oq_high
- zbin_oq_high = (cpi->zbin_over_quant > zbin_oq_low) ? (cpi->zbin_over_quant - 1) : zbin_oq_low;
+ q_high = (Q > q_low) ? (Q - 1) : q_low;
if (overshoot_seen || (loop_count > 1)) {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
@@ -3260,12 +3127,6 @@
vp9_update_rate_correction_factors(cpi, 1);
Q = (q_high + q_low) / 2;
-
- // Adjust cpi->zbin_over_quant (only allowed when Q is max)
- if (Q < MAXQ)
- cpi->zbin_over_quant = 0;
- else
- cpi->zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2;
} else {
// Update rate_correction_factor unless cpi->active_worst_quality has changed.
if (!active_worst_qchanged)
@@ -3282,7 +3143,7 @@
q_low = Q;
}
- while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) {
+ while ((Q > q_high) && (Retries < 10)) {
vp9_update_rate_correction_factors(cpi, 0);
Q = vp9_regulate_q(cpi, cpi->this_frame_target);
Retries++;
@@ -3293,21 +3154,9 @@
}
// Clamp Q to upper and lower limits:
- if (Q > q_high)
- Q = q_high;
- else if (Q < q_low)
- Q = q_low;
+ Q = clamp(Q, q_low, q_high);
- // Clamp cpi->zbin_over_quant
- cpi->zbin_over_quant = (cpi->zbin_over_quant < zbin_oq_low) ?
- zbin_oq_low : (cpi->zbin_over_quant > zbin_oq_high) ?
- zbin_oq_high : cpi->zbin_over_quant;
-
- // Loop = ((Q != last_q) || (last_zbin_oq != cpi->zbin_over_quant)) ? TRUE : FALSE;
- Loop = ((Q != last_q)) ? TRUE : FALSE;
-#if RESET_FOREACH_FILTER
- last_zbin_oq = cpi->zbin_over_quant;
-#endif
+ Loop = Q != last_q;
} else
Loop = FALSE;
@@ -3351,12 +3200,9 @@
if (Loop == TRUE) {
overshoot_seen = FALSE;
undershoot_seen = FALSE;
- zbin_oq_low = zbin_oq_low0;
- zbin_oq_high = zbin_oq_high0;
q_low = q_low0;
q_high = q_high0;
Q = Q0;
- cpi->zbin_over_quant = last_zbin_oq = last_zbin_oq0;
cpi->rate_correction_factor = rate_correction_factor0;
cpi->gf_rate_correction_factor = gf_rate_correction_factor0;
cpi->active_best_quality = active_best_quality0;
@@ -3412,12 +3258,18 @@
vp9_update_gf_useage_maps(cpi, cm, &cpi->mb);
if (cm->frame_type == KEY_FRAME)
- cm->refresh_last_frame = 1;
+ cpi->refresh_last_frame = 1;
#if 0
{
FILE *f = fopen("gfactive.stt", "a");
- fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+ fprintf(f, "%8d %8d %8d %8d %8d\n",
+ cm->current_video_frame,
+ (100 * cpi->gf_active_count)
+ / (cpi->common.mb_rows * cpi->common.mb_cols),
+ cpi->this_iiratio,
+ cpi->next_iiratio,
+ cpi->refresh_golden_frame);
fclose(f);
}
#endif
@@ -3444,18 +3296,19 @@
update_reference_segmentation_map(cpi);
}
- update_reference_frames(cm);
+ release_scaled_references(cpi);
+ update_reference_frames(cpi);
vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_4x4,
- cpi->hybrid_coef_counts_4x4);
vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_8x8,
- cpi->hybrid_coef_counts_8x8);
vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
- vp9_copy(cpi->common.fc.hybrid_coef_counts_16x16,
- cpi->hybrid_coef_counts_16x16);
vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32);
- vp9_adapt_coef_probs(&cpi->common);
+ if (!cpi->common.error_resilient_mode &&
+ !cpi->common.frame_parallel_decoding_mode) {
+ vp9_adapt_coef_probs(&cpi->common);
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_adapt_nzc_probs(&cpi->common);
+#endif
+ }
if (cpi->common.frame_type != KEY_FRAME) {
vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count);
vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
@@ -3467,14 +3320,13 @@
#if CONFIG_COMP_INTERINTRA_PRED
vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
#endif
- vp9_adapt_mode_probs(&cpi->common);
-
cpi->common.fc.NMVcount = cpi->NMVcount;
- /*
- printf("2: %d %d %d %d\n", cpi->NMVcount.joints[0], cpi->NMVcount.joints[1],
- cpi->NMVcount.joints[2], cpi->NMVcount.joints[3]);
- */
- vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+ if (!cpi->common.error_resilient_mode &&
+ !cpi->common.frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(&cpi->common);
+ vp9_adapt_mode_context(&cpi->common);
+ vp9_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+ }
}
#if CONFIG_COMP_INTERINTRA_PRED
if (cm->frame_type != KEY_FRAME)
@@ -3502,8 +3354,8 @@
if ((cm->base_qindex < cpi->last_boosted_qindex) ||
((cpi->static_mb_pct < 100) &&
((cm->frame_type == KEY_FRAME) ||
- cm->refresh_alt_ref_frame ||
- (cm->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
cpi->last_boosted_qindex = cm->base_qindex;
}
@@ -3516,7 +3368,8 @@
cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2;
// Keep a record from which we can calculate the average Q excluding GF updates and key frames
- if ((cm->frame_type != KEY_FRAME) && !cm->refresh_golden_frame && !cm->refresh_alt_ref_frame) {
+ if ((cm->frame_type != KEY_FRAME)
+ && !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
cpi->ni_frames++;
cpi->tot_q += vp9_convert_qindex_to_q(Q);
cpi->avg_q = cpi->tot_q / (double)cpi->ni_frames;
@@ -3538,11 +3391,19 @@
if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
- // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
- cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
- cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
- cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
- cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32;
+ // Rolling monitors of whether we are over or underspending used to help
+ // regulate min and Max Q in two pass.
+ if (cm->frame_type != KEY_FRAME) {
+ cpi->rolling_target_bits =
+ ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
+ cpi->rolling_actual_bits =
+ ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4;
+ cpi->long_rolling_target_bits =
+ ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32;
+ cpi->long_rolling_actual_bits =
+ ((cpi->long_rolling_actual_bits * 31) +
+ cpi->projected_frame_size + 16) / 32;
+ }
// Actual bits spent
cpi->total_actual_bits += cpi->projected_frame_size;
@@ -3558,7 +3419,7 @@
if (cpi->twopass.kf_group_bits < 0)
cpi->twopass.kf_group_bits = 0;
- } else if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) {
+ } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size;
if (cpi->twopass.gf_group_bits < 0)
@@ -3569,7 +3430,7 @@
// in this frame.
update_base_skip_probs(cpi);
-#if 0// 1 && CONFIG_INTERNAL_STATS
+#if 0 // 1 && CONFIG_INTERNAL_STATS
{
FILE *f = fopen("tmp.stt", "a");
int recon_err;
@@ -3582,13 +3443,13 @@
if (cpi->twopass.total_left_stats->coded_error != 0.0)
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%6d %6d %5d %5d %5d %8.2f %10d %10.3f"
"%10.3f %8d %10d %10d %10d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size, 0, //loop_size_estimate,
(cpi->projected_frame_size - cpi->this_frame_target),
(int)cpi->total_target_vs_actual,
- (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
+ (int)(cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
(int)cpi->total_actual_bits,
vp9_convert_qindex_to_q(cm->base_qindex),
(double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
@@ -3597,9 +3458,8 @@
cpi->avg_q,
vp9_convert_qindex_to_q(cpi->ni_av_qi),
vp9_convert_qindex_to_q(cpi->cq_target_quality),
- cpi->zbin_over_quant,
- // cpi->avg_frame_qindex, cpi->zbin_over_quant,
- cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cpi->refresh_last_frame,
+ cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
@@ -3611,7 +3471,7 @@
else
fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
"%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
- "%6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%5d %5d %5d %8d %8d %8.2f %10d %10.3f"
"%8d %10d %10d %10d\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
@@ -3618,7 +3478,7 @@
0, //loop_size_estimate,
(cpi->projected_frame_size - cpi->this_frame_target),
(int)cpi->total_target_vs_actual,
- (cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
+ (int)(cpi->oxcf.starting_buffer_level - cpi->bits_off_target),
(int)cpi->total_actual_bits,
vp9_convert_qindex_to_q(cm->base_qindex),
(double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
@@ -3627,9 +3487,8 @@
cpi->avg_q,
vp9_convert_qindex_to_q(cpi->ni_av_qi),
vp9_convert_qindex_to_q(cpi->cq_target_quality),
- cpi->zbin_over_quant,
- // cpi->avg_frame_qindex, cpi->zbin_over_quant,
- cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+ cpi->refresh_last_frame,
+ cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame,
cm->frame_type, cpi->gfu_boost,
cpi->twopass.est_max_qcorrection_factor,
(int)cpi->twopass.bits_left,
@@ -3645,8 +3504,8 @@
fprintf(fmodes, "%6d:%1d:%1d:%1d ",
cpi->common.current_video_frame,
- cm->frame_type, cm->refresh_golden_frame,
- cm->refresh_alt_ref_frame);
+ cm->frame_type, cpi->refresh_golden_frame,
+ cpi->refresh_alt_ref_frame);
for (i = 0; i < MAX_MODES; i++)
fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
@@ -3665,33 +3524,34 @@
#endif
// If this was a kf or Gf note the Q
- if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
+ if ((cm->frame_type == KEY_FRAME)
+ || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
cm->last_kf_gf_q = cm->base_qindex;
- if (cm->refresh_golden_frame == 1)
+ if (cpi->refresh_golden_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
else
cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
- if (cm->refresh_alt_ref_frame == 1)
+ if (cpi->refresh_alt_ref_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
else
cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
- if (cm->refresh_last_frame & cm->refresh_golden_frame) // both refreshed
+ if (cpi->refresh_last_frame & cpi->refresh_golden_frame)
cpi->gold_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_last_frame ^ cpi->refresh_golden_frame)
cpi->gold_is_last = 0;
- if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) // both refreshed
+ if (cpi->refresh_last_frame & cpi->refresh_alt_ref_frame)
cpi->alt_is_last = 1;
- else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_last_frame ^ cpi->refresh_alt_ref_frame)
cpi->alt_is_last = 0;
- if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) // both refreshed
+ if (cpi->refresh_alt_ref_frame & cpi->refresh_golden_frame)
cpi->gold_is_alt = 1;
- else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) // 1 refreshed but not the other
+ else if (cpi->refresh_alt_ref_frame ^ cpi->refresh_golden_frame)
cpi->gold_is_alt = 0;
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
@@ -3705,7 +3565,8 @@
if (cpi->gold_is_alt)
cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
- if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
+ if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame
+ && (cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
else
@@ -3727,6 +3588,9 @@
xd->update_mb_segmentation_data = 0;
xd->mode_ref_lf_delta_update = 0;
+ // keep track of the last coded dimensions
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
// Dont increment frame counters if this was an altref buffer update not a real frame
if (cm->show_frame) {
@@ -3744,8 +3608,9 @@
FILE *recon_file;
sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
recon_file = fopen(filename, "wb");
- fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
- cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
+ fwrite(cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].buffer_alloc,
+ cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]].frame_size,
+ 1, recon_file);
fclose(recon_file);
}
#endif
@@ -3765,13 +3630,18 @@
static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
unsigned char *dest, unsigned int *frame_flags) {
- if (!cpi->common.refresh_alt_ref_frame)
+ if (!cpi->refresh_alt_ref_frame)
vp9_second_pass(cpi);
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+
+#ifdef DISABLE_RC_LONG_TERM_MEM
+ cpi->twopass.bits_left -= cpi->this_frame_target;
+#else
cpi->twopass.bits_left -= 8 * *size;
+#endif
- if (!cpi->common.refresh_alt_ref_frame) {
+ if (!cpi->refresh_alt_ref_frame) {
double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.frame_rate;
double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
* cpi->oxcf.two_pass_vbrmin_section / 100);
@@ -3808,9 +3678,8 @@
const VP9_COMMON *cm = &cpi->common;
const MACROBLOCKD *xd = &cpi->mb.e_mbd;
- return cm->frame_type == KEY_FRAME || cm->refresh_last_frame
- || cm->refresh_golden_frame || cm->refresh_alt_ref_frame
- || cm->copy_buffer_to_gf || cm->copy_buffer_to_arf
+ return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame
+ || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame
|| cm->refresh_entropy_probs
|| xd->mode_ref_lf_delta_update
|| xd->update_mb_segmentation_map || xd->update_mb_segmentation_data;
@@ -3846,9 +3715,9 @@
force_src_buffer = &cpi->alt_ref_buffer;
}
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
- cm->refresh_alt_ref_frame = 1;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 0;
cm->show_frame = 0;
cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag.
cpi->is_src_frame_alt_ref = 0;
@@ -3862,8 +3731,10 @@
cpi->is_src_frame_alt_ref = cpi->alt_ref_source
&& (cpi->source == cpi->alt_ref_source);
- if (cpi->is_src_frame_alt_ref)
+ if (cpi->is_src_frame_alt_ref) {
+ cpi->refresh_last_frame = 0;
cpi->alt_ref_source = NULL;
+ }
}
}
@@ -3889,7 +3760,7 @@
}
// adjust frame rates based on timestamps given
- if (!cm->refresh_alt_ref_frame) {
+ if (!cpi->refresh_alt_ref_frame) {
int64_t this_duration;
int step = 0;
@@ -3945,28 +3816,34 @@
#if 0
- if (cm->refresh_alt_ref_frame) {
- // cm->refresh_golden_frame = 1;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 0;
+ if (cpi->refresh_alt_ref_frame) {
+ // cpi->refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 0;
} else {
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
}
#endif
- /* find a free buffer for the new frame */
- {
- int i = 0;
- for (; i < NUM_YV12_BUFFERS; i++) {
- if (!cm->yv12_fb[i].flags) {
- cm->new_fb_idx = i;
- break;
- }
- }
- assert(i < NUM_YV12_BUFFERS);
- }
+ /* find a free buffer for the new frame, releasing the reference previously
+ * held.
+ */
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+ cm->new_fb_idx = get_free_fb(cm);
+
+ /* Get the mapping of L/G/A to the reference buffer pool */
+ cm->active_ref_idx[0] = cm->ref_frame_map[cpi->lst_fb_idx];
+ cm->active_ref_idx[1] = cm->ref_frame_map[cpi->gld_fb_idx];
+ cm->active_ref_idx[2] = cm->ref_frame_map[cpi->alt_fb_idx];
+
+ /* Reset the frame pointers to the current frame size */
+ vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],
+ cm->width, cm->height,
+ VP9BORDERINPIXELS);
+
+ vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);
if (cpi->pass == 1) {
Pass1Encode(cpi, size, dest, frame_flags);
} else if (cpi->pass == 2) {
@@ -3976,21 +3853,19 @@
}
if (cm->refresh_entropy_probs) {
- if (cm->refresh_alt_ref_frame)
- vpx_memcpy(&cm->lfc_a, &cm->fc, sizeof(cm->fc));
- else
- vpx_memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc));
+ vpx_memcpy(&cm->frame_contexts[cm->frame_context_idx], &cm->fc,
+ sizeof(cm->fc));
}
- // if its a dropped frame honor the requests on subsequent frames
if (*size > 0) {
+ // if its a dropped frame honor the requests on subsequent frames
cpi->droppable = !frame_is_reference(cpi);
// return to normal state
cm->refresh_entropy_probs = 1;
- cm->refresh_alt_ref_frame = 0;
- cm->refresh_golden_frame = 0;
- cm->refresh_last_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 1;
cm->frame_type = INTER_FRAME;
}
@@ -4113,7 +3988,7 @@
vp9_ppflags_t *flags) {
VP9_COMP *cpi = (VP9_COMP *) comp;
- if (cpi->common.refresh_alt_ref_frame)
+ if (!cpi->common.show_frame)
return -1;
else {
int ret;
@@ -4123,9 +3998,9 @@
if (cpi->common.frame_to_show) {
*dest = *cpi->common.frame_to_show;
- dest->y_width = cpi->common.Width;
- dest->y_height = cpi->common.Height;
- dest->uv_height = cpi->common.Height / 2;
+ dest->y_width = cpi->common.width;
+ dest->y_height = cpi->common.height;
+ dest->uv_height = cpi->common.height / 2;
ret = 0;
} else {
ret = -1;
@@ -4217,17 +4092,25 @@
int vp9_set_internal_size(VP9_PTR comp,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
VP9_COMP *cpi = (VP9_COMP *) comp;
+ VP9_COMMON *cm = &cpi->common;
+ int hr = 0, hs = 0, vr = 0, vs = 0;
- if (horiz_mode <= ONETWO)
- cpi->common.horiz_scale = horiz_mode;
- else
+ if (horiz_mode > ONETWO)
return -1;
- if (vert_mode <= ONETWO)
- cpi->common.vert_scale = vert_mode;
- else
+ if (vert_mode > ONETWO)
return -1;
+ Scale2Ratio(horiz_mode, &hr, &hs);
+ Scale2Ratio(vert_mode, &vr, &vs);
+
+ // always go to the next whole number
+ cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
+ cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
+
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
+ update_frame_size(cpi);
return 0;
}
@@ -4235,16 +4118,17 @@
int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
int i, j;
- int Total = 0;
+ int total = 0;
uint8_t *src = source->y_buffer;
uint8_t *dst = dest->y_buffer;
- // Loop through the Y plane raw and reconstruction data summing (square differences)
+ // Loop through the Y plane raw and reconstruction data summing
+ // (square differences)
for (i = 0; i < source->y_height; i += 16) {
for (j = 0; j < source->y_width; j += 16) {
unsigned int sse;
- Total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
+ total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
&sse);
}
@@ -4252,7 +4136,7 @@
dst += 16 * dest->y_stride;
}
- return Total;
+ return total;
}
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -29,6 +29,11 @@
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/encoder/vp9_lookahead.h"
+// Experimental rate control switches
+// #define ONE_SHOT_Q_ESTIMATE 1
+// #define STRICT_ONE_SHOT_Q 1
+// #define DISABLE_RC_LONG_TERM_MEM 1
+
// #define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
#define DEFAULT_GF_INTERVAL 7
@@ -37,10 +42,6 @@
#define MAX_LAG_BUFFERS 25
-#define AF_THRESH 25
-#define AF_THRESH2 100
-#define ARF_DECAY_THRESH 12
-
#if CONFIG_COMP_INTERINTRA_PRED
#define MAX_MODES 54
#else
@@ -50,13 +51,12 @@
#define MIN_THRESHMULT 32
#define MAX_THRESHMULT 512
-#define GF_ZEROMV_ZBIN_BOOST 12
-#define LF_ZEROMV_ZBIN_BOOST 6
-#define MV_ZBIN_BOOST 4
-#define ZBIN_OQ_MAX 192
+#define GF_ZEROMV_ZBIN_BOOST 0
+#define LF_ZEROMV_ZBIN_BOOST 0
+#define MV_ZBIN_BOOST 0
+#define SPLIT_MV_ZBIN_BOOST 0
+#define INTRA_ZBIN_BOOST 0
-#define VP9_TEMPORAL_ALT_REF 1
-
typedef struct {
nmv_context nmvc;
int nmvjointcost[MV_JOINTS];
@@ -86,13 +86,10 @@
// 0 = BPRED, ZERO_MV, MV, SPLIT
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
- vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
+ vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
@@ -111,6 +108,18 @@
int mv_ref_ct[INTER_MODE_CONTEXTS][4][2];
int vp9_mode_contexts[INTER_MODE_CONTEXTS][4];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob nzc_probs_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES];
+ vp9_prob nzc_probs_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES];
+ vp9_prob nzc_probs_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES];
+ vp9_prob nzc_probs_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES];
+ vp9_prob nzc_pcat_probs[MAX_NZC_CONTEXTS]
+ [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA];
+#endif
} CODING_CONTEXT;
typedef struct {
@@ -259,7 +268,9 @@
int optimize_coefficients;
int no_skip_block4x4_search;
int search_best_filter;
-
+ int splitmode_breakout;
+ int mb16_breakout;
+ int static_segmentation;
} SPEED_FEATURES;
typedef struct {
@@ -301,11 +312,6 @@
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
-
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
@@ -312,30 +318,8 @@
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(64, short, Y1zbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, Y2zbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, UVzbin_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_y1_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_y2_8x8[QINDEX_RANGE][64]);
- DECLARE_ALIGNED(64, short, zrun_zbin_boost_uv_8x8[QINDEX_RANGE][64]);
-
- DECLARE_ALIGNED(16, short, Y1zbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, Y2zbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, UVzbin_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_16x16[QINDEX_RANGE][256]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_16x16[QINDEX_RANGE][256]);
-
- DECLARE_ALIGNED(16, short, Y1zbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, Y2zbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, UVzbin_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2_32x32[QINDEX_RANGE][1024]);
- DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv_32x32[QINDEX_RANGE][1024]);
-
MACROBLOCK mb;
VP9_COMMON common;
VP9_CONFIG oxcf;
@@ -357,11 +341,17 @@
int alt_is_last; // Alt reference frame same as last ( short circuit altref search)
int gold_is_alt; // don't do both alt and gold search ( just do gold).
- // int refresh_alt_ref_frame;
+ int scaled_ref_idx[3];
+ int lst_fb_idx;
+ int gld_fb_idx;
+ int alt_fb_idx;
+ int refresh_last_frame;
+ int refresh_golden_frame;
+ int refresh_alt_ref_frame;
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tok;
- unsigned int tok_count;
+ unsigned int tok_count[1 << 6];
unsigned int frames_since_key;
@@ -396,11 +386,6 @@
CODING_CONTEXT coding_context;
// Rate targetting variables
- int64_t prediction_error;
- int64_t last_prediction_error;
- int64_t intra_error;
- int64_t last_intra_error;
-
int this_frame_target;
int projected_frame_size;
int last_q[2]; // Separate values for Intra/Inter
@@ -422,6 +407,7 @@
int max_gf_interval;
int baseline_gf_interval;
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
+ int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength
int64_t key_frame_count;
int prior_key_frame_distance[KEY_FRAME_CONTEXT];
@@ -441,7 +427,6 @@
double tot_q;
double avg_q;
- int zbin_over_quant;
int zbin_mode_boost;
int zbin_mode_boost_enabled;
@@ -484,37 +469,47 @@
nmv_context_counts NMVcount;
- vp9_coeff_count coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_count hybrid_coef_counts_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_probs frame_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4];
- vp9_coeff_stats frame_hybrid_branch_ct_4x4[BLOCK_TYPES_4X4];
+ vp9_coeff_count coef_counts_4x4[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_4x4[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_4x4[BLOCK_TYPES];
- vp9_coeff_count coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_count hybrid_coef_counts_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_probs frame_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8];
- vp9_coeff_stats frame_hybrid_branch_ct_8x8[BLOCK_TYPES_8X8];
+ vp9_coeff_count coef_counts_8x8[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_8x8[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_8x8[BLOCK_TYPES];
- vp9_coeff_count coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_count hybrid_coef_counts_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_probs frame_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16];
- vp9_coeff_stats frame_hybrid_branch_ct_16x16[BLOCK_TYPES_16X16];
+ vp9_coeff_count coef_counts_16x16[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES];
- vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32];
- vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32];
- vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32];
+ vp9_coeff_count coef_counts_32x32[BLOCK_TYPES];
+ vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES];
+ vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES];
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_prob frame_nzc_probs_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES];
+ unsigned int frame_nzc_branch_ct_4x4
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES][2];
+ vp9_prob frame_nzc_probs_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES];
+ unsigned int frame_nzc_branch_ct_8x8
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES][2];
+ vp9_prob frame_nzc_probs_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES];
+ unsigned int frame_nzc_branch_ct_16x16
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES][2];
+ vp9_prob frame_nzc_probs_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES];
+ unsigned int frame_nzc_branch_ct_32x32
+ [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES][2];
+#endif
+
int gfu_boost;
int last_boost;
int kf_boost;
int kf_zeromotion_pct;
- int target_bandwidth;
+ int64_t target_bandwidth;
struct vpx_codec_pkt_list *output_pkt_list;
#if 0
@@ -542,8 +537,6 @@
int goldfreq;
int auto_worst_q;
int cpu_used;
- int horiz_scale;
- int vert_scale;
int pass;
vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS];
@@ -628,11 +621,9 @@
double est_max_qcorrection_factor;
} twopass;
-#if VP9_TEMPORAL_ALT_REF
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
int fixed_divide[512];
-#endif
#if CONFIG_INTERNAL_STATS
int count;
@@ -683,9 +674,6 @@
int droppable;
- // TODO Do we still need this??
- int update_context;
-
int dummy_packing; /* flag to indicate if packing is dummy */
unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
@@ -696,6 +684,8 @@
unsigned int mb_mv_ref_count[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
#endif
+ int initial_width;
+ int initial_height;
} VP9_COMP;
void vp9_encode_frame(VP9_COMP *cpi);
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -8,7 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include <assert.h>
+#include <limits.h>
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_picklpf.h"
@@ -27,6 +28,7 @@
int yoffset;
int linestocopy;
+ assert(src_ybc->y_stride == dst_ybc->y_stride);
yheight = src_ybc->y_height;
ystride = src_ybc->y_stride;
@@ -246,7 +248,7 @@
int Bias = 0; // Bias against raising loop filter and in favour of lowering it
// Make a copy of the unfiltered / processed recon buffer
- vp8_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
+ vp8_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
if (cm->frame_type == KEY_FRAME)
cm->sharpness_level = 0;
@@ -266,7 +268,7 @@
// Get baseline error score
vp9_set_alt_lf_level(cpi, filt_mid);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1, 0);
best_err = vp9_calc_ss_err(sd, cm->frame_to_show);
filt_best = filt_mid;
@@ -291,7 +293,7 @@
if ((filt_direction <= 0) && (filt_low != filt_mid)) {
// Get Low filter error score
vp9_set_alt_lf_level(cpi, filt_low);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1, 0);
filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
@@ -311,7 +313,7 @@
// Now look at filt_high
if ((filt_direction >= 0) && (filt_high != filt_mid)) {
vp9_set_alt_lf_level(cpi, filt_high);
- vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, 0);
filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
@@ -336,4 +338,30 @@
}
cm->filter_level = filt_best;
+
+#if CONFIG_LOOP_DERING
+ /* Decide whether to turn on deringing filter */
+ { // NOLINT
+ int best_dering = 0;
+ int this_dering;
+ int last_err_diff = INT_MAX;
+
+ for (this_dering = 1; this_dering <= 16; this_dering++) {
+ vp9_set_alt_lf_level(cpi, filt_best);
+ vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, this_dering);
+ filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+ vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+ if (filt_err < best_err) {
+ best_err = filt_err;
+ best_dering = this_dering;
+ last_err_diff = INT_MAX;
+ } else {
+ if (filt_err - best_err > last_err_diff)
+ break;
+ last_err_diff = filt_err - best_err;
+ }
+ }
+ cm->dering_enabled = best_dering;
+ }
+#endif
}
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -15,12 +15,12 @@
struct yv12_buffer_config;
struct VP9_COMP;
-extern void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd,
- struct VP9_COMP *cpi);
+void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd,
+ struct VP9_COMP *cpi);
-extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
+void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
-extern void vp9_pick_filter_level(struct yv12_buffer_config *sd,
- struct VP9_COMP *cpi);
+void vp9_pick_filter_level(struct yv12_buffer_config *sd,
+ struct VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_PICKLPF_H_
--- a/vp9/encoder/vp9_psnr.c
+++ b/vp9/encoder/vp9_psnr.c
@@ -11,17 +11,16 @@
#include "vpx_scale/yv12config.h"
#include "math.h"
-#include "vp9/common/vp9_systemdependent.h" /* for vp9_clear_system_state() */
#define MAX_PSNR 100
-double vp9_mse2psnr(double Samples, double Peak, double Mse) {
+double vp9_mse2psnr(double samples, double peak, double mse) {
double psnr;
- if ((double)Mse > 0.0)
- psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+ if (mse > 0.0)
+ psnr = 10.0 * log10(peak * peak * samples / mse);
else
- psnr = MAX_PSNR; // Limit to prevent / 0
+ psnr = MAX_PSNR; // Limit to prevent / 0
if (psnr > MAX_PSNR)
psnr = MAX_PSNR;
--- a/vp9/encoder/vp9_psnr.h
+++ b/vp9/encoder/vp9_psnr.h
@@ -12,6 +12,6 @@
#ifndef VP9_ENCODER_VP9_PSNR_H_
#define VP9_ENCODER_VP9_PSNR_H_
-extern double vp9_mse2psnr(double Samples, double Peak, double Mse);
+double vp9_mse2psnr(double samples, double peak, double mse);
#endif // VP9_ENCODER_VP9_PSNR_H_
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -21,32 +21,46 @@
extern int enc_debug;
#endif
-void vp9_ht_quantize_b_4x4(BLOCK *b, BLOCKD *d, TX_TYPE tx_type) {
+static INLINE int plane_idx(MACROBLOCKD *xd, int b_idx) {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ if (b_idx < (16 << (sb_type * 2)))
+ return 0; // Y
+ else if (b_idx < (20 << (sb_type * 2)))
+ return 16; // U
+ assert(b_idx < (24 << (sb_type * 2)));
+ return 20; // V
+}
+
+void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ BLOCK *const b = &mb->block[0];
+ BLOCKD *const d = &xd->block[0];
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int16_t *coeff_ptr = mb->coeff + b_idx * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
int16_t *zbin_ptr = b->zbin;
int16_t *round_ptr = b->round;
int16_t *quant_ptr = b->quant;
uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
int16_t *dequant_ptr = d->dequant;
int zbin_oq_value = b->zbin_extra;
+ const int *pt_scan;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
- int const *pt_scan ;
-
+ assert(plane_idx(xd, b_idx) == 0);
switch (tx_type) {
case ADST_DCT:
pt_scan = vp9_row_scan_4x4;
break;
-
case DCT_ADST:
pt_scan = vp9_col_scan_4x4;
break;
-
default:
pt_scan = vp9_default_zig_zag1d_4x4;
break;
@@ -57,48 +71,63 @@
eob = -1;
- for (i = 0; i < b->eob_max_offset; i++) {
- rc = pt_scan[i];
- z = coeff_ptr[rc];
+ if (!b->skip_block) {
+ for (i = 0; i < 16; i++) {
+ rc = pt_scan[i];
+ z = coeff_ptr[rc];
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr ++;
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr++;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
- if (x >= zbin) {
- x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ if (x >= zbin) {
+ x += round_ptr[rc];
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ if (y) {
+ eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
}
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
}
-void vp9_regular_quantize_b_4x4(BLOCK *b, BLOCKD *d) {
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int16_t *coeff_ptr = mb->coeff + b_idx * 16;
+ int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16;
int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int16_t *coeff_ptr = b->coeff;
int16_t *zbin_ptr = b->zbin;
int16_t *round_ptr = b->round;
int16_t *quant_ptr = b->quant;
uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
int16_t *dequant_ptr = d->dequant;
int zbin_oq_value = b->zbin_extra;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
@@ -105,203 +134,203 @@
eob = -1;
- for (i = 0; i < b->eob_max_offset; i++) {
- rc = vp9_default_zig_zag1d_4x4[i];
- z = coeff_ptr[rc];
+ if (!b->skip_block) {
+ for (i = 0; i < 16; i++) {
+ rc = vp9_default_zig_zag1d_4x4[i];
+ z = coeff_ptr[rc];
- zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
- zbin_boost_ptr ++;
+ zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+ zbin_boost_ptr++;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
- if (x >= zbin) {
- x += round_ptr[rc];
+ if (x >= zbin) {
+ x += round_ptr[rc];
- y = (((x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
+ y = (((x * quant_ptr[rc]) >> 16) + x)
+ >> quant_shift_ptr[rc]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ if (y) {
+ eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
+ zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength
+ }
}
}
}
- d->eob = eob + 1;
+ xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
}
-void vp9_quantize_mby_4x4_c(MACROBLOCK *x) {
+void vp9_quantize_mby_4x4(MACROBLOCK *x) {
int i;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
for (i = 0; i < 16; i++) {
- TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, &x->e_mbd.block[i]);
+ TX_TYPE tx_type = get_tx_type_4x4(&x->e_mbd, i);
if (tx_type != DCT_DCT) {
- assert(has_2nd_order == 0);
- vp9_ht_quantize_b_4x4(&x->block[i], &x->e_mbd.block[i], tx_type);
+ vp9_ht_quantize_b_4x4(x, i, tx_type);
} else {
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
}
- if (has_2nd_order) {
- x->quantize_b_4x4(&x->block[24], &x->e_mbd.block[24]);
- } else {
- vpx_memset(x->e_mbd.block[24].qcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
- vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
- x->e_mbd.block[24].eob = 0;
- }
}
-void vp9_quantize_mbuv_4x4_c(MACROBLOCK *x) {
+void vp9_quantize_mbuv_4x4(MACROBLOCK *x) {
int i;
for (i = 16; i < 24; i++)
- x->quantize_b_4x4(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_4x4(x, i);
}
-void vp9_quantize_mb_4x4_c(MACROBLOCK *x) {
- vp9_quantize_mby_4x4_c(x);
- vp9_quantize_mbuv_4x4_c(x);
+void vp9_quantize_mb_4x4(MACROBLOCK *x) {
+ vp9_quantize_mby_4x4(x);
+ vp9_quantize_mbuv_4x4(x);
}
-void vp9_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
- int zbin_zrun_index = 0;
- int16_t *coeff_ptr = b->coeff;
- int16_t *zbin_ptr = b->zbin;
- int16_t *round_ptr = b->round;
- int16_t *quant_ptr = b->quant;
- uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
- int16_t *dequant_ptr = d->dequant;
- int zbin_oq_value = b->zbin_extra;
- // double q2nd = 4;
- vpx_memset(qcoeff_ptr, 0, 32);
- vpx_memset(dqcoeff_ptr, 0, 32);
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx;
+ int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
+ const int *pt_scan;
- eob = -1;
-
- for (i = 0; i < b->eob_max_offset_8x8; i++) {
- rc = vp9_default_zig_zag1d_4x4[i];
- z = coeff_ptr[rc];
-
- zbin_boost_ptr = &b->zrun_zbin_boost[zbin_zrun_index];
- zbin_zrun_index += 4;
- zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value);
-
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- if (x >= zbin) {
- x += (round_ptr[rc]);
- y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x)
- >> quant_shift_ptr[rc]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_zrun_index = 0;
- }
- }
+ switch (tx_type) {
+ case ADST_DCT:
+ pt_scan = vp9_row_scan_8x8;
+ break;
+ case DCT_ADST:
+ pt_scan = vp9_col_scan_8x8;
+ break;
+ default:
+ pt_scan = vp9_default_zig_zag1d_8x8;
+ break;
}
- d->eob = eob + 1;
-}
-
-void vp9_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) {
- int i, rc, eob;
- int zbin;
- int x, y, z, sz;
- int16_t *zbin_boost_ptr = b->zrun_zbin_boost_8x8;
- int16_t *coeff_ptr = b->coeff;
- int16_t *zbin_ptr = b->zbin_8x8;
- int16_t *round_ptr = b->round;
- int16_t *quant_ptr = b->quant;
- uint8_t *quant_shift_ptr = b->quant_shift;
- int16_t *qcoeff_ptr = d->qcoeff;
- int16_t *dqcoeff_ptr = d->dqcoeff;
- int16_t *dequant_ptr = d->dequant;
- int zbin_oq_value = b->zbin_extra;
-
vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
- eob = -1;
+ if (!b->skip_block) {
+ int i, rc, eob;
+ int zbin;
+ int x, y, z, sz;
+ int zero_run;
+ int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
+ int16_t *coeff_ptr = mb->coeff + 16 * b_idx;
+ int16_t *zbin_ptr = b->zbin;
+ int16_t *round_ptr = b->round;
+ int16_t *quant_ptr = b->quant;
+ uint8_t *quant_shift_ptr = b->quant_shift;
+ int16_t *dequant_ptr = d->dequant;
+ int zbin_oq_value = b->zbin_extra;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
- for (i = 0; i < b->eob_max_offset_8x8; i++) {
- rc = vp9_default_zig_zag1d_8x8[i];
- z = coeff_ptr[rc];
+ eob = -1;
- zbin = (zbin_ptr[rc != 0] + *zbin_boost_ptr + zbin_oq_value);
- zbin_boost_ptr++;
+ // Special case for DC as it is the one triggering access in various
+ // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
+ {
+ z = coeff_ptr[0];
+ zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
+ zero_run = 1;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
- if (x >= zbin) {
- x += (round_ptr[rc != 0]);
- y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
- >> quant_shift_ptr[rc != 0]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
+ if (x >= zbin) {
+ x += (round_ptr[0]);
+ y = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
+ >> quant_shift_ptr[0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[0] = x; // write to destination
+ dqcoeff_ptr[0] = x * dequant_ptr[0]; // dequantized value
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = b->zrun_zbin_boost_8x8;
+ if (y) {
+ eob = 0; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
+ zero_run = 0;
+ }
}
}
- }
+ for (i = 1; i < 64; i++) {
+ rc = pt_scan[i];
+ z = coeff_ptr[rc];
+ zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
+ // The original code was incrementing zero_run while keeping it at
+ // maximum 15 by adding "(zero_run < 15)". The same is achieved by
+ // removing the opposite of the sign mask of "(zero_run - 15)".
+ zero_run -= (zero_run - 15) >> 31;
- d->eob = eob + 1;
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
+
+ if (x >= zbin) {
+ x += (round_ptr[rc != 0]);
+ y = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
+ >> quant_shift_ptr[1]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[1]; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
+ zero_run = 0;
+ }
+ }
+ }
+ xd->eobs[b_idx] = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = nzc;
+#endif
+ } else {
+ xd->eobs[b_idx] = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ xd->nzcs[b_idx] = 0;
+#endif
+ }
}
void vp9_quantize_mby_8x8(MACROBLOCK *x) {
int i;
- int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
+#if CONFIG_CODE_NONZEROCOUNT
for (i = 0; i < 16; i ++) {
- x->e_mbd.block[i].eob = 0;
+ x->e_mbd.nzcs[i] = 0;
}
- x->e_mbd.block[24].eob = 0;
+#endif
for (i = 0; i < 16; i += 4) {
- int ib = (i & 8) + ((i & 4) >> 1);
- TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, &x->e_mbd.block[ib]);
- if (tx_type != DCT_DCT)
- assert(has_2nd_order == 0);
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1));
+ x->quantize_b_8x8(x, i, tx_type);
}
-
- if (has_2nd_order) {
- x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]);
- } else {
- vpx_memset(x->e_mbd.block[24].qcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].qcoeff[0]));
- vpx_memset(x->e_mbd.block[24].dqcoeff, 0,
- 16 * sizeof(x->e_mbd.block[24].dqcoeff[0]));
- x->e_mbd.block[24].eob = 0;
- }
}
void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
int i;
- for (i = 16; i < 24; i ++)
- x->e_mbd.block[i].eob = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ for (i = 16; i < 24; i ++) {
+ x->e_mbd.nzcs[i] = 0;
+ }
+#endif
for (i = 16; i < 24; i += 4)
- x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]);
+ x->quantize_b_8x8(x, i, DCT_DCT);
}
void vp9_quantize_mb_8x8(MACROBLOCK *x) {
@@ -310,12 +339,14 @@
}
void vp9_quantize_mby_16x16(MACROBLOCK *x) {
+ TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, 0);
+#if CONFIG_CODE_NONZEROCOUNT
int i;
-
- for (i = 0; i < 16; i++)
- x->e_mbd.block[i].eob = 0;
- x->e_mbd.block[24].eob = 0;
- x->quantize_b_16x16(&x->block[0], &x->e_mbd.block[0]);
+ for (i = 0; i < 16; i++) {
+ x->e_mbd.nzcs[i] = 0;
+ }
+#endif
+ x->quantize_b_16x16(x, 0, tx_type);
}
void vp9_quantize_mb_16x16(MACROBLOCK *x) {
@@ -324,107 +355,256 @@
}
static void quantize(int16_t *zbin_boost_orig_ptr,
- int16_t *coeff_ptr, int n_coeffs, int max_coeffs,
+ int16_t *coeff_ptr, int n_coeffs, int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
uint8_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
int16_t *dequant_ptr, int zbin_oq_value,
- int *eob_ptr, const int *scan, int mul) {
+ uint16_t *eob_ptr,
+#if CONFIG_CODE_NONZEROCOUNT
+ uint16_t *nzc_ptr,
+#endif
+ const int *scan, int mul) {
int i, rc, eob;
int zbin;
int x, y, z, sz;
+ int zero_run = 0;
int16_t *zbin_boost_ptr = zbin_boost_orig_ptr;
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
+#endif
vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
eob = -1;
- for (i = 0; i < max_coeffs; i++) {
- rc = scan[i];
- z = coeff_ptr[rc] * mul;
- zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value);
- zbin_boost_ptr ++;
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ rc = scan[i];
+ z = coeff_ptr[rc] * mul;
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
+ zbin = (zbin_ptr[rc != 0] + zbin_boost_ptr[zero_run] + zbin_oq_value);
+ zero_run += (zero_run < 15);
- if (x >= zbin) {
- x += (round_ptr[rc!=0]);
- y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
- >> quant_shift_ptr[rc!=0]; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
+ sz = (z >> 31); // sign of z
+ x = (z ^ sz) - sz; // x = abs(z)
- if (y) {
- eob = i; // last nonzero coeffs
- zbin_boost_ptr = zbin_boost_orig_ptr;
+ if (x >= zbin) {
+ x += (round_ptr[rc != 0]);
+ y = ((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x))
+ >> quant_shift_ptr[rc != 0]; // quantize (x)
+ x = (y ^ sz) - sz; // get the sign back
+ qcoeff_ptr[rc] = x; // write to destination
+ dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / mul; // dequantized value
+
+ if (y) {
+ eob = i; // last nonzero coeffs
+ zero_run = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ ++nzc; // number of nonzero coeffs
+#endif
+ }
}
}
}
*eob_ptr = eob + 1;
+#if CONFIG_CODE_NONZEROCOUNT
+ *nzc_ptr = nzc;
+#endif
}
-void vp9_regular_quantize_b_16x16(BLOCK *b, BLOCKD *d) {
- quantize(b->zrun_zbin_boost_16x16,
- b->coeff,
- 256, b->eob_max_offset_16x16,
- b->zbin_16x16, b->round, b->quant, b->quant_shift,
- d->qcoeff,
- d->dqcoeff,
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
+ const int *pt_scan;
+
+ switch (tx_type) {
+ case ADST_DCT:
+ pt_scan = vp9_row_scan_16x16;
+ break;
+ case DCT_ADST:
+ pt_scan = vp9_col_scan_16x16;
+ break;
+ default:
+ pt_scan = vp9_default_zig_zag1d_16x16;
+ break;
+ }
+
+ quantize(b->zrun_zbin_boost,
+ mb->coeff + 16 * b_idx,
+ 256, b->skip_block,
+ b->zbin, b->round, b->quant, b->quant_shift,
+ xd->qcoeff + 16 * b_idx,
+ xd->dqcoeff + 16 * b_idx,
d->dequant,
b->zbin_extra,
- &d->eob, vp9_default_zig_zag1d_16x16, 1);
+ &xd->eobs[b_idx],
+#if CONFIG_CODE_NONZEROCOUNT
+ &xd->nzcs[b_idx],
+#endif
+ pt_scan, 1);
}
-void vp9_quantize_sby_32x32(MACROBLOCK *x) {
- x->e_mbd.block[0].eob = 0;
- quantize(x->block[0].zrun_zbin_boost_32x32,
- x->sb_coeff_data.coeff,
- 1024, x->block[0].eob_max_offset_32x32,
- x->block[0].zbin_32x32,
- x->block[0].round, x->block[0].quant, x->block[0].quant_shift,
- x->e_mbd.sb_coeff_data.qcoeff,
- x->e_mbd.sb_coeff_data.dqcoeff,
- x->e_mbd.block[0].dequant,
- x->block[0].zbin_extra,
- &x->e_mbd.block[0].eob,
+void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ const int c_idx = plane_idx(xd, b_idx);
+ BLOCK *const b = &mb->block[c_idx];
+ BLOCKD *const d = &xd->block[c_idx];
+
+ quantize(b->zrun_zbin_boost,
+ mb->coeff + b_idx * 16,
+ 1024, b->skip_block,
+ b->zbin,
+ b->round, b->quant, b->quant_shift,
+ xd->qcoeff + b_idx * 16,
+ xd->dqcoeff + b_idx * 16,
+ d->dequant,
+ b->zbin_extra,
+ &xd->eobs[b_idx],
+#if CONFIG_CODE_NONZEROCOUNT
+ &xd->nzcs[b_idx],
+#endif
vp9_default_zig_zag1d_32x32, 2);
}
+void vp9_quantize_sby_32x32(MACROBLOCK *x) {
+ vp9_regular_quantize_b_32x32(x, 0);
+}
+
+void vp9_quantize_sby_16x16(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 4; n++) {
+ TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
+ (16 * (n & 2)) + ((n & 1) * 4));
+ x->quantize_b_16x16(x, n * 16, tx_type);
+ }
+}
+
+void vp9_quantize_sby_8x8(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
+ (4 * (n & 12)) + ((n & 3) * 2));
+ x->quantize_b_8x8(x, n * 4, tx_type);
+ }
+}
+
+void vp9_quantize_sby_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_quantize_b_4x4(x, n, tx_type);
+ } else {
+ x->quantize_b_4x4(x, n);
+ }
+ }
+}
+
void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
+ x->quantize_b_16x16(x, 64, DCT_DCT);
+ x->quantize_b_16x16(x, 80, DCT_DCT);
+}
+
+void vp9_quantize_sbuv_8x8(MACROBLOCK *x) {
int i;
- x->e_mbd.block[16].eob = 0;
- x->e_mbd.block[20].eob = 0;
- for (i = 16; i < 24; i += 4)
- quantize(x->block[i].zrun_zbin_boost_16x16,
- x->sb_coeff_data.coeff + 1024 + (i - 16) * 64,
- 256, x->block[i].eob_max_offset_16x16,
- x->block[i].zbin_16x16,
- x->block[i].round, x->block[0].quant, x->block[i].quant_shift,
- x->e_mbd.sb_coeff_data.qcoeff + 1024 + (i - 16) * 64,
- x->e_mbd.sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64,
- x->e_mbd.block[i].dequant,
- x->block[i].zbin_extra,
- &x->e_mbd.block[i].eob,
- vp9_default_zig_zag1d_16x16, 1);
+ for (i = 64; i < 96; i += 4)
+ x->quantize_b_8x8(x, i, DCT_DCT);
}
+void vp9_quantize_sbuv_4x4(MACROBLOCK *x) {
+ int i;
+
+ for (i = 64; i < 96; i++)
+ x->quantize_b_4x4(x, i);
+}
+
+void vp9_quantize_sb64y_32x32(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 4; n++)
+ vp9_regular_quantize_b_32x32(x, n * 64);
+}
+
+void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 16; n++) {
+ TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
+ (16 * (n & 12)) + ((n & 3) * 4));
+ x->quantize_b_16x16(x, n * 16, tx_type);
+ }
+}
+
+void vp9_quantize_sb64y_8x8(MACROBLOCK *x) {
+ int n;
+
+ for (n = 0; n < 64; n++) {
+ TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
+ (4 * (n & 56)) + ((n & 7) * 2));
+ x->quantize_b_8x8(x, n * 4, tx_type);
+ }
+}
+
+void vp9_quantize_sb64y_4x4(MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int n;
+
+ for (n = 0; n < 256; n++) {
+ const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
+ if (tx_type != DCT_DCT) {
+ vp9_ht_quantize_b_4x4(x, n, tx_type);
+ } else {
+ x->quantize_b_4x4(x, n);
+ }
+ }
+}
+
+void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) {
+ vp9_regular_quantize_b_32x32(x, 256);
+ vp9_regular_quantize_b_32x32(x, 320);
+}
+
+void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) {
+ int i;
+
+ for (i = 256; i < 384; i += 16)
+ x->quantize_b_16x16(x, i, DCT_DCT);
+}
+
+void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) {
+ int i;
+
+ for (i = 256; i < 384; i += 4)
+ x->quantize_b_8x8(x, i, DCT_DCT);
+}
+
+void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) {
+ int i;
+
+ for (i = 256; i < 384; i++)
+ x->quantize_b_4x4(x, i);
+}
+
/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
* these two C functions if corresponding optimized routine is not available.
* NEON optimized version implements currently the fast quantization for pair
* of blocks. */
-void vp9_regular_quantize_b_4x4_pair(BLOCK *b1, BLOCK *b2,
- BLOCKD *d1, BLOCKD *d2) {
- vp9_regular_quantize_b_4x4(b1, d1);
- vp9_regular_quantize_b_4x4(b2, d2);
+void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2) {
+ vp9_regular_quantize_b_4x4(x, b_idx1);
+ vp9_regular_quantize_b_4x4(x, b_idx2);
}
-static void invert_quant(int16_t *quant,
- uint8_t *shift, int d) {
+static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
unsigned t;
int l;
t = d;
@@ -438,248 +618,53 @@
void vp9_init_quantizer(VP9_COMP *cpi) {
int i;
int quant_val;
- int Q;
- static const int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20,
- 24, 28, 32, 36, 40, 44, 44, 44
- };
+ int q;
- static const int zbin_boost_8x8[64] = { 0, 0, 0, 8, 8, 8, 10, 12,
- 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44,
- 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48
- };
- static const int zbin_boost_16x16[256] = {
- 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- };
- static const int zbin_boost_32x32[1024] = {
- 0, 0, 0, 8, 8, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28,
- 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
- };
- int qrounding_factor = 48;
+ static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
+ 14, 16, 20, 24, 28, 32, 36, 40 };
-
- for (Q = 0; Q < QINDEX_RANGE; Q++) {
- int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
-
-#if CONFIG_LOSSLESS
- if (cpi->oxcf.lossless) {
- if (Q == 0) {
- qzbin_factor = 64;
- qrounding_factor = 64;
- }
+ for (q = 0; q < QINDEX_RANGE; q++) {
+ int qzbin_factor = (vp9_dc_quant(q, 0) < 148) ? 84 : 80;
+ int qrounding_factor = 48;
+ if (q == 0) {
+ qzbin_factor = 64;
+ qrounding_factor = 64;
}
-#endif
-
// dc values
- quant_val = vp9_dc_quant(Q, cpi->common.y1dc_delta_q);
- invert_quant(cpi->Y1quant[Q] + 0,
- cpi->Y1quant_shift[Q] + 0, quant_val);
- cpi->Y1zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1round[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y1dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_y1_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
- cpi->Y1zbin_32x32[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_32x32[Q][0] =
- ((quant_val * zbin_boost_32x32[0]) + 64) >> 7;
+ quant_val = vp9_dc_quant(q, cpi->common.y1dc_delta_q);
+ invert_quant(cpi->Y1quant[q] + 0, cpi->Y1quant_shift[q] + 0, quant_val);
+ cpi->Y1zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->Y1round[q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[q][0] = quant_val;
+ cpi->zrun_zbin_boost_y1[q][0] = (quant_val * zbin_boost[0]) >> 7;
+ quant_val = vp9_dc_uv_quant(q, cpi->common.uvdc_delta_q);
+ invert_quant(cpi->UVquant[q] + 0, cpi->UVquant_shift[q] + 0, quant_val);
+ cpi->UVzbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->UVround[q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[q][0] = quant_val;
+ cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7;
- quant_val = vp9_dc2quant(Q, cpi->common.y2dc_delta_q);
- invert_quant(cpi->Y2quant[Q] + 0,
- cpi->Y2quant_shift[Q] + 0, quant_val);
- cpi->Y2zbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2zbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2zbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2round[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y2dequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_y2_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
-
- quant_val = vp9_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
- invert_quant(cpi->UVquant[Q] + 0,
- cpi->UVquant_shift[Q] + 0, quant_val);
- cpi->UVzbin[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVzbin_8x8[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVzbin_16x16[Q][0] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVround[Q][0] = (qrounding_factor * quant_val) >> 7;
- cpi->common.UVdequant[Q][0] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
- cpi->zrun_zbin_boost_uv_8x8[Q][0] =
- ((quant_val * zbin_boost_8x8[0]) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_16x16[Q][0] =
- ((quant_val * zbin_boost_16x16[0]) + 64) >> 7;
-
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
int rc = vp9_default_zig_zag1d_4x4[i];
- quant_val = vp9_ac_yquant(Q);
- invert_quant(cpi->Y1quant[Q] + rc,
- cpi->Y1quant_shift[Q] + rc, quant_val);
- cpi->Y1zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y1round[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y1dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y1[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
+ quant_val = vp9_ac_yquant(q);
+ invert_quant(cpi->Y1quant[q] + rc, cpi->Y1quant_shift[q] + rc, quant_val);
+ cpi->Y1zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->Y1round[q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.Y1dequant[q][rc] = quant_val;
+ cpi->zrun_zbin_boost_y1[q][i] =
+ ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- invert_quant(cpi->Y2quant[Q] + rc,
- cpi->Y2quant_shift[Q] + rc, quant_val);
- cpi->Y2zbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->Y2round[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.Y2dequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_y2[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
-
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- invert_quant(cpi->UVquant[Q] + rc,
- cpi->UVquant_shift[Q] + rc, quant_val);
- cpi->UVzbin[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->UVround[Q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.UVdequant[Q][rc] = quant_val;
- cpi->zrun_zbin_boost_uv[Q][i] =
- ((quant_val * zbin_boost[i]) + 64) >> 7;
+ quant_val = vp9_ac_uv_quant(q, cpi->common.uvac_delta_q);
+ invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, quant_val);
+ cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->UVround[q][rc] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.UVdequant[q][rc] = quant_val;
+ cpi->zrun_zbin_boost_uv[q][i] =
+ ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
}
-
- // 8x8 structures... only zbin seperated out for now
- // This needs cleaning up for 8x8 especially if we are to add
- // support for non flat Q matices
- for (i = 1; i < 64; i++) {
- int rc = vp9_default_zig_zag1d_8x8[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
-
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2zbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
-
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVzbin_8x8[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_8x8[Q][i] =
- ((quant_val * zbin_boost_8x8[i]) + 64) >> 7;
- }
-
- // 16x16 structures. Same comment above applies.
- for (i = 1; i < 256; i++) {
- int rc = vp9_default_zig_zag1d_16x16[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
-
- quant_val = vp9_ac2quant(Q, cpi->common.y2ac_delta_q);
- cpi->Y2zbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y2_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
-
- quant_val = vp9_ac_uv_quant(Q, cpi->common.uvac_delta_q);
- cpi->UVzbin_16x16[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_uv_16x16[Q][i] =
- ((quant_val * zbin_boost_16x16[i]) + 64) >> 7;
- }
- // 32x32 structures. Same comment above applies.
- for (i = 1; i < 1024; i++) {
- int rc = vp9_default_zig_zag1d_32x32[i];
-
- quant_val = vp9_ac_yquant(Q);
- cpi->Y1zbin_32x32[Q][rc] = ((qzbin_factor * quant_val) + 64) >> 7;
- cpi->zrun_zbin_boost_y1_32x32[Q][i] =
- ((quant_val * zbin_boost_32x32[i]) + 64) >> 7;
- }
}
}
@@ -709,8 +694,7 @@
// Y
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 0; i < 16; i++) {
@@ -717,39 +701,19 @@
x->block[i].quant = cpi->Y1quant[QIndex];
x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
x->block[i].zbin = cpi->Y1zbin[QIndex];
- x->block[i].zbin_8x8 = cpi->Y1zbin_8x8[QIndex];
- x->block[i].zbin_16x16 = cpi->Y1zbin_16x16[QIndex];
- x->block[i].zbin_32x32 = cpi->Y1zbin_32x32[QIndex];
x->block[i].round = cpi->Y1round[QIndex];
x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
- x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y1_8x8[QIndex];
- x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y1_16x16[QIndex];
- x->block[i].zrun_zbin_boost_32x32 = cpi->zrun_zbin_boost_y1_32x32[QIndex];
x->block[i].zbin_extra = (int16_t)zbin_extra;
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[i].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_16x16 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_32x32 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[i].eob_max_offset = 16;
- x->block[i].eob_max_offset_8x8 = 64;
- x->block[i].eob_max_offset_16x16 = 256;
- x->block[i].eob_max_offset_32x32 = 1024;
- }
+ // Segment skip feature.
+ x->block[i].skip_block =
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
}
// UV
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 16; i < 24; i++) {
@@ -756,61 +720,16 @@
x->block[i].quant = cpi->UVquant[QIndex];
x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
x->block[i].zbin = cpi->UVzbin[QIndex];
- x->block[i].zbin_8x8 = cpi->UVzbin_8x8[QIndex];
- x->block[i].zbin_16x16 = cpi->UVzbin_16x16[QIndex];
x->block[i].round = cpi->UVround[QIndex];
x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
- x->block[i].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_uv_8x8[QIndex];
- x->block[i].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_uv_16x16[QIndex];
-
x->block[i].zbin_extra = (int16_t)zbin_extra;
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[i].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[i].eob_max_offset_16x16 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[i].eob_max_offset = 16;
- x->block[i].eob_max_offset_8x8 = 64;
- x->block[i].eob_max_offset_16x16 = 256;
- }
+ // Segment skip feature.
+ x->block[i].skip_block =
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
}
- // Y2
- zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
- ((cpi->zbin_over_quant / 2) +
- cpi->zbin_mode_boost +
- x->act_zbin_adj)) >> 7;
-
- x->block[24].quant = cpi->Y2quant[QIndex];
- x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
- x->block[24].zbin = cpi->Y2zbin[QIndex];
- x->block[24].zbin_8x8 = cpi->Y2zbin_8x8[QIndex];
- x->block[24].zbin_16x16 = cpi->Y2zbin_16x16[QIndex];
- x->block[24].round = cpi->Y2round[QIndex];
- x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
- x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
- x->block[24].zrun_zbin_boost_8x8 = cpi->zrun_zbin_boost_y2_8x8[QIndex];
- x->block[24].zrun_zbin_boost_16x16 = cpi->zrun_zbin_boost_y2_16x16[QIndex];
- x->block[24].zbin_extra = (int16_t)zbin_extra;
-
- // TBD perhaps not use for Y2
- // Segment max eob offset feature.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
- x->block[24].eob_max_offset =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- x->block[24].eob_max_offset_8x8 =
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
- } else {
- x->block[24].eob_max_offset = 16;
- x->block[24].eob_max_offset_8x8 = 4;
- }
-
/* save this macroblock QIndex for vp9_update_zbin_extra() */
x->e_mbd.q_index = QIndex;
}
@@ -822,8 +741,7 @@
// Y
zbin_extra = (cpi->common.Y1dequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 0; i < 16; i++) {
x->block[i].zbin_extra = (int16_t)zbin_extra;
@@ -831,21 +749,12 @@
// UV
zbin_extra = (cpi->common.UVdequant[QIndex][1] *
- (cpi->zbin_over_quant +
- cpi->zbin_mode_boost +
+ (cpi->zbin_mode_boost +
x->act_zbin_adj)) >> 7;
for (i = 16; i < 24; i++) {
x->block[i].zbin_extra = (int16_t)zbin_extra;
}
-
- // Y2
- zbin_extra = (cpi->common.Y2dequant[QIndex][1] *
- ((cpi->zbin_over_quant / 2) +
- cpi->zbin_mode_boost +
- x->act_zbin_adj)) >> 7;
-
- x->block[24].zbin_extra = (int16_t)zbin_extra;
}
void vp9_frame_init_quantizer(VP9_COMP *cpi) {
@@ -861,13 +770,15 @@
cm->base_qindex = Q;
+ // Set lossless mode
+ if (cm->base_qindex <= 4)
+ cm->base_qindex = 0;
+
// if any of the delta_q values are changing update flag will
// have to be set.
cm->y1dc_delta_q = 0;
- cm->y2ac_delta_q = 0;
cm->uvdc_delta_q = 0;
cm->uvac_delta_q = 0;
- cm->y2dc_delta_q = 0;
// quantizer has to be reinitialized if any delta_q changes.
// As there are not any here for now this is inactive code.
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -14,10 +14,10 @@
#include "vp9/encoder/vp9_block.h"
#define prototype_quantize_block(sym) \
- void (sym)(BLOCK *b,BLOCKD *d)
+ void (sym)(MACROBLOCK *mb, int b_idx)
#define prototype_quantize_block_pair(sym) \
- void (sym)(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2)
+ void (sym)(MACROBLOCK *mb, int b_idx1, int b_idx2)
#define prototype_quantize_mb(sym) \
void (sym)(MACROBLOCK *x)
@@ -26,60 +26,41 @@
#include "x86/vp9_quantize_x86.h"
#endif
-#define prototype_quantize_block_type(sym) \
- void (sym)(BLOCK *b, BLOCKD *d, TX_TYPE type)
-extern prototype_quantize_block_type(vp9_ht_quantize_b_4x4);
+void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx);
+void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2);
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
+void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx);
-#ifndef vp9_quantize_quantb_4x4
-#define vp9_quantize_quantb_4x4 vp9_regular_quantize_b_4x4
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_4x4);
-
-#ifndef vp9_quantize_quantb_4x4_pair
-#define vp9_quantize_quantb_4x4_pair vp9_regular_quantize_b_4x4_pair
-#endif
-extern prototype_quantize_block_pair(vp9_quantize_quantb_4x4_pair);
-
-#ifndef vp9_quantize_quantb_8x8
-#define vp9_quantize_quantb_8x8 vp9_regular_quantize_b_8x8
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_8x8);
-
-#ifndef vp9_quantize_quantb_16x16
-#define vp9_quantize_quantb_16x16 vp9_regular_quantize_b_16x16
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_16x16);
-
-#ifndef vp9_quantize_quantb_2x2
-#define vp9_quantize_quantb_2x2 vp9_regular_quantize_b_2x2
-#endif
-extern prototype_quantize_block(vp9_quantize_quantb_2x2);
-
-#ifndef vp9_quantize_mb_4x4
-#define vp9_quantize_mb_4x4 vp9_quantize_mb_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mb_4x4);
+void vp9_quantize_mb_4x4(MACROBLOCK *x);
void vp9_quantize_mb_8x8(MACROBLOCK *x);
-#ifndef vp9_quantize_mbuv_4x4
-#define vp9_quantize_mbuv_4x4 vp9_quantize_mbuv_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mbuv_4x4);
+void vp9_quantize_mbuv_4x4(MACROBLOCK *x);
+void vp9_quantize_mby_4x4(MACROBLOCK *x);
-#ifndef vp9_quantize_mby_4x4
-#define vp9_quantize_mby_4x4 vp9_quantize_mby_4x4_c
-#endif
-extern prototype_quantize_mb(vp9_quantize_mby_4x4);
+void vp9_quantize_mby_8x8(MACROBLOCK *x);
+void vp9_quantize_mbuv_8x8(MACROBLOCK *x);
-extern prototype_quantize_mb(vp9_quantize_mby_8x8);
-extern prototype_quantize_mb(vp9_quantize_mbuv_8x8);
-
void vp9_quantize_mb_16x16(MACROBLOCK *x);
-extern prototype_quantize_block(vp9_quantize_quantb_16x16);
-extern prototype_quantize_mb(vp9_quantize_mby_16x16);
+void vp9_quantize_mby_16x16(MACROBLOCK *x);
void vp9_quantize_sby_32x32(MACROBLOCK *x);
+void vp9_quantize_sby_16x16(MACROBLOCK *x);
+void vp9_quantize_sby_8x8(MACROBLOCK *x);
+void vp9_quantize_sby_4x4(MACROBLOCK *x);
void vp9_quantize_sbuv_16x16(MACROBLOCK *x);
+void vp9_quantize_sbuv_8x8(MACROBLOCK *x);
+void vp9_quantize_sbuv_4x4(MACROBLOCK *x);
+
+void vp9_quantize_sb64y_32x32(MACROBLOCK *x);
+void vp9_quantize_sb64y_16x16(MACROBLOCK *x);
+void vp9_quantize_sb64y_8x8(MACROBLOCK *x);
+void vp9_quantize_sb64y_4x4(MACROBLOCK *x);
+void vp9_quantize_sb64uv_32x32(MACROBLOCK *x);
+void vp9_quantize_sb64uv_16x16(MACROBLOCK *x);
+void vp9_quantize_sb64uv_8x8(MACROBLOCK *x);
+void vp9_quantize_sb64uv_4x4(MACROBLOCK *x);
struct VP9_COMP;
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -14,8 +14,8 @@
#include <string.h>
#include <limits.h>
#include <assert.h>
+#include <math.h>
-#include "math.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_modecont.h"
#include "vp9/common/vp9_common.h"
@@ -25,9 +25,10 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_seg_common.h"
-#define MIN_BPB_FACTOR 0.005
-#define MAX_BPB_FACTOR 50
+#define MIN_BPB_FACTOR 0.005
+#define MAX_BPB_FACTOR 50
#ifdef MODE_STATS
extern unsigned int y_modes[VP9_YMODES];
@@ -88,39 +89,34 @@
// tables if and when things settle down in the experimental bitstream
double vp9_convert_qindex_to_q(int qindex) {
// Convert the index to a real Q value (scaled down to match old Q values)
- return (double)vp9_ac_yquant(qindex) / 4.0;
+ return vp9_ac_yquant(qindex) / 4.0;
}
int vp9_gfboost_qadjust(int qindex) {
- int retval;
- double q;
-
- q = vp9_convert_qindex_to_q(qindex);
- retval = (int)((0.00000828 * q * q * q) +
- (-0.0055 * q * q) +
- (1.32 * q) + 79.3);
- return retval;
+ const double q = vp9_convert_qindex_to_q(qindex);
+ return (int)((0.00000828 * q * q * q) +
+ (-0.0055 * q * q) +
+ (1.32 * q) + 79.3);
}
static int kfboost_qadjust(int qindex) {
- int retval;
- double q;
-
- q = vp9_convert_qindex_to_q(qindex);
- retval = (int)((0.00000973 * q * q * q) +
- (-0.00613 * q * q) +
- (1.316 * q) + 121.2);
- return retval;
+ const double q = vp9_convert_qindex_to_q(qindex);
+ return (int)((0.00000973 * q * q * q) +
+ (-0.00613 * q * q) +
+ (1.316 * q) + 121.2);
}
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex) {
- if (frame_type == KEY_FRAME)
- return (int)(4500000 / vp9_convert_qindex_to_q(qindex));
- else
- return (int)(2850000 / vp9_convert_qindex_to_q(qindex));
-}
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor) {
+ const double q = vp9_convert_qindex_to_q(qindex);
+ int enumerator = frame_type == KEY_FRAME ? 4000000 : 2500000;
+ // q based adjustment to baseline enumberator
+ enumerator += (int)(enumerator * q) >> 12;
+ return (int)(0.5 + (enumerator * correction_factor / q));
+}
+
void vp9_save_coding_context(VP9_COMP *cpi) {
CODING_CONTEXT *const cc = &cpi->coding_context;
VP9_COMMON *cm = &cpi->common;
@@ -168,16 +164,20 @@
vp9_copy(cc->last_mode_lf_deltas, xd->last_mode_lf_deltas);
vp9_copy(cc->coef_probs_4x4, cm->fc.coef_probs_4x4);
- vp9_copy(cc->hybrid_coef_probs_4x4, cm->fc.hybrid_coef_probs_4x4);
vp9_copy(cc->coef_probs_8x8, cm->fc.coef_probs_8x8);
- vp9_copy(cc->hybrid_coef_probs_8x8, cm->fc.hybrid_coef_probs_8x8);
vp9_copy(cc->coef_probs_16x16, cm->fc.coef_probs_16x16);
- vp9_copy(cc->hybrid_coef_probs_16x16, cm->fc.hybrid_coef_probs_16x16);
vp9_copy(cc->coef_probs_32x32, cm->fc.coef_probs_32x32);
vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
cc->interintra_prob = cm->fc.interintra_prob;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cc->nzc_probs_4x4, cm->fc.nzc_probs_4x4);
+ vp9_copy(cc->nzc_probs_8x8, cm->fc.nzc_probs_8x8);
+ vp9_copy(cc->nzc_probs_16x16, cm->fc.nzc_probs_16x16);
+ vp9_copy(cc->nzc_probs_32x32, cm->fc.nzc_probs_32x32);
+ vp9_copy(cc->nzc_pcat_probs, cm->fc.nzc_pcat_probs);
+#endif
}
void vp9_restore_coding_context(VP9_COMP *cpi) {
@@ -226,89 +226,55 @@
vp9_copy(xd->last_mode_lf_deltas, cc->last_mode_lf_deltas);
vp9_copy(cm->fc.coef_probs_4x4, cc->coef_probs_4x4);
- vp9_copy(cm->fc.hybrid_coef_probs_4x4, cc->hybrid_coef_probs_4x4);
vp9_copy(cm->fc.coef_probs_8x8, cc->coef_probs_8x8);
- vp9_copy(cm->fc.hybrid_coef_probs_8x8, cc->hybrid_coef_probs_8x8);
vp9_copy(cm->fc.coef_probs_16x16, cc->coef_probs_16x16);
- vp9_copy(cm->fc.hybrid_coef_probs_16x16, cc->hybrid_coef_probs_16x16);
vp9_copy(cm->fc.coef_probs_32x32, cc->coef_probs_32x32);
vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
#if CONFIG_COMP_INTERINTRA_PRED
cm->fc.interintra_prob = cc->interintra_prob;
#endif
+#if CONFIG_CODE_NONZEROCOUNT
+ vp9_copy(cm->fc.nzc_probs_4x4, cc->nzc_probs_4x4);
+ vp9_copy(cm->fc.nzc_probs_8x8, cc->nzc_probs_8x8);
+ vp9_copy(cm->fc.nzc_probs_16x16, cc->nzc_probs_16x16);
+ vp9_copy(cm->fc.nzc_probs_32x32, cc->nzc_probs_32x32);
+ vp9_copy(cm->fc.nzc_pcat_probs, cc->nzc_pcat_probs);
+#endif
}
-
void vp9_setup_key_frame(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
- // Setup for Key frame:
- vp9_default_coef_probs(& cpi->common);
- vp9_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
- vp9_init_mbmode_probs(& cpi->common);
- vp9_default_bmode_probs(cm->fc.bmode_prob);
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
- if(cm->last_frame_seg_map)
- vpx_memset(cm->last_frame_seg_map, 0, (cm->mb_rows * cm->mb_cols));
+ vp9_setup_past_independence(cm, xd);
- vp9_init_mv_probs(& cpi->common);
-
- // cpi->common.filter_level = 0; // Reset every key frame.
- cpi->common.filter_level = cpi->common.base_qindex * 3 / 8;
-
// interval before next GF
cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
-
- cpi->common.refresh_golden_frame = TRUE;
- cpi->common.refresh_alt_ref_frame = TRUE;
-
- vp9_init_mode_contexts(&cpi->common);
- vpx_memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc));
- vpx_memcpy(&cpi->common.lfc_a, &cpi->common.fc, sizeof(cpi->common.fc));
-
- vpx_memset(cm->prev_mip, 0,
- (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
- vpx_memset(cm->mip, 0,
- (cm->mb_cols + 1) * (cm->mb_rows + 1)* sizeof(MODE_INFO));
-
- vp9_update_mode_info_border(cm, cm->mip);
- vp9_update_mode_info_in_image(cm, cm->mi);
-
-#if CONFIG_NEW_MVREF
- if (1) {
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
-
- // Defaults probabilities for encoding the MV ref id signal
- vpx_memset(xd->mb_mv_ref_probs, VP9_DEFAULT_MV_REF_PROB,
- sizeof(xd->mb_mv_ref_probs));
- }
-#endif
+ /* All buffers are implicitly updated on key frames. */
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
}
void vp9_setup_inter_frame(VP9_COMP *cpi) {
- if (cpi->common.refresh_alt_ref_frame) {
- vpx_memcpy(&cpi->common.fc,
- &cpi->common.lfc_a,
- sizeof(cpi->common.fc));
- } else {
- vpx_memcpy(&cpi->common.fc,
- &cpi->common.lfc,
- sizeof(cpi->common.fc));
- }
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ if (cm->error_resilient_mode)
+ vp9_setup_past_independence(cm, xd);
+
+ assert(cm->frame_context_idx < NUM_FRAME_CONTEXTS);
+ vpx_memcpy(&cm->fc, &cm->frame_contexts[cm->frame_context_idx],
+ sizeof(cm->fc));
}
-
-static int estimate_bits_at_q(int frame_kind, int Q, int MBs,
+static int estimate_bits_at_q(int frame_kind, int q, int mbs,
double correction_factor) {
- int Bpm = (int)(.5 + correction_factor * vp9_bits_per_mb(frame_kind, Q));
+ const int bpm = (int)(vp9_bits_per_mb(frame_kind, q, correction_factor));
- /* Attempt to retain reasonable accuracy without overflow. The cutoff is
- * chosen such that the maximum product of Bpm and MBs fits 31 bits. The
- * largest Bpm takes 20 bits.
- */
- if (MBs > (1 << 11))
- return (Bpm >> BPER_MB_NORMBITS) * MBs;
- else
- return (Bpm * MBs) >> BPER_MB_NORMBITS;
+ // Attempt to retain reasonable accuracy without overflow. The cutoff is
+ // chosen such that the maximum product of Bpm and MBs fits 31 bits. The
+ // largest Bpm takes 20 bits.
+ return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs
+ : (bpm * mbs) >> BPER_MB_NORMBITS;
}
@@ -331,7 +297,6 @@
}
cpi->this_frame_target = target;
-
}
@@ -347,25 +312,15 @@
static void calc_pframe_target_size(VP9_COMP *cpi) {
- int min_frame_target;
-
- min_frame_target = 0;
-
- min_frame_target = cpi->min_frame_bandwidth;
-
- if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5))
- min_frame_target = cpi->av_per_frame_bandwidth >> 5;
-
-
- // Special alt reference frame case
- if (cpi->common.refresh_alt_ref_frame) {
+ const int min_frame_target = MAX(cpi->min_frame_bandwidth,
+ cpi->av_per_frame_bandwidth >> 5);
+ if (cpi->refresh_alt_ref_frame) {
+ // Special alt reference frame case
// Per frame bit target for the alt ref frame
cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
cpi->this_frame_target = cpi->per_frame_bandwidth;
- }
-
- // Normal frames (gf,and inter)
- else {
+ } else {
+ // Normal frames (gf,and inter)
cpi->this_frame_target = cpi->per_frame_bandwidth;
}
@@ -377,16 +332,16 @@
if (cpi->this_frame_target < min_frame_target)
cpi->this_frame_target = min_frame_target;
- if (!cpi->common.refresh_alt_ref_frame)
+ if (!cpi->refresh_alt_ref_frame)
// Note the baseline target data rate for this inter frame.
cpi->inter_frame_target = cpi->this_frame_target;
// Adjust target frame size for Golden Frames:
if (cpi->frames_till_gf_update_due == 0) {
- // int Boost = 0;
- int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
+ const int q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME]
+ : cpi->oxcf.fixed_q;
- cpi->common.refresh_golden_frame = TRUE;
+ cpi->refresh_golden_frame = 1;
calc_gf_params(cpi);
@@ -398,17 +353,17 @@
// The spend on the GF is defined in the two pass code
// for two pass encodes
cpi->this_frame_target = cpi->per_frame_bandwidth;
- } else
+ } else {
cpi->this_frame_target =
- (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0)
+ (estimate_bits_at_q(1, q, cpi->common.MBs, 1.0)
* cpi->last_boost) / 100;
+ }
- }
- // If there is an active ARF at this location use the minimum
- // bits on this frame even if it is a contructed arf.
- // The active maximum quantizer insures that an appropriate
- // number of bits will be spent if needed for contstructed ARFs.
- else {
+ } else {
+ // If there is an active ARF at this location use the minimum
+ // bits on this frame even if it is a contructed arf.
+ // The active maximum quantizer insures that an appropriate
+ // number of bits will be spent if needed for contstructed ARFs.
cpi->this_frame_target = 0;
}
@@ -418,12 +373,12 @@
void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
- int Q = cpi->common.base_qindex;
- int correction_factor = 100;
+ const int q = cpi->common.base_qindex;
+ int correction_factor = 100;
double rate_correction_factor;
double adjustment_limit;
- int projected_size_based_on_q = 0;
+ int projected_size_based_on_q = 0;
// Clear down mmx registers to allow floating point in what follows
vp9_clear_system_state(); // __asm emms;
@@ -431,36 +386,19 @@
if (cpi->common.frame_type == KEY_FRAME) {
rate_correction_factor = cpi->key_frame_rate_correction_factor;
} else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
rate_correction_factor = cpi->gf_rate_correction_factor;
else
rate_correction_factor = cpi->rate_correction_factor;
}
- // Work out how big we would have expected the frame to be at this Q given the current correction factor.
+ // Work out how big we would have expected the frame to be at this Q given
+ // the current correction factor.
// Stay in double to avoid int overflow when values are large
- projected_size_based_on_q =
- (int)(((.5 + rate_correction_factor *
- vp9_bits_per_mb(cpi->common.frame_type, Q)) *
- cpi->common.MBs) / (1 << BPER_MB_NORMBITS));
+ projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q,
+ cpi->common.MBs,
+ rate_correction_factor);
- // Make some allowance for cpi->zbin_over_quant
- if (cpi->zbin_over_quant > 0) {
- int Z = cpi->zbin_over_quant;
- double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
-
- while (Z > 0) {
- Z--;
- projected_size_based_on_q =
- (int)(Factor * projected_size_based_on_q);
- Factor += factor_adjustment;
-
- if (Factor >= 0.999)
- Factor = 0.999;
- }
- }
-
// Work out a size correction factor.
// if ( cpi->this_frame_target > 0 )
// correction_factor = (100 * cpi->projected_frame_size) / cpi->this_frame_target;
@@ -505,7 +443,7 @@
if (cpi->common.frame_type == KEY_FRAME)
cpi->key_frame_rate_correction_factor = rate_correction_factor;
else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
cpi->gf_rate_correction_factor = rate_correction_factor;
else
cpi->rate_correction_factor = rate_correction_factor;
@@ -514,7 +452,7 @@
int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame) {
- int Q = cpi->active_worst_quality;
+ int q = cpi->active_worst_quality;
int i;
int last_error = INT_MAX;
@@ -522,14 +460,11 @@
int bits_per_mb_at_this_q;
double correction_factor;
- // Reset Zbin OQ value
- cpi->zbin_over_quant = 0;
-
// Select the appropriate correction factor based upon type of frame.
if (cpi->common.frame_type == KEY_FRAME)
correction_factor = cpi->key_frame_rate_correction_factor;
else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
correction_factor = cpi->gf_rate_correction_factor;
else
correction_factor = cpi->rate_correction_factor;
@@ -544,61 +479,22 @@
i = cpi->active_best_quality;
do {
- bits_per_mb_at_this_q =
- (int)(.5 + correction_factor *
- vp9_bits_per_mb(cpi->common.frame_type, i));
+ bits_per_mb_at_this_q = (int)vp9_bits_per_mb(cpi->common.frame_type, i,
+ correction_factor);
if (bits_per_mb_at_this_q <= target_bits_per_mb) {
if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
- Q = i;
+ q = i;
else
- Q = i - 1;
+ q = i - 1;
break;
- } else
+ } else {
last_error = bits_per_mb_at_this_q - target_bits_per_mb;
+ }
} while (++i <= cpi->active_worst_quality);
-
- // If we are at MAXQ then enable Q over-run which seeks to claw back additional bits through things like
- // the RD multiplier and zero bin size.
- if (Q >= MAXQ) {
- int zbin_oqmax;
-
- double Factor = 0.99;
- double factor_adjustment = 0.01 / 256.0; // (double)ZBIN_OQ_MAX;
-
- if (cpi->common.frame_type == KEY_FRAME)
- zbin_oqmax = 0; // ZBIN_OQ_MAX/16
- else if (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))
- zbin_oqmax = 16;
- else
- zbin_oqmax = ZBIN_OQ_MAX;
-
- // Each incrment in the zbin is assumed to have a fixed effect on bitrate. This is not of course true.
- // The effect will be highly clip dependent and may well have sudden steps.
- // The idea here is to acheive higher effective quantizers than the normal maximum by expanding the zero
- // bin and hence decreasing the number of low magnitude non zero coefficients.
- while (cpi->zbin_over_quant < zbin_oqmax) {
- cpi->zbin_over_quant++;
-
- if (cpi->zbin_over_quant > zbin_oqmax)
- cpi->zbin_over_quant = zbin_oqmax;
-
- // Adjust bits_per_mb_at_this_q estimate
- bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
- Factor += factor_adjustment;
-
- if (Factor >= 0.999)
- Factor = 0.999;
-
- if (bits_per_mb_at_this_q <= target_bits_per_mb) // Break out if we get down to the target rate
- break;
- }
-
- }
-
- return Q;
+ return q;
}
@@ -643,7 +539,7 @@
total_weight += prior_key_frame_weight[i];
}
- av_key_frame_frequency /= total_weight;
+ av_key_frame_frequency /= total_weight;
}
return av_key_frame_frequency;
@@ -671,7 +567,7 @@
*frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
} else {
- if (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) {
+ if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) {
*frame_over_shoot_limit = cpi->this_frame_target * 9 / 8;
*frame_under_shoot_limit = cpi->this_frame_target * 7 / 8;
} else {
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -16,23 +16,24 @@
#define FRAME_OVERHEAD_BITS 200
-extern void vp9_save_coding_context(VP9_COMP *cpi);
-extern void vp9_restore_coding_context(VP9_COMP *cpi);
+void vp9_save_coding_context(VP9_COMP *cpi);
+void vp9_restore_coding_context(VP9_COMP *cpi);
-extern void vp9_setup_key_frame(VP9_COMP *cpi);
-extern void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
-extern int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame);
-extern void vp9_adjust_key_frame_context(VP9_COMP *cpi);
-extern void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
- int *frame_under_shoot_limit,
- int *frame_over_shoot_limit);
+void vp9_setup_key_frame(VP9_COMP *cpi);
+void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
+int vp9_regulate_q(VP9_COMP *cpi, int target_bits_per_frame);
+void vp9_adjust_key_frame_context(VP9_COMP *cpi);
+void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
+ int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit);
// return of 0 means drop frame
-extern int vp9_pick_frame_size(VP9_COMP *cpi);
+int vp9_pick_frame_size(VP9_COMP *cpi);
-extern double vp9_convert_qindex_to_q(int qindex);
-extern int vp9_gfboost_qadjust(int qindex);
-extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex);
+double vp9_convert_qindex_to_q(int qindex);
+int vp9_gfboost_qadjust(int qindex);
+extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor);
void vp9_setup_inter_frame(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_RATECTRL_H_
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -23,7 +23,6 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
-#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_encodemb.h"
@@ -151,21 +150,70 @@
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs *p,
int block_type_counts) {
- int i, j, k;
+ int i, j, k, l;
for (i = 0; i < block_type_counts; i++)
- for (j = 0; j < COEF_BANDS; j++)
- for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
- if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
- vp9_cost_tokens_skip((int *)(c[i][j][k]),
- p[i][j][k],
+ for (j = 0; j < REF_TYPES; j++)
+ for (k = 0; k < COEF_BANDS; k++)
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
+ p[i][j][k][l],
vp9_coef_tree);
- else
- vp9_cost_tokens((int *)(c[i][j][k]),
- p[i][j][k],
- vp9_coef_tree);
+ }
+}
+
+#if CONFIG_CODE_NONZEROCOUNT
+static void fill_nzc_costs(VP9_COMP *cpi, int block_size) {
+ int nzc_context, r, b, nzc, values;
+ int cost[16];
+ values = block_size * block_size + 1;
+
+ for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) {
+ for (r = 0; r < REF_TYPES; ++r) {
+ for (b = 0; b < BLOCK_TYPES; ++b) {
+ unsigned int *nzc_costs;
+ if (block_size == 4) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_4x4[nzc_context][r][b],
+ vp9_nzc4x4_tree);
+ nzc_costs = cpi->mb.nzc_costs_4x4[nzc_context][r][b];
+ } else if (block_size == 8) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_8x8[nzc_context][r][b],
+ vp9_nzc8x8_tree);
+ nzc_costs = cpi->mb.nzc_costs_8x8[nzc_context][r][b];
+ } else if (block_size == 16) {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_16x16[nzc_context][r][b],
+ vp9_nzc16x16_tree);
+ nzc_costs = cpi->mb.nzc_costs_16x16[nzc_context][r][b];
+ } else {
+ vp9_cost_tokens(cost,
+ cpi->common.fc.nzc_probs_32x32[nzc_context][r][b],
+ vp9_nzc32x32_tree);
+ nzc_costs = cpi->mb.nzc_costs_32x32[nzc_context][r][b];
+ }
+
+ for (nzc = 0; nzc < values; ++nzc) {
+ int e, c, totalcost = 0;
+ c = codenzc(nzc);
+ totalcost = cost[c];
+ if ((e = vp9_extranzcbits[c])) {
+ int x = nzc - vp9_basenzcvalue[c];
+ while (e--) {
+ totalcost += vp9_cost_bit(
+ cpi->common.fc.nzc_pcat_probs[nzc_context]
+ [c - NZC_TOKENS_NOEXTRA][e],
+ ((x >> e) & 1));
+ }
+ }
+ nzc_costs[nzc] = totalcost;
+ }
}
+ }
+ }
}
+#endif
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
@@ -193,19 +241,17 @@
}
static int compute_rd_mult(int qindex) {
- int q;
-
- q = vp9_dc_quant(qindex, 0);
- return (11 * q * q) >> 6;
+ int q = vp9_dc_quant(qindex, 0);
+ return (11 * q * q) >> 2;
}
-void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
- cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex];
- cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex];
+void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
+ cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
}
-void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
+void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
int q, i;
vp9_clear_system_state(); // __asm emms;
@@ -214,40 +260,23 @@
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
- QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
+ qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
- cpi->RDMULT = compute_rd_mult(QIndex);
-
- // Extend rate multiplier along side quantizer zbin increases
- if (cpi->zbin_over_quant > 0) {
- double oq_factor;
-
- // Experimental code using the same basic equation as used for Q above
- // The units of cpi->zbin_over_quant are 1/128 of Q bin size
- oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
- cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
- }
-
+ cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
else
cpi->RDMULT +=
- (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
+ (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
}
-
- if (cpi->RDMULT < 7)
- cpi->RDMULT = 7;
-
- cpi->mb.errorperbit = (cpi->RDMULT / 110);
+ cpi->mb.errorperbit = cpi->RDMULT >> 6;
cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
vp9_set_speed_features(cpi);
- q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
- q = q << 2;
- cpi->RDMULT = cpi->RDMULT << 4;
-
+ q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
+ q <<= 2;
if (q < 8)
q = 8;
@@ -279,22 +308,19 @@
}
fill_token_costs(cpi->mb.token_costs[TX_4X4],
- cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
- cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);
-
+ cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
- cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
- cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);
-
+ cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
- cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
- fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
- cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16);
-
+ cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
- cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
+ cpi->common.fc.coef_probs_32x32, BLOCK_TYPES);
+#if CONFIG_CODE_NONZEROCOUNT
+ fill_nzc_costs(cpi, 4);
+ fill_nzc_costs(cpi, 8);
+ fill_nzc_costs(cpi, 16);
+ fill_nzc_costs(cpi, 32);
+#endif
/*rough estimate for costing*/
cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
@@ -321,36 +347,17 @@
return error;
}
-int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_c(MACROBLOCK *mb) {
BLOCK *be;
BLOCKD *bd;
int i, j;
int berror, error = 0;
- for (i = 0; i < 16; i+=4) {
- be = &mb->block[i];
- bd = &mb->e_mbd.block[i];
- berror = 0;
- for (j = dc; j < 64; j++) {
- int this_diff = be->coeff[j] - bd->dqcoeff[j];
- berror += this_diff * this_diff;
- }
- error += berror;
- }
- return error;
-}
-
-int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
- BLOCK *be;
- BLOCKD *bd;
- int i, j;
- int berror, error = 0;
-
for (i = 0; i < 16; i++) {
be = &mb->block[i];
bd = &mb->e_mbd.block[i];
berror = 0;
- for (j = dc; j < 16; j++) {
+ for (j = 0; j < 16; j++) {
int this_diff = be->coeff[j] - bd->dqcoeff[j];
berror += this_diff * this_diff;
}
@@ -417,75 +424,143 @@
sse2 += sse1;
}
return sse2;
-
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
-static int cost_coeffs(MACROBLOCK *mb,
- BLOCKD *b, PLANE_TYPE type,
- ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l,
- TX_SIZE tx_size) {
+static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
+ int ib, PLANE_TYPE type,
+ ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l,
+ TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt;
- const int eob = b->eob;
- MACROBLOCKD *xd = &mb->e_mbd;
- const int ib = (int)(b - xd->block);
- int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
- int cost = 0, seg_eob;
+ const int eob = xd->eobs[ib];
+ int c = 0;
+ int cost = 0, pad;
+ const int *scan, *nb;
+ const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+ unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+ mb->token_costs[tx_size][type][ref];
+ ENTROPY_CONTEXT a_ec, l_ec;
+ ENTROPY_CONTEXT *const a1 = a +
+ sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
+ ENTROPY_CONTEXT *const l1 = l +
+ sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
+
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc_context = vp9_get_nzc_context(cm, xd, ib);
+ unsigned int *nzc_cost;
+#else
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- const int *scan, *band;
- int16_t *qcoeff_ptr = b->qcoeff;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
+ vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
+ [ENTROPY_NODES];
#endif
+ int seg_eob, default_eob;
+ uint8_t token_cache[1024];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
+ // Check for consistency of tx_size with mode info
+ if (type == PLANE_TYPE_Y_WITH_DC) {
+ assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
+ } else {
+ TX_SIZE tx_size_uv = get_uv_tx_size(xd);
+ assert(tx_size == tx_size_uv);
+ }
switch (tx_size) {
- case TX_4X4:
- scan = vp9_default_zig_zag1d_4x4;
- band = vp9_coef_bands_4x4;
+ case TX_4X4: {
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_4x4(xd, ib) : DCT_DCT;
+ a_ec = *a;
+ l_ec = *l;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
+#else
+ coef_probs = cm->fc.coef_probs_4x4;
+#endif
seg_eob = 16;
- if (type == PLANE_TYPE_Y_WITH_DC) {
- if (tx_type == ADST_DCT) {
- scan = vp9_row_scan_4x4;
- } else if (tx_type == DCT_ADST) {
- scan = vp9_col_scan_4x4;
- }
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_4x4;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_4x4;
+ } else {
+ scan = vp9_default_zig_zag1d_4x4;
}
break;
- case TX_8X8:
- if (type == PLANE_TYPE_Y2) {
- scan = vp9_default_zig_zag1d_4x4;
- band = vp9_coef_bands_4x4;
- seg_eob = 4;
+ }
+ case TX_8X8: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_8x8;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_8x8;
} else {
scan = vp9_default_zig_zag1d_8x8;
- band = vp9_coef_bands_8x8;
- seg_eob = 64;
}
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
+#else
+ coef_probs = cm->fc.coef_probs_8x8;
+#endif
+ seg_eob = 64;
break;
- case TX_16X16:
- scan = vp9_default_zig_zag1d_16x16;
- band = vp9_coef_bands_16x16;
+ }
+ case TX_16X16: {
+ const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+ const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_16x16;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_16x16;
+ } else {
+ scan = vp9_default_zig_zag1d_16x16;
+ }
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
+#else
+ coef_probs = cm->fc.coef_probs_16x16;
+#endif
seg_eob = 256;
if (type == PLANE_TYPE_UV) {
- const int uv_idx = ib - 16;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
+ a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
}
break;
+ }
case TX_32X32:
scan = vp9_default_zig_zag1d_32x32;
- band = vp9_coef_bands_32x32;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
+#else
+ coef_probs = cm->fc.coef_probs_32x32;
+#endif
seg_eob = 1024;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff;
+ if (type == PLANE_TYPE_UV) {
+ ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ }
break;
default:
abort();
@@ -493,202 +568,152 @@
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
+ nb = vp9_get_coef_neighbors_handle(scan, &pad);
+ default_eob = seg_eob;
+
+#if CONFIG_CODE_NONZEROCOUNT == 0
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
+ seg_eob = 0;
#endif
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
- seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
-
- if (tx_type != DCT_DCT) {
- for (; c < eob; c++) {
- int v = qcoeff_ptr[scan[c]];
- int t = vp9_dct_value_tokens_ptr[v].Token;
- cost += mb->hybrid_token_costs[tx_size][type][band[c]][PT][t];
- cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
+ {
+#if CONFIG_CODE_NONZEROCOUNT
+ int nzc = 0;
#endif
- }
- if (c < seg_eob)
- cost += mb->hybrid_token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
- } else {
for (; c < eob; c++) {
int v = qcoeff_ptr[scan[c]];
int t = vp9_dct_value_tokens_ptr[v].Token;
- cost += mb->token_costs[tx_size][type][band[c]][pt][t];
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc += (v != 0);
+#endif
+ token_cache[c] = t;
+ cost += token_costs[get_coef_band(scan, tx_size, c)][pt][t];
cost += vp9_dct_value_cost_ptr[v];
- pt = vp9_prev_token_class[t];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(band[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
+#if !CONFIG_CODE_NONZEROCOUNT
+ if (!c || token_cache[c - 1])
+ cost += vp9_cost_bit(coef_probs[type][ref]
+ [get_coef_band(scan, tx_size, c)]
+ [pt][0], 1);
#endif
+ pt = vp9_get_coef_context(scan, nb, pad, token_cache, c + 1, default_eob);
}
+#if CONFIG_CODE_NONZEROCOUNT
+ cost += nzc_cost[nzc];
+#else
if (c < seg_eob)
- cost += mb->token_costs[tx_size][type][band[c]]
- [PT][DCT_EOB_TOKEN];
+ cost += mb->token_costs[tx_size][type][ref]
+ [get_coef_band(scan, tx_size, c)]
+ [pt][DCT_EOB_TOKEN];
+#endif
}
// is eob first coefficient;
- pt = (c > !type);
+ pt = (c > 0);
*a = *l = pt;
+ if (tx_size >= TX_8X8) {
+ a[1] = l[1] = pt;
+ if (tx_size >= TX_16X16) {
+ if (type == PLANE_TYPE_UV) {
+ a1[0] = a1[1] = l1[0] = l1[1] = pt;
+ } else {
+ a[2] = a[3] = l[2] = l[3] = pt;
+ if (tx_size >= TX_32X32) {
+ a1[0] = a1[1] = a1[2] = a1[3] = pt;
+ l1[0] = l1[1] = l1[2] = l1[3] = pt;
+ }
+ }
+ }
+ }
return cost;
}
-static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
+static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta;
- ENTROPY_CONTEXT *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- }
-
for (b = 0; b < 16; b++)
- cost += cost_coeffs(mb, xd->block + b,
- (has_2nd_order ?
- PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
- if (has_2nd_order)
- cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_4X4][24],
- tl + vp9_block2left[TX_4X4][24],
- TX_4X4);
-
return cost;
}
-static void macro_block_yrd_4x4(MACROBLOCK *mb,
- int *Rate,
- int *Distortion,
- int *skippable, int backup) {
+static void macro_block_yrd_4x4(VP9_COMMON *const cm,
+ MACROBLOCK *mb,
+ int *rate,
+ int *distortion,
+ int *skippable) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const mb_y2 = mb->block + 24;
- BLOCKD *const x_y2 = xd->block + 24;
- int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- has_2nd_order = get_2nd_order_usage(xd);
- // Fdct and building the 2nd order block
vp9_transform_mby_4x4(mb);
vp9_quantize_mby_4x4(mb);
- d = vp9_mbblock_error(mb, has_2nd_order);
- if (has_2nd_order)
- d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
- *Distortion = (d >> 2);
- // rate
- *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup);
- *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_4x4(cm, mb);
+ *skippable = vp9_mby_is_skippable_4x4(xd);
}
-static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
+static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta;
- ENTROPY_CONTEXT *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
- tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
- }
-
for (b = 0; b < 16; b += 4)
- cost += cost_coeffs(mb, xd->block + b,
- (has_2nd_order ?
- PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b],
TX_8X8);
- if (has_2nd_order)
- cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
- ta + vp9_block2above[TX_8X8][24],
- tl + vp9_block2left[TX_8X8][24],
- TX_8X8);
return cost;
}
-static void macro_block_yrd_8x8(MACROBLOCK *mb,
- int *Rate,
- int *Distortion,
- int *skippable, int backup) {
+static void macro_block_yrd_8x8(VP9_COMMON *const cm,
+ MACROBLOCK *mb,
+ int *rate,
+ int *distortion,
+ int *skippable) {
MACROBLOCKD *const xd = &mb->e_mbd;
- BLOCK *const mb_y2 = mb->block + 24;
- BLOCKD *const x_y2 = xd->block + 24;
- int d, has_2nd_order;
xd->mode_info_context->mbmi.txfm_size = TX_8X8;
-
vp9_transform_mby_8x8(mb);
vp9_quantize_mby_8x8(mb);
- has_2nd_order = get_2nd_order_usage(xd);
- d = vp9_mbblock_error_8x8_c(mb, has_2nd_order);
- if (has_2nd_order)
- d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
- *Distortion = (d >> 2);
- // rate
- *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup);
- *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_8x8(cm, mb);
+ *skippable = vp9_mby_is_skippable_8x8(xd);
}
-static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
- int cost;
- MACROBLOCKD *xd = &mb->e_mbd;
+static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta, *tl;
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *)&t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *)&t_left;
- if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- } else {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- }
-
- cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
- return cost;
+ return cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
}
-static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
- int *skippable, int backup) {
- int d;
- MACROBLOCKD *xd = &mb->e_mbd;
+static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
xd->mode_info_context->mbmi.txfm_size = TX_16X16;
vp9_transform_mby_16x16(mb);
@@ -696,15 +721,13 @@
// TODO(jingning) is it possible to quickly determine whether to force
// trailing coefficients to be zero, instead of running trellis
// optimization in the rate-distortion optimization loop?
- if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
- vp9_optimize_mby_16x16(mb);
+ if (mb->optimize &&
+ xd->mode_info_context->mbmi.mode < I8X8_PRED)
+ vp9_optimize_mby_16x16(cm, mb);
- d = vp9_mbblock_error(mb, 0);
-
- *Distortion = (d >> 2);
- // rate
- *Rate = rdcost_mby_16x16(mb, backup);
- *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
+ *distortion = vp9_mbblock_error(mb) >> 2;
+ *rate = rdcost_mby_16x16(cm, mb);
+ *skippable = vp9_mby_is_skippable_16x16(xd);
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -795,6 +818,7 @@
static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int *skippable,
int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
@@ -801,9 +825,9 @@
vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
x->block[0].src_stride);
- macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
- macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
- macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
+ macro_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ macro_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ macro_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
txfm_cache, TX_16X16);
@@ -818,27 +842,8 @@
d[12] = p[12];
}
-static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
- MACROBLOCKD * const xd = &x->e_mbd;
- ENTROPY_CONTEXT_PLANES t_above, t_left;
- ENTROPY_CONTEXT *ta, *tl;
-
- if (backup) {
- ta = (ENTROPY_CONTEXT *) &t_above,
- tl = (ENTROPY_CONTEXT *) &t_left;
-
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- } else {
- ta = (ENTROPY_CONTEXT *) xd->above_context;
- tl = (ENTROPY_CONTEXT *) xd->left_context;
- }
-
- return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
-}
-
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
- int block_size) {
+ int block_size, int shift) {
int i;
int64_t error = 0;
@@ -846,38 +851,127 @@
unsigned int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff;
}
+ error >>= shift;
return error > INT_MAX ? INT_MAX : (int)error;
}
-#define DEBUG_ERROR 0
-static void super_block_yrd_32x32(MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- int backup) {
- SUPERBLOCK * const x_sb = &x->sb_coeff_data;
+static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b++)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_4X4][b],
+ tl + vp9_block2left_sb[TX_4X4][b], TX_4X4);
+
+ return cost;
+}
+
+static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ vp9_transform_sby_4x4(x);
+ vp9_quantize_sby_4x4(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_4x4(cm, x);
+ *skippable = vp9_sby_is_skippable_4x4(xd);
+}
+
+static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b += 4)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_8X8][b],
+ tl + vp9_block2left_sb[TX_8X8][b], TX_8X8);
+
+ return cost;
+}
+
+static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ vp9_transform_sby_8x8(x);
+ vp9_quantize_sby_8x8(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_8x8(cm, x);
+ *skippable = vp9_sby_is_skippable_8x8(xd);
+}
+
+static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 64; b += 16)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb[TX_16X16][b],
+ tl + vp9_block2left_sb[TX_16X16][b], TX_16X16);
+
+ return cost;
+}
+
+static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_16X16;
+ vp9_transform_sby_16x16(x);
+ vp9_quantize_sby_16x16(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+ *rate = rdcost_sby_16x16(cm, x);
+ *skippable = vp9_sby_is_skippable_16x16(xd);
+}
+
+static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD * const xd = &x->e_mbd;
- SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
- int16_t out[1024];
-#endif
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
+}
+
+static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_32X32;
vp9_transform_sby_32x32(x);
vp9_quantize_sby_32x32(x);
-#if DEBUG_ERROR || CONFIG_DWTDCTHYBRID
- vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
-#endif
-#if !CONFIG_DWTDCTHYBRID
- *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
-#else
- *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
-#endif
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
- vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
-#endif
- *rate = rdcost_sby_32x32(x, backup);
- *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd);
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0);
+ *rate = rdcost_sby_32x32(cm, x);
+ *skippable = vp9_sby_is_skippable_32x32(xd);
}
static void super_block_yrd(VP9_COMP *cpi,
@@ -884,179 +978,166 @@
MACROBLOCK *x, int *rate, int *distortion,
int *skip,
int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
+ int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
- ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
- *orig_above = xd->above_context;
- ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
- *orig_left = xd->left_context;
- for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
- vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
- vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
- r[n][0] = 0;
- d[n] = 0;
- s[n] = 1;
- }
+ vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
+ super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
- dst, dst_y_stride);
- super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
+ TX_SIZE_MAX_SB - 1);
+}
-#if DEBUG_ERROR
- int err[3] = { 0, 0, 0 };
-#endif
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
- int r_tmp, d_tmp, s_tmp;
+static int rdcost_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- xd->above_context = &t_above[TX_16X16][x_idx];
- xd->left_context = &t_left[TX_16X16][y_idx];
- macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_16X16] += d_tmp;
- r[TX_16X16][0] += r_tmp;
- s[TX_16X16] = s[TX_16X16] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_16x16(xd);
- err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
+ for (b = 0; b < 256; b++)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_4X4][b],
+ tl + vp9_block2left_sb64[TX_4X4][b], TX_4X4);
- xd->above_context = &t_above[TX_4X4][x_idx];
- xd->left_context = &t_left[TX_4X4][y_idx];
- macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_4X4] += d_tmp;
- r[TX_4X4][0] += r_tmp;
- s[TX_4X4] = s[TX_4X4] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_4x4(xd);
- err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
+ return cost;
+}
- xd->above_context = &t_above[TX_8X8][x_idx];
- xd->left_context = &t_left[TX_8X8][y_idx];
- macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_8X8] += d_tmp;
- r[TX_8X8][0] += r_tmp;
- s[TX_8X8] = s[TX_8X8] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_8x8(xd);
- err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
- }
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
- printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
- printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
-#endif
- choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
- TX_SIZE_MAX_SB - 1);
+static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
- xd->above_context = orig_above;
- xd->left_context = orig_left;
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ vp9_transform_sb64y_4x4(x);
+ vp9_quantize_sb64y_4x4(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_4x4(cm, x);
+ *skippable = vp9_sb64y_is_skippable_4x4(xd);
}
-static void super_block_64_yrd(VP9_COMP *cpi,
- MACROBLOCK *x, int *rate, int *distortion,
- int *skip,
- int64_t txfm_cache[NB_TXFM_MODES]) {
+static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
MACROBLOCKD *const xd = &x->e_mbd;
- int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
- const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
- int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
- ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
- *orig_above = xd->above_context;
- ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
- *orig_left = xd->left_context;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
- for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
- vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
- vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
- r[n][0] = 0;
- d[n] = 0;
- s[n] = 1;
- }
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
- for (n = 0; n < 4; n++) {
- int x_idx = n & 1, y_idx = n >> 1;
- int r_tmp, d_tmp, s_tmp;
+ for (b = 0; b < 256; b += 4)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_8X8][b],
+ tl + vp9_block2left_sb64[TX_8X8][b], TX_8X8);
- xd->above_context = &t_above[TX_32X32][x_idx << 1];
- xd->left_context = &t_left[TX_32X32][y_idx << 1];
- vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
- src + 32 * x_idx + 32 * y_idx * src_y_stride,
- src_y_stride,
- dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
- dst_y_stride);
- super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
- r[TX_32X32][0] += r_tmp;
- d[TX_32X32] += d_tmp;
- s[TX_32X32] = s[TX_32X32] && s_tmp;
- }
+ return cost;
+}
-#if DEBUG_ERROR
- int err[3] = { 0, 0, 0 };
-#endif
- for (n = 0; n < 16; n++) {
- int x_idx = n & 3, y_idx = n >> 2;
- int r_tmp, d_tmp, s_tmp;
+static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
- vp9_subtract_mby_s_c(x->src_diff,
- src + x_idx * 16 + y_idx * 16 * src_y_stride,
- src_y_stride,
- dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
- dst_y_stride);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ vp9_transform_sb64y_8x8(x);
+ vp9_quantize_sb64y_8x8(x);
- xd->above_context = &t_above[TX_16X16][x_idx];
- xd->left_context = &t_left[TX_16X16][y_idx];
- macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_16X16] += d_tmp;
- r[TX_16X16][0] += r_tmp;
- s[TX_16X16] = s[TX_16X16] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_16x16(xd);
- err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_8x8(cm, x);
+ *skippable = vp9_sb64y_is_skippable_8x8(xd);
+}
- xd->above_context = &t_above[TX_4X4][x_idx];
- xd->left_context = &t_left[TX_4X4][y_idx];
- macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_4X4] += d_tmp;
- r[TX_4X4][0] += r_tmp;
- s[TX_4X4] = s[TX_4X4] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_4x4(xd);
- err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
+static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
- xd->above_context = &t_above[TX_8X8][x_idx];
- xd->left_context = &t_left[TX_8X8][y_idx];
- macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
- d[TX_8X8] += d_tmp;
- r[TX_8X8][0] += r_tmp;
- s[TX_8X8] = s[TX_8X8] && s_tmp;
-#if DEBUG_ERROR
- vp9_inverse_transform_mby_8x8(xd);
- err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
-#endif
- }
-#if DEBUG_ERROR
- printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
- printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
- printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
-#endif
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b += 16)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_16X16][b],
+ tl + vp9_block2left_sb64[TX_16X16][b], TX_16X16);
+
+ return cost;
+}
+
+static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion,
+ int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_16X16;
+ vp9_transform_sb64y_16x16(x);
+ vp9_quantize_sb64y_16x16(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+ *rate = rdcost_sb64y_16x16(cm, x);
+ *skippable = vp9_sb64y_is_skippable_16x16(xd);
+}
+
+static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
+ int cost = 0, b;
+ MACROBLOCKD * const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above;
+ ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) &t_left;
+
+ vpx_memcpy(&t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(t_left));
+
+ for (b = 0; b < 256; b += 64)
+ cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above_sb64[TX_32X32][b],
+ tl + vp9_block2left_sb64[TX_32X32][b], TX_32X32);
+
+ return cost;
+}
+
+static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion,
+ int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_32X32;
+ vp9_transform_sb64y_32x32(x);
+ vp9_quantize_sb64y_32x32(x);
+
+ *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0);
+ *rate = rdcost_sb64y_32x32(cm, x);
+ *skippable = vp9_sb64y_is_skippable_32x32(xd);
+}
+
+static void super_block_64_yrd(VP9_COMP *cpi,
+ MACROBLOCK *x, int *rate, int *distortion,
+ int *skip,
+ int64_t txfm_cache[NB_TXFM_MODES]) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
+ const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+
+ vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride);
+ super_block64_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
+ super_block64_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
+ super_block64_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
+ super_block64_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
+
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
TX_SIZE_MAX_SB - 1);
-
- xd->above_context = orig_above;
- xd->left_context = orig_left;
}
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
@@ -1091,6 +1172,7 @@
int64_t best_rd = INT64_MAX;
int rate = 0;
int distortion;
+ VP9_COMMON *const cm = &cpi->common;
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
@@ -1105,8 +1187,9 @@
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
#if CONFIG_NEWBINTRAMODES
- b->bmi.as_mode.context = vp9_find_bpred_context(b);
+ b->bmi.as_mode.context = vp9_find_bpred_context(xd, b);
#endif
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
int64_t this_rd;
int ratey;
@@ -1129,23 +1212,24 @@
rate = bmode_costs[mode];
#endif
- vp9_intra4x4_predict(b, mode, b->predictor);
+ vp9_intra4x4_predict(xd, b, mode, b->predictor);
vp9_subtract_b(be, b, 16);
b->bmi.as_mode.first = mode;
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, be - x->block);
if (tx_type != DCT_DCT) {
- vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, be - x->block);
}
tempa = ta;
templ = tl;
- ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
+ ratey = cost_coeffs(cm, x, b - xd->block,
+ PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
rate += ratey;
distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
@@ -1168,9 +1252,9 @@
// inverse transform
if (best_tx_type != DCT_DCT)
- vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
+ vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type);
else
- xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+ xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -1179,8 +1263,7 @@
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int *Rate, int *rate_y,
- int *Distortion, int64_t best_rd,
- int update_contexts) {
+ int *Distortion, int64_t best_rd) {
int i;
MACROBLOCKD *const xd = &mb->e_mbd;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
@@ -1191,18 +1274,13 @@
ENTROPY_CONTEXT *ta, *tl;
int *bmode_costs;
- if (update_contexts) {
- ta = (ENTROPY_CONTEXT *)xd->above_context;
- tl = (ENTROPY_CONTEXT *)xd->left_context;
- } else {
- vpx_memcpy(&t_above, xd->above_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context,
+ sizeof(ENTROPY_CONTEXT_PLANES));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
- }
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
xd->mode_info_context->mbmi.mode = B_PRED;
bmode_costs = mb->inter_bmode_costs;
@@ -1220,7 +1298,7 @@
bmode_costs = mb->bmode_costs[A][L];
}
#if CONFIG_NEWBINTRAMODES
- mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd->block + i);
+ mic->bmi[i].as_mode.context = vp9_find_bpred_context(xd, xd->block + i);
#endif
total_rd += rd_pick_intra4x4block(
@@ -1401,6 +1479,7 @@
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
int *bestdistortion) {
+ VP9_COMMON *const cm = &cpi->common;
MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = INT64_MAX;
@@ -1407,8 +1486,9 @@
int distortion = 0, rate = 0;
BLOCK *be = x->block + ib;
BLOCKD *b = xd->block + ib;
- ENTROPY_CONTEXT ta0, ta1, besta0 = 0, besta1 = 0;
- ENTROPY_CONTEXT tl0, tl1, bestl0 = 0, bestl1 = 0;
+ ENTROPY_CONTEXT_PLANES ta, tl;
+ ENTROPY_CONTEXT *ta0, *ta1, besta0 = 0, besta1 = 0;
+ ENTROPY_CONTEXT *tl0, *tl1, bestl0 = 0, bestl1 = 0;
/*
* The predictor buffer is a 2d buffer with a stride of 16. Create
@@ -1430,58 +1510,76 @@
rate = mode_costs[mode];
b->bmi.as_mode.first = mode;
- vp9_intra8x8_predict(b, mode, b->predictor);
+ vp9_intra8x8_predict(xd, b, mode, b->predictor);
vp9_subtract_4b_c(be, b, 16);
- assert(get_2nd_order_usage(xd) == 0);
if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, b);
+ TX_TYPE tx_type = get_tx_type_8x8(xd, ib);
if (tx_type != DCT_DCT)
- vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
+ vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
else
- x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
- x->quantize_b_8x8(x->block + idx, xd->block + idx);
+ x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ x->quantize_b_8x8(x, idx, tx_type);
// compute quantization mse of 8x8 block
distortion = vp9_block_error_c((x->block + idx)->coeff,
(xd->block + idx)->dqcoeff, 64);
- ta0 = a[vp9_block2above[TX_8X8][idx]];
- tl0 = l[vp9_block2left[TX_8X8][idx]];
- rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
- &ta0, &tl0, TX_8X8);
+ vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
+ ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_8X8][idx];
+ tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_8X8][idx];
+ ta1 = ta0 + 1;
+ tl1 = tl0 + 1;
+
+ rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
+ ta0, tl0, TX_8X8);
+
rate += rate_t;
- ta1 = ta0;
- tl1 = tl0;
} else {
static const int iblock[4] = {0, 1, 4, 5};
TX_TYPE tx_type;
int i;
- ta0 = a[vp9_block2above[TX_4X4][ib]];
- ta1 = a[vp9_block2above[TX_4X4][ib + 1]];
- tl0 = l[vp9_block2left[TX_4X4][ib]];
- tl1 = l[vp9_block2left[TX_4X4][ib + 4]];
+ vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
+ ta0 = ((ENTROPY_CONTEXT*)&ta) + vp9_block2above[TX_4X4][ib];
+ tl0 = ((ENTROPY_CONTEXT*)&tl) + vp9_block2left[TX_4X4][ib];
+ ta1 = ta0 + 1;
+ tl1 = tl0 + 1;
distortion = 0;
rate_t = 0;
for (i = 0; i < 4; ++i) {
+ int do_two = 0;
b = &xd->block[ib + iblock[i]];
be = &x->block[ib + iblock[i]];
- tx_type = get_tx_type_4x4(xd, b);
+ tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
if (tx_type != DCT_DCT) {
- vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
- vp9_ht_quantize_b_4x4(be, b, tx_type);
+ vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
+ vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+ } else if (!(i & 1) &&
+ get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
+ do_two = 1;
} else {
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, b);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, ib + iblock[i]);
}
- distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16);
- rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC,
- // i&1 ? &ta1 : &ta0, i&2 ? &tl1 : &tl0,
- &ta0, &tl0,
+ distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
+ rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
+ i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
TX_4X4);
+ if (do_two) {
+ i++;
+ rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
+ i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
+ TX_4X4);
+ }
}
+ b = &xd->block[ib];
+ be = &x->block[ib];
rate += rate_t;
}
@@ -1491,10 +1589,10 @@
*bestrate = rate;
*bestratey = rate_t;
*bestdistortion = distortion;
- besta0 = ta0;
- besta1 = ta1;
- bestl0 = tl0;
- bestl1 = tl1;
+ besta0 = *ta0;
+ besta1 = *ta1;
+ bestl0 = *tl0;
+ bestl1 = *tl1;
best_rd = this_rd;
*best_mode = mode;
copy_predictor_8x8(best_predictor, b->predictor);
@@ -1563,7 +1661,80 @@
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
-static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) {
+static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_y,
+ int *distortion,
+ int *mode8x8,
+ int64_t best_yrd,
+ int64_t *txfm_cache) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
+ int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
+ int64_t tmp_rd_4x4s, tmp_rd_8x8s;
+ int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
+ int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
+
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
+ &d4x4, best_yrd);
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
+ &d8x8, best_yrd);
+ txfm_cache[ONLY_4X4] = tmp_rd_4x4;
+ txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
+ txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
+ tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
+ tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
+ txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ?
+ tmp_rd_4x4s : tmp_rd_8x8s;
+ if (cm->txfm_mode == TX_MODE_SELECT) {
+ if (tmp_rd_4x4s < tmp_rd_8x8s) {
+ *rate = r4x4 + cost0;
+ *rate_y = tok4x4 + cost0;
+ *distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4s;
+ } else {
+ *rate = r8x8 + cost1;
+ *rate_y = tok8x8 + cost1;
+ *distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8s;
+
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ }
+ } else if (cm->txfm_mode == ONLY_4X4) {
+ *rate = r4x4;
+ *rate_y = tok4x4;
+ *distortion = d4x4;
+ mbmi->txfm_size = TX_4X4;
+ tmp_rd = tmp_rd_4x4;
+ } else {
+ *rate = r8x8;
+ *rate_y = tok8x8;
+ *distortion = d8x8;
+ mbmi->txfm_size = TX_8X8;
+ tmp_rd = tmp_rd_8x8;
+
+ mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
+ mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
+ mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
+ mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
+ }
+
+ return tmp_rd;
+}
+
+static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -1582,7 +1753,7 @@
}
for (b = 16; b < 24; b++)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_4X4][b],
tl + vp9_block2left[TX_4X4][b],
TX_4X4);
@@ -1597,7 +1768,7 @@
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
- *rate = rd_cost_mbuv_4x4(x, do_ctx_backup);
+ *rate = rd_cost_mbuv_4x4(&cpi->common, x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd);
@@ -1604,7 +1775,7 @@
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) {
+static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -1623,7 +1794,7 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_8X8);
@@ -1636,7 +1807,7 @@
vp9_transform_mbuv_8x8(x);
vp9_quantize_mbuv_8x8(x);
- *rate = rd_cost_mbuv_8x8(x, do_ctx_backup);
+ *rate = rd_cost_mbuv_8x8(&cpi->common, x, do_ctx_backup);
*distortion = vp9_mbuverror(x) / 4;
*skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd);
@@ -1643,16 +1814,16 @@
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) {
+static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) {
int b;
int cost = 0;
MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT_PLANES t_above, t_left;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
ENTROPY_CONTEXT *ta, *tl;
if (backup) {
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
ta = (ENTROPY_CONTEXT *) &t_above;
tl = (ENTROPY_CONTEXT *) &t_left;
@@ -1662,7 +1833,7 @@
}
for (b = 16; b < 24; b += 4)
- cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV,
+ cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV,
ta + vp9_block2above[TX_8X8][b],
tl + vp9_block2left[TX_8X8][b], TX_16X16);
@@ -1669,8 +1840,8 @@
return cost;
}
-static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate,
- int *distortion, int *skip,
+static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skip,
int backup) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1677,9 +1848,9 @@
vp9_transform_sbuv_16x16(x);
vp9_quantize_sbuv_16x16(x);
- *rate = rd_cost_sbuv_16x16(x, backup);
- *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024,
- xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2;
+ *rate = rd_cost_sbuv_16x16(cm, x, backup);
+ *distortion = vp9_sb_block_error_c(x->coeff + 1024,
+ xd->dqcoeff + 1024, 512, 2);
*skip = vp9_sbuv_is_skippable_16x16(xd);
}
@@ -1691,11 +1862,11 @@
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
- if (mbmi->txfm_size == TX_32X32) {
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ if (mbmi->txfm_size >= TX_16X16) {
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
- rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1);
+ rd_inter32x32_uv_16x16(&cpi->common, x, rate, distortion, skip, 1);
} else {
int n, r = 0, d = 0;
int skippable = 1;
@@ -1743,22 +1914,14 @@
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static void super_block_64_uvrd(MACROBLOCK *x, int *rate,
+static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate,
int *distortion, int *skip);
static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel, int *skip) {
- super_block_64_uvrd(x, rate, distortion, skip);
+ super_block_64_uvrd(&cpi->common, x, rate, distortion, skip);
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
- int *distortion, int *skip, int fullpixel) {
- vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);
- vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
- x->e_mbd.predictor, x->src.uv_stride);
- return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);
-}
-
static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi,
MACROBLOCK *x,
int *rate,
@@ -1773,6 +1936,7 @@
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int rate;
int distortion;
@@ -1786,7 +1950,7 @@
vp9_transform_mbuv_4x4(x);
vp9_quantize_mbuv_4x4(x);
- rate_to = rd_cost_mbuv_4x4(x, 1);
+ rate_to = rd_cost_mbuv_4x4(&cpi->common, x, 1);
rate = rate_to
+ x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
@@ -1825,6 +1989,7 @@
int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
int rate_to, UNINITIALIZED_IS_SAFE(skip);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int rate;
int distortion;
@@ -1838,7 +2003,7 @@
vp9_quantize_mbuv_8x8(x);
- rate_to = rd_cost_mbuv_8x8(x, 1);
+ rate_to = rd_cost_mbuv_8x8(&cpi->common, x, 1);
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
distortion = vp9_mbuverror(x) / 4;
@@ -1860,7 +2025,8 @@
}
// TODO(rbultje) very similar to rd_inter32x32_uv(), merge?
-static void super_block_uvrd(MACROBLOCK *x,
+static void super_block_uvrd(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
@@ -1870,11 +2036,11 @@
const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
- if (mbmi->txfm_size == TX_32X32) {
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ if (mbmi->txfm_size >= TX_16X16) {
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc, vsrc, src_uv_stride,
udst, vdst, dst_uv_stride);
- rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1);
+ rd_inter32x32_uv_16x16(cm, x, rate, distortion, skippable, 1);
} else {
int d = 0, r = 0, n, s = 1;
ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
@@ -1908,9 +2074,9 @@
xd->above_context = t_above + x_idx;
xd->left_context = t_left + y_idx;
if (mbmi->txfm_size == TX_4X4) {
- r += rd_cost_mbuv_4x4(x, 0);
+ r += rd_cost_mbuv_4x4(cm, x, 0);
} else {
- r += rd_cost_mbuv_8x8(x, 0);
+ r += rd_cost_mbuv_8x8(cm, x, 0);
}
}
@@ -1923,7 +2089,48 @@
}
}
-static void super_block_64_uvrd(MACROBLOCK *x,
+static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int backup) {
+ int b;
+ int cost = 0;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
+ ENTROPY_CONTEXT *ta, *tl;
+
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
+
+ ta = (ENTROPY_CONTEXT *) &t_above;
+ tl = (ENTROPY_CONTEXT *) &t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)xd->above_context;
+ tl = (ENTROPY_CONTEXT *)xd->left_context;
+ }
+
+ for (b = 16; b < 24; b += 4)
+ cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV,
+ ta + vp9_block2above[TX_8X8][b],
+ tl + vp9_block2left[TX_8X8][b], TX_32X32);
+
+ return cost;
+}
+
+static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skip,
+ int backup) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ vp9_transform_sb64uv_32x32(x);
+ vp9_quantize_sb64uv_32x32(x);
+
+ *rate = rd_cost_sb64uv_32x32(cm, x, backup);
+ *distortion = vp9_sb_block_error_c(x->coeff + 4096,
+ xd->dqcoeff + 4096, 2048, 0);
+ *skip = vp9_sb64uv_is_skippable_32x32(xd);
+}
+
+static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate,
int *distortion,
int *skippable) {
@@ -1937,10 +2144,15 @@
ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context;
int d = 0, r = 0, n, s = 1;
+ // FIXME not needed if tx=32x32
memcpy(t_above, xd->above_context, sizeof(t_above));
memcpy(t_left, xd->left_context, sizeof(t_left));
if (mbmi->txfm_size == TX_32X32) {
+ vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride,
+ udst, vdst, dst_uv_stride);
+ rd_inter64x64_uv_32x32(cm, x, &r, &d, &s, 1);
+ } else if (mbmi->txfm_size == TX_16X16) {
int n;
*rate = 0;
@@ -1948,7 +2160,7 @@
int x_idx = n & 1, y_idx = n >> 1;
int r_tmp, d_tmp, s_tmp;
- vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff,
+ vp9_subtract_sbuv_s_c(x->src_diff,
usrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride,
src_uv_stride,
@@ -1957,7 +2169,7 @@
dst_uv_stride);
xd->above_context = t_above + x_idx * 2;
xd->left_context = t_left + y_idx * 2;
- rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
+ rd_inter32x32_uv_16x16(cm, x, &r_tmp, &d_tmp, &s_tmp, 0);
r += r_tmp;
d += d_tmp;
s = s && s_tmp;
@@ -1987,9 +2199,9 @@
xd->left_context = t_left + y_idx;
d += vp9_mbuverror(x) >> 2;
if (mbmi->txfm_size == TX_4X4) {
- r += rd_cost_mbuv_4x4(x, 0);
+ r += rd_cost_mbuv_4x4(cm, x, 0);
} else {
- r += rd_cost_mbuv_8x8(x, 0);
+ r += rd_cost_mbuv_8x8(cm, x, 0);
}
}
}
@@ -2018,7 +2230,7 @@
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
- super_block_uvrd(x, &this_rate_tokenonly,
+ super_block_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
@@ -2055,7 +2267,7 @@
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
vp9_build_intra_predictors_sb64uv_s(&x->e_mbd);
- super_block_64_uvrd(x, &this_rate_tokenonly,
+ super_block_64_uvrd(&cpi->common, x, &this_rate_tokenonly,
&this_distortion, &s);
this_rate = this_rate_tokenonly +
x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
@@ -2082,12 +2294,8 @@
MACROBLOCKD *xd = &cpi->mb.e_mbd;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- // If the mode coding is done entirely at the segment level
- // we should not account for it at the per mb level in rd code.
- // Note that if the segment level coding is expanded from single mode
- // to multiple mode masks as per reference frame coding we will need
- // to do something different here.
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // Dont account for mode here if segment skip is enabled.
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
VP9_COMMON *pc = &cpi->common;
vp9_prob p [VP9_MVREFS - 1];
@@ -2156,14 +2364,18 @@
}
break;
case LEFT4X4:
- this_mv->as_int = col ? d[-1].bmi.as_mv.first.as_int : left_block_mv(mic, i);
+ this_mv->as_int = col ? d[-1].bmi.as_mv[0].as_int :
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = col ? d[-1].bmi.as_mv.second.as_int : left_block_second_mv(mic, i);
+ this_second_mv->as_int = col ? d[-1].bmi.as_mv[1].as_int :
+ left_block_second_mv(xd, mic, i);
break;
case ABOVE4X4:
- this_mv->as_int = row ? d[-4].bmi.as_mv.first.as_int : above_block_mv(mic, i, mis);
+ this_mv->as_int = row ? d[-4].bmi.as_mv[0].as_int :
+ above_block_mv(mic, i, mis);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = row ? d[-4].bmi.as_mv.second.as_int : above_block_second_mv(mic, i, mis);
+ this_second_mv->as_int = row ? d[-4].bmi.as_mv[1].as_int :
+ above_block_second_mv(mic, i, mis);
break;
case ZERO4X4:
this_mv->as_int = 0;
@@ -2178,11 +2390,11 @@
int_mv left_mv, left_second_mv;
left_second_mv.as_int = 0;
- left_mv.as_int = col ? d[-1].bmi.as_mv.first.as_int :
- left_block_mv(mic, i);
+ left_mv.as_int = col ? d[-1].bmi.as_mv[0].as_int :
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
- left_second_mv.as_int = col ? d[-1].bmi.as_mv.second.as_int :
- left_block_second_mv(mic, i);
+ left_second_mv.as_int = col ? d[-1].bmi.as_mv[1].as_int :
+ left_block_second_mv(xd, mic, i);
if (left_mv.as_int == this_mv->as_int &&
(mbmi->second_ref_frame <= 0 ||
@@ -2198,9 +2410,9 @@
#endif
}
- d->bmi.as_mv.first.as_int = this_mv->as_int;
+ d->bmi.as_mv[0].as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
- d->bmi.as_mv.second.as_int = this_second_mv->as_int;
+ d->bmi.as_mv[1].as_int = this_second_mv->as_int;
x->partition_info->bmi[i].mode = m;
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
@@ -2212,7 +2424,8 @@
return cost;
}
-static int64_t encode_inter_mb_segment(MACROBLOCK *x,
+static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
@@ -2230,15 +2443,30 @@
BLOCK *be = &x->block[i];
int thisdistortion;
- vp9_build_inter_predictors_b(bd, 16, xd->subpixel_predict4x4);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0)
- vp9_build_2nd_inter_predictors_b(bd, 16, xd->subpixel_predict_avg4x4);
+ vp9_build_inter_predictor(*(bd->base_pre) + bd->pre,
+ bd->pre_stride,
+ bd->predictor, 16,
+ &bd->bmi.as_mv[0],
+ &xd->scale_factor[0],
+ 4, 4, 0 /* no avg */, &xd->subpix);
+
+ // TODO(debargha): Make this work properly with the
+ // implicit-compoundinter-weight experiment when implicit
+ // weighting for splitmv modes is turned on.
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ vp9_build_inter_predictor(
+ *(bd->base_second_pre) + bd->pre, bd->pre_stride, bd->predictor, 16,
+ &bd->bmi.as_mv[1], &xd->scale_factor[1], 4, 4,
+ 1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT) /* avg */,
+ &xd->subpix);
+ }
+
vp9_subtract_b(be, bd, 16);
- x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4(be, bd);
+ x->fwd_txm4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4(x, i);
thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_4X4][i],
tl + vp9_block2left[TX_4X4][i], TX_4X4);
}
@@ -2247,7 +2475,8 @@
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
-static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
+static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
+ MACROBLOCK *x,
int const *labels,
int which_label,
int *labelyrate,
@@ -2274,42 +2503,60 @@
int ib = vp9_i8x8_block[i];
if (labels[ib] == which_label) {
+ const int use_second_ref =
+ xd->mode_info_context->mbmi.second_ref_frame > 0;
+ int which_mv;
int idx = (ib & 8) + ((ib & 2) << 1);
BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
int thisdistortion;
- vp9_build_inter_predictors4b(xd, bd, 16);
- if (xd->mode_info_context->mbmi.second_ref_frame > 0)
- vp9_build_2nd_inter_predictors4b(xd, bd, 16);
+ for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
+ uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
+
+ // TODO(debargha): Make this work properly with the
+ // implicit-compoundinter-weight experiment when implicit
+ // weighting for splitmv modes is turned on.
+ vp9_build_inter_predictor(
+ *base_pre + bd->pre, bd->pre_stride, bd->predictor, 16,
+ &bd->bmi.as_mv[which_mv], &xd->scale_factor[which_mv], 8, 8,
+ which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+ &xd->subpix);
+ }
+
vp9_subtract_4b_c(be, bd, 16);
if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
if (otherrd) {
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(x, idx, DCT_DCT);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_8X8][idx],
- tlcp + vp9_block2left[TX_8X8][idx],
- TX_8X8);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+ othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_8X8][idx],
+ tlcp + vp9_block2left[TX_8X8][idx],
+ TX_8X8);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
}
for (j = 0; j < 4; j += 2) {
bd = &xd->block[ib + iblock[j]];
be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
- ta + vp9_block2above[TX_4X4][ib + iblock[j]],
- tl + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
- *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
- ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
- tl + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
+ *labelyrate +=
+ cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[TX_4X4][ib + iblock[j]],
+ tl + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ *labelyrate +=
+ cost_coeffs(cm, x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
+ ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
+ tl + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
}
} else /* 8x8 */ {
if (otherrd) {
@@ -2316,25 +2563,30 @@
for (j = 0; j < 4; j += 2) {
BLOCKD *bd = &xd->block[ib + iblock[j]];
BLOCK *be = &x->block[ib + iblock[j]];
- x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
- x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
+ x->fwd_txm8x4(be->src_diff, be->coeff, 32);
+ x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]);
thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
otherdist += thisdistortion;
- othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
- tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
- othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC,
- tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
- tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
- TX_4X4);
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ othercost +=
+ cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_4X4][ib + iblock[j]],
+ tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ othercost +=
+ cost_coeffs(cm, x, ib + iblock[j] + 1,
+ PLANE_TYPE_Y_WITH_DC,
+ tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1],
+ tlcp + vp9_block2left[TX_4X4][ib + iblock[j]],
+ TX_4X4);
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
}
}
- x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
- x->quantize_b_8x8(be2, bd2);
+ x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
+ x->quantize_b_8x8(x, idx, DCT_DCT);
thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC,
+ *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
ta + vp9_block2above[TX_8X8][idx],
tl + vp9_block2left[TX_8X8][idx], TX_8X8);
}
@@ -2373,8 +2625,7 @@
} BEST_SEG_INFO;
-static __inline
-int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
+static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
int r = 0;
r |= (mv->as_mv.row >> 3) < x->mv_row_min;
r |= (mv->as_mv.row >> 3) > x->mv_row_max;
@@ -2487,9 +2738,9 @@
// use previous block's result as next block's MV predictor.
if (segmentation == PARTITIONING_4X4 && i > 0) {
- bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.as_mv[0].as_int;
if (i == 4 || i == 8 || i == 12)
- bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv.first.as_int;
+ bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.as_mv[0].as_int;
step_param = 2;
}
}
@@ -2528,11 +2779,11 @@
if (thissme < bestsme) {
bestsme = thissme;
- mode_mv[NEW4X4].as_int = e->bmi.as_mv.first.as_int;
+ mode_mv[NEW4X4].as_int = e->bmi.as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
- e->bmi.as_mv.first.as_int = mode_mv[NEW4X4].as_int;
+ e->bmi.as_mv[0].as_int = mode_mv[NEW4X4].as_int;
}
}
}
@@ -2575,11 +2826,13 @@
continue;
if (segmentation == PARTITIONING_4X4) {
- this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
+ this_rd = encode_inter_mb_segment(&cpi->common,
+ x, labels, i, &labelyrate,
&distortion, ta_s, tl_s);
other_rd = this_rd;
} else {
- this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
+ this_rd = encode_inter_mb_segment_8x8(&cpi->common,
+ x, labels, i, &labelyrate,
&distortion, &other_rd,
ta_s, tl_s);
}
@@ -2595,13 +2848,13 @@
if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
for (j = 0; j < 16; j++)
if (labels[j] == i)
- best_eobs[j] = x->e_mbd.block[j].eob;
+ best_eobs[j] = x->e_mbd.eobs[j];
} else {
for (j = 0; j < 4; j++) {
int ib = vp9_i8x8_block[j], idx = j * 4;
if (labels[ib] == i)
- best_eobs[idx] = x->e_mbd.block[idx].eob;
+ best_eobs[idx] = x->e_mbd.eobs[idx];
}
}
if (other_rd < best_other_rd)
@@ -2734,8 +2987,9 @@
if (base_rd < txfm_cache[ONLY_4X4]) {
txfm_cache[ONLY_4X4] = base_rd;
}
- if (base_rd + diff < txfm_cache[1]) {
- txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = base_rd + diff;
+ if (base_rd + diff < txfm_cache[ALLOW_8X8]) {
+ txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] =
+ txfm_cache[ALLOW_32X32] = base_rd + diff;
}
if (diff < 0) {
base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
@@ -2749,7 +3003,7 @@
}
}
-static __inline void cal_step_param(int sr, int *sp) {
+static INLINE void cal_step_param(int sr, int *sp) {
int step = 0;
if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
@@ -2872,10 +3126,10 @@
for (i = 0; i < 16; i++) {
BLOCKD *bd = &x->e_mbd.block[i];
- bd->bmi.as_mv.first.as_int = bsi.mvs[i].as_int;
+ bd->bmi.as_mv[0].as_int = bsi.mvs[i].as_int;
if (mbmi->second_ref_frame > 0)
- bd->bmi.as_mv.second.as_int = bsi.second_mvs[i].as_int;
- bd->eob = bsi.eobs[i];
+ bd->bmi.as_mv[1].as_int = bsi.second_mvs[i].as_int;
+ x->e_mbd.eobs[i] = bsi.eobs[i];
}
*returntotrate = bsi.r;
@@ -2882,8 +3136,8 @@
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
*skippable = bsi.txfm_size == TX_4X4 ?
- vp9_mby_is_skippable_4x4(&x->e_mbd, 0) :
- vp9_mby_is_skippable_8x8(&x->e_mbd, 0);
+ vp9_mby_is_skippable_4x4(&x->e_mbd) :
+ vp9_mby_is_skippable_8x8(&x->e_mbd);
/* save partitions */
mbmi->txfm_size = bsi.txfm_size;
@@ -3016,7 +3270,8 @@
}
}
-static __inline unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, int idx, int val, int weight) {
+static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
+ int idx, int val, int weight) {
unsigned cost0 = tab0[idx] ? vp9_cost_bit(tab0[idx], val) : 0;
unsigned cost1 = tab1[idx] ? vp9_cost_bit(tab1[idx], val) : 0;
// weight is 16-bit fixed point, so this basically calculates:
@@ -3145,7 +3400,9 @@
// UV cost and distortion
vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
x->e_mbd.predictor, x->src.uv_stride);
- if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4)
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4 &&
+ x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED &&
+ x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv,
cpi->common.full_pixel, &uv_skippable, 1);
else
@@ -3160,41 +3417,104 @@
static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
int idx, MV_REFERENCE_FRAME frame_type,
int block_size,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int_mv frame_nearest_mv[MAX_REF_FRAMES],
int_mv frame_near_mv[MAX_REF_FRAMES],
int frame_mdcounts[4][4],
- uint8_t *y_buffer[4],
- uint8_t *u_buffer[4],
- uint8_t *v_buffer[4]) {
- YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx];
+ YV12_BUFFER_CONFIG yv12_mb[4],
+ struct scale_factors scale[MAX_REF_FRAMES]) {
+ VP9_COMMON *cm = &cpi->common;
+ YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ int use_prev_in_find_mv_refs, use_prev_in_find_best_ref;
- y_buffer[frame_type] = yv12->y_buffer + recon_yoffset;
- u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset;
- v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset;
+ // set up scaling factors
+ scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
+ scale[frame_type].x_offset_q4 =
+ (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf;
+ scale[frame_type].y_offset_q4 =
+ (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf;
+ // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
+ // use the UV scaling factors.
+ setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col,
+ &scale[frame_type], &scale[frame_type]);
+
// Gets an initial list of candidate vectors from neighbours and orders them
- vp9_find_mv_refs(xd, xd->mode_info_context,
- xd->prev_mode_info_context,
+ use_prev_in_find_mv_refs = cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+ !cpi->common.error_resilient_mode;
+ vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,
+ use_prev_in_find_mv_refs ? xd->prev_mode_info_context : NULL,
frame_type,
mbmi->ref_mvs[frame_type],
cpi->common.ref_frame_sign_bias);
// Candidate refinement carried out at encoder and decoder
- vp9_find_best_ref_mvs(xd, y_buffer[frame_type],
+ use_prev_in_find_best_ref =
+ scale[frame_type].x_num == scale[frame_type].x_den &&
+ scale[frame_type].y_num == scale[frame_type].y_den &&
+ !cm->error_resilient_mode &&
+ !cm->frame_parallel_decoding_mode;
+ vp9_find_best_ref_mvs(xd,
+ use_prev_in_find_best_ref ?
+ yv12_mb[frame_type].y_buffer : NULL,
yv12->y_stride,
mbmi->ref_mvs[frame_type],
&frame_nearest_mv[frame_type],
&frame_near_mv[frame_type]);
-
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the centre point for subsequent searches.
- mv_pred(cpi, x, y_buffer[frame_type], yv12->y_stride,
- frame_type, block_size);
+ // The current implementation doesn't support scaling.
+ if (scale[frame_type].x_num == scale[frame_type].x_den &&
+ scale[frame_type].y_num == scale[frame_type].y_den)
+ mv_pred(cpi, x, yv12_mb[frame_type].y_buffer, yv12->y_stride,
+ frame_type, block_size);
+}
+static void model_rd_from_var_lapndz(int var, int n, int qstep,
+ int *rate, int *dist) {
+ // This function models the rate and distortion for a Laplacian
+ // source with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expressions are in:
+ // Hang and Chen, "Source Model for transform video coder and its
+ // application - Part I: Fundamental Theory", IEEE Trans. Circ.
+ // Sys. for Video Tech., April 1997.
+ // The function is implemented as piecewise approximation to the
+ // exact computation.
+ // TODO(debargha): Implement the functions by interpolating from a
+ // look-up table
+ vp9_clear_system_state();
+ {
+ double D, R;
+ double s2 = (double) var / n;
+ double s = sqrt(s2);
+ double x = qstep / s;
+ if (x > 1.0) {
+ double y = exp(-x / 2);
+ double y2 = y * y;
+ D = 2.069981728764738 * y2 - 2.764286806516079 * y + 1.003956960819275;
+ R = 0.924056758535089 * y2 + 2.738636469814024 * y - 0.005169662030017;
+ } else {
+ double x2 = x * x;
+ D = 0.075303187668830 * x2 + 0.004296954321112 * x - 0.000413209252807;
+ if (x > 0.125)
+ R = 1 / (-0.03459733614226 * x2 + 0.36561675733603 * x +
+ 0.1626989668625);
+ else
+ R = -1.442252874826093 * log(x) + 1.944647760719664;
+ }
+ if (R < 0) {
+ *rate = 0;
+ *dist = var;
+ } else {
+ *rate = (n * R * 256 + 0.5);
+ *dist = (n * D * s2 + 0.5);
+ }
+ }
+ vp9_clear_system_state();
}
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -3209,9 +3529,12 @@
int *rate_y, int *distortion_y,
int *rate_uv, int *distortion_uv,
int *mode_excluded, int *disable_skip,
- int recon_yoffset, int mode_index,
+ int mode_index,
+ INTERPOLATIONFILTERTYPE *best_filter,
int_mv frame_mv[MB_MODE_COUNT]
- [MAX_REF_FRAMES]) {
+ [MAX_REF_FRAMES],
+ YV12_BUFFER_CONFIG *scaled_ref_frame,
+ int mb_row, int mb_col) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
@@ -3229,6 +3552,13 @@
int_mv cur_mv[2];
int_mv ref_mv[2];
int64_t this_rd = 0;
+ unsigned char tmp_ybuf[64 * 64];
+ unsigned char tmp_ubuf[32 * 32];
+ unsigned char tmp_vbuf[32 * 32];
+ int pred_exists = 0;
+ int interpolating_intpel_seen = 0;
+ int intpel_mv;
+ int64_t rd, best_rd = INT64_MAX;
switch (this_mode) {
case NEWMV:
@@ -3248,6 +3578,7 @@
x->nmvjointcost, x->mvcost, 96,
x->e_mbd.allow_high_precision_mv);
} else {
+ YV12_BUFFER_CONFIG backup_yv12 = xd->pre;
int bestsme = INT_MAX;
int further_steps, step_param = cpi->sf.first_step;
int sadpb = x->sadperbit16;
@@ -3259,8 +3590,20 @@
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
+ if (scaled_ref_frame) {
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ xd->pre = *scaled_ref_frame;
+ xd->pre.y_buffer += mb_row * 16 * xd->pre.y_stride + mb_col * 16;
+ xd->pre.u_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ xd->pre.v_buffer += mb_row * 8 * xd->pre.uv_stride + mb_col * 8;
+ }
+
vp9_clamp_mv_min_max(x, &ref_mv[0]);
+ sr = vp9_init_search_range(cpi->common.width, cpi->common.height);
+
// mvp_full.as_int = ref_mv[0].as_int;
mvp_full.as_int =
mbmi->ref_mvs[refs[0]][x->mv_best_ref_index[refs[0]]].as_int;
@@ -3267,9 +3610,6 @@
mvp_full.as_mv.col >>= 3;
mvp_full.as_mv.row >>= 3;
- if (mvp_full.as_int != mvp_full.as_int) {
- mvp_full.as_int = mvp_full.as_int;
- }
// adjust search range according to sr from mv prediction
step_param = MAX(step_param, sr);
@@ -3297,22 +3637,22 @@
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- d->bmi.as_mv.first.as_int = tmp_mv.as_int;
- frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv.first.as_int;
+ d->bmi.as_mv[0].as_int = tmp_mv.as_int;
+ frame_mv[NEWMV][refs[0]].as_int = d->bmi.as_mv[0].as_int;
// Add the new motion vector cost to our rolling cost variable
*rate2 += vp9_mv_bit_cost(&tmp_mv, &ref_mv[0],
x->nmvjointcost, x->mvcost,
96, xd->allow_high_precision_mv);
+
+ // restore the predictor, if required
+ if (scaled_ref_frame) {
+ xd->pre = backup_yv12;
+ }
}
break;
- case NEARESTMV:
case NEARMV:
- // Do not bother proceeding if the vector (from newmv, nearest or
- // near) is 0,0 as this should then be coded using the zeromv mode.
- for (i = 0; i < num_refs; ++i)
- if (frame_mv[this_mode][refs[i]].as_int == 0)
- return INT64_MAX;
+ case NEARESTMV:
case ZEROMV:
default:
break;
@@ -3326,11 +3666,6 @@
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
- const int m = vp9_switchable_interp_map[mbmi->interp_filter];
- *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
- }
/* We don't include the cost of the second reference here, because there
* are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
@@ -3355,36 +3690,332 @@
}
#endif
+ pred_exists = 0;
+ interpolating_intpel_seen = 0;
+ // Are all MVs integer pel for Y and UV
+ intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
+ (mbmi->mv[0].as_mv.col & 15) == 0;
+ if (is_comp_pred)
+ intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
+ (mbmi->mv[1].as_mv.col & 15) == 0;
+ // Search for best switchable filter by checking the variance of
+ // pred error irrespective of whether the filter will be used
if (block_size == BLOCK_64X64) {
- vp9_build_inter64x64_predictors_sb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ vp9_build_inter64x64_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ var = vp9_variance64x64(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 64 * 64, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ int i;
+ for (i = 0; i < 64; ++i)
+ vpx_memcpy(tmp_ybuf + i * 64,
+ xd->dst.y_buffer + i * xd->dst.y_stride,
+ sizeof(unsigned char) * 64);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_ubuf + i * 32,
+ xd->dst.u_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_vbuf + i * 32,
+ xd->dst.v_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 32);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
+ }
} else if (block_size == BLOCK_32X32) {
- vp9_build_inter32x32_predictors_sb(xd,
- xd->dst.y_buffer,
- xd->dst.u_buffer,
- xd->dst.v_buffer,
- xd->dst.y_stride,
- xd->dst.uv_stride);
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ var = vp9_variance32x32(*(b->base_src), b->src_stride,
+ xd->dst.y_buffer, xd->dst.y_stride, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 32 * 32, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse);
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(tmp_ybuf + i * 64,
+ xd->dst.y_buffer + i * xd->dst.y_stride,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(tmp_ubuf + i * 32,
+ xd->dst.u_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 16);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(tmp_vbuf + i * 32,
+ xd->dst.v_buffer + i * xd->dst.uv_stride,
+ sizeof(unsigned char) * 16);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
+ }
} else {
+ int switchable_filter_index, newbest;
+ int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
+ int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
assert(block_size == BLOCK_16X16);
- vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
- if (is_comp_pred)
- vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16);
-#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred) {
- vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int rs = 0;
+ mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+ if (interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
+ tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+ } else {
+ unsigned int sse, var;
+ int tmp_rate_y, tmp_rate_u, tmp_rate_v;
+ int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
+ xd->predictor + 256,
+ xd->predictor + 320,
+ 16, 8, mb_row, mb_col);
+ var = vp9_variance16x16(*(b->base_src), b->src_stride,
+ xd->predictor, 16, &sse);
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ model_rd_from_var_lapndz(var, 16 * 16, xd->block[0].dequant[1] >> 3,
+ &tmp_rate_y, &tmp_dist_y);
+ var = vp9_variance8x8(x->src.u_buffer, x->src.uv_stride,
+ &xd->predictor[256], 8, &sse);
+ model_rd_from_var_lapndz(var, 8 * 8, xd->block[16].dequant[1] >> 3,
+ &tmp_rate_u, &tmp_dist_u);
+ var = vp9_variance8x8(x->src.v_buffer, x->src.uv_stride,
+ &xd->predictor[320], 8, &sse);
+ model_rd_from_var_lapndz(var, 8 * 8, xd->block[20].dequant[1] >> 3,
+ &tmp_rate_v, &tmp_dist_v);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
+ tmp_dist_y + tmp_dist_u + tmp_dist_v);
+ if (!interpolating_intpel_seen && intpel_mv &&
+ vp9_is_interpolating_filter[mbmi->interp_filter]) {
+ tmp_rate_y_i = tmp_rate_y;
+ tmp_rate_u_i = tmp_rate_u;
+ tmp_rate_v_i = tmp_rate_v;
+ tmp_dist_y_i = tmp_dist_y;
+ tmp_dist_u_i = tmp_dist_u;
+ tmp_dist_v_i = tmp_dist_v;
+ }
+ }
+ newbest = (switchable_filter_index == 0 || rd < best_rd);
+ if (newbest) {
+ best_rd = rd;
+ *best_filter = mbmi->interp_filter;
+ }
+ if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+ (cm->mcomp_filter_type != SWITCHABLE &&
+ cm->mcomp_filter_type == mbmi->interp_filter)) {
+ vpx_memcpy(tmp_ybuf, xd->predictor, sizeof(unsigned char) * 256);
+ vpx_memcpy(tmp_ubuf, xd->predictor + 256, sizeof(unsigned char) * 64);
+ vpx_memcpy(tmp_vbuf, xd->predictor + 320, sizeof(unsigned char) * 64);
+ pred_exists = 1;
+ }
+ interpolating_intpel_seen |=
+ intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
}
-#endif
}
+ // Set the appripriate filter
+ if (cm->mcomp_filter_type != SWITCHABLE)
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ else
+ mbmi->interp_filter = *best_filter;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ if (pred_exists) {
+ if (block_size == BLOCK_64X64) {
+ for (i = 0; i < 64; ++i)
+ vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
+ sizeof(unsigned char) * 64);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
+ sizeof(unsigned char) * 32);
+ } else if (block_size == BLOCK_32X32) {
+ for (i = 0; i < 32; ++i)
+ vpx_memcpy(xd->dst.y_buffer + i * xd->dst.y_stride, tmp_ybuf + i * 64,
+ sizeof(unsigned char) * 32);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(xd->dst.u_buffer + i * xd->dst.uv_stride, tmp_ubuf + i * 32,
+ sizeof(unsigned char) * 16);
+ for (i = 0; i < 16; ++i)
+ vpx_memcpy(xd->dst.v_buffer + i * xd->dst.uv_stride, tmp_vbuf + i * 32,
+ sizeof(unsigned char) * 16);
+ } else {
+ vpx_memcpy(xd->predictor, tmp_ybuf, sizeof(unsigned char) * 256);
+ vpx_memcpy(xd->predictor + 256, tmp_ubuf, sizeof(unsigned char) * 64);
+ vpx_memcpy(xd->predictor + 320, tmp_vbuf, sizeof(unsigned char) * 64);
+ }
+ } else {
+ // Handles the special case when a filter that is not in the
+ // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
+ if (block_size == BLOCK_64X64) {
+ vp9_build_inter64x64_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ } else if (block_size == BLOCK_32X32) {
+ vp9_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride,
+ mb_row, mb_col);
+ } else {
+ vp9_build_inter16x16_predictors_mb(xd, xd->predictor,
+ xd->predictor + 256,
+ xd->predictor + 320,
+ 16, 8, mb_row, mb_col);
+ }
+ }
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
+ const int m = vp9_switchable_interp_map[mbmi->interp_filter];
+ *rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+ }
+
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
x->skip = 1;
else if (x->encode_breakout) {
- unsigned int sse, var;
+ unsigned int var, sse;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >> 4);
@@ -3404,9 +4035,9 @@
}
if ((int)sse < threshold) {
- unsigned int q2dc = xd->block[24].dequant[0];
+ unsigned int q2dc = xd->block[0].dequant[0];
/* If there is no codeable 2nd order dc
- or a very small uniform pixel change change */
+ or a very small uniform pixel change change */
if ((sse - var < q2dc * q2dc >> 4) ||
(sse / 2 > var && sse - var < 64)) {
// Check u and v to make sure skip is ok
@@ -3447,17 +4078,6 @@
}
}
- if (!(*mode_excluded)) {
- if (is_comp_pred) {
- *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
- } else {
- *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
- }
-#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
-#endif
- }
-
if (!x->skip) {
if (block_size == BLOCK_64X64) {
int skippable_y, skippable_uv;
@@ -3491,30 +4111,32 @@
*skippable = skippable_y && skippable_uv;
} else {
assert(block_size == BLOCK_16X16);
-
- vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
- if (is_comp_pred)
- vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
-#if CONFIG_COMP_INTERINTRA_PRED
- if (is_comp_interintra_pred) {
- vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],
- &xd->predictor[320], 8);
- }
-#endif
inter_mode_cost(cpi, x, rate2, distortion,
rate_y, distortion_y, rate_uv, distortion_uv,
skippable, txfm_cache);
}
}
+
+ if (!(*mode_excluded)) {
+ if (is_comp_pred) {
+ *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+ } else {
+ *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+ }
+#if CONFIG_COMP_INTERINTRA_PRED
+ if (is_comp_interintra_pred && !cm->use_interintra) *mode_excluded = 1;
+#endif
+ }
+
return this_rd; // if 0, this will be re-calculated by caller
}
static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate, int *returndistortion,
int64_t *returnintra) {
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
union b_mode_info best_bmodes[16];
@@ -3540,10 +4162,14 @@
#if CONFIG_COMP_INTERINTRA_PRED
int is_best_interintra = 0;
int64_t best_intra16_rd = INT64_MAX;
- int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+ int best_intra16_mode = DC_PRED;
+#if SEPARATE_INTERINTRA_UV
+ int best_intra16_uv_mode = DC_PRED;
#endif
+#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
+ INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
int uv_intra_skippable = 0;
int uv_intra_rate_8x8 = 0, uv_intra_distortion_8x8 = 0, uv_intra_rate_tokenonly_8x8 = 0;
@@ -3551,7 +4177,6 @@
int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
int distortion_uv = INT_MAX;
int64_t best_yrd = INT64_MAX;
- int switchable_filter_index = 0;
MB_PREDICTION_MODE uv_intra_mode;
MB_PREDICTION_MODE uv_intra_mode_8x8 = 0;
@@ -3561,7 +4186,7 @@
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int frame_mdcounts[4][4];
- uint8_t *y_buffer[4], *u_buffer[4], *v_buffer[4];
+ YV12_BUFFER_CONFIG yv12_mb[4];
unsigned int ref_costs[MAX_REF_FRAMES];
int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
@@ -3569,6 +4194,8 @@
int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
cpi->common.y1dc_delta_q);
+ struct scale_factors scale_factor[4];
+
vpx_memset(mode8x8, 0, sizeof(mode8x8));
vpx_memset(&frame_mv, 0, sizeof(frame_mv));
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -3592,24 +4219,24 @@
}
if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.lst_fb_idx, LAST_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->lst_fb_idx,
+ LAST_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.gld_fb_idx, GOLDEN_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->gld_fb_idx,
+ GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- setup_buffer_inter(cpi, x, cpi->common.alt_fb_idx, ALTREF_FRAME,
- BLOCK_16X16, recon_yoffset, recon_uvoffset,
+ setup_buffer_inter(cpi, x, cpi->alt_fb_idx,
+ ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, y_buffer, u_buffer, v_buffer);
+ frame_mdcounts, yv12_mb, scale_factor);
}
*returnintra = INT64_MAX;
@@ -3620,6 +4247,8 @@
cpi->zbin_mode_boost = 0;
vp9_update_zbin_extra(cpi, x);
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+
rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate,
&uv_intra_rate_tokenonly, &uv_intra_distortion,
&uv_intra_skippable);
@@ -3638,8 +4267,7 @@
// that depend on the current prediction etc.
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
- for (mode_index = 0; mode_index < MAX_MODES;
- mode_index += (!switchable_filter_index)) {
+ for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int64_t this_rd = INT64_MAX;
int disable_skip = 0, skippable = 0;
int other_cost = 0;
@@ -3649,6 +4277,7 @@
#endif
int mode_excluded = 0;
int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
+ YV12_BUFFER_CONFIG *scaled_ref_frame;
// These variables hold are rolling total cost and distortion for this mode
rate2 = 0;
@@ -3664,24 +4293,38 @@
mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
- // Evaluate all sub-pel filters irrespective of whether we can use
- // them for this frame.
- if (this_mode >= NEARESTMV && this_mode <= SPLITMV) {
- mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index++];
- if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
- switchable_filter_index = 0;
- if ((cm->mcomp_filter_type != SWITCHABLE) &&
- (cm->mcomp_filter_type != mbmi->interp_filter)) {
- mode_excluded = 1;
- }
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- }
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
+
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index])
continue;
+ // Ensure that the references used by this mode are available.
+ if (mbmi->ref_frame &&
+ !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame]))
+ continue;
+
+ if (mbmi->second_ref_frame > 0 &&
+ !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))
+ continue;
+
+ // only scale on zeromv.
+ if (mbmi->ref_frame > 0 &&
+ (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+ if (mbmi->second_ref_frame > 0 &&
+ (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode != ZEROMV)
+ continue;
+
// current coding mode under rate-distortion optimization test loop
#if CONFIG_COMP_INTERINTRA_PRED
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
@@ -3693,18 +4336,16 @@
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
!vp9_check_segref(xd, segment_id, mbmi->ref_frame)) {
continue;
- // If the segment mode feature is enabled....
+ // If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
- (this_mode !=
- vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
+ (this_mode != ZEROMV)) {
continue;
- // Disable this drop out case if either the mode or ref frame
- // segment level feature is enabled for this segment. This is to
+ // Disable this drop out case if the ref frame segment
+ // level feature is enabled for this segment. This is to
// prevent the possibility that the we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative
if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
@@ -3716,22 +4357,31 @@
}
/* everything but intra */
+ scaled_ref_frame = NULL;
if (mbmi->ref_frame) {
int ref = mbmi->ref_frame;
+ int fb;
- xd->pre.y_buffer = y_buffer[ref];
- xd->pre.u_buffer = u_buffer[ref];
- xd->pre.v_buffer = v_buffer[ref];
+ xd->pre = yv12_mb[ref];
best_ref_mv = mbmi->ref_mvs[ref][0];
vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
}
if (mbmi->second_ref_frame > 0) {
int ref = mbmi->second_ref_frame;
- xd->second_pre.y_buffer = y_buffer[ref];
- xd->second_pre.u_buffer = u_buffer[ref];
- xd->second_pre.v_buffer = v_buffer[ref];
+ xd->second_pre = yv12_mb[ref];
second_best_ref_mv = mbmi->ref_mvs[ref][0];
}
@@ -3798,8 +4448,7 @@
// the BPRED mode : x->mbmode_cost[xd->frame_type][BPRED];
mbmi->txfm_size = TX_4X4;
tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
- &distortion, best_yrd,
- cpi->update_context);
+ &distortion, best_yrd);
rate2 += rate;
rate2 += intra_cost_penalty;
distortion2 += distortion;
@@ -3816,65 +4465,11 @@
}
break;
case I8X8_PRED: {
- int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
- int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
- int64_t tmp_rd_4x4s, tmp_rd_8x8s;
- int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
- int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
- mbmi->txfm_size = TX_4X4;
- tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
- &d4x4, best_yrd);
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- mbmi->txfm_size = TX_8X8;
- tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
- &d8x8, best_yrd);
- txfm_cache[ONLY_4X4] = tmp_rd_4x4;
- txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
- txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
- tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
- tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
- txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? tmp_rd_4x4s : tmp_rd_8x8s;
- if (cm->txfm_mode == TX_MODE_SELECT) {
- if (tmp_rd_4x4s < tmp_rd_8x8s) {
- rate = r4x4 + cost0;
- rate_y = tok4x4 + cost0;
- distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4s;
- } else {
- rate = r8x8 + cost1;
- rate_y = tok8x8 + cost1;
- distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8s;
+ int64_t tmp_rd;
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
- } else if (cm->txfm_mode == ONLY_4X4) {
- rate = r4x4;
- rate_y = tok4x4;
- distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4;
- } else {
- rate = r8x8;
- rate_y = tok8x8;
- distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8;
-
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
-
+ tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y,
+ &distortion, mode8x8,
+ best_yrd, txfm_cache);
rate2 += rate;
rate2 += intra_cost_penalty;
distortion2 += distortion;
@@ -3898,22 +4493,102 @@
// special case it.
else if (this_mode == SPLITMV) {
const int is_comp_pred = mbmi->second_ref_frame > 0;
- int64_t tmp_rd, this_rd_thresh;
+ int64_t this_rd_thresh;
+ int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
+ int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
+ int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
+ int switchable_filter_index;
int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL;
+ union b_mode_info tmp_best_bmodes[16];
+ MB_MODE_INFO tmp_best_mbmode;
+ PARTITION_INFO tmp_best_partition;
+ int pred_exists = 0;
this_rd_thresh =
- (mbmi->ref_frame == LAST_FRAME) ?
+ (mbmi->ref_frame == LAST_FRAME) ?
cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
this_rd_thresh =
- (mbmi->ref_frame == GOLDEN_FRAME) ?
+ (mbmi->ref_frame == GOLDEN_FRAME) ?
cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- second_ref, best_yrd, mdcounts,
- &rate, &rate_y, &distortion,
- &skippable,
- (int)this_rd_thresh, seg_mvs,
- txfm_cache);
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int newbest;
+ mbmi->interp_filter =
+ vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs,
+ txfm_cache);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ newbest = (tmp_rd < tmp_best_rd);
+ if (newbest) {
+ tmp_best_filter = mbmi->interp_filter;
+ tmp_best_rd = tmp_rd;
+ }
+ if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
+ (mbmi->interp_filter == cm->mcomp_filter_type &&
+ cm->mcomp_filter_type != SWITCHABLE)) {
+ tmp_best_rdu = tmp_rd;
+ tmp_best_rate = rate;
+ tmp_best_ratey = rate_y;
+ tmp_best_distortion = distortion;
+ tmp_best_skippable = skippable;
+ vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&tmp_best_partition, x->partition_info,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 16; i++) {
+ tmp_best_bmodes[i] = xd->block[i].bmi;
+ }
+ pred_exists = 1;
+ }
+ } // switchable_filter_index loop
+
+ mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
+ tmp_best_filter : cm->mcomp_filter_type);
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (!pred_exists) {
+ // Handles the special case when a filter that is not in the
+ // switchable list (bilinear, 6-tap) is indicated at the frame level
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
+ second_ref, best_yrd, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs,
+ txfm_cache);
+ } else {
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ tmp_rd = tmp_best_rdu;
+ rate = tmp_best_rate;
+ rate_y = tmp_best_ratey;
+ distortion = tmp_best_distortion;
+ skippable = tmp_best_skippable;
+ vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &tmp_best_partition,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 16; i++) {
+ xd->block[i].bmi = xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
+ }
+ }
+
rate2 += rate;
distortion2 += distortion;
@@ -3920,7 +4595,7 @@
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
[vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
- [vp9_switchable_interp_map[mbmi->interp_filter]];
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
@@ -3927,8 +4602,11 @@
if (tmp_rd < best_yrd) {
int uv_skippable;
- rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- cpi->common.full_pixel);
+ vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);
+ vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
+ x->e_mbd.predictor, x->src.uv_stride);
+ rd_inter16x16_uv_4x4(cpi, x, &rate_uv, &distortion_uv,
+ cpi->common.full_pixel, &uv_skippable, 1);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
@@ -3969,8 +4647,9 @@
#endif
&rate_y, &distortion,
&rate_uv, &distortion_uv,
- &mode_excluded, &disable_skip, recon_yoffset,
- mode_index, frame_mv);
+ &mode_excluded, &disable_skip,
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -3995,10 +4674,8 @@
if (cpi->common.mb_no_coeff_skip) {
int mb_skip_allowed;
- // Is Mb level skip allowed for this mb.
- mb_skip_allowed =
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
if (skippable) {
mbmi->mb_skip_coeff = 1;
@@ -4050,8 +4727,10 @@
(this_rd < best_intra16_rd)) {
best_intra16_rd = this_rd;
best_intra16_mode = this_mode;
+#if SEPARATE_INTERINTRA_UV
best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
uv_intra_mode_8x8 : uv_intra_mode);
+#endif
}
#endif
@@ -4061,7 +4740,7 @@
if (this_rd < best_overall_rd) {
best_overall_rd = this_rd;
- best_filter = mbmi->interp_filter;
+ best_filter = tmp_best_filter;
best_mode = this_mode;
#if CONFIG_COMP_INTERINTRA_PRED
is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
@@ -4175,7 +4854,7 @@
if (x->skip && !mode_excluded)
break;
- }
+ }
assert((cm->mcomp_filter_type == SWITCHABLE) ||
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
@@ -4204,12 +4883,11 @@
cpi->rd_thresh_mult[best_mode_index];
}
- // This code force Altref,0,0 and skip for the frame that overlays a
+ // This code forces Altref,0,0 and skip for the frame that overlays a
// an alrtef unless Altref is filtered. However, this is unsafe if
- // segment level coding of ref frame or mode is enabled for this
+ // segment level coding of ref frame is enabled for this
// segment.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
@@ -4224,6 +4902,8 @@
mbmi->mb_skip_coeff =
(cpi->common.mb_no_coeff_skip) ? 1 : 0;
mbmi->partitioning = 0;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
@@ -4244,10 +4924,12 @@
if (best_mbmode.mode == SPLITMV) {
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.first.as_int = best_bmodes[i].as_mv.first.as_int;
+ xd->mode_info_context->bmi[i].as_mv[0].as_int =
+ best_bmodes[i].as_mv[0].as_int;
if (mbmi->second_ref_frame > 0)
for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv.second.as_int = best_bmodes[i].as_mv.second.as_int;
+ xd->mode_info_context->bmi[i].as_mv[1].as_int =
+ best_bmodes[i].as_mv[1].as_int;
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
@@ -4265,7 +4947,7 @@
if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = INT_MIN;
+ best_txfm_diff[i] = 0;
else
best_txfm_diff[i] = best_rd - best_txfm_rd[i];
}
@@ -4274,6 +4956,8 @@
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],
best_mode_index, &best_partition,
&mbmi->ref_mvs[mbmi->ref_frame][0],
@@ -4291,22 +4975,29 @@
int rate_y_tokenonly = 0, rate_uv_tokenonly;
int dist_y = 0, dist_uv;
int y_skip = 0, uv_skip;
- int64_t txfm_cache[NB_TXFM_MODES];
+ int64_t txfm_cache[NB_TXFM_MODES], err;
+ int i;
- rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, txfm_cache);
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, &y_skip, txfm_cache);
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip);
+ &dist_uv, &uv_skip);
if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
*returndist = dist_y + (dist_uv >> 2);
+ memset(x->sb32_context[xd->sb_index].txfm_rd_diff, 0,
+ sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff));
} else {
*returnrate = rate_y + rate_uv;
if (cpi->common.mb_no_coeff_skip)
*returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->sb32_context[xd->sb_index].txfm_rd_diff[i] = err - txfm_cache[i];
+ }
}
}
@@ -4319,22 +5010,29 @@
int rate_y_tokenonly = 0, rate_uv_tokenonly;
int dist_y = 0, dist_uv;
int y_skip = 0, uv_skip;
- int64_t txfm_cache[NB_TXFM_MODES];
+ int64_t txfm_cache[NB_TXFM_MODES], err;
+ int i;
- rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, txfm_cache);
+ xd->mode_info_context->mbmi.mode = DC_PRED;
+ err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, &y_skip, txfm_cache);
rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip);
+ &dist_uv, &uv_skip);
if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
*returndist = dist_y + (dist_uv >> 2);
+ memset(x->sb64_context.txfm_rd_diff, 0,
+ sizeof(x->sb64_context.txfm_rd_diff));
} else {
*returnrate = rate_y + rate_uv;
if (cm->mb_no_coeff_skip)
*returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->sb64_context.txfm_rd_diff[i] = err - txfm_cache[i];
+ }
}
}
@@ -4356,13 +5054,14 @@
int mode16x16;
int mode8x8[4];
int dist;
- int modeuv, uv_intra_skippable, uv_intra_skippable_8x8;
+ int modeuv, modeuv8x8, uv_intra_skippable, uv_intra_skippable_8x8;
int y_intra16x16_skippable = 0;
- int64_t txfm_cache[NB_TXFM_MODES];
- TX_SIZE txfm_size_16x16;
+ int64_t txfm_cache[2][NB_TXFM_MODES];
+ TX_SIZE txfm_size_16x16, txfm_size_8x8;
int i;
mbmi->ref_frame = INTRA_FRAME;
+ mbmi->mode = DC_PRED;
rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv,
&uv_intra_skippable);
modeuv = mbmi->uv_mode;
@@ -4369,47 +5068,71 @@
if (cpi->common.txfm_mode != ONLY_4X4) {
rd_pick_intra_mbuv_mode_8x8(cpi, x, &rateuv8x8, &rateuv8x8_tokenonly,
&distuv8x8, &uv_intra_skippable_8x8);
+ modeuv8x8 = mbmi->uv_mode;
} else {
uv_intra_skippable_8x8 = uv_intra_skippable;
rateuv8x8 = rateuv;
distuv8x8 = distuv;
rateuv8x8_tokenonly = rateuv_tokenonly;
+ modeuv8x8 = modeuv;
}
// current macroblock under rate-distortion optimization test loop
error16x16 = rd_pick_intra16x16mby_mode(cpi, x, &rate16x16,
&rate16x16_tokenonly, &dist16x16,
- &y_intra16x16_skippable, txfm_cache);
+ &y_intra16x16_skippable,
+ txfm_cache[1]);
mode16x16 = mbmi->mode;
txfm_size_16x16 = mbmi->txfm_size;
+ if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+ (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
+ error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
+ rate16x16 -= rate16x16_tokenonly;
+ }
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] +
+ txfm_cache[1][i];
+ }
- // FIXME(rbultje) support transform-size selection
- mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
- error8x8 = rd_pick_intra8x8mby_modes(cpi, x, &rate8x8, &rate8x8_tokenonly,
- &dist8x8, error16x16);
- mode8x8[0]= xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1]= xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2]= xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3]= xd->mode_info_context->bmi[10].as_mode.first;
+ error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8,
+ &rate8x8_tokenonly,
+ &dist8x8, mode8x8,
+ error16x16, txfm_cache[1]);
+ txfm_size_8x8 = mbmi->txfm_size;
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i];
+ if (tmp_rd < txfm_cache[0][i])
+ txfm_cache[0][i] = tmp_rd;
+ }
+ mbmi->txfm_size = TX_4X4;
error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
&rate4x4, &rate4x4_tokenonly,
- &dist4x4, error16x16,
- cpi->update_context);
+ &dist4x4, error16x16);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ if (error4x4 < txfm_cache[0][i])
+ txfm_cache[0][i] = error4x4;
+ }
mbmi->mb_skip_coeff = 0;
- if (cpi->common.mb_no_coeff_skip &&
- y_intra16x16_skippable && uv_intra_skippable_8x8) {
+ if (cpi->common.mb_no_coeff_skip && y_intra16x16_skippable &&
+ ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable) ||
+ (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable_8x8))) {
mbmi->mb_skip_coeff = 1;
mbmi->mode = mode16x16;
- mbmi->uv_mode = modeuv;
- rate = rateuv8x8 + rate16x16 - rateuv8x8_tokenonly - rate16x16_tokenonly +
- vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
- dist = dist16x16 + (distuv8x8 >> 2);
+ mbmi->uv_mode = (cm->txfm_mode == ONLY_4X4) ? modeuv : modeuv8x8;
+ rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
+ dist = dist16x16;
+ if (cm->txfm_mode == ONLY_4X4) {
+ rate += rateuv - rateuv_tokenonly;
+ dist += (distuv >> 2);
+ } else {
+ rate += rateuv8x8 - rateuv8x8_tokenonly;
+ dist += (distuv8x8 >> 2);
+ }
mbmi->txfm_size = txfm_size_16x16;
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else if (error8x8 > error16x16) {
if (error4x4 < error16x16) {
rate = rateuv + rate4x4;
@@ -4416,17 +5139,11 @@
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else {
mbmi->txfm_size = txfm_size_16x16;
mbmi->mode = mode16x16;
rate = rate16x16 + rateuv8x8;
dist = dist16x16 + (distuv8x8 >> 2);
- for (i = 0; i < NB_TXFM_MODES; i++) {
- x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
- error16x16 - txfm_cache[i];
- }
}
if (cpi->common.mb_no_coeff_skip)
rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
@@ -4436,28 +5153,28 @@
mbmi->mode = B_PRED;
mbmi->txfm_size = TX_4X4;
dist = dist4x4 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
} else {
- // FIXME(rbultje) support transform-size selection
mbmi->mode = I8X8_PRED;
- mbmi->txfm_size = (cm->txfm_mode == ONLY_4X4) ? TX_4X4 : TX_8X8;
+ mbmi->txfm_size = txfm_size_8x8;
set_i8x8_block_modes(x, mode8x8);
rate = rate8x8 + rateuv;
dist = dist8x8 + (distuv >> 2);
- memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0,
- sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff));
}
if (cpi->common.mb_no_coeff_skip)
rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
}
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
+ txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i];
+ }
+
*returnrate = rate;
*returndist = dist;
}
static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion,
int block_size) {
@@ -4471,13 +5188,13 @@
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int frame_mdcounts[4][4];
- uint8_t *y_buffer[4];
- uint8_t *u_buffer[4];
- uint8_t *v_buffer[4];
+ YV12_BUFFER_CONFIG yv12_mb[4];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
- int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx,
- cpi->common.alt_fb_idx };
+ int idx_list[4] = {0,
+ cpi->lst_fb_idx,
+ cpi->gld_fb_idx,
+ cpi->alt_fb_idx};
int mdcounts[4];
int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int saddone = 0;
@@ -4492,20 +5209,23 @@
#if CONFIG_COMP_INTERINTRA_PRED
int is_best_interintra = 0;
int64_t best_intra16_rd = INT64_MAX;
- int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED;
+ int best_intra16_mode = DC_PRED;
+#if SEPARATE_INTERINTRA_UV
+ int best_intra16_uv_mode = DC_PRED;
#endif
+#endif
int64_t best_overall_rd = INT64_MAX;
INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
+ INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int rate_uv_4x4 = 0, rate_uv_8x8 = 0, rate_uv_tokenonly_4x4 = 0,
rate_uv_tokenonly_8x8 = 0;
int dist_uv_4x4 = 0, dist_uv_8x8 = 0, uv_skip_4x4 = 0, uv_skip_8x8 = 0;
MB_PREDICTION_MODE mode_uv_4x4 = NEARESTMV, mode_uv_8x8 = NEARESTMV;
- int switchable_filter_index = 0;
int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0;
int dist_uv_16x16 = 0, uv_skip_16x16 = 0;
MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV;
+ struct scale_factors scale_factor[4];
- x->skip = 0;
xd->mode_info_context->mbmi.segment_id = segment_id;
estimate_ref_frame_costs(cpi, segment_id, ref_costs);
vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
@@ -4518,9 +5238,9 @@
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,
- recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV],
+ mb_row, mb_col, frame_mv[NEARESTMV],
frame_mv[NEARMV], frame_mdcounts,
- y_buffer, u_buffer, v_buffer);
+ yv12_mb, scale_factor);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
@@ -4570,8 +5290,7 @@
}
}
- for (mode_index = 0; mode_index < MAX_MODES;
- mode_index += (!switchable_filter_index)) {
+ for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
int disable_skip = 0;
@@ -4588,10 +5307,10 @@
// Test best rd so far against threshold for trying this mode.
if (best_rd <= cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX) {
- switchable_filter_index = 0;
continue;
}
+ x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame;
if (!(ref_frame == INTRA_FRAME ||
@@ -4600,6 +5319,8 @@
}
mbmi->ref_frame = ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
mbmi->mode = this_mode;
mbmi->uv_mode = DC_PRED;
@@ -4607,19 +5328,11 @@
mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
#endif
+
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- if (this_mode >= NEARESTMV && this_mode <= SPLITMV) {
- mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index++];
- if (switchable_filter_index == VP9_SWITCHABLE_FILTERS)
- switchable_filter_index = 0;
- if ((cm->mcomp_filter_type != SWITCHABLE) &&
- (cm->mcomp_filter_type != mbmi->interp_filter)) {
- mode_excluded = 1;
- }
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- }
+ mbmi->interp_filter = cm->mcomp_filter_type;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
// if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
// continue;
@@ -4640,10 +5353,10 @@
if (!(cpi->ref_frame_flags & flag_list[second_ref]))
continue;
mbmi->second_ref_frame = second_ref;
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
- xd->second_pre.y_buffer = y_buffer[second_ref];
- xd->second_pre.u_buffer = u_buffer[second_ref];
- xd->second_pre.v_buffer = v_buffer[second_ref];
+ xd->second_pre = yv12_mb[second_ref];
mode_excluded =
mode_excluded ?
mode_excluded : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
@@ -4661,9 +5374,7 @@
}
}
- xd->pre.y_buffer = y_buffer[ref_frame];
- xd->pre.u_buffer = u_buffer[ref_frame];
- xd->pre.v_buffer = v_buffer[ref_frame];
+ xd->pre = yv12_mb[ref_frame];
vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts));
// If the segment reference frame feature is enabled....
@@ -4671,16 +5382,15 @@
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
!vp9_check_segref(xd, segment_id, ref_frame)) {
continue;
- // If the segment mode feature is enabled....
+ // If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
- (this_mode != vp9_get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
+ (this_mode != ZEROMV)) {
continue;
- // Disable this drop out case if either the mode or ref frame
+ // Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative
@@ -4722,6 +5432,20 @@
rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
distortion2 = distortion_y + distortion_uv;
} else {
+ YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
+ int fb;
+
+ if (mbmi->ref_frame == LAST_FRAME) {
+ fb = cpi->lst_fb_idx;
+ } else if (mbmi->ref_frame == GOLDEN_FRAME) {
+ fb = cpi->gld_fb_idx;
+ } else {
+ fb = cpi->alt_fb_idx;
+ }
+
+ if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
+ scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
+
#if CONFIG_COMP_INTERINTRA_PRED
if (mbmi->second_ref_frame == INTRA_FRAME) {
if (best_intra16_mode == DC_PRED - 1) continue;
@@ -4742,8 +5466,9 @@
#endif
&rate_y, &distortion_y,
&rate_uv, &distortion_uv,
- &mode_excluded, &disable_skip, recon_yoffset,
- mode_index, frame_mv);
+ &mode_excluded, &disable_skip,
+ mode_index, &tmp_best_filter, frame_mv,
+ scaled_ref_frame, mb_row, mb_col);
if (this_rd == INT64_MAX)
continue;
}
@@ -4769,10 +5494,8 @@
if (cpi->common.mb_no_coeff_skip) {
int mb_skip_allowed;
- // Is Mb level skip allowed for this mb.
- mb_skip_allowed =
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ // Is Mb level skip allowed (i.e. not coded at segment level).
+ mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
if (skippable) {
// Back out the coefficient coding costs
@@ -4821,8 +5544,10 @@
(this_rd < best_intra16_rd)) {
best_intra16_rd = this_rd;
best_intra16_mode = this_mode;
+#if SEPARATE_INTERINTRA_UV
best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ?
mode_uv_8x8 : mode_uv_4x4);
+#endif
}
#endif
@@ -4832,7 +5557,7 @@
if (this_rd < best_overall_rd) {
best_overall_rd = this_rd;
- best_filter = mbmi->interp_filter;
+ best_filter = tmp_best_filter;
best_mode = this_mode;
#if CONFIG_COMP_INTERINTRA_PRED
is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
@@ -4956,10 +5681,8 @@
// This code forces Altref,0,0 and skip for the frame that overlays a
// an alrtef unless Altref is filtered. However, this is unsafe if
- // segment level coding of ref frame or mode is enabled for this
- // segment.
+ // segment level coding of ref frame is enabled for this segment.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
@@ -4971,7 +5694,7 @@
mbmi->mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
mbmi->partitioning = 0;
mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ?
- TX_16X16 : cm->txfm_mode;
+ TX_32X32 : cm->txfm_mode;
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
@@ -4991,7 +5714,7 @@
if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = INT_MIN;
+ best_txfm_diff[i] = 0;
else
best_txfm_diff[i] = best_rd - best_txfm_rd[i];
}
@@ -5000,6 +5723,8 @@
}
end:
+ set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
+ scale_factor);
{
PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ?
&x->sb32_context[xd->sb_index] :
@@ -5015,24 +5740,23 @@
}
int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion) {
- return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset,
+ return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col,
returnrate, returndistortion, BLOCK_32X32);
}
int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset,
+ int mb_row, int mb_col,
int *returnrate,
int *returndistortion) {
- return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset,
+ return vp9_rd_pick_inter_mode_sb(cpi, x, mb_row, mb_col,
returnrate, returndistortion, BLOCK_64X64);
}
void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset,
- int recon_uvoffset,
+ int mb_row, int mb_col,
int *totalrate, int *totaldist) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -5050,7 +5774,7 @@
{
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
- rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
+ rd_pick_inter_mode(cpi, x, mb_row, mb_col, &rate,
&distortion, &intra_error);
/* restore cpi->zbin_mode_boost_enabled */
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -15,34 +15,34 @@
#define RDCOST(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) )
#define RDCOST_8x8(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) )
-extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue);
+void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex);
-extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex);
+void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
-extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int *r, int *d);
+void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ int *r, int *d);
-extern void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
- int *r, int *d);
+void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
+ int *r, int *d);
-extern void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
- int *r, int *d);
+void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
+ int *r, int *d);
-extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
- int *r, int *d);
+void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
+ int mb_row, int mb_col,
+ int *r, int *d);
-extern int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
- int *r, int *d);
+int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x,
+ int mb_row, int mb_col,
+ int *r, int *d);
-extern int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
- int ref_yoffset, int ref_uvoffset,
- int *r, int *d);
+int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x,
+ int mb_row, int mb_col,
+ int *r, int *d);
-extern void vp9_init_me_luts();
+void vp9_init_me_luts();
-extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
- MB_PREDICTION_MODE mb, int_mv *mv);
+void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
+ MB_PREDICTION_MODE mb, int_mv *mv);
#endif // VP9_ENCODER_VP9_RDOPT_H_
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -13,12 +13,13 @@
#include "vp9/common/vp9_sadmxn.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "./vp9_rtcd.h"
unsigned int vp9_sad64x64_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64);
}
@@ -26,7 +27,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
}
@@ -34,7 +35,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16);
}
@@ -42,7 +43,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 8);
}
@@ -51,7 +52,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 8);
}
@@ -59,7 +60,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
}
@@ -68,7 +69,7 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- int max_sad) {
+ unsigned int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
}
@@ -77,12 +78,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride, ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride, ref_ptr + 2, ref_stride,
+ 0x7fffffff);
}
void vp9_sad32x32x3_c(const uint8_t *src_ptr,
@@ -90,12 +91,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad64x64x8_c(const uint8_t *src_ptr,
@@ -102,31 +103,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad32x32x8_c(const uint8_t *src_ptr,
@@ -133,31 +134,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad16x16x3_c(const uint8_t *src_ptr,
@@ -165,12 +166,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad16x16x8_c(const uint8_t *src_ptr,
@@ -177,31 +178,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad16x8x3_c(const uint8_t *src_ptr,
@@ -209,12 +210,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad16x8x8_c(const uint8_t *src_ptr,
@@ -221,31 +222,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad8x8x3_c(const uint8_t *src_ptr,
@@ -253,12 +254,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad8x8x8_c(const uint8_t *src_ptr,
@@ -265,31 +266,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad8x16x3_c(const uint8_t *src_ptr,
@@ -297,12 +298,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad8x16x8_c(const uint8_t *src_ptr,
@@ -309,31 +310,31 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad4x4x3_c(const uint8_t *src_ptr,
@@ -341,12 +342,12 @@
const uint8_t *ref_ptr,
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr, ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride, 0x7fffffff);
}
void vp9_sad4x4x8_c(const uint8_t *src_ptr,
@@ -353,192 +354,134 @@
int src_stride,
const uint8_t *ref_ptr,
int ref_stride,
- uint16_t *sad_array) {
- sad_array[0] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr, ref_stride,
- 0x7fffffff);
- sad_array[1] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 1, ref_stride,
- 0x7fffffff);
- sad_array[2] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 2, ref_stride,
- 0x7fffffff);
- sad_array[3] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 3, ref_stride,
- 0x7fffffff);
- sad_array[4] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 4, ref_stride,
- 0x7fffffff);
- sad_array[5] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 5, ref_stride,
- 0x7fffffff);
- sad_array[6] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 6, ref_stride,
- 0x7fffffff);
- sad_array[7] = (uint16_t)vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr + 7, ref_stride,
- 0x7fffffff);
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
}
void vp9_sad64x64x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad64x64_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad64x64(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad32x32x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad32x32_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad32x32(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad16x16x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x16_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x16(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad16x8x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad16x8_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad16x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad8x8x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x8_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad8x16_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
+ sad_array[0] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
int src_stride,
- uint8_t *ref_ptr[],
+ const uint8_t* const ref_ptr[],
int ref_stride,
unsigned int *sad_array) {
- sad_array[0] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[0], ref_stride, 0x7fffffff);
- sad_array[1] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[1], ref_stride, 0x7fffffff);
- sad_array[2] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[2], ref_stride, 0x7fffffff);
- sad_array[3] = vp9_sad4x4_c(src_ptr, src_stride,
- ref_ptr[3], ref_stride, 0x7fffffff);
-}
-
-/* Copy 2 macroblocks to a buffer */
-void vp9_copy32xn_c(uint8_t *src_ptr,
- int src_stride,
- uint8_t *dst_ptr,
- int dst_stride,
- int height) {
- int r;
-
- for (r = 0; r < height; r++) {
-#if !(CONFIG_FAST_UNALIGNED)
- dst_ptr[0] = src_ptr[0];
- dst_ptr[1] = src_ptr[1];
- dst_ptr[2] = src_ptr[2];
- dst_ptr[3] = src_ptr[3];
- dst_ptr[4] = src_ptr[4];
- dst_ptr[5] = src_ptr[5];
- dst_ptr[6] = src_ptr[6];
- dst_ptr[7] = src_ptr[7];
- dst_ptr[8] = src_ptr[8];
- dst_ptr[9] = src_ptr[9];
- dst_ptr[10] = src_ptr[10];
- dst_ptr[11] = src_ptr[11];
- dst_ptr[12] = src_ptr[12];
- dst_ptr[13] = src_ptr[13];
- dst_ptr[14] = src_ptr[14];
- dst_ptr[15] = src_ptr[15];
- dst_ptr[16] = src_ptr[16];
- dst_ptr[17] = src_ptr[17];
- dst_ptr[18] = src_ptr[18];
- dst_ptr[19] = src_ptr[19];
- dst_ptr[20] = src_ptr[20];
- dst_ptr[21] = src_ptr[21];
- dst_ptr[22] = src_ptr[22];
- dst_ptr[23] = src_ptr[23];
- dst_ptr[24] = src_ptr[24];
- dst_ptr[25] = src_ptr[25];
- dst_ptr[26] = src_ptr[26];
- dst_ptr[27] = src_ptr[27];
- dst_ptr[28] = src_ptr[28];
- dst_ptr[29] = src_ptr[29];
- dst_ptr[30] = src_ptr[30];
- dst_ptr[31] = src_ptr[31];
-#else
- ((uint32_t *)dst_ptr)[0] = ((uint32_t *)src_ptr)[0];
- ((uint32_t *)dst_ptr)[1] = ((uint32_t *)src_ptr)[1];
- ((uint32_t *)dst_ptr)[2] = ((uint32_t *)src_ptr)[2];
- ((uint32_t *)dst_ptr)[3] = ((uint32_t *)src_ptr)[3];
- ((uint32_t *)dst_ptr)[4] = ((uint32_t *)src_ptr)[4];
- ((uint32_t *)dst_ptr)[5] = ((uint32_t *)src_ptr)[5];
- ((uint32_t *)dst_ptr)[6] = ((uint32_t *)src_ptr)[6];
- ((uint32_t *)dst_ptr)[7] = ((uint32_t *)src_ptr)[7];
-#endif
- src_ptr += src_stride;
- dst_ptr += dst_stride;
-
- }
+ sad_array[0] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad4x4(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
--- a/vp9/encoder/vp9_satd_c.c
+++ /dev/null
@@ -1,48 +1,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include "vpx_ports/mem.h"
-#include "./vp9_rtcd.h"
-
-unsigned int vp9_satd16x16_c(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- unsigned int *psatd) {
- int r, c, i;
- unsigned int satd = 0;
- DECLARE_ALIGNED(16, int16_t, diff_in[256]);
- DECLARE_ALIGNED(16, int16_t, diff_out[16]);
- int16_t *in;
-
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
- diff_in[r * 16 + c] = src_ptr[c] - ref_ptr[c];
- }
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- in = diff_in;
- for (r = 0; r < 16; r += 4) {
- for (c = 0; c < 16; c += 4) {
- vp9_short_walsh4x4_c(in + c, diff_out, 32);
- for (i = 0; i < 16; i++)
- satd += abs(diff_out[i]);
- }
- in += 64;
- }
-
- if (psatd)
- *psatd = satd;
-
- return satd;
-}
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -9,10 +9,11 @@
*/
-#include "limits.h"
+#include <limits.h>
#include "vpx_mem/vpx_mem.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_tile_common.h"
void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x) {
int mb_row, mb_col;
@@ -21,7 +22,7 @@
x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
- if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) {
+ if ((cm->frame_type == KEY_FRAME) || (cpi->refresh_golden_frame)) {
// Reset Gf useage monitors
vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
@@ -143,11 +144,74 @@
return cost;
}
+// Based on set of segment counts calculate a probability tree
+static void calc_segtree_probs_pred(MACROBLOCKD *xd,
+ int (*segcounts)[MAX_MB_SEGMENTS],
+ vp9_prob *segment_tree_probs,
+ vp9_prob *mod_probs) {
+ int count[4];
+
+ assert(!segcounts[0][0] && !segcounts[1][1] &&
+ !segcounts[2][2] && !segcounts[3][3]);
+
+ // Total count for all segments
+ count[0] = segcounts[3][0] + segcounts[1][0] + segcounts[2][0];
+ count[1] = segcounts[2][1] + segcounts[0][1] + segcounts[3][1];
+ count[2] = segcounts[0][2] + segcounts[3][2] + segcounts[1][2];
+ count[3] = segcounts[1][3] + segcounts[2][3] + segcounts[0][3];
+
+ // Work out probabilities of each segment
+ segment_tree_probs[0] = get_binary_prob(count[0] + count[1],
+ count[2] + count[3]);
+ segment_tree_probs[1] = get_binary_prob(count[0], count[1]);
+ segment_tree_probs[2] = get_binary_prob(count[2], count[3]);
+
+ // now work out modified counts that the decoder would have
+ count[0] = segment_tree_probs[0] * segment_tree_probs[1];
+ count[1] = segment_tree_probs[0] * (256 - segment_tree_probs[1]);
+ count[2] = (256 - segment_tree_probs[0]) * segment_tree_probs[2];
+ count[3] = (256 - segment_tree_probs[0]) * (256 - segment_tree_probs[2]);
+
+ // Work out modified probabilties depending on what segment was predicted
+ mod_probs[0] = get_binary_prob(count[1], count[2] + count[3]);
+ mod_probs[1] = get_binary_prob(count[0], count[2] + count[3]);
+ mod_probs[2] = get_binary_prob(count[0] + count[1], count[3]);
+ mod_probs[3] = get_binary_prob(count[0] + count[1], count[2]);
+}
+
+// Based on set of segment counts and probabilities calculate a cost estimate
+static int cost_segmap_pred(MACROBLOCKD *xd,
+ int (*segcounts)[MAX_MB_SEGMENTS],
+ vp9_prob *probs, vp9_prob *mod_probs) {
+ int pred_seg, cost = 0;
+
+ for (pred_seg = 0; pred_seg < MAX_MB_SEGMENTS; pred_seg++) {
+ int count1, count2;
+
+ // Cost the top node of the tree
+ count1 = segcounts[pred_seg][0] + segcounts[pred_seg][1];
+ count2 = segcounts[pred_seg][2] + segcounts[pred_seg][3];
+ cost += count1 * vp9_cost_zero(mod_probs[pred_seg]) +
+ count2 * vp9_cost_one(mod_probs[pred_seg]);
+
+ // Now add the cost of each individual segment branch
+ if (pred_seg >= 2 && count1) {
+ cost += segcounts[pred_seg][0] * vp9_cost_zero(probs[1]) +
+ segcounts[pred_seg][1] * vp9_cost_one(probs[1]);
+ } else if (pred_seg < 2 && count2 > 0) {
+ cost += segcounts[pred_seg][2] * vp9_cost_zero(probs[2]) +
+ segcounts[pred_seg][3] * vp9_cost_one(probs[2]);
+ }
+ }
+
+ return cost;
+}
+
static void count_segs(VP9_COMP *cpi,
MODE_INFO *mi,
int *no_pred_segcounts,
int (*temporal_predictor_count)[2],
- int *t_unpred_seg_counts,
+ int (*t_unpred_seg_counts)[MAX_MB_SEGMENTS],
int mb_size, int mb_row, int mb_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -155,10 +219,8 @@
const int segment_id = mi->mbmi.segment_id;
xd->mode_info_context = mi;
- xd->mb_to_top_edge = -((mb_row * 16) << 3);
- xd->mb_to_left_edge = -((mb_col * 16) << 3);
- xd->mb_to_bottom_edge = ((cm->mb_rows - mb_size - mb_row) * 16) << 3;
- xd->mb_to_right_edge = ((cm->mb_cols - mb_size - mb_col) * 16) << 3;
+ set_mb_row(cm, xd, mb_row, mb_size);
+ set_mb_col(cm, xd, mb_col, mb_size);
// Count the number of hits on each segment with no prediction
no_pred_segcounts[segment_id]++;
@@ -166,8 +228,8 @@
// Temporal prediction not allowed on key frames
if (cm->frame_type != KEY_FRAME) {
// Test to see if the segment id matches the predicted value.
- const int seg_predicted =
- (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index));
+ const int pred_seg_id = vp9_get_pred_mb_segid(cm, xd, segmap_index);
+ const int seg_predicted = (segment_id == pred_seg_id);
// Get the segment id prediction context
const int pred_context = vp9_get_pred_context(cm, xd, PRED_SEG_ID);
@@ -179,7 +241,7 @@
if (!seg_predicted)
// Update the "unpredicted" segment count
- t_unpred_seg_counts[segment_id]++;
+ t_unpred_seg_counts[pred_seg_id][segment_id]++;
}
}
@@ -191,18 +253,19 @@
int t_pred_cost = INT_MAX;
int i;
- int mb_row, mb_col;
+ int tile_col, mb_row, mb_col;
int temporal_predictor_count[PREDICTION_PROBS][2];
int no_pred_segcounts[MAX_MB_SEGMENTS];
- int t_unpred_seg_counts[MAX_MB_SEGMENTS];
+ int t_unpred_seg_counts[MAX_MB_SEGMENTS][MAX_MB_SEGMENTS];
vp9_prob no_pred_tree[MB_FEATURE_TREE_PROBS];
vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS];
+ vp9_prob t_pred_tree_mod[MAX_MB_SEGMENTS];
vp9_prob t_nopred_prob[PREDICTION_PROBS];
const int mis = cm->mode_info_stride;
- MODE_INFO *mi_ptr = cm->mi, *mi;
+ MODE_INFO *mi_ptr, *mi;
// Set default state for the segment tree probabilities and the
// temporal coding probabilities
@@ -218,42 +281,49 @@
// First of all generate stats regarding how well the last segment map
// predicts this one
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) {
- mi = mi_ptr;
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) {
- if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
- count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, 4, mb_row, mb_col);
- } else {
- for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) << 1, y_idx = i & 2;
- MODE_INFO *sb_mi = mi + y_idx * mis + x_idx;
+ for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
+ vp9_get_tile_col_offsets(cm, tile_col);
+ mi_ptr = cm->mi + cm->cur_tile_mb_col_start;
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) {
+ mi = mi_ptr;
+ for (mb_col = cm->cur_tile_mb_col_start;
+ mb_col < cm->cur_tile_mb_col_end; mb_col += 4, mi += 4) {
+ if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) {
+ count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, 4, mb_row, mb_col);
+ } else {
+ for (i = 0; i < 4; i++) {
+ int x_idx = (i & 1) << 1, y_idx = i & 2;
+ MODE_INFO *sb_mi = mi + y_idx * mis + x_idx;
- if (mb_col + x_idx >= cm->mb_cols ||
- mb_row + y_idx >= cm->mb_rows) {
- continue;
- }
+ if (mb_col + x_idx >= cm->mb_cols ||
+ mb_row + y_idx >= cm->mb_rows) {
+ continue;
+ }
- if (sb_mi->mbmi.sb_type) {
- assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
- count_segs(cpi, sb_mi, no_pred_segcounts, temporal_predictor_count,
- t_unpred_seg_counts, 2, mb_row + y_idx, mb_col + x_idx);
- } else {
- int j;
+ if (sb_mi->mbmi.sb_type) {
+ assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
+ count_segs(cpi, sb_mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, 2,
+ mb_row + y_idx, mb_col + x_idx);
+ } else {
+ int j;
- for (j = 0; j < 4; j++) {
- const int x_idx_mb = x_idx + (j & 1), y_idx_mb = y_idx + (j >> 1);
- MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis;
+ for (j = 0; j < 4; j++) {
+ const int x_idx_mb = x_idx + (j & 1);
+ const int y_idx_mb = y_idx + (j >> 1);
+ MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis;
- if (mb_col + x_idx_mb >= cm->mb_cols ||
- mb_row + y_idx_mb >= cm->mb_rows) {
- continue;
- }
+ if (mb_col + x_idx_mb >= cm->mb_cols ||
+ mb_row + y_idx_mb >= cm->mb_rows) {
+ continue;
+ }
- assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16);
- count_segs(cpi, mb_mi, no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts,
- 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
+ assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16);
+ count_segs(cpi, mb_mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts,
+ 1, mb_row + y_idx_mb, mb_col + x_idx_mb);
+ }
}
}
}
@@ -270,8 +340,10 @@
if (cm->frame_type != KEY_FRAME) {
// Work out probability tree for coding those segments not
// predicted using the temporal method and the cost.
- calc_segtree_probs(xd, t_unpred_seg_counts, t_pred_tree);
- t_pred_cost = cost_segmap(xd, t_unpred_seg_counts, t_pred_tree);
+ calc_segtree_probs_pred(xd, t_unpred_seg_counts, t_pred_tree,
+ t_pred_tree_mod);
+ t_pred_cost = cost_segmap_pred(xd, t_unpred_seg_counts, t_pred_tree,
+ t_pred_tree_mod);
// Add in the cost of the signalling for each prediction context
for (i = 0; i < PREDICTION_PROBS; i++) {
@@ -291,6 +363,8 @@
cm->temporal_update = 1;
vpx_memcpy(xd->mb_segment_tree_probs,
t_pred_tree, sizeof(t_pred_tree));
+ vpx_memcpy(xd->mb_segment_mispred_tree_probs,
+ t_pred_tree_mod, sizeof(t_pred_tree_mod));
vpx_memcpy(&cm->segment_pred_probs,
t_nopred_prob, sizeof(t_nopred_prob));
} else {
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -9,23 +9,20 @@
*/
-#include "string.h"
-#include "vp9/common/vp9_blockd.h"
-#include "vp9/encoder/vp9_onyx_int.h"
-
#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_
#define VP9_ENCODER_VP9_SEGMENTATION_H_
-extern void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm,
- MACROBLOCK *x);
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_onyx_int.h"
-extern void vp9_enable_segmentation(VP9_PTR ptr);
-extern void vp9_disable_segmentation(VP9_PTR ptr);
+void vp9_update_gf_useage_maps(VP9_COMP *cpi, VP9_COMMON *cm, MACROBLOCK *x);
+void vp9_enable_segmentation(VP9_PTR ptr);
+void vp9_disable_segmentation(VP9_PTR ptr);
+
// Valid values for a segment are 0 to 3
// Segmentation map is arrange as [Rows][Columns]
-extern void vp9_set_segmentation_map(VP9_PTR ptr,
- unsigned char *segmentation_map);
+void vp9_set_segmentation_map(VP9_PTR ptr, unsigned char *segmentation_map);
// The values given for each segment can be either deltas (from the default
// value chosen for the frame) or absolute values.
@@ -37,10 +34,9 @@
//
// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
// the absolute values given).
-//
-extern void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data,
- unsigned char abs_delta);
+void vp9_set_segment_data(VP9_PTR ptr, signed char *feature_data,
+ unsigned char abs_delta);
-extern void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
+void vp9_choose_segmap_coding_method(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_SEGMENTATION_H_
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -8,8 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+#include <limits.h>
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_quantize.h"
@@ -26,15 +29,9 @@
#include "vp9/common/vp9_swapyv12buffer.h"
#include "vpx_ports/vpx_timer.h"
-#include <math.h>
-#include <limits.h>
-
#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
-#if VP9_TEMPORAL_ALT_REF
-
-
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
@@ -43,39 +40,44 @@
int mv_row,
int mv_col,
uint8_t *pred) {
- int offset;
- uint8_t *yptr, *uptr, *vptr;
- int omv_row, omv_col;
+ const int which_mv = 0;
+ int_mv subpel_mv;
+ int_mv fullpel_mv;
- // Y
- yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+ subpel_mv.as_mv.row = mv_row;
+ subpel_mv.as_mv.col = mv_col;
+ // TODO(jkoleszar): Make this rounding consistent with the rest of the code
+ fullpel_mv.as_mv.row = (mv_row >> 1) & ~7;
+ fullpel_mv.as_mv.col = (mv_col >> 1) & ~7;
- if ((mv_row | mv_col) & 7) {
- xd->subpixel_predict16x16(yptr, stride,
- (mv_col & 7) << 1, (mv_row & 7) << 1, &pred[0], 16);
- } else {
- vp9_copy_mem16x16(yptr, stride, &pred[0], 16);
- }
+ vp9_build_inter_predictor(y_mb_ptr, stride,
+ &pred[0], 16,
+ &subpel_mv,
+ &xd->scale_factor[which_mv],
+ 16, 16,
+ which_mv <<
+ (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+ &xd->subpix);
- // U & V
- omv_row = mv_row;
- omv_col = mv_col;
- mv_row >>= 1;
- mv_col >>= 1;
stride = (stride + 1) >> 1;
- offset = (mv_row >> 3) * stride + (mv_col >> 3);
- uptr = u_mb_ptr + offset;
- vptr = v_mb_ptr + offset;
- if ((omv_row | omv_col) & 15) {
- xd->subpixel_predict8x8(uptr, stride,
- (omv_col & 15), (omv_row & 15), &pred[256], 8);
- xd->subpixel_predict8x8(vptr, stride,
- (omv_col & 15), (omv_row & 15), &pred[320], 8);
- } else {
- vp9_copy_mem8x8(uptr, stride, &pred[256], 8);
- vp9_copy_mem8x8(vptr, stride, &pred[320], 8);
- }
+ vp9_build_inter_predictor_q4(u_mb_ptr, stride,
+ &pred[256], 8,
+ &fullpel_mv, &subpel_mv,
+ &xd->scale_factor_uv[which_mv],
+ 8, 8,
+ which_mv <<
+ (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+ &xd->subpix);
+
+ vp9_build_inter_predictor_q4(v_mb_ptr, stride,
+ &pred[320], 8,
+ &fullpel_mv, &subpel_mv,
+ &xd->scale_factor_uv[which_mv],
+ 8, 8,
+ which_mv <<
+ (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT),
+ &xd->subpix);
}
void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -170,7 +172,7 @@
/*cpi->sf.search_method == HEX*/
// TODO Check that the 16x16 vf & sdf are selected here
// Ignore mv costing by sending NULL pointer instead of cost arrays
- bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
+ bestsme = vp9_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv[0],
step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
NULL, NULL, NULL, NULL,
&best_ref_mv1);
@@ -182,7 +184,7 @@
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first,
+ bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv[0],
&best_ref_mv1,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
@@ -262,8 +264,8 @@
if (cpi->frames[frame] == NULL)
continue;
- mbd->block[0].bmi.as_mv.first.as_mv.row = 0;
- mbd->block[0].bmi.as_mv.first.as_mv.col = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.row = 0;
+ mbd->block[0].bmi.as_mv[0].as_mv.col = 0;
if (frame == alt_ref_index) {
filter_weight = 2;
@@ -296,8 +298,8 @@
cpi->frames[frame]->u_buffer + mb_uv_offset,
cpi->frames[frame]->v_buffer + mb_uv_offset,
cpi->frames[frame]->y_stride,
- mbd->block[0].bmi.as_mv.first.as_mv.row,
- mbd->block[0].bmi.as_mv.first.as_mv.col,
+ mbd->block[0].bmi.as_mv[0].as_mv.row,
+ mbd->block[0].bmi.as_mv[0].as_mv.col,
predictor);
// Apply the filter (YUV)
@@ -375,11 +377,7 @@
mbd->pre.v_buffer = v_buffer;
}
-void vp9_temporal_filter_prepare
-(
- VP9_COMP *cpi,
- int distance
-) {
+void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
int frame = 0;
int num_frames_backward = 0;
@@ -389,10 +387,8 @@
int frames_to_blur = 0;
int start_frame = 0;
- int strength = cpi->oxcf.arnr_strength;
-
+ int strength = cpi->active_arnr_strength;
int blur_type = cpi->oxcf.arnr_type;
-
int max_frames = cpi->active_arnr_frames;
num_frames_backward = distance;
@@ -464,6 +460,13 @@
, start_frame);
#endif
+ // Setup scaling factors. Scaling on each of the arnr frames is not supported
+ vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],
+ &cpi->common.yv12_fb[cpi->common.new_fb_idx],
+ cpi->common.width,
+ cpi->common.height);
+ cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
+
// Setup frame pointers, NULL indicates frame not included in filter
vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
for (frame = 0; frame < frames_to_blur; frame++) {
@@ -479,4 +482,3 @@
frames_to_blur_backward,
strength);
}
-#endif
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -11,6 +11,6 @@
#ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
#define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
-extern void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
+void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
#endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -25,23 +25,32 @@
compressions, then generating vp9_context.c = initial stats. */
#ifdef ENTROPY_STATS
-vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4];
-vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8];
-vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16];
-vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
+vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
+vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
+vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
+vp9_coeff_accum context_counters_32x32[BLOCK_TYPES];
-extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_stats hybrid_tree_update_hist_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_stats hybrid_tree_update_hist_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES_16X16];
-extern vp9_coeff_stats hybrid_tree_update_hist_16x16[BLOCK_TYPES_16X16];
-extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32];
+extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES];
+extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES];
#endif /* ENTROPY_STATS */
+#if CONFIG_CODE_NONZEROCOUNT
+#ifdef NZC_STATS
+unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC4X4_TOKENS];
+unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC8X8_TOKENS];
+unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC16X16_TOKENS];
+unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES]
+ [NZC32X32_TOKENS];
+unsigned int nzc_pcat_counts[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA]
+ [NZC_BITS_EXTRA][2];
+#endif
+#endif
+
static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
const TOKENVALUE *vp9_dct_value_tokens_ptr;
static int dct_value_cost[DCT_MAX_VALUE * 2];
@@ -100,11 +109,7 @@
vp9_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
}
-#if CONFIG_NEWCOEFCONTEXT
-#define PT pn
-#else
-#define PT pt
-#endif
+extern const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);
static void tokenize_b(VP9_COMP *cpi,
MACROBLOCKD *xd,
@@ -113,79 +118,92 @@
PLANE_TYPE type,
TX_SIZE tx_size,
int dry_run) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt; /* near block/prev token context index */
- int c = (type == PLANE_TYPE_Y_NO_DC) ? 1 : 0;
- const BLOCKD * const b = xd->block + ib;
- const int eob = b->eob; /* one beyond last nonzero coeff */
+ int c = 0;
+ const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */
TOKENEXTRA *t = *tp; /* store tokens starting here */
- int16_t *qcoeff_ptr = b->qcoeff;
- int seg_eob;
- const int segment_id = xd->mode_info_context->mbmi.segment_id;
- const int *bands, *scan;
+ int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib;
+ int seg_eob, default_eob, pad;
+ const int segment_id = mbmi->segment_id;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+ const int *scan, *nb;
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
-#if CONFIG_NEWCOEFCONTEXT
- const int *neighbors;
- int pn;
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+ uint8_t token_cache[1024];
+#if CONFIG_CODE_NONZEROCOUNT
+ int zerosleft, nzc = 0;
+ if (eob == 0)
+ assert(xd->nzcs[ib] == 0);
#endif
- ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context +
- vp9_block2left[tx_size][ib];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
+ if (sb_type == BLOCK_SIZE_SB64X64) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb64[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a3 = l2 = l3 = NULL;
+ } else {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib];
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+ }
- ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) +
- vp9_block2left[tx_size][ib];
-
-
switch (tx_size) {
default:
- case TX_4X4:
+ case TX_4X4: {
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_4x4(xd, ib) : DCT_DCT;
+ a_ec = *a;
+ l_ec = *l;
seg_eob = 16;
- bands = vp9_coef_bands_4x4;
scan = vp9_default_zig_zag1d_4x4;
if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_4x4;
- probs = cpi->common.fc.hybrid_coef_probs_4x4;
if (tx_type == ADST_DCT) {
scan = vp9_row_scan_4x4;
} else if (tx_type == DCT_ADST) {
scan = vp9_col_scan_4x4;
}
- } else {
- counts = cpi->coef_counts_4x4;
- probs = cpi->common.fc.coef_probs_4x4;
}
+ counts = cpi->coef_counts_4x4;
+ probs = cpi->common.fc.coef_probs_4x4;
break;
- case TX_8X8:
- if (type == PLANE_TYPE_Y2) {
- seg_eob = 4;
- bands = vp9_coef_bands_4x4;
- scan = vp9_default_zig_zag1d_4x4;
- } else {
-#if CONFIG_CNVCONTEXT
- a_ec = (a[0] + a[1]) != 0;
- l_ec = (l[0] + l[1]) != 0;
-#endif
- seg_eob = 64;
- bands = vp9_coef_bands_8x8;
- scan = vp9_default_zig_zag1d_8x8;
- }
+ }
+ case TX_8X8: {
+ const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+ seg_eob = 64;
+ scan = vp9_default_zig_zag1d_8x8;
if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_8x8;
- probs = cpi->common.fc.hybrid_coef_probs_8x8;
- } else {
- counts = cpi->coef_counts_8x8;
- probs = cpi->common.fc.coef_probs_8x8;
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_8x8;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_8x8;
+ }
}
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
break;
- case TX_16X16:
-#if CONFIG_CNVCONTEXT
+ }
+ case TX_16X16: {
+ const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+ const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+ get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -193,89 +211,99 @@
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
-#endif
seg_eob = 256;
- bands = vp9_coef_bands_16x16;
scan = vp9_default_zig_zag1d_16x16;
if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_16x16;
- probs = cpi->common.fc.hybrid_coef_probs_16x16;
- } else {
- counts = cpi->coef_counts_16x16;
- probs = cpi->common.fc.coef_probs_16x16;
+ if (tx_type == ADST_DCT) {
+ scan = vp9_row_scan_16x16;
+ } else if (tx_type == DCT_ADST) {
+ scan = vp9_col_scan_16x16;
+ }
}
- if (type == PLANE_TYPE_UV) {
- int uv_idx = (ib - 16) >> 2;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx;
- }
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
break;
+ }
case TX_32X32:
-#if CONFIG_CNVCONTEXT
- a_ec = a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3];
- l_ec = l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3];
- a_ec = a_ec != 0;
- l_ec = l_ec != 0;
-#endif
+ if (type != PLANE_TYPE_UV) {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ }
seg_eob = 1024;
- bands = vp9_coef_bands_32x32;
scan = vp9_default_zig_zag1d_32x32;
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
- qcoeff_ptr = xd->sb_coeff_data.qcoeff;
break;
}
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-#if CONFIG_NEWCOEFCONTEXT
- neighbors = vp9_get_coef_neighbors_handle(scan);
- pn = pt;
-#endif
+ nb = vp9_get_coef_neighbors_handle(scan, &pad);
+ default_eob = seg_eob;
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB))
- seg_eob = vp9_get_segdata(xd, segment_id, SEG_LVL_EOB);
+ if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
+ seg_eob = 0;
do {
- const int band = bands[c];
+ const int band = get_coef_band(scan, tx_size, c);
int token;
-
+ int v = 0;
+#if CONFIG_CODE_NONZEROCOUNT
+ zerosleft = seg_eob - xd->nzcs[ib] - c + nzc;
+#endif
if (c < eob) {
const int rc = scan[c];
- const int v = qcoeff_ptr[rc];
+ v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);
t->Extra = vp9_dct_value_tokens_ptr[v].Extra;
token = vp9_dct_value_tokens_ptr[v].Token;
} else {
+#if CONFIG_CODE_NONZEROCOUNT
+ break;
+#else
token = DCT_EOB_TOKEN;
+#endif
}
t->Token = token;
- t->context_tree = probs[type][band][PT];
- t->skip_eob_node = (pt == 0) && ((band > 0 && type != PLANE_TYPE_Y_NO_DC) ||
- (band > 1 && type == PLANE_TYPE_Y_NO_DC));
+ t->context_tree = probs[type][ref][band][pt];
+#if CONFIG_CODE_NONZEROCOUNT
+ // Skip zero node if there are no zeros left
+ t->skip_eob_node = 1 + (zerosleft == 0);
+#else
+ t->skip_eob_node = (c > 0) && (token_cache[c - 1] == 0);
+#endif
assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0);
if (!dry_run) {
- ++counts[type][band][PT][token];
+ ++counts[type][ref][band][pt][token];
+ if (!t->skip_eob_node)
+ ++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt];
}
- pt = vp9_prev_token_class[token];
-#if CONFIG_NEWCOEFCONTEXT
- if (c < seg_eob - 1 && NEWCOEFCONTEXT_BAND_COND(bands[c + 1]))
- pn = vp9_get_coef_neighbor_context(
- qcoeff_ptr, (type == PLANE_TYPE_Y_NO_DC), neighbors, scan[c + 1]);
- else
- pn = pt;
+#if CONFIG_CODE_NONZEROCOUNT
+ nzc += (v != 0);
#endif
+ token_cache[c] = token;
+
+ pt = vp9_get_coef_context(scan, nb, pad, token_cache, c + 1, default_eob);
++t;
} while (c < eob && ++c < seg_eob);
+#if CONFIG_CODE_NONZEROCOUNT
+ assert(nzc == xd->nzcs[ib]);
+#endif
*tp = t;
- a_ec = l_ec = (c > !type); /* 0 <-> all coeff data is zero */
+ a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */
a[0] = a_ec;
l[0] = l_ec;
- if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) {
+ if (tx_size == TX_8X8) {
a[1] = a_ec;
l[1] = l_ec;
} else if (tx_size == TX_16X16) {
@@ -287,25 +315,27 @@
l1[0] = l1[1] = l[1] = l_ec;
}
} else if (tx_size == TX_32X32) {
- a[1] = a[2] = a[3] = a_ec;
- l[1] = l[2] = l[3] = l_ec;
- a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
- l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ if (type != PLANE_TYPE_UV) {
+ a[1] = a[2] = a[3] = a_ec;
+ l[1] = l[2] = l[3] = l_ec;
+ a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
+ l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ } else {
+ a[1] = a1[0] = a1[1] = a_ec;
+ l[1] = l1[0] = l1[1] = l_ec;
+ a2[0] = a2[1] = a3[0] = a3[1] = a_ec;
+ l2[0] = l2[1] = l3[0] = l3[1] = l_ec;
+ }
}
}
-int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
+int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
- if (has_2nd_order) {
- for (i = 0; i < 16; i++)
- skip &= (xd->block[i].eob < 2);
- skip &= (!xd->block[24].eob);
- } else {
- for (i = 0; i < 16; i++)
- skip &= (!xd->block[i].eob);
- }
+ for (i = 0; i < 16; i++)
+ skip &= (!xd->eobs[i]);
+
return skip;
}
@@ -314,48 +344,41 @@
int i;
for (i = 16; i < 24; i++)
- skip &= (!xd->block[i].eob);
+ skip &= (!xd->eobs[i]);
return skip;
}
-static int mb_is_skippable_4x4(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_4x4(xd, has_2nd_order) &
+static int mb_is_skippable_4x4(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_4x4(xd) &
vp9_mbuv_is_skippable_4x4(xd));
}
-int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
+int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd) {
int skip = 1;
int i = 0;
- if (has_2nd_order) {
- for (i = 0; i < 16; i += 4)
- skip &= (xd->block[i].eob < 2);
- skip &= (!xd->block[24].eob);
- } else {
- for (i = 0; i < 16; i += 4)
- skip &= (!xd->block[i].eob);
- }
+ for (i = 0; i < 16; i += 4)
+ skip &= (!xd->eobs[i]);
+
return skip;
}
int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[16]) & (!xd->eobs[20]);
}
-static int mb_is_skippable_8x8(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+static int mb_is_skippable_8x8(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_8x8(xd) &
vp9_mbuv_is_skippable_8x8(xd));
}
-static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd, int has_2nd_order) {
- return (vp9_mby_is_skippable_8x8(xd, has_2nd_order) &
+static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) {
+ return (vp9_mby_is_skippable_8x8(xd) &
vp9_mbuv_is_skippable_4x4(xd));
}
int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) {
- int skip = 1;
- skip &= !xd->block[0].eob;
- return skip;
+ return (!xd->eobs[0]);
}
static int mb_is_skippable_16x16(MACROBLOCKD *xd) {
@@ -363,13 +386,11 @@
}
int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) {
- int skip = 1;
- skip &= !xd->block[0].eob;
- return skip;
+ return (!xd->eobs[0]);
}
int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) {
- return (!xd->block[16].eob) & (!xd->block[20].eob);
+ return (!xd->eobs[64]) & (!xd->eobs[80]);
}
static int sb_is_skippable_32x32(MACROBLOCKD *xd) {
@@ -377,6 +398,68 @@
vp9_sbuv_is_skippable_16x16(xd);
}
+int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_16x16(MACROBLOCKD *xd) {
+ return vp9_sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd);
+}
+
+int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 64; i < 96; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_8x8(MACROBLOCKD *xd) {
+ return vp9_sby_is_skippable_8x8(xd) & vp9_sbuv_is_skippable_8x8(xd);
+}
+
+int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 64; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 64; i < 96; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb_is_skippable_4x4(MACROBLOCKD *xd) {
+ return vp9_sby_is_skippable_4x4(xd) & vp9_sbuv_is_skippable_4x4(xd);
+}
+
void vp9_tokenize_sb(VP9_COMP *cpi,
MACROBLOCKD *xd,
TOKENEXTRA **t,
@@ -384,17 +467,26 @@
VP9_COMMON * const cm = &cpi->common;
MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
TOKENEXTRA *t_backup = *t;
- ENTROPY_CONTEXT *A[2] = { (ENTROPY_CONTEXT *) (xd->above_context + 0),
- (ENTROPY_CONTEXT *) (xd->above_context + 1), };
- ENTROPY_CONTEXT *L[2] = { (ENTROPY_CONTEXT *) (xd->left_context + 0),
- (ENTROPY_CONTEXT *) (xd->left_context + 1), };
const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP);
const int segment_id = mbmi->segment_id;
- const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0);
+ const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
int b;
- mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd);
+ break;
+ case TX_16X16:
+ mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd);
+ break;
+ case TX_8X8:
+ mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd);
+ break;
+ case TX_4X4:
+ mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd);
+ break;
+ default: assert(0);
+ }
if (mbmi->mb_skip_coeff) {
if (!dry_run)
@@ -402,7 +494,7 @@
if (!cm->mb_no_coeff_skip) {
vp9_stuff_sb(cpi, xd, t, dry_run);
} else {
- vp9_fix_contexts_sb(xd);
+ vp9_reset_sb_tokens_context(xd);
}
if (dry_run)
*t = t_backup;
@@ -412,14 +504,215 @@
if (!dry_run)
cpi->skip_false_count[mb_skip_context] += skip_inc;
- tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC,
- TX_32X32, dry_run);
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC,
+ TX_32X32, dry_run);
+ for (b = 64; b < 96; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 64; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_16X16, dry_run);
+ for (b = 64; b < 96; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 64; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_8X8, dry_run);
+ for (b = 64; b < 96; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 64; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_4X4, dry_run);
+ for (b = 64; b < 96; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
- for (b = 16; b < 24; b += 4) {
- tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
- TX_16X16, dry_run);
+ if (dry_run)
+ *t = t_backup;
+}
+
+int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 64)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) {
+ return (!xd->eobs[256]) & (!xd->eobs[320]);
+}
+
+static int sb64_is_skippable_32x32(MACROBLOCKD *xd) {
+ return vp9_sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd);
+}
+
+int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i += 16)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_16x16(MACROBLOCKD *xd) {
+ return vp9_sb64y_is_skippable_16x16(xd) & vp9_sb64uv_is_skippable_16x16(xd);
+}
+
+int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i += 4)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_8x8(MACROBLOCKD *xd) {
+ return vp9_sb64y_is_skippable_8x8(xd) & vp9_sb64uv_is_skippable_8x8(xd);
+}
+
+int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 0; i < 256; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) {
+ int skip = 1;
+ int i = 0;
+
+ for (i = 256; i < 384; i++)
+ skip &= (!xd->eobs[i]);
+
+ return skip;
+}
+
+static int sb64_is_skippable_4x4(MACROBLOCKD *xd) {
+ return vp9_sb64y_is_skippable_4x4(xd) & vp9_sb64uv_is_skippable_4x4(xd);
+}
+
+void vp9_tokenize_sb64(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **t,
+ int dry_run) {
+ VP9_COMMON * const cm = &cpi->common;
+ MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP);
+ const int segment_id = mbmi->segment_id;
+ const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
+ int b;
+
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd);
+ break;
+ case TX_16X16:
+ mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd);
+ break;
+ case TX_8X8:
+ mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd);
+ break;
+ case TX_4X4:
+ mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd);
+ break;
+ default: assert(0);
}
- A[0][8] = L[0][8] = A[1][8] = L[1][8] = 0;
+
+ if (mbmi->mb_skip_coeff) {
+ if (!dry_run)
+ cpi->skip_true_count[mb_skip_context] += skip_inc;
+ if (!cm->mb_no_coeff_skip) {
+ vp9_stuff_sb64(cpi, xd, t, dry_run);
+ } else {
+ vp9_reset_sb64_tokens_context(xd);
+ }
+ if (dry_run)
+ *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ cpi->skip_false_count[mb_skip_context] += skip_inc;
+
+ switch (mbmi->txfm_size) {
+ case TX_32X32:
+ for (b = 0; b < 256; b += 64)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_32X32, dry_run);
+ for (b = 256; b < 384; b += 64)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_32X32, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 256; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_16X16, dry_run);
+ for (b = 256; b < 384; b += 16)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 256; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_8X8, dry_run);
+ for (b = 256; b < 384; b += 4)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 256; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC,
+ TX_4X4, dry_run);
+ for (b = 256; b < 384; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV,
+ TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
+
if (dry_run)
*t = t_backup;
}
@@ -428,8 +721,6 @@
MACROBLOCKD *xd,
TOKENEXTRA **t,
int dry_run) {
- PLANE_TYPE plane_type;
- int has_2nd_order;
int b;
int tx_size = xd->mode_info_context->mbmi.txfm_size;
int mb_skip_context = vp9_get_pred_context(&cpi->common, xd, PRED_MBSKIP);
@@ -441,14 +732,11 @@
int skip_inc;
int segment_id = xd->mode_info_context->mbmi.segment_id;
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
- (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0)) {
+ if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
skip_inc = 1;
} else
skip_inc = 0;
- has_2nd_order = get_2nd_order_usage(xd);
-
switch (tx_size) {
case TX_16X16:
@@ -458,15 +746,15 @@
if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
xd->mode_info_context->mbmi.mode == SPLITMV)
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_8x8_4x4uv(xd, 0);
+ mb_is_skippable_8x8_4x4uv(xd);
else
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_8x8(xd, has_2nd_order);
+ mb_is_skippable_8x8(xd);
break;
default:
xd->mode_info_context->mbmi.mb_skip_coeff =
- mb_is_skippable_4x4(xd, has_2nd_order);
+ mb_is_skippable_4x4(xd);
break;
}
@@ -487,15 +775,6 @@
if (!dry_run)
cpi->skip_false_count[mb_skip_context] += skip_inc;
- if (has_2nd_order) {
- tokenize_b(cpi, xd, 24, t, PLANE_TYPE_Y2, tx_size, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
if (tx_size == TX_16X16) {
tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
for (b = 16; b < 24; b += 4) {
@@ -503,7 +782,7 @@
}
} else if (tx_size == TX_8X8) {
for (b = 0; b < 16; b += 4) {
- tokenize_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
}
if (xd->mode_info_context->mbmi.mode == I8X8_PRED ||
xd->mode_info_context->mbmi.mode == SPLITMV) {
@@ -516,11 +795,10 @@
}
}
} else {
- for (b = 0; b < 24; b++) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- tokenize_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run);
- }
+ for (b = 0; b < 16; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 16; b < 24; b++)
+ tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
}
if (dry_run)
*t = t_backup;
@@ -531,25 +809,13 @@
FILE *f = fopen("context.bin", "rb");
if (!f) {
vpx_memset(context_counters_4x4, 0, sizeof(context_counters_4x4));
- vpx_memset(hybrid_context_counters_4x4, 0,
- sizeof(hybrid_context_counters_4x4));
vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8));
- vpx_memset(hybrid_context_counters_8x8, 0,
- sizeof(hybrid_context_counters_8x8));
vpx_memset(context_counters_16x16, 0, sizeof(context_counters_16x16));
- vpx_memset(hybrid_context_counters_16x16, 0,
- sizeof(hybrid_context_counters_16x16));
vpx_memset(context_counters_32x32, 0, sizeof(context_counters_32x32));
} else {
fread(context_counters_4x4, sizeof(context_counters_4x4), 1, f);
- fread(hybrid_context_counters_4x4,
- sizeof(hybrid_context_counters_4x4), 1, f);
fread(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
- fread(hybrid_context_counters_8x8,
- sizeof(hybrid_context_counters_8x8), 1, f);
fread(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
- fread(hybrid_context_counters_16x16,
- sizeof(hybrid_context_counters_16x16), 1, f);
fread(context_counters_32x32, sizeof(context_counters_32x32), 1, f);
fclose(f);
}
@@ -557,25 +823,13 @@
f = fopen("treeupdate.bin", "rb");
if (!f) {
vpx_memset(tree_update_hist_4x4, 0, sizeof(tree_update_hist_4x4));
- vpx_memset(hybrid_tree_update_hist_4x4, 0,
- sizeof(hybrid_tree_update_hist_4x4));
vpx_memset(tree_update_hist_8x8, 0, sizeof(tree_update_hist_8x8));
- vpx_memset(hybrid_tree_update_hist_8x8, 0,
- sizeof(hybrid_tree_update_hist_8x8));
vpx_memset(tree_update_hist_16x16, 0, sizeof(tree_update_hist_16x16));
- vpx_memset(hybrid_tree_update_hist_16x16, 0,
- sizeof(hybrid_tree_update_hist_16x16));
vpx_memset(tree_update_hist_32x32, 0, sizeof(tree_update_hist_32x32));
} else {
fread(tree_update_hist_4x4, sizeof(tree_update_hist_4x4), 1, f);
- fread(hybrid_tree_update_hist_4x4,
- sizeof(hybrid_tree_update_hist_4x4), 1, f);
fread(tree_update_hist_8x8, sizeof(tree_update_hist_8x8), 1, f);
- fread(hybrid_tree_update_hist_8x8,
- sizeof(hybrid_tree_update_hist_8x8), 1, f);
fread(tree_update_hist_16x16, sizeof(tree_update_hist_16x16), 1, f);
- fread(hybrid_tree_update_hist_16x16,
- sizeof(hybrid_tree_update_hist_16x16), 1, f);
fread(tree_update_hist_32x32, sizeof(tree_update_hist_32x32), 1, f);
fclose(f);
}
@@ -583,7 +837,7 @@
static void print_counter(FILE *f, vp9_coeff_accum *context_counters,
int block_types, const char *header) {
- int type, band, pt, t;
+ int type, ref, band, pt, t;
fprintf(f, "static const vp9_coeff_count %s = {\n", header);
@@ -590,26 +844,31 @@
#define Comma(X) (X ? "," : "")
type = 0;
do {
+ ref = 0;
fprintf(f, "%s\n { /* block Type %d */", Comma(type), type);
- band = 0;
do {
- fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
- pt = 0;
+ fprintf(f, "%s\n { /* %s */", Comma(type), ref ? "Inter" : "Intra");
+ band = 0;
do {
- fprintf(f, "%s\n {", Comma(pt));
-
- t = 0;
+ fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band);
+ pt = 0;
do {
- const int64_t x = context_counters[type][band][pt][t];
- const int y = (int) x;
+ fprintf(f, "%s\n {", Comma(pt));
- assert(x == (int64_t) y); /* no overflow handling yet */
- fprintf(f, "%s %d", Comma(t), y);
- } while (++t < MAX_ENTROPY_TOKENS);
- fprintf(f, "}");
- } while (++pt < PREV_COEF_CONTEXTS);
+ t = 0;
+ do {
+ const int64_t x = context_counters[type][ref][band][pt][t];
+ const int y = (int) x;
+
+ assert(x == (int64_t) y); /* no overflow handling yet */
+ fprintf(f, "%s %d", Comma(t), y);
+ } while (++t < 1 + MAX_ENTROPY_TOKENS);
+ fprintf(f, "}");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
fprintf(f, "\n }");
- } while (++band < COEF_BANDS);
+ } while (++ref < REF_TYPES);
fprintf(f, "\n }");
} while (++type < block_types);
fprintf(f, "\n};\n");
@@ -617,7 +876,7 @@
static void print_probs(FILE *f, vp9_coeff_accum *context_counters,
int block_types, const char *header) {
- int type, band, pt, t;
+ int type, ref, band, pt, t;
fprintf(f, "static const vp9_coeff_probs %s = {", header);
@@ -626,32 +885,41 @@
do {
fprintf(f, "%s%s{ /* block Type %d */",
Comma(type), Newline(type, " "), type);
- band = 0;
+ ref = 0;
do {
- fprintf(f, "%s%s{ /* Coeff Band %d */",
- Comma(band), Newline(band, " "), band);
- pt = 0;
+ fprintf(f, "%s%s{ /* %s */",
+ Comma(band), Newline(band, " "), ref ? "Inter" : "Intra");
+ band = 0;
do {
- unsigned int branch_ct[ENTROPY_NODES][2];
- unsigned int coef_counts[MAX_ENTROPY_TOKENS];
- vp9_prob coef_probs[ENTROPY_NODES];
+ fprintf(f, "%s%s{ /* Coeff Band %d */",
+ Comma(band), Newline(band, " "), band);
+ pt = 0;
+ do {
+ unsigned int branch_ct[ENTROPY_NODES][2];
+ unsigned int coef_counts[MAX_ENTROPY_TOKENS + 1];
+ vp9_prob coef_probs[ENTROPY_NODES];
- for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
- coef_counts[t] = context_counters[type][band][pt][t];
- vp9_tree_probs_from_distribution(MAX_ENTROPY_TOKENS,
- vp9_coef_encodings, vp9_coef_tree,
- coef_probs, branch_ct, coef_counts);
- fprintf(f, "%s\n {", Comma(pt));
+ if (pt >= 3 && band == 0)
+ break;
+ for (t = 0; t < MAX_ENTROPY_TOKENS + 1; ++t)
+ coef_counts[t] = context_counters[type][ref][band][pt][t];
+ vp9_tree_probs_from_distribution(vp9_coef_tree, coef_probs,
+ branch_ct, coef_counts, 0);
+ branch_ct[0][1] = coef_counts[MAX_ENTROPY_TOKENS] - branch_ct[0][0];
+ coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
+ fprintf(f, "%s\n {", Comma(pt));
- t = 0;
- do {
- fprintf(f, "%s %3d", Comma(t), coef_probs[t]);
- } while (++t < ENTROPY_NODES);
+ t = 0;
+ do {
+ fprintf(f, "%s %3d", Comma(t), coef_probs[t]);
+ } while (++t < ENTROPY_NODES);
- fprintf(f, " }");
- } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, " }");
+ } while (++pt < PREV_COEF_CONTEXTS);
+ fprintf(f, "\n }");
+ } while (++band < COEF_BANDS);
fprintf(f, "\n }");
- } while (++band < COEF_BANDS);
+ } while (++ref < REF_TYPES);
fprintf(f, "\n }");
} while (++type < block_types);
fprintf(f, "\n};\n");
@@ -664,49 +932,31 @@
fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n");
/* print counts */
- print_counter(f, context_counters_4x4, BLOCK_TYPES_4X4,
- "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]");
- print_counter(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4,
- "vp9_default_hybrid_coef_counts_4x4[BLOCK_TYPES_4X4]");
- print_counter(f, context_counters_8x8, BLOCK_TYPES_8X8,
- "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]");
- print_counter(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8,
- "vp9_default_hybrid_coef_counts_8x8[BLOCK_TYPES_8X8]");
- print_counter(f, context_counters_16x16, BLOCK_TYPES_16X16,
- "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]");
- print_counter(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16,
- "vp9_default_hybrid_coef_counts_16x16[BLOCK_TYPES_16X16]");
- print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32,
- "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]");
+ print_counter(f, context_counters_4x4, BLOCK_TYPES,
+ "vp9_default_coef_counts_4x4[BLOCK_TYPES]");
+ print_counter(f, context_counters_8x8, BLOCK_TYPES,
+ "vp9_default_coef_counts_8x8[BLOCK_TYPES]");
+ print_counter(f, context_counters_16x16, BLOCK_TYPES,
+ "vp9_default_coef_counts_16x16[BLOCK_TYPES]");
+ print_counter(f, context_counters_32x32, BLOCK_TYPES,
+ "vp9_default_coef_counts_32x32[BLOCK_TYPES]");
/* print coefficient probabilities */
- print_probs(f, context_counters_4x4, BLOCK_TYPES_4X4,
- "default_coef_probs_4x4[BLOCK_TYPES_4X4]");
- print_probs(f, hybrid_context_counters_4x4, BLOCK_TYPES_4X4,
- "default_hybrid_coef_probs_4x4[BLOCK_TYPES_4X4]");
- print_probs(f, context_counters_8x8, BLOCK_TYPES_8X8,
- "default_coef_probs_8x8[BLOCK_TYPES_8X8]");
- print_probs(f, hybrid_context_counters_8x8, BLOCK_TYPES_8X8,
- "default_hybrid_coef_probs_8x8[BLOCK_TYPES_8X8]");
- print_probs(f, context_counters_16x16, BLOCK_TYPES_16X16,
- "default_coef_probs_16x16[BLOCK_TYPES_16X16]");
- print_probs(f, hybrid_context_counters_16x16, BLOCK_TYPES_16X16,
- "default_hybrid_coef_probs_16x16[BLOCK_TYPES_16X16]");
- print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32,
- "default_coef_probs_32x32[BLOCK_TYPES_32X32]");
+ print_probs(f, context_counters_4x4, BLOCK_TYPES,
+ "default_coef_probs_4x4[BLOCK_TYPES]");
+ print_probs(f, context_counters_8x8, BLOCK_TYPES,
+ "default_coef_probs_8x8[BLOCK_TYPES]");
+ print_probs(f, context_counters_16x16, BLOCK_TYPES,
+ "default_coef_probs_16x16[BLOCK_TYPES]");
+ print_probs(f, context_counters_32x32, BLOCK_TYPES,
+ "default_coef_probs_32x32[BLOCK_TYPES]");
fclose(f);
f = fopen("context.bin", "wb");
fwrite(context_counters_4x4, sizeof(context_counters_4x4), 1, f);
- fwrite(hybrid_context_counters_4x4,
- sizeof(hybrid_context_counters_4x4), 1, f);
fwrite(context_counters_8x8, sizeof(context_counters_8x8), 1, f);
- fwrite(hybrid_context_counters_8x8,
- sizeof(hybrid_context_counters_8x8), 1, f);
fwrite(context_counters_16x16, sizeof(context_counters_16x16), 1, f);
- fwrite(hybrid_context_counters_16x16,
- sizeof(hybrid_context_counters_16x16), 1, f);
fwrite(context_counters_32x32, sizeof(context_counters_32x32), 1, f);
fclose(f);
}
@@ -716,61 +966,65 @@
fill_value_tokens();
}
-static __inline void stuff_b(VP9_COMP *cpi,
- MACROBLOCKD *xd,
- const int ib,
- TOKENEXTRA **tp,
- PLANE_TYPE type,
- TX_SIZE tx_size,
- int dry_run) {
- const BLOCKD * const b = xd->block + ib;
- const int *bands;
+static void stuff_b(VP9_COMP *cpi,
+ MACROBLOCKD *xd,
+ const int ib,
+ TOKENEXTRA **tp,
+ PLANE_TYPE type,
+ TX_SIZE tx_size,
+ int dry_run) {
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
+#if CONFIG_CODE_NONZEROCOUNT == 0
vp9_coeff_count *counts;
vp9_coeff_probs *probs;
int pt, band;
TOKENEXTRA *t = *tp;
- const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type(xd, b) : DCT_DCT;
- ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context +
- vp9_block2left[tx_size][ib];
- ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
- ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) +
- vp9_block2above[tx_size][ib];
- ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) +
- vp9_block2left[tx_size][ib];
+ const int ref = mbmi->ref_frame != INTRA_FRAME;
+#endif
+ ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec;
+ if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context +
+ vp9_block2above_sb64[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ } else if (sb_type == BLOCK_SIZE_SB32X32) {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib];
+ a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
+ a2 = l2 = a3 = l3 = NULL;
+ } else {
+ a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib];
+ l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib];
+ a1 = l1 = a2 = l2 = a3 = l3 = NULL;
+ }
+
switch (tx_size) {
default:
case TX_4X4:
- bands = vp9_coef_bands_4x4;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_4x4;
- probs = cpi->common.fc.hybrid_coef_probs_4x4;
- } else {
- counts = cpi->coef_counts_4x4;
- probs = cpi->common.fc.coef_probs_4x4;
- }
+ a_ec = a[0];
+ l_ec = l[0];
+#if CONFIG_CODE_NONZEROCOUNT == 0
+ counts = cpi->coef_counts_4x4;
+ probs = cpi->common.fc.coef_probs_4x4;
+#endif
break;
case TX_8X8:
-#if CONFIG_CNVCONTEXT
- if (type != PLANE_TYPE_Y2) {
- a_ec = (a[0] + a[1]) != 0;
- l_ec = (l[0] + l[1]) != 0;
- }
+ a_ec = (a[0] + a[1]) != 0;
+ l_ec = (l[0] + l[1]) != 0;
+#if CONFIG_CODE_NONZEROCOUNT == 0
+ counts = cpi->coef_counts_8x8;
+ probs = cpi->common.fc.coef_probs_8x8;
#endif
- bands = vp9_coef_bands_8x8;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_8x8;
- probs = cpi->common.fc.hybrid_coef_probs_8x8;
- } else {
- counts = cpi->coef_counts_8x8;
- probs = cpi->common.fc.coef_probs_8x8;
- }
break;
case TX_16X16:
-#if CONFIG_CNVCONTEXT
if (type != PLANE_TYPE_UV) {
a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -778,41 +1032,44 @@
a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
+ counts = cpi->coef_counts_16x16;
+ probs = cpi->common.fc.coef_probs_16x16;
#endif
- bands = vp9_coef_bands_16x16;
- if (tx_type != DCT_DCT) {
- counts = cpi->hybrid_coef_counts_16x16;
- probs = cpi->common.fc.hybrid_coef_probs_16x16;
- } else {
- counts = cpi->coef_counts_16x16;
- probs = cpi->common.fc.coef_probs_16x16;
- }
break;
case TX_32X32:
-#if CONFIG_CNVCONTEXT
- a_ec = a[0] + a[1] + a[2] + a[3] +
- a1[0] + a1[1] + a1[2] + a1[3];
- l_ec = l[0] + l[1] + l[2] + l[3] +
- l1[0] + l1[1] + l1[2] + l1[3];
- a_ec = a_ec != 0;
- l_ec = l_ec != 0;
-#endif
- bands = vp9_coef_bands_32x32;
+ if (type != PLANE_TYPE_UV) {
+ a_ec = (a[0] + a[1] + a[2] + a[3] +
+ a1[0] + a1[1] + a1[2] + a1[3]) != 0;
+ l_ec = (l[0] + l[1] + l[2] + l[3] +
+ l1[0] + l1[1] + l1[2] + l1[3]) != 0;
+ } else {
+ a_ec = (a[0] + a[1] + a1[0] + a1[1] +
+ a2[0] + a2[1] + a3[0] + a3[1]) != 0;
+ l_ec = (l[0] + l[1] + l1[0] + l1[1] +
+ l2[0] + l2[1] + l3[0] + l3[1]) != 0;
+ }
+#if CONFIG_CODE_NONZEROCOUNT == 0
counts = cpi->coef_counts_32x32;
probs = cpi->common.fc.coef_probs_32x32;
+#endif
break;
}
+#if CONFIG_CODE_NONZEROCOUNT == 0
VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);
-
- band = bands[(type == PLANE_TYPE_Y_NO_DC) ? 1 : 0];
+ band = 0;
t->Token = DCT_EOB_TOKEN;
- t->context_tree = probs[type][band][pt];
+ t->context_tree = probs[type][ref][band][pt];
t->skip_eob_node = 0;
++t;
*tp = t;
+ if (!dry_run) {
+ ++counts[type][ref][band][pt][DCT_EOB_TOKEN];
+ }
+#endif
*a = *l = 0;
- if (tx_size == TX_8X8 && type != PLANE_TYPE_Y2) {
+ if (tx_size == TX_8X8) {
a[1] = 0;
l[1] = 0;
} else if (tx_size == TX_16X16) {
@@ -824,39 +1081,28 @@
l1[0] = l1[1] = l[1] = l_ec;
}
} else if (tx_size == TX_32X32) {
- a[1] = a[2] = a[3] = a_ec;
- l[1] = l[2] = l[3] = l_ec;
- a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
- l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ if (type != PLANE_TYPE_Y_WITH_DC) {
+ a[1] = a[2] = a[3] = a_ec;
+ l[1] = l[2] = l[3] = l_ec;
+ a1[0] = a1[1] = a1[2] = a1[3] = a_ec;
+ l1[0] = l1[1] = l1[2] = l1[3] = l_ec;
+ } else {
+ a[1] = a1[0] = a1[1] = a_ec;
+ l[1] = l1[0] = l1[1] = l_ec;
+ a2[0] = a2[1] = a3[0] = a3[1] = a_ec;
+ l2[0] = l2[1] = l3[0] = l3[1] = l_ec;
+ }
}
-
- if (!dry_run) {
- ++counts[type][band][pt][DCT_EOB_TOKEN];
- }
}
static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
- PLANE_TYPE plane_type;
int b;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
-#if CONFIG_CNVCONTEXT
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
-#endif
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
- for (b = 0; b < 24; b += 4) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
- }
+ for (b = 0; b < 16; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 16; b < 24; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
}
static void stuff_mb_16x16(VP9_COMP *cpi, MACROBLOCKD *xd,
@@ -867,56 +1113,26 @@
for (b = 16; b < 24; b += 4) {
stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
}
-#if CONFIG_CNVCONTEXT
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
-#endif
}
static void stuff_mb_4x4(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
int b;
- PLANE_TYPE plane_type;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_4X4, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
- for (b = 0; b < 24; b++) {
- if (b >= 16)
- plane_type = PLANE_TYPE_UV;
- stuff_b(cpi, xd, b, t, plane_type, TX_4X4, dry_run);
- }
+ for (b = 0; b < 16; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 16; b < 24; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
}
static void stuff_mb_8x8_4x4uv(VP9_COMP *cpi, MACROBLOCKD *xd,
TOKENEXTRA **t, int dry_run) {
- PLANE_TYPE plane_type;
int b;
- int has_2nd_order = get_2nd_order_usage(xd);
- if (has_2nd_order) {
- stuff_b(cpi, xd, 24, t, PLANE_TYPE_Y2, TX_8X8, dry_run);
- plane_type = PLANE_TYPE_Y_NO_DC;
- } else {
- xd->above_context->y2 = 0;
- xd->left_context->y2 = 0;
- plane_type = PLANE_TYPE_Y_WITH_DC;
- }
-
- for (b = 0; b < 16; b += 4) {
- stuff_b(cpi, xd, b, t, plane_type, TX_8X8, dry_run);
- }
-
- for (b = 16; b < 24; b++) {
+ for (b = 0; b < 16; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 16; b < 24; b++)
stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
- }
}
void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
@@ -941,27 +1157,76 @@
}
}
-static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run) {
+void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
+ TOKENEXTRA * const t_backup = *t;
int b;
- stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
- for (b = 16; b < 24; b += 4) {
- stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
+ for (b = 64; b < 96; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 64; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
+ for (b = 64; b < 96; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 64; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 64; b < 96; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 64; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 64; b < 96; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
+ break;
+ default: assert(0);
}
+
+ if (dry_run) {
+ *t = t_backup;
+ }
}
-void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) {
+void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run) {
TOKENEXTRA * const t_backup = *t;
+ int b;
- stuff_sb_32x32(cpi, xd, t, dry_run);
+ switch (xd->mode_info_context->mbmi.txfm_size) {
+ case TX_32X32:
+ for (b = 0; b < 256; b += 64)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run);
+ for (b = 256; b < 384; b += 64)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run);
+ break;
+ case TX_16X16:
+ for (b = 0; b < 256; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run);
+ for (b = 256; b < 384; b += 16)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run);
+ break;
+ case TX_8X8:
+ for (b = 0; b < 256; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run);
+ for (b = 256; b < 384; b += 4)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run);
+ break;
+ case TX_4X4:
+ for (b = 0; b < 256; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run);
+ for (b = 256; b < 384; b++)
+ stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run);
+ break;
+ default: assert(0);
+ }
if (dry_run) {
*t = t_backup;
}
-}
-
-void vp9_fix_contexts_sb(MACROBLOCKD *xd) {
- vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
- vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
}
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -28,42 +28,54 @@
uint8_t skip_eob_node;
} TOKENEXTRA;
-typedef int64_t vp9_coeff_accum[COEF_BANDS][PREV_COEF_CONTEXTS]
- [MAX_ENTROPY_TOKENS];
+typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
+ [MAX_ENTROPY_TOKENS + 1];
-extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block);
-extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
-extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block);
-extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
-extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
-extern int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
-extern int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd);
+int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd);
struct VP9_COMP;
-extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
-extern void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
-extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
-extern void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
- TOKENEXTRA **t, int dry_run);
+void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
+void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd,
+ TOKENEXTRA **t, int dry_run);
-extern void vp9_fix_contexts_sb(MACROBLOCKD *xd);
#ifdef ENTROPY_STATS
void init_context_counters();
void print_context_counters();
-extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES_16X16];
-extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32];
-
-extern vp9_coeff_accum hybrid_context_counters_4x4[BLOCK_TYPES_4X4];
-extern vp9_coeff_accum hybrid_context_counters_8x8[BLOCK_TYPES_8X8];
-extern vp9_coeff_accum hybrid_context_counters_16x16[BLOCK_TYPES_16X16];
+extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES];
+extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES];
+extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES];
+extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES];
#endif
extern const int *vp9_dct_value_cost_ptr;
--- a/vp9/encoder/vp9_treewriter.c
+++ b/vp9/encoder/vp9_treewriter.c
@@ -10,6 +10,7 @@
#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/common/vp9_common.h"
static void cost(
int *const C,
@@ -35,5 +36,7 @@
}
void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) {
+ assert(t[1] > 0 && t[0] <= 0);
+ c[-t[0]] = vp9_cost_bit(p[0], 0);
cost(c, t, p, 2, 0);
}
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -36,30 +36,28 @@
/* Both of these return bits, not scaled bits. */
-
-static __inline unsigned int cost_branch(const unsigned int ct[2],
- vp9_prob p) {
+static INLINE unsigned int cost_branch256(const unsigned int ct[2],
+ vp9_prob p) {
/* Imitate existing calculation */
- return ((ct[0] * vp9_cost_zero(p))
- + (ct[1] * vp9_cost_one(p))) >> 8;
+ return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
}
-static __inline unsigned int cost_branch256(const unsigned int ct[2],
- vp9_prob p) {
+static INLINE unsigned int cost_branch(const unsigned int ct[2],
+ vp9_prob p) {
/* Imitate existing calculation */
- return ((ct[0] * vp9_cost_zero(p))
- + (ct[1] * vp9_cost_one(p)));
+ return cost_branch256(ct, p) >> 8;
}
+
/* Small functions to write explicit values and tokens, as well as
estimate their lengths. */
-static __inline void treed_write(vp9_writer *const w,
- vp9_tree t,
- const vp9_prob *const p,
- int v,
- /* number of bits in v, assumed nonzero */
- int n) {
+static INLINE void treed_write(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
vp9_tree_index i = 0;
do {
@@ -69,18 +67,18 @@
} while (n);
}
-static __inline void write_token(vp9_writer *const w,
- vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
+static INLINE void write_token(vp9_writer *const w,
+ vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
treed_write(w, t, p, x->value, x->Len);
}
-static __inline int treed_cost(vp9_tree t,
- const vp9_prob *const p,
- int v,
- /* number of bits in v, assumed nonzero */
- int n) {
+static INLINE int treed_cost(vp9_tree t,
+ const vp9_prob *const p,
+ int v,
+ /* number of bits in v, assumed nonzero */
+ int n) {
int c = 0;
vp9_tree_index i = 0;
@@ -93,9 +91,9 @@
return c;
}
-static __inline int cost_token(vp9_tree t,
- const vp9_prob *const p,
- vp9_token *const x) {
+static INLINE int cost_token(vp9_tree t,
+ const vp9_prob *const p,
+ vp9_token *const x) {
return treed_cost(t, p, x->value, x->Len);
}
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -19,12 +19,6 @@
int ref_stride,
unsigned int max_sad);
-typedef void (*vp9_copy32xn_fn_t)(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- int n);
-
typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@@ -35,11 +29,11 @@
int source_stride,
const uint8_t *ref_ptr,
int ref_stride,
- unsigned short *sad_array);
+ unsigned int *sad_array);
typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
int source_stride,
- const uint8_t ** ref_ptr,
+ const uint8_t* const ref_ptr[],
int ref_stride, unsigned int *sad_array);
typedef unsigned int (*vp9_variance_fn_t)(const uint8_t *src_ptr,
@@ -79,7 +73,6 @@
vp9_sad_multi_fn_t sdx3f;
vp9_sad_multi1_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
- vp9_copy32xn_fn_t copymem;
} vp9_variance_fn_ptr_t;
#endif // VP9_ENCODER_VP9_VARIANCE_H_
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -142,8 +142,8 @@
const int16_t *HFilter, *VFilter;
uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
// First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
@@ -166,8 +166,8 @@
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
@@ -186,8 +186,8 @@
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
@@ -206,8 +206,8 @@
uint8_t temp2[68 * 64];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
1, 65, 64, HFilter);
@@ -227,8 +227,8 @@
uint8_t temp2[36 * 32];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
@@ -367,8 +367,8 @@
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
@@ -387,8 +387,8 @@
uint8_t temp2[20 * 16];
const int16_t *HFilter, *VFilter;
- HFilter = vp9_bilinear_filters[xoffset];
- VFilter = vp9_bilinear_filters[yoffset];
+ HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
1, 17, 8, HFilter);
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_sse2_intrinsics.c
@@ -1,0 +1,895 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+#include "vp9/common/vp9_idct.h" // for cospi constants
+
+void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
+ const int stride = pitch >> 1;
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ // Load input
+ __m128i in0 = _mm_loadu_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_loadu_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_loadu_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_loadu_si128((const __m128i *)(input + 3 * stride));
+ __m128i in4 = _mm_loadu_si128((const __m128i *)(input + 4 * stride));
+ __m128i in5 = _mm_loadu_si128((const __m128i *)(input + 5 * stride));
+ __m128i in6 = _mm_loadu_si128((const __m128i *)(input + 6 * stride));
+ __m128i in7 = _mm_loadu_si128((const __m128i *)(input + 7 * stride));
+ // Pre-condition input (shift by two)
+ in0 = _mm_slli_epi16(in0, 2);
+ in1 = _mm_slli_epi16(in1, 2);
+ in2 = _mm_slli_epi16(in2, 2);
+ in3 = _mm_slli_epi16(in3, 2);
+ in4 = _mm_slli_epi16(in4, 2);
+ in5 = _mm_slli_epi16(in5, 2);
+ in6 = _mm_slli_epi16(in6, 2);
+ in7 = _mm_slli_epi16(in7, 2);
+
+ // We do two passes, first the columns, then the rows. The results of the
+ // first pass are transposed so that the same column code can be reused. The
+ // results of the second pass are also transposed so that the rows (processed
+ // as columns) are put back in row positions.
+ for (pass = 0; pass < 2; pass++) {
+ // To store results of each pass before the transpose.
+ __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+ // Add/substract
+ const __m128i q0 = _mm_add_epi16(in0, in7);
+ const __m128i q1 = _mm_add_epi16(in1, in6);
+ const __m128i q2 = _mm_add_epi16(in2, in5);
+ const __m128i q3 = _mm_add_epi16(in3, in4);
+ const __m128i q4 = _mm_sub_epi16(in3, in4);
+ const __m128i q5 = _mm_sub_epi16(in2, in5);
+ const __m128i q6 = _mm_sub_epi16(in1, in6);
+ const __m128i q7 = _mm_sub_epi16(in0, in7);
+ // Work on first four results
+ {
+ // Add/substract
+ const __m128i r0 = _mm_add_epi16(q0, q3);
+ const __m128i r1 = _mm_add_epi16(q1, q2);
+ const __m128i r2 = _mm_sub_epi16(q1, q2);
+ const __m128i r3 = _mm_sub_epi16(q0, q3);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+ const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+ const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+ const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+ const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+ const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+ const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+ const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+ const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+ const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+ const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+ // Combine
+ const __m128i r0 = _mm_packs_epi32(s0, s1);
+ const __m128i r1 = _mm_packs_epi32(s2, s3);
+ // Add/substract
+ const __m128i x0 = _mm_add_epi16(q4, r0);
+ const __m128i x1 = _mm_sub_epi16(q4, r0);
+ const __m128i x2 = _mm_sub_epi16(q7, r1);
+ const __m128i x3 = _mm_add_epi16(q7, r1);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res1 = _mm_packs_epi32(w0, w1);
+ res7 = _mm_packs_epi32(w2, w3);
+ res5 = _mm_packs_epi32(w4, w5);
+ res3 = _mm_packs_epi32(w6, w7);
+ }
+ // Transpose the 8x8.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ }
+ // Post-condition output and store it
+ {
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+ const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+ const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+ const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+ const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+ const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+ const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+ const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+ in0 = _mm_sub_epi16(in0, sign_in0);
+ in1 = _mm_sub_epi16(in1, sign_in1);
+ in2 = _mm_sub_epi16(in2, sign_in2);
+ in3 = _mm_sub_epi16(in3, sign_in3);
+ in4 = _mm_sub_epi16(in4, sign_in4);
+ in5 = _mm_sub_epi16(in5, sign_in5);
+ in6 = _mm_sub_epi16(in6, sign_in6);
+ in7 = _mm_sub_epi16(in7, sign_in7);
+ in0 = _mm_srai_epi16(in0, 1);
+ in1 = _mm_srai_epi16(in1, 1);
+ in2 = _mm_srai_epi16(in2, 1);
+ in3 = _mm_srai_epi16(in3, 1);
+ in4 = _mm_srai_epi16(in4, 1);
+ in5 = _mm_srai_epi16(in5, 1);
+ in6 = _mm_srai_epi16(in6, 1);
+ in7 = _mm_srai_epi16(in7, 1);
+ // store results
+ _mm_storeu_si128 ((__m128i *)(output + 0 * 8), in0);
+ _mm_storeu_si128 ((__m128i *)(output + 1 * 8), in1);
+ _mm_storeu_si128 ((__m128i *)(output + 2 * 8), in2);
+ _mm_storeu_si128 ((__m128i *)(output + 3 * 8), in3);
+ _mm_storeu_si128 ((__m128i *)(output + 4 * 8), in4);
+ _mm_storeu_si128 ((__m128i *)(output + 5 * 8), in5);
+ _mm_storeu_si128 ((__m128i *)(output + 6 * 8), in6);
+ _mm_storeu_si128 ((__m128i *)(output + 7 * 8), in7);
+ }
+}
+
+void vp9_short_fdct16x16_sse2(int16_t *input, int16_t *output, int pitch) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we tranpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ const int stride = pitch >> 1;
+ int pass;
+ // We need an intermediate buffer between passes.
+ int16_t intermediate[256];
+ int16_t *in = input;
+ int16_t *out = intermediate;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64);
+ const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64);
+ const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64);
+ const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64);
+ const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i kOne = _mm_set1_epi16(1);
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ // We process eight columns (transposed rows in second pass) at a time.
+ int column_start;
+ for (column_start = 0; column_start < 16; column_start += 8) {
+ __m128i in00, in01, in02, in03, in04, in05, in06, in07;
+ __m128i in08, in09, in10, in11, in12, in13, in14, in15;
+ __m128i input0, input1, input2, input3, input4, input5, input6, input7;
+ __m128i step1_0, step1_1, step1_2, step1_3;
+ __m128i step1_4, step1_5, step1_6, step1_7;
+ __m128i step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
+ __m128i step3_0, step3_1, step3_2, step3_3;
+ __m128i step3_4, step3_5, step3_6, step3_7;
+ __m128i res00, res01, res02, res03, res04, res05, res06, res07;
+ __m128i res08, res09, res10, res11, res12, res13, res14, res15;
+ // Load and pre-condition input.
+ if (0 == pass) {
+ in00 = _mm_loadu_si128((const __m128i *)(in + 0 * stride));
+ in01 = _mm_loadu_si128((const __m128i *)(in + 1 * stride));
+ in02 = _mm_loadu_si128((const __m128i *)(in + 2 * stride));
+ in03 = _mm_loadu_si128((const __m128i *)(in + 3 * stride));
+ in04 = _mm_loadu_si128((const __m128i *)(in + 4 * stride));
+ in05 = _mm_loadu_si128((const __m128i *)(in + 5 * stride));
+ in06 = _mm_loadu_si128((const __m128i *)(in + 6 * stride));
+ in07 = _mm_loadu_si128((const __m128i *)(in + 7 * stride));
+ in08 = _mm_loadu_si128((const __m128i *)(in + 8 * stride));
+ in09 = _mm_loadu_si128((const __m128i *)(in + 9 * stride));
+ in10 = _mm_loadu_si128((const __m128i *)(in + 10 * stride));
+ in11 = _mm_loadu_si128((const __m128i *)(in + 11 * stride));
+ in12 = _mm_loadu_si128((const __m128i *)(in + 12 * stride));
+ in13 = _mm_loadu_si128((const __m128i *)(in + 13 * stride));
+ in14 = _mm_loadu_si128((const __m128i *)(in + 14 * stride));
+ in15 = _mm_loadu_si128((const __m128i *)(in + 15 * stride));
+ // x = x << 2
+ in00 = _mm_slli_epi16(in00, 2);
+ in01 = _mm_slli_epi16(in01, 2);
+ in02 = _mm_slli_epi16(in02, 2);
+ in03 = _mm_slli_epi16(in03, 2);
+ in04 = _mm_slli_epi16(in04, 2);
+ in05 = _mm_slli_epi16(in05, 2);
+ in06 = _mm_slli_epi16(in06, 2);
+ in07 = _mm_slli_epi16(in07, 2);
+ in08 = _mm_slli_epi16(in08, 2);
+ in09 = _mm_slli_epi16(in09, 2);
+ in10 = _mm_slli_epi16(in10, 2);
+ in11 = _mm_slli_epi16(in11, 2);
+ in12 = _mm_slli_epi16(in12, 2);
+ in13 = _mm_slli_epi16(in13, 2);
+ in14 = _mm_slli_epi16(in14, 2);
+ in15 = _mm_slli_epi16(in15, 2);
+ } else {
+ in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 16));
+ in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 16));
+ in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 16));
+ in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 16));
+ in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 16));
+ in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 16));
+ in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 16));
+ in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 16));
+ in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 16));
+ in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 16));
+ in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 16));
+ in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 16));
+ in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 16));
+ in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 16));
+ in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 16));
+ in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 16));
+ // x = (x + 1) >> 2
+ in00 = _mm_add_epi16(in00, kOne);
+ in01 = _mm_add_epi16(in01, kOne);
+ in02 = _mm_add_epi16(in02, kOne);
+ in03 = _mm_add_epi16(in03, kOne);
+ in04 = _mm_add_epi16(in04, kOne);
+ in05 = _mm_add_epi16(in05, kOne);
+ in06 = _mm_add_epi16(in06, kOne);
+ in07 = _mm_add_epi16(in07, kOne);
+ in08 = _mm_add_epi16(in08, kOne);
+ in09 = _mm_add_epi16(in09, kOne);
+ in10 = _mm_add_epi16(in10, kOne);
+ in11 = _mm_add_epi16(in11, kOne);
+ in12 = _mm_add_epi16(in12, kOne);
+ in13 = _mm_add_epi16(in13, kOne);
+ in14 = _mm_add_epi16(in14, kOne);
+ in15 = _mm_add_epi16(in15, kOne);
+ in00 = _mm_srai_epi16(in00, 2);
+ in01 = _mm_srai_epi16(in01, 2);
+ in02 = _mm_srai_epi16(in02, 2);
+ in03 = _mm_srai_epi16(in03, 2);
+ in04 = _mm_srai_epi16(in04, 2);
+ in05 = _mm_srai_epi16(in05, 2);
+ in06 = _mm_srai_epi16(in06, 2);
+ in07 = _mm_srai_epi16(in07, 2);
+ in08 = _mm_srai_epi16(in08, 2);
+ in09 = _mm_srai_epi16(in09, 2);
+ in10 = _mm_srai_epi16(in10, 2);
+ in11 = _mm_srai_epi16(in11, 2);
+ in12 = _mm_srai_epi16(in12, 2);
+ in13 = _mm_srai_epi16(in13, 2);
+ in14 = _mm_srai_epi16(in14, 2);
+ in15 = _mm_srai_epi16(in15, 2);
+ }
+ in += 8;
+ // Calculate input for the first 8 results.
+ {
+ input0 = _mm_add_epi16(in00, in15);
+ input1 = _mm_add_epi16(in01, in14);
+ input2 = _mm_add_epi16(in02, in13);
+ input3 = _mm_add_epi16(in03, in12);
+ input4 = _mm_add_epi16(in04, in11);
+ input5 = _mm_add_epi16(in05, in10);
+ input6 = _mm_add_epi16(in06, in09);
+ input7 = _mm_add_epi16(in07, in08);
+ }
+ // Calculate input for the next 8 results.
+ {
+ step1_0 = _mm_sub_epi16(in07, in08);
+ step1_1 = _mm_sub_epi16(in06, in09);
+ step1_2 = _mm_sub_epi16(in05, in10);
+ step1_3 = _mm_sub_epi16(in04, in11);
+ step1_4 = _mm_sub_epi16(in03, in12);
+ step1_5 = _mm_sub_epi16(in02, in13);
+ step1_6 = _mm_sub_epi16(in01, in14);
+ step1_7 = _mm_sub_epi16(in00, in15);
+ }
+ // Work on the first eight values; fdct8_1d(input, even_results);
+ {
+ // Add/substract
+ const __m128i q0 = _mm_add_epi16(input0, input7);
+ const __m128i q1 = _mm_add_epi16(input1, input6);
+ const __m128i q2 = _mm_add_epi16(input2, input5);
+ const __m128i q3 = _mm_add_epi16(input3, input4);
+ const __m128i q4 = _mm_sub_epi16(input3, input4);
+ const __m128i q5 = _mm_sub_epi16(input2, input5);
+ const __m128i q6 = _mm_sub_epi16(input1, input6);
+ const __m128i q7 = _mm_sub_epi16(input0, input7);
+ // Work on first four results
+ {
+ // Add/substract
+ const __m128i r0 = _mm_add_epi16(q0, q3);
+ const __m128i r1 = _mm_add_epi16(q1, q2);
+ const __m128i r2 = _mm_sub_epi16(q1, q2);
+ const __m128i r3 = _mm_sub_epi16(q0, q3);
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res00 = _mm_packs_epi32(w0, w1);
+ res08 = _mm_packs_epi32(w2, w3);
+ res04 = _mm_packs_epi32(w4, w5);
+ res12 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+ const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+ const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+ const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+ const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+ const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+ const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+ const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+ const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+ const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+ const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+ // Combine
+ const __m128i r0 = _mm_packs_epi32(s0, s1);
+ const __m128i r1 = _mm_packs_epi32(s2, s3);
+ // Add/substract
+ const __m128i x0 = _mm_add_epi16(q4, r0);
+ const __m128i x1 = _mm_sub_epi16(q4, r0);
+ const __m128i x2 = _mm_sub_epi16(q7, r1);
+ const __m128i x3 = _mm_add_epi16(q7, r1);
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res02 = _mm_packs_epi32(w0, w1);
+ res14 = _mm_packs_epi32(w2, w3);
+ res10 = _mm_packs_epi32(w4, w5);
+ res06 = _mm_packs_epi32(w6, w7);
+ }
+ }
+ // Work on the next eight values; step1 -> odd_results
+ {
+ // step 2
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_5, step1_2);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_5, step1_2);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_4, step1_3);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_4, step1_3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p16_m16);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ step2_2 = _mm_packs_epi32(w0, w1);
+ step2_3 = _mm_packs_epi32(w2, w3);
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_5, step1_2);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_5, step1_2);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_4, step1_3);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_4, step1_3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p16_p16);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ step2_5 = _mm_packs_epi32(w0, w1);
+ step2_4 = _mm_packs_epi32(w2, w3);
+ }
+ // step 3
+ {
+ step3_0 = _mm_add_epi16(step1_0, step2_3);
+ step3_1 = _mm_add_epi16(step1_1, step2_2);
+ step3_2 = _mm_sub_epi16(step1_1, step2_2);
+ step3_3 = _mm_sub_epi16(step1_0, step2_3);
+ step3_4 = _mm_sub_epi16(step1_7, step2_4);
+ step3_5 = _mm_sub_epi16(step1_6, step2_5);
+ step3_6 = _mm_add_epi16(step1_6, step2_5);
+ step3_7 = _mm_add_epi16(step1_7, step2_4);
+ }
+ // step 4
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step3_1, step3_6);
+ const __m128i t1 = _mm_unpackhi_epi16(step3_1, step3_6);
+ const __m128i t2 = _mm_unpacklo_epi16(step3_2, step3_5);
+ const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_m08_p24);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_m08_p24);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m24_m08);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m24_m08);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ step2_1 = _mm_packs_epi32(w0, w1);
+ step2_2 = _mm_packs_epi32(w2, w3);
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step3_1, step3_6);
+ const __m128i t1 = _mm_unpackhi_epi16(step3_1, step3_6);
+ const __m128i t2 = _mm_unpacklo_epi16(step3_2, step3_5);
+ const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p24_p08);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p24_p08);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ step2_6 = _mm_packs_epi32(w0, w1);
+ step2_5 = _mm_packs_epi32(w2, w3);
+ }
+ // step 5
+ {
+ step1_0 = _mm_add_epi16(step3_0, step2_1);
+ step1_1 = _mm_sub_epi16(step3_0, step2_1);
+ step1_2 = _mm_sub_epi16(step3_3, step2_2);
+ step1_3 = _mm_add_epi16(step3_3, step2_2);
+ step1_4 = _mm_add_epi16(step3_4, step2_5);
+ step1_5 = _mm_sub_epi16(step3_4, step2_5);
+ step1_6 = _mm_sub_epi16(step3_7, step2_6);
+ step1_7 = _mm_add_epi16(step3_7, step2_6);
+ }
+ // step 6
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_0, step1_7);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_0, step1_7);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_1, step1_6);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_1, step1_6);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p30_p02);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p30_p02);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p14_p18);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p14_p18);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ res01 = _mm_packs_epi32(w0, w1);
+ res09 = _mm_packs_epi32(w2, w3);
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_2, step1_5);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_2, step1_5);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_3, step1_4);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_3, step1_4);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p22_p10);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p22_p10);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_p06_p26);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_p06_p26);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ res05 = _mm_packs_epi32(w0, w1);
+ res13 = _mm_packs_epi32(w2, w3);
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_2, step1_5);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_2, step1_5);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_3, step1_4);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_3, step1_4);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_m10_p22);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_m10_p22);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m26_p06);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m26_p06);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ res11 = _mm_packs_epi32(w0, w1);
+ res03 = _mm_packs_epi32(w2, w3);
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_0, step1_7);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_0, step1_7);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_1, step1_6);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_1, step1_6);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_m02_p30);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_m02_p30);
+ const __m128i u2 = _mm_madd_epi16(t2, k__cospi_m18_p14);
+ const __m128i u3 = _mm_madd_epi16(t3, k__cospi_m18_p14);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // Combine
+ res15 = _mm_packs_epi32(w0, w1);
+ res07 = _mm_packs_epi32(w2, w3);
+ }
+ }
+ // Transpose the results, do it as two 8x8 transposes.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res00, res01);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res02, res03);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res00, res01);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res02, res03);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res04, res05);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res06, res07);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res04, res05);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res06, res07);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ const __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ const __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ const __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ const __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ const __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ const __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ const __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ const __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ _mm_storeu_si128 ((__m128i *)(out + 0 * 16), tr2_0);
+ _mm_storeu_si128 ((__m128i *)(out + 1 * 16), tr2_1);
+ _mm_storeu_si128 ((__m128i *)(out + 2 * 16), tr2_2);
+ _mm_storeu_si128 ((__m128i *)(out + 3 * 16), tr2_3);
+ _mm_storeu_si128 ((__m128i *)(out + 4 * 16), tr2_4);
+ _mm_storeu_si128 ((__m128i *)(out + 5 * 16), tr2_5);
+ _mm_storeu_si128 ((__m128i *)(out + 6 * 16), tr2_6);
+ _mm_storeu_si128 ((__m128i *)(out + 7 * 16), tr2_7);
+ }
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res08, res09);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res10, res11);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res08, res09);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res10, res11);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res12, res13);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res14, res15);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res12, res13);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res14, res15);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ const __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ const __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ const __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ const __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ const __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ const __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ const __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ const __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ // Store results
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 0 * 16), tr2_0);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 1 * 16), tr2_1);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 2 * 16), tr2_2);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 3 * 16), tr2_3);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 4 * 16), tr2_4);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 5 * 16), tr2_5);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 6 * 16), tr2_6);
+ _mm_storeu_si128 ((__m128i *)(out + 8 + 7 * 16), tr2_7);
+ }
+ out += 8*16;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
+ }
+}
--- a/vp9/encoder/x86/vp9_encodeopt.asm
+++ b/vp9/encoder/x86/vp9_encodeopt.asm
@@ -125,7 +125,7 @@
ret
-;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+;int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_mmx_impl) PRIVATE
sym(vp9_mbblock_error_mmx_impl):
push rbp
@@ -142,10 +142,6 @@
mov rdi, arg(1) ;dcoef_ptr
pxor mm2, mm2
- movd mm1, dword ptr arg(2) ;dc
- por mm1, mm2
-
- pcmpeqw mm1, mm7
mov rcx, 16
.mberror_loop_mmx:
@@ -160,7 +156,6 @@
pmaddwd mm5, mm5
psubw mm3, mm4
- pand mm3, mm1
pmaddwd mm3, mm3
paddd mm2, mm5
@@ -202,13 +197,13 @@
ret
-;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
+;int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
global sym(vp9_mbblock_error_xmm_impl) PRIVATE
sym(vp9_mbblock_error_xmm_impl):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
- SAVE_XMM 6
+ SAVE_XMM 5
push rsi
push rdi
; end prolog
@@ -215,15 +210,11 @@
mov rsi, arg(0) ;coeff_ptr
- pxor xmm6, xmm6
+ pxor xmm5, xmm5
mov rdi, arg(1) ;dcoef_ptr
pxor xmm4, xmm4
- movd xmm5, dword ptr arg(2) ;dc
- por xmm5, xmm4
-
- pcmpeqw xmm5, xmm6
mov rcx, 16
.mberror_loop:
@@ -238,7 +229,6 @@
pmaddwd xmm2, xmm2
psubw xmm0, xmm1
- pand xmm0, xmm5
pmaddwd xmm0, xmm0
add rsi, 32
@@ -252,9 +242,9 @@
jnz .mberror_loop
movdqa xmm0, xmm4
- punpckldq xmm0, xmm6
+ punpckldq xmm0, xmm5
- punpckhdq xmm4, xmm6
+ punpckhdq xmm4, xmm5
paddd xmm0, xmm4
movdqa xmm1, xmm0
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad4d_sse2.asm
@@ -1,0 +1,225 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_4x2x4 5-6 0
+ movd m0, [srcq +%2]
+%if %1 == 1
+ movd m6, [ref1q+%3]
+ movd m4, [ref2q+%3]
+ movd m7, [ref3q+%3]
+ movd m5, [ref4q+%3]
+ punpckldq m0, [srcq +%4]
+ punpckldq m6, [ref1q+%5]
+ punpckldq m4, [ref2q+%5]
+ punpckldq m7, [ref3q+%5]
+ punpckldq m5, [ref4q+%5]
+ psadbw m6, m0
+ psadbw m4, m0
+ psadbw m7, m0
+ psadbw m5, m0
+ punpckldq m6, m4
+ punpckldq m7, m5
+%else
+ movd m1, [ref1q+%3]
+ movd m2, [ref2q+%3]
+ movd m3, [ref3q+%3]
+ movd m4, [ref4q+%3]
+ punpckldq m0, [srcq +%4]
+ punpckldq m1, [ref1q+%5]
+ punpckldq m2, [ref2q+%5]
+ punpckldq m3, [ref3q+%5]
+ punpckldq m4, [ref4q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ psadbw m4, m0
+ punpckldq m1, m2
+ punpckldq m3, m4
+ paddd m6, m1
+ paddd m7, m3
+%endif
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+%endmacro
+
+; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_8x2x4 5-6 0
+ movh m0, [srcq +%2]
+%if %1 == 1
+ movh m4, [ref1q+%3]
+ movh m5, [ref2q+%3]
+ movh m6, [ref3q+%3]
+ movh m7, [ref4q+%3]
+ movhps m0, [srcq +%4]
+ movhps m4, [ref1q+%5]
+ movhps m5, [ref2q+%5]
+ movhps m6, [ref3q+%5]
+ movhps m7, [ref4q+%5]
+ psadbw m4, m0
+ psadbw m5, m0
+ psadbw m6, m0
+ psadbw m7, m0
+%else
+ movh m1, [ref1q+%3]
+ movh m2, [ref2q+%3]
+ movh m3, [ref3q+%3]
+ movhps m0, [srcq +%4]
+ movhps m1, [ref1q+%5]
+ movhps m2, [ref2q+%5]
+ movhps m3, [ref3q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movh m1, [ref4q+%3]
+ movhps m1, [ref4q+%5]
+ paddd m5, m2
+ paddd m6, m3
+ psadbw m1, m0
+ paddd m7, m1
+%endif
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+%endmacro
+
+; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_16x2x4 5-6 0
+ ; 1st 16 px
+ mova m0, [srcq +%2]
+%if %1 == 1
+ movu m4, [ref1q+%3]
+ movu m5, [ref2q+%3]
+ movu m6, [ref3q+%3]
+ movu m7, [ref4q+%3]
+ psadbw m4, m0
+ psadbw m5, m0
+ psadbw m6, m0
+ psadbw m7, m0
+%else
+ movu m1, [ref1q+%3]
+ movu m2, [ref2q+%3]
+ movu m3, [ref3q+%3]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movu m1, [ref4q+%3]
+ paddd m5, m2
+ paddd m6, m3
+ psadbw m1, m0
+ paddd m7, m1
+%endif
+
+ ; 2nd 16 px
+ mova m0, [srcq +%4]
+ movu m1, [ref1q+%5]
+ movu m2, [ref2q+%5]
+ movu m3, [ref3q+%5]
+ psadbw m1, m0
+ psadbw m2, m0
+ psadbw m3, m0
+ paddd m4, m1
+ movu m1, [ref4q+%5]
+ paddd m5, m2
+ paddd m6, m3
+%if %6 == 1
+ lea srcq, [srcq +src_strideq*2]
+ lea ref1q, [ref1q+ref_strideq*2]
+ lea ref2q, [ref2q+ref_strideq*2]
+ lea ref3q, [ref3q+ref_strideq*2]
+ lea ref4q, [ref4q+ref_strideq*2]
+%endif
+ psadbw m1, m0
+ paddd m7, m1
+%endmacro
+
+; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_32x2x4 5-6 0
+ PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16
+ PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6
+%endmacro
+
+; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_64x2x4 5-6 0
+ PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32
+ PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
+%endmacro
+
+; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref[4], int ref_stride,
+; unsigned int res[4]);
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+%macro SADNXN4D 2
+%if UNIX64
+cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
+ res, ref2, ref3, ref4
+%else
+cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
+ ref2, ref3, ref4
+%endif
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov ref2q, [ref1q+gprsize*1]
+ mov ref3q, [ref1q+gprsize*2]
+ mov ref4q, [ref1q+gprsize*3]
+ mov ref1q, [ref1q+gprsize*0]
+
+ PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
+%rep (%2-4)/2
+ PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
+%endrep
+ PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
+
+%if mmsize == 16
+ pslldq m5, 4
+ pslldq m7, 4
+ por m4, m5
+ por m6, m7
+ mova m5, m4
+ mova m7, m6
+ punpcklqdq m4, m6
+ punpckhqdq m5, m7
+ movifnidn r4, r4mp
+ paddd m4, m5
+ movu [r4], m4
+ RET
+%else
+ movifnidn r4, r4mp
+ movq [r4+0], m6
+ movq [r4+8], m7
+ RET
+%endif
+%endmacro
+
+INIT_XMM sse2
+SADNXN4D 64, 64
+SADNXN4D 32, 32
+SADNXN4D 16, 16
+SADNXN4D 16, 8
+SADNXN4D 8, 16
+SADNXN4D 8, 8
+
+INIT_MMX sse
+SADNXN4D 4, 4
--- a/vp9/encoder/x86/vp9_sad_sse2.asm
+++ b/vp9/encoder/x86/vp9_sad_sse2.asm
@@ -8,403 +8,175 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%include "third_party/x86inc/x86inc.asm"
-%include "vpx_ports/x86_abi_support.asm"
+SECTION .text
-;unsigned int vp9_sad16x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad16x16_wmt) PRIVATE
-sym(vp9_sad16x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- SAVE_XMM 6
- push rsi
- push rdi
- ; end prolog
+; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_XMM sse2
+cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov n_rowsd, 64
+ pxor m0, m0
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+32]
+ movu m4, [refq+48]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+16]
+ psadbw m3, [srcq+32]
+ psadbw m4, [srcq+48]
+ paddd m1, m2
+ paddd m3, m4
+ add refq, ref_strideq
+ paddd m0, m1
+ add srcq, src_strideq
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_XMM sse2
+cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ mov n_rowsd, 16
+ pxor m0, m0
- lea rcx, [rsi+rax*8]
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+16]
+ movu m3, [refq+ref_strideq]
+ movu m4, [refq+ref_strideq+16]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+16]
+ psadbw m3, [srcq+src_strideq]
+ psadbw m4, [srcq+src_strideq+16]
+ paddd m1, m2
+ paddd m3, m4
+ lea refq, [refq+ref_strideq*2]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*2]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
- lea rcx, [rcx+rax*8]
- pxor xmm6, xmm6
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
-.x16x16sad_wmt_loop:
+; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro SAD16XN 1
+cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
+ src_stride3, ref_stride3, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ lea src_stride3q, [src_strideq*3]
+ lea ref_stride3q, [ref_strideq*3]
+ mov n_rowsd, %1/4
+ pxor m0, m0
- movq xmm0, QWORD PTR [rsi]
- movq xmm2, QWORD PTR [rsi+8]
+.loop:
+ movu m1, [refq]
+ movu m2, [refq+ref_strideq]
+ movu m3, [refq+ref_strideq*2]
+ movu m4, [refq+ref_stride3q]
+ psadbw m1, [srcq]
+ psadbw m2, [srcq+src_strideq]
+ psadbw m3, [srcq+src_strideq*2]
+ psadbw m4, [srcq+src_stride3q]
+ paddd m1, m2
+ paddd m3, m4
+ lea refq, [refq+ref_strideq*4]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*4]
+ paddd m0, m3
+ dec n_rowsd
+ jg .loop
- movq xmm1, QWORD PTR [rdi]
- movq xmm3, QWORD PTR [rdi+8]
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
- movq xmm4, QWORD PTR [rsi+rax]
- movq xmm5, QWORD PTR [rdi+rdx]
+INIT_XMM sse2
+SAD16XN 16 ; sad16x16_sse2
+SAD16XN 8 ; sad16x8_sse2
+; unsigned int vp9_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+%macro SAD8XN 1
+cglobal sad8x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
+ src_stride3, ref_stride3, n_rows
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ lea src_stride3q, [src_strideq*3]
+ lea ref_stride3q, [ref_strideq*3]
+ mov n_rowsd, %1/4
+ pxor m0, m0
- punpcklbw xmm0, xmm2
- punpcklbw xmm1, xmm3
+.loop:
+ movh m1, [refq]
+ movhps m1, [refq+ref_strideq]
+ movh m2, [refq+ref_strideq*2]
+ movhps m2, [refq+ref_stride3q]
+ movh m3, [srcq]
+ movhps m3, [srcq+src_strideq]
+ movh m4, [srcq+src_strideq*2]
+ movhps m4, [srcq+src_stride3q]
+ psadbw m1, m3
+ psadbw m2, m4
+ lea refq, [refq+ref_strideq*4]
+ paddd m0, m1
+ lea srcq, [srcq+src_strideq*4]
+ paddd m0, m2
+ dec n_rowsd
+ jg .loop
- psadbw xmm0, xmm1
- movq xmm2, QWORD PTR [rsi+rax+8]
+ movhlps m1, m0
+ paddd m0, m1
+ movd eax, m0
+ RET
+%endmacro
- movq xmm3, QWORD PTR [rdi+rdx+8]
- lea rsi, [rsi+rax*2]
+INIT_XMM sse2
+SAD8XN 16 ; sad8x16_sse2
+SAD8XN 8 ; sad8x8_sse2
- lea rdi, [rdi+rdx*2]
- punpcklbw xmm4, xmm2
-
- punpcklbw xmm5, xmm3
- psadbw xmm4, xmm5
-
- paddw xmm6, xmm0
- paddw xmm6, xmm4
-
- cmp rsi, rcx
- jne .x16x16sad_wmt_loop
-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movq rax, xmm0
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp9_sad8x16_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_err)
-global sym(vp9_sad8x16_wmt) PRIVATE
-sym(vp9_sad8x16_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
-
- lea rcx, [rcx+rbx*8]
- pxor mm7, mm7
-
-.x8x16sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x8x16sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, QWORD PTR [rsi+rbx]
- movq mm3, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm7, mm0
- paddw mm7, mm2
-
- cmp rsi, rcx
- jne .x8x16sad_wmt_loop
-
- movq rax, mm7
-
-.x8x16sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp9_sad8x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad8x8_wmt) PRIVATE
-sym(vp9_sad8x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x8x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x8x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- psadbw mm0, mm1
- lea rsi, [rsi+rbx]
-
- add rdi, rdx
- paddw mm7, mm0
-
- cmp rsi, rcx
- jne .x8x8sad_wmt_loop
-
- movq rax, mm7
-.x8x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;unsigned int vp9_sad4x4_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad4x4_wmt) PRIVATE
-sym(vp9_sad4x4_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- movd mm0, DWORD PTR [rsi]
- movd mm1, DWORD PTR [rdi]
-
- movd mm2, DWORD PTR [rsi+rax]
- movd mm3, DWORD PTR [rdi+rdx]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- psadbw mm0, mm1
- lea rsi, [rsi+rax*2]
-
- lea rdi, [rdi+rdx*2]
- movd mm4, DWORD PTR [rsi]
-
- movd mm5, DWORD PTR [rdi]
- movd mm6, DWORD PTR [rsi+rax]
-
- movd mm7, DWORD PTR [rdi+rdx]
- punpcklbw mm4, mm6
-
- punpcklbw mm5, mm7
- psadbw mm4, mm5
-
- paddw mm0, mm4
- movq rax, mm0
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp9_sad16x8_wmt(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-global sym(vp9_sad16x8_wmt) PRIVATE
-sym(vp9_sad16x8_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rbx
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rbx, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rbx*8]
- pxor mm7, mm7
-
-.x16x8sad_wmt_loop:
-
- movq rax, mm7
- cmp eax, arg(4)
- jg .x16x8sad_wmt_early_exit
-
- movq mm0, QWORD PTR [rsi]
- movq mm2, QWORD PTR [rsi+8]
-
- movq mm1, QWORD PTR [rdi]
- movq mm3, QWORD PTR [rdi+8]
-
- movq mm4, QWORD PTR [rsi+rbx]
- movq mm5, QWORD PTR [rdi+rdx]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- movq mm1, QWORD PTR [rsi+rbx+8]
- movq mm3, QWORD PTR [rdi+rdx+8]
-
- psadbw mm4, mm5
- psadbw mm1, mm3
-
- lea rsi, [rsi+rbx*2]
- lea rdi, [rdi+rdx*2]
-
- paddw mm0, mm2
- paddw mm4, mm1
-
- paddw mm7, mm0
- paddw mm7, mm4
-
- cmp rsi, rcx
- jne .x16x8sad_wmt_loop
-
- movq rax, mm7
-
-.x16x8sad_wmt_early_exit:
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_copy32xn_sse2(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp9_copy32xn_sse2) PRIVATE
-sym(vp9_copy32xn_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;dst_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;dst_stride
- movsxd rcx, dword ptr arg(4) ;height
-
-.block_copy_sse2_loopx4:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- movdqu xmm2, XMMWORD PTR [rsi + rax]
- movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqu xmm4, XMMWORD PTR [rsi]
- movdqu xmm5, XMMWORD PTR [rsi + 16]
- movdqu xmm6, XMMWORD PTR [rsi + rax]
- movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
-
- lea rsi, [rsi+rax*2]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- movdqa XMMWORD PTR [rdi + rdx], xmm2
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
-
- lea rdi, [rdi+rdx*2]
-
- movdqa XMMWORD PTR [rdi], xmm4
- movdqa XMMWORD PTR [rdi + 16], xmm5
- movdqa XMMWORD PTR [rdi + rdx], xmm6
- movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
-
- lea rdi, [rdi+rdx*2]
-
- sub rcx, 4
- cmp rcx, 4
- jge .block_copy_sse2_loopx4
-
- cmp rcx, 0
- je .copy_is_done
-
-.block_copy_sse2_loop:
- movdqu xmm0, XMMWORD PTR [rsi]
- movdqu xmm1, XMMWORD PTR [rsi + 16]
- lea rsi, [rsi+rax]
-
- movdqa XMMWORD PTR [rdi], xmm0
- movdqa XMMWORD PTR [rdi + 16], xmm1
- lea rdi, [rdi+rdx]
-
- sub rcx, 1
- jne .block_copy_sse2_loop
-
-.copy_is_done:
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
+; unsigned int vp9_sad4x4_sse(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
+INIT_MMX sse
+cglobal sad4x4, 4, 4, 8, src, src_stride, ref, ref_stride
+ movsxdifnidn src_strideq, src_strided
+ movsxdifnidn ref_strideq, ref_strided
+ movd m0, [refq]
+ movd m1, [refq+ref_strideq]
+ movd m2, [srcq]
+ movd m3, [srcq+src_strideq]
+ lea refq, [refq+ref_strideq*2]
+ lea srcq, [srcq+src_strideq*2]
+ movd m4, [refq]
+ movd m5, [refq+ref_strideq]
+ movd m6, [srcq]
+ movd m7, [srcq+src_strideq]
+ punpckldq m0, m1
+ punpckldq m2, m3
+ punpckldq m4, m5
+ punpckldq m6, m7
+ psadbw m0, m2
+ psadbw m4, m6
+ paddd m0, m4
+ movd eax, m0
+ RET
--- a/vp9/encoder/x86/vp9_sad_sse3.asm
+++ b/vp9/encoder/x86/vp9_sad_sse3.asm
@@ -83,87 +83,6 @@
ret
%endmacro
-%macro STACK_FRAME_CREATE_X4 0
-%if ABI_IS_32BIT
- %define src_ptr rsi
- %define src_stride rax
- %define r0_ptr rcx
- %define r1_ptr rdx
- %define r2_ptr rbx
- %define r3_ptr rdi
- %define ref_stride rbp
- %define result_ptr arg(4)
- push rbp
- mov rbp, rsp
- push rsi
- push rdi
- push rbx
-
- push rbp
- mov rdi, arg(2) ; ref_ptr_base
-
- LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
- mov rsi, arg(0) ; src_ptr
-
- movsxd rbx, dword ptr arg(1) ; src_stride
- movsxd rbp, dword ptr arg(3) ; ref_stride
-
- xchg rbx, rax
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 7, u
- %define src_ptr rcx
- %define src_stride rdx
- %define r0_ptr rsi
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr r8
- %define ref_stride r9
- %define result_ptr [rsp+xmm_stack_space+16+4*8]
- push rsi
-
- LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
- %else
- %define src_ptr rdi
- %define src_stride rsi
- %define r0_ptr r9
- %define r1_ptr r10
- %define r2_ptr r11
- %define r3_ptr rdx
- %define ref_stride rcx
- %define result_ptr r8
-
- LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
-
- %endif
-%endif
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X4 0
- %define src_ptr
- %define src_stride
- %define r0_ptr
- %define r1_ptr
- %define r2_ptr
- %define r3_ptr
- %define ref_stride
- %define result_ptr
-
-%if ABI_IS_32BIT
- pop rbx
- pop rdi
- pop rsi
- pop rbp
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- RESTORE_XMM
- %endif
-%endif
- ret
-%endmacro
-
%macro PROCESS_16X2X3 5
%if %1==0
movdqa xmm0, XMMWORD PTR [%2]
@@ -250,130 +169,6 @@
paddw mm7, mm3
%endmacro
-%macro LOAD_X4_ADDRESSES 5
- mov %2, [%1+REG_SZ_BYTES*0]
- mov %3, [%1+REG_SZ_BYTES*1]
-
- mov %4, [%1+REG_SZ_BYTES*2]
- mov %5, [%1+REG_SZ_BYTES*3]
-%endmacro
-
-%macro PROCESS_16X2X4 8
-%if %1==0
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm4, XMMWORD PTR [%3]
- lddqu xmm5, XMMWORD PTR [%4]
- lddqu xmm6, XMMWORD PTR [%5]
- lddqu xmm7, XMMWORD PTR [%6]
-
- psadbw xmm4, xmm0
- psadbw xmm5, xmm0
- psadbw xmm6, xmm0
- psadbw xmm7, xmm0
-%else
- movdqa xmm0, XMMWORD PTR [%2]
- lddqu xmm1, XMMWORD PTR [%3]
- lddqu xmm2, XMMWORD PTR [%4]
- lddqu xmm3, XMMWORD PTR [%5]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-%endif
- movdqa xmm0, XMMWORD PTR [%2+%7]
- lddqu xmm1, XMMWORD PTR [%3+%8]
- lddqu xmm2, XMMWORD PTR [%4+%8]
- lddqu xmm3, XMMWORD PTR [%5+%8]
-
- psadbw xmm1, xmm0
- psadbw xmm2, xmm0
- psadbw xmm3, xmm0
-
- paddw xmm4, xmm1
- lddqu xmm1, XMMWORD PTR [%6+%8]
- paddw xmm5, xmm2
- paddw xmm6, xmm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw xmm1, xmm0
- paddw xmm7, xmm1
-
-%endmacro
-
-%macro PROCESS_8X2X4 8
-%if %1==0
- movq mm0, QWORD PTR [%2]
- movq mm4, QWORD PTR [%3]
- movq mm5, QWORD PTR [%4]
- movq mm6, QWORD PTR [%5]
- movq mm7, QWORD PTR [%6]
-
- psadbw mm4, mm0
- psadbw mm5, mm0
- psadbw mm6, mm0
- psadbw mm7, mm0
-%else
- movq mm0, QWORD PTR [%2]
- movq mm1, QWORD PTR [%3]
- movq mm2, QWORD PTR [%4]
- movq mm3, QWORD PTR [%5]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6]
- paddw mm5, mm2
- paddw mm6, mm3
-
- psadbw mm1, mm0
- paddw mm7, mm1
-%endif
- movq mm0, QWORD PTR [%2+%7]
- movq mm1, QWORD PTR [%3+%8]
- movq mm2, QWORD PTR [%4+%8]
- movq mm3, QWORD PTR [%5+%8]
-
- psadbw mm1, mm0
- psadbw mm2, mm0
- psadbw mm3, mm0
-
- paddw mm4, mm1
- movq mm1, QWORD PTR [%6+%8]
- paddw mm5, mm2
- paddw mm6, mm3
-
-%if %1==0 || %1==1
- lea %2, [%2+%7*2]
- lea %3, [%3+%8*2]
-
- lea %4, [%4+%8*2]
- lea %5, [%5+%8*2]
-
- lea %6, [%6+%8*2]
-%endif
- psadbw mm1, mm0
- paddw mm7, mm1
-
-%endmacro
-
;void int vp9_sad16x16x3_sse3(
; unsigned char *src_ptr,
; int src_stride,
@@ -581,380 +376,3 @@
movd [rcx+8], mm7
STACK_FRAME_DESTROY_X3
-
-;unsigned int vp9_sad16x16_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int max_err)
-;%define lddqu movdqu
-global sym(vp9_sad16x16_sse3) PRIVATE
-sym(vp9_sad16x16_sse3):
-
- STACK_FRAME_CREATE_X3
-
- mov end_ptr, 4
- pxor xmm7, xmm7
-
-.vp9_sad16x16_sse3_loop:
- movdqa xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [ref_ptr]
- movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
- movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- movdqa xmm4, XMMWORD PTR [src_ptr]
- movdqu xmm5, XMMWORD PTR [ref_ptr]
- movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
-
- psadbw xmm0, xmm1
-
- movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
-
- psadbw xmm2, xmm3
- psadbw xmm4, xmm5
- psadbw xmm6, xmm1
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea ref_ptr, [ref_ptr+ref_stride*2]
-
- paddw xmm7, xmm0
- paddw xmm7, xmm2
- paddw xmm7, xmm4
- paddw xmm7, xmm6
-
- sub end_ptr, 1
- jne .vp9_sad16x16_sse3_loop
-
- movq xmm0, xmm7
- psrldq xmm7, 8
- paddw xmm0, xmm7
- movq rax, xmm0
-
- STACK_FRAME_DESTROY_X3
-
-;void vp9_copy32xn_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *dst_ptr,
-; int dst_stride,
-; int height);
-global sym(vp9_copy32xn_sse3) PRIVATE
-sym(vp9_copy32xn_sse3):
-
- STACK_FRAME_CREATE_X3
-
-.block_copy_sse3_loopx4:
- lea end_ptr, [src_ptr+src_stride*2]
-
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
- movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
- movdqu xmm4, XMMWORD PTR [end_ptr]
- movdqu xmm5, XMMWORD PTR [end_ptr + 16]
- movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
- movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
-
- lea src_ptr, [src_ptr+src_stride*4]
-
- lea end_ptr, [ref_ptr+ref_stride*2]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
- movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
- movdqa XMMWORD PTR [end_ptr], xmm4
- movdqa XMMWORD PTR [end_ptr + 16], xmm5
- movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
- movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
-
- lea ref_ptr, [ref_ptr+ref_stride*4]
-
- sub height, 4
- cmp height, 4
- jge .block_copy_sse3_loopx4
-
- ;Check to see if there is more rows need to be copied.
- cmp height, 0
- je .copy_is_done
-
-.block_copy_sse3_loop:
- movdqu xmm0, XMMWORD PTR [src_ptr]
- movdqu xmm1, XMMWORD PTR [src_ptr + 16]
- lea src_ptr, [src_ptr+src_stride]
-
- movdqa XMMWORD PTR [ref_ptr], xmm0
- movdqa XMMWORD PTR [ref_ptr + 16], xmm1
- lea ref_ptr, [ref_ptr+ref_stride]
-
- sub height, 1
- jne .block_copy_sse3_loop
-
-.copy_is_done:
- STACK_FRAME_DESTROY_X3
-
-;void vp9_sad16x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x16x4d_sse3) PRIVATE
-sym(vp9_sad16x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void vp9_sad16x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr_base,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad16x8x4d_sse3) PRIVATE
-sym(vp9_sad16x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- movq xmm0, xmm4
- psrldq xmm4, 8
-
- paddw xmm0, xmm4
- movd [rcx], xmm0
-;-
- movq xmm0, xmm5
- psrldq xmm5, 8
-
- paddw xmm0, xmm5
- movd [rcx+4], xmm0
-;-
- movq xmm0, xmm6
- psrldq xmm6, 8
-
- paddw xmm0, xmm6
- movd [rcx+8], xmm0
-;-
- movq xmm0, xmm7
- psrldq xmm7, 8
-
- paddw xmm0, xmm7
- movd [rcx+12], xmm0
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad8x16x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad8x16x4d_sse3) PRIVATE
-sym(vp9_sad8x16x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad8x8x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad8x8x4d_sse3) PRIVATE
-sym(vp9_sad8x8x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
- PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-
-%if ABI_IS_32BIT
- pop rbp
-%endif
- mov rcx, result_ptr
-
- punpckldq mm4, mm5
- punpckldq mm6, mm7
-
- movq [rcx], mm4
- movq [rcx+8], mm6
-
- STACK_FRAME_DESTROY_X4
-
-;void int vp9_sad4x4x4d_sse3(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride,
-; int *results)
-global sym(vp9_sad4x4x4d_sse3) PRIVATE
-sym(vp9_sad4x4x4d_sse3):
-
- STACK_FRAME_CREATE_X4
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm1, DWORD PTR [r0_ptr]
-
- movd mm2, DWORD PTR [src_ptr+src_stride]
- movd mm3, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movd mm4, DWORD PTR [r1_ptr]
- movd mm5, DWORD PTR [r2_ptr]
-
- movd mm6, DWORD PTR [r3_ptr]
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
-
- movd mm3, DWORD PTR [r2_ptr+ref_stride]
- movd mm7, DWORD PTR [r3_ptr+ref_stride]
-
- psadbw mm1, mm0
-
- punpcklbw mm4, mm2
- punpcklbw mm5, mm3
-
- punpcklbw mm6, mm7
- psadbw mm4, mm0
-
- psadbw mm5, mm0
- psadbw mm6, mm0
-
-
-
- lea src_ptr, [src_ptr+src_stride*2]
- lea r0_ptr, [r0_ptr+ref_stride*2]
-
- lea r1_ptr, [r1_ptr+ref_stride*2]
- lea r2_ptr, [r2_ptr+ref_stride*2]
-
- lea r3_ptr, [r3_ptr+ref_stride*2]
-
- movd mm0, DWORD PTR [src_ptr]
- movd mm2, DWORD PTR [r0_ptr]
-
- movd mm3, DWORD PTR [src_ptr+src_stride]
- movd mm7, DWORD PTR [r0_ptr+ref_stride]
-
- punpcklbw mm0, mm3
- punpcklbw mm2, mm7
-
- movd mm3, DWORD PTR [r1_ptr]
- movd mm7, DWORD PTR [r2_ptr]
-
- psadbw mm2, mm0
-%if ABI_IS_32BIT
- mov rax, rbp
-
- pop rbp
-%define ref_stride rax
-%endif
- mov rsi, result_ptr
-
- paddw mm1, mm2
- movd [rsi], mm1
-
- movd mm2, DWORD PTR [r1_ptr+ref_stride]
- movd mm1, DWORD PTR [r2_ptr+ref_stride]
-
- punpcklbw mm3, mm2
- punpcklbw mm7, mm1
-
- psadbw mm3, mm0
- psadbw mm7, mm0
-
- movd mm2, DWORD PTR [r3_ptr]
- movd mm1, DWORD PTR [r3_ptr+ref_stride]
-
- paddw mm3, mm4
- paddw mm7, mm5
-
- movd [rsi+4], mm3
- punpcklbw mm2, mm1
-
- movd [rsi+8], mm7
- psadbw mm2, mm0
-
- paddw mm2, mm6
- movd [rsi+12], mm2
-
-
- STACK_FRAME_DESTROY_X4
-
--- a/vp9/encoder/x86/vp9_sad_sse4.asm
+++ b/vp9/encoder/x86/vp9_sad_sse4.asm
@@ -154,7 +154,17 @@
paddw xmm1, xmm5
%endmacro
+%macro WRITE_AS_INTS 0
+ mov rdi, arg(4) ;Results
+ pxor xmm0, xmm0
+ movdqa xmm2, xmm1
+ punpcklwd xmm1, xmm0
+ punpckhwd xmm2, xmm0
+ movdqa [rdi], xmm1
+ movdqa [rdi + 16], xmm2
+%endmacro
+
;void vp9_sad16x16x8_sse4(
; const unsigned char *src_ptr,
; int src_stride,
@@ -170,23 +180,22 @@
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -212,19 +221,18 @@
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_16X2X8 1
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
- PROCESS_16X2X8 0
+ PROCESS_16X2X8 1
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
+ PROCESS_16X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -250,19 +258,18 @@
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
@@ -288,23 +295,23 @@
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_8X2X8 1
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- PROCESS_8X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ PROCESS_8X2X8 1
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ PROCESS_8X2X8 0
+ WRITE_AS_INTS
+
; begin epilog
pop rdi
pop rsi
@@ -329,17 +336,16 @@
push rdi
; end prolog
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;ref_ptr
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
+ movsxd rax, dword ptr arg(1) ;src_stride
+ movsxd rdx, dword ptr arg(3) ;ref_stride
- PROCESS_4X2X8 1
- PROCESS_4X2X8 0
+ PROCESS_4X2X8 1
+ PROCESS_4X2X8 0
- mov rdi, arg(4) ;Results
- movdqa XMMWORD PTR [rdi], xmm1
+ WRITE_AS_INTS
; begin epilog
pop rdi
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -186,6 +186,7 @@
*sse = sse0;
return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
}
+
unsigned int vp9_mse16x16_wmt(
const unsigned char *src_ptr,
int source_stride,
@@ -305,20 +306,16 @@
return (xxsum - (((unsigned int)xsum * xsum) >> 6));
}
-unsigned int vp9_sub_pixel_variance16x16_wmt
-(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-) {
+static void sub_pixel_variance16x16_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse, int *avg) {
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
-
// note we could avoid these if statements if the calling function
// just called the appropriate functions inside.
if (xoffset == HALFNDX && yoffset == 0) {
@@ -355,10 +352,136 @@
}
*sse = xxsum0;
- return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
+ *avg = xsum0;
}
-unsigned int vp9_sub_pixel_mse16x16_wmt(
+unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg;
+ unsigned int sse;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse, &avg);
+ *sse_ptr = sse;
+
+ return (sse - (((unsigned int) avg * avg) >> 8));
+}
+
+unsigned int vp9_sub_pixel_variance32x32_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg0, avg1, avg2, avg3;
+ unsigned int sse0, sse1, sse2, sse3;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse0, &avg0);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse1, &avg1);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse3, &avg3);
+ sse0 += sse1 + sse2 + sse3;
+ avg0 += avg1 + avg2 + avg3;
+ *sse_ptr = sse0;
+
+ return (sse0 - (((unsigned int) avg0 * avg0) >> 10));
+}
+
+unsigned int vp9_sub_pixel_variance64x64_sse2(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse_ptr) {
+ int avg0, avg1, avg2, avg3, avg4;
+ unsigned int sse0, sse1, sse2, sse3, sse4;
+
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse0, &avg0);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse3, &avg3);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3;
+ sse0 += sse1 + sse2 + sse3;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ src_ptr += 16 * src_pixels_per_line;
+ dst_ptr += 16 * dst_pixels_per_line;
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line,
+ &sse1, &avg1);
+ sub_pixel_variance16x16_sse2(src_ptr + 16, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 16, dst_pixels_per_line,
+ &sse2, &avg2);
+ sub_pixel_variance16x16_sse2(src_ptr + 32, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 32, dst_pixels_per_line,
+ &sse3, &avg3);
+ sub_pixel_variance16x16_sse2(src_ptr + 48, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr + 48, dst_pixels_per_line,
+ &sse4, &avg4);
+ avg0 += avg1 + avg2 + avg3 + avg4;
+ sse0 += sse1 + sse2 + sse3 + sse4;
+ *sse_ptr = sse0;
+
+ return (sse0 - (((unsigned int) avg0 * avg0) >> 12));
+}
+
+unsigned int vp9_sub_pixel_mse16x16_sse2(
const unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
@@ -367,7 +490,8 @@
int dst_pixels_per_line,
unsigned int *sse
) {
- vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+ vp9_sub_pixel_variance16x16_sse2(src_ptr, src_pixels_per_line, xoffset,
+ yoffset, dst_ptr, dst_pixels_per_line, sse);
return *sse;
}
--- a/vp9/encoder/x86/vp9_x86_csystemdependent.c
+++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c
@@ -23,11 +23,11 @@
vp9_short_fdct4x4_mmx(input + 4, output + 16, pitch);
}
-int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp9_mbblock_error_mmx(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
+int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
- return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc);
+ return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
}
int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
@@ -51,11 +51,11 @@
#endif
#if HAVE_SSE2
-int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
-int vp9_mbblock_error_xmm(MACROBLOCK *mb, int dc) {
+int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
+int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
short *coeff_ptr = mb->block[0].coeff;
short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff;
- return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr, dc);
+ return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
}
int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -9,6 +9,7 @@
##
VP9_COMMON_SRCS-yes += vp9_common.mk
+VP9_COMMON_SRCS-yes += vp9_iface_common.h
VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
VP9_COMMON_SRCS-yes += common/vp9_onyx.h
@@ -16,6 +17,8 @@
VP9_COMMON_SRCS-yes += common/vp9_asm_com_offsets.c
VP9_COMMON_SRCS-yes += common/vp9_blockd.c
VP9_COMMON_SRCS-yes += common/vp9_coefupdateprobs.h
+VP9_COMMON_SRCS-yes += common/vp9_convolve.c
+VP9_COMMON_SRCS-yes += common/vp9_convolve.h
VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h
VP9_COMMON_SRCS-yes += common/vp9_entropy.c
@@ -26,7 +29,7 @@
VP9_COMMON_SRCS-yes += common/vp9_filter.h
VP9_COMMON_SRCS-yes += common/vp9_findnearmv.c
VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c
-VP9_COMMON_SRCS-yes += common/vp9_idctllm.c
+VP9_COMMON_SRCS-yes += common/vp9_idct.c
VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h
VP9_COMMON_SRCS-yes += common/vp9_blockd.h
VP9_COMMON_SRCS-yes += common/vp9_common.h
@@ -36,6 +39,7 @@
VP9_COMMON_SRCS-yes += common/vp9_extend.h
VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h
VP9_COMMON_SRCS-yes += common/vp9_header.h
+VP9_COMMON_SRCS-yes += common/vp9_idct.h
VP9_COMMON_SRCS-yes += common/vp9_invtrans.h
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
VP9_COMMON_SRCS-yes += common/vp9_modecont.h
@@ -46,7 +50,6 @@
VP9_COMMON_SRCS-yes += common/vp9_quant_common.h
VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
-VP9_COMMON_SRCS-yes += common/vp9_reconintra4x4.h
VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h
@@ -54,10 +57,11 @@
VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
VP9_COMMON_SRCS-yes += common/vp9_seg_common.c
VP9_COMMON_SRCS-yes += common/vp9_setupintrarecon.h
-VP9_COMMON_SRCS-yes += common/vp9_subpixel.h
VP9_COMMON_SRCS-yes += common/vp9_swapyv12buffer.h
VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h
VP9_COMMON_SRCS-yes += common/vp9_textblit.h
+VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
+VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
VP9_COMMON_SRCS-yes += common/vp9_treecoder.h
VP9_COMMON_SRCS-yes += common/vp9_invtrans.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
@@ -79,7 +83,6 @@
VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/vp9_implicit_segmentation.c
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.h
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_subpixel_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
@@ -89,18 +92,15 @@
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_iwalsh_mmx.asm
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_recon_mmx.asm
-VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_subpixel_mmx.asm
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idctllm_sse2.asm
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_iwalsh_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_recon_wrapper_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpel_variance_impl_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_variance_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
-VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_ssse3.asm
ifeq ($(CONFIG_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
@@ -112,19 +112,13 @@
VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/vp9_mask_sse3.asm
endif
-VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_filter_sse4.c
-ifeq ($(HAVE_SSE4_1),yes)
-vp9/common/x86/vp9_filter_sse4.c.o: CFLAGS += -msse4
-vp9/common/x86/vp9_filter_sse4.c.d: CFLAGS += -msse4
-endif
-
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_filter_sse2.c
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_idct_x86.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_sadmxn_sse2.c
ifeq ($(HAVE_SSE2),yes)
-vp9/common/x86/vp9_filter_sse2.c.o: CFLAGS += -msse2
+vp9/common/x86/vp9_idct_x86.c.o: CFLAGS += -msse2
vp9/common/x86/vp9_loopfilter_intrin_sse2.c.o: CFLAGS += -msse2
vp9/common/x86/vp9_sadmxn_sse2.c.o: CFLAGS += -msse2
-vp9/common/x86/vp9_filter_sse2.c.d: CFLAGS += -msse2
+vp9/common/x86/vp9_idct_x86.c.d: CFLAGS += -msse2
vp9/common/x86/vp9_loopfilter_intrin_sse2.c.d: CFLAGS += -msse2
vp9/common/x86/vp9_sadmxn_sse2.c.d: CFLAGS += -msse2
endif
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -16,6 +16,7 @@
#include "vpx/vp8cx.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/common/vp9_onyx.h"
+#include "vp9/vp9_iface_common.h"
#include <stdlib.h>
#include <string.h>
@@ -26,7 +27,8 @@
unsigned int noise_sensitivity;
unsigned int Sharpness;
unsigned int static_thresh;
- unsigned int token_partitions;
+ unsigned int tile_columns;
+ unsigned int tile_rows;
unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */
unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */
unsigned int arnr_type; /* alt_ref filter type */
@@ -34,9 +36,8 @@
vp8e_tuning tuning;
unsigned int cq_level; /* constrained quality level */
unsigned int rc_max_intra_bitrate_pct;
-#if CONFIG_LOSSLESS
unsigned int lossless;
-#endif
+ unsigned int frame_parallel_decoding_mode;
};
struct extraconfig_map {
@@ -54,7 +55,8 @@
0, /* noise_sensitivity */
0, /* Sharpness */
0, /* static_thresh */
- VP8_ONE_TOKENPARTITION, /* token_partitions */
+ 0, /* tile_columns */
+ 0, /* tile_rows */
0, /* arnr_max_frames */
3, /* arnr_strength */
3, /* arnr_type*/
@@ -62,9 +64,8 @@
0, /* tuning*/
10, /* cq_level */
0, /* rc_max_intra_bitrate_pct */
-#if CONFIG_LOSSLESS
0, /* lossless */
-#endif
+ 0, /* frame_parallel_decoding_mode */
}
}
};
@@ -79,8 +80,10 @@
unsigned int cx_data_sz;
unsigned char *pending_cx_data;
unsigned int pending_cx_data_sz;
+ int pending_frame_count;
+ uint32_t pending_frame_sizes[8];
+ uint32_t pending_frame_magnitude;
vpx_image_t preview_img;
- unsigned int next_frame_flag;
vp8_postproc_cfg_t preview_ppcfg;
vpx_codec_pkt_list_decl(64) pkt_list; // changed to accomendate the maximum number of lagged frames allowed
unsigned int fixed_kf_cntr;
@@ -129,8 +132,8 @@
static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
const vpx_codec_enc_cfg_t *cfg,
const struct vp8_extracfg *vp8_cfg) {
- RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */
- RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */
+ RANGE_CHECK(cfg, g_w, 1, 65535); /* 16 bits available */
+ RANGE_CHECK(cfg, g_h, 1, 65535); /* 16 bits available */
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
RANGE_CHECK_HI(cfg, g_profile, 3);
@@ -137,13 +140,11 @@
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
-#if CONFIG_LOSSLESS
RANGE_CHECK_BOOL(vp8_cfg, lossless);
if (vp8_cfg->lossless) {
RANGE_CHECK_HI(cfg, rc_max_quantizer, 0);
RANGE_CHECK_HI(cfg, rc_min_quantizer, 0);
}
-#endif
RANGE_CHECK_HI(cfg, g_threads, 64);
RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS);
@@ -172,7 +173,8 @@
RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
- RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
+ RANGE_CHECK(vp8_cfg, tile_columns, 0, 6);
+ RANGE_CHECK(vp8_cfg, tile_rows, 0, 2);
RANGE_CHECK_HI(vp8_cfg, Sharpness, 7);
RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
@@ -226,11 +228,9 @@
static vpx_codec_err_t set_vp8e_config(VP9_CONFIG *oxcf,
vpx_codec_enc_cfg_t cfg,
struct vp8_extracfg vp8_cfg) {
- oxcf->Version = cfg.g_profile;
- oxcf->Version |= vp8_cfg.experimental ? 0x4 : 0;
-
- oxcf->Width = cfg.g_w;
- oxcf->Height = cfg.g_h;
+ oxcf->version = cfg.g_profile | (vp8_cfg.experimental ? 0x4 : 0);
+ oxcf->width = cfg.g_w;
+ oxcf->height = cfg.g_h;
/* guess a frame rate if out of whack, use 30 */
oxcf->frame_rate = (double)(cfg.g_timebase.den) / (double)(cfg.g_timebase.num);
@@ -309,37 +309,43 @@
oxcf->tuning = vp8_cfg.tuning;
-#if CONFIG_LOSSLESS
+ oxcf->tile_columns = vp8_cfg.tile_columns;
+ oxcf->tile_rows = vp8_cfg.tile_rows;
+
oxcf->lossless = vp8_cfg.lossless;
-#endif
+ oxcf->error_resilient_mode = cfg.g_error_resilient;
+ oxcf->frame_parallel_decoding_mode = vp8_cfg.frame_parallel_decoding_mode;
/*
- printf("Current VP8 Settings: \n");
- printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
- printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
- printf("Sharpness: %d\n", oxcf->Sharpness);
- printf("cpu_used: %d\n", oxcf->cpu_used);
- printf("Mode: %d\n", oxcf->Mode);
- printf("delete_first_pass_file: %d\n", oxcf->delete_first_pass_file);
- printf("auto_key: %d\n", oxcf->auto_key);
- printf("key_freq: %d\n", oxcf->key_freq);
- printf("end_usage: %d\n", oxcf->end_usage);
- printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
- printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
- printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
- printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level);
- printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
- printf("fixed_q: %d\n", oxcf->fixed_q);
- printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
- printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
- printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
- printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
- printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
- printf("allow_lag: %d\n", oxcf->allow_lag);
- printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
- printf("play_alternate: %d\n", oxcf->play_alternate);
- printf("Version: %d\n", oxcf->Version);
- printf("encode_breakout: %d\n", oxcf->encode_breakout);
+ printf("Current VP9 Settings: \n");
+ printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
+ printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
+ printf("Sharpness: %d\n", oxcf->Sharpness);
+ printf("cpu_used: %d\n", oxcf->cpu_used);
+ printf("Mode: %d\n", oxcf->Mode);
+ // printf("delete_first_pass_file: %d\n", oxcf->delete_first_pass_file);
+ printf("auto_key: %d\n", oxcf->auto_key);
+ printf("key_freq: %d\n", oxcf->key_freq);
+ printf("end_usage: %d\n", oxcf->end_usage);
+ printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
+ printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
+ printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
+ printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level);
+ printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
+ printf("fixed_q: %d\n", oxcf->fixed_q);
+ printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
+ printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+ printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
+ printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
+ printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
+ printf("allow_lag: %d\n", oxcf->allow_lag);
+ printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
+ printf("play_alternate: %d\n", oxcf->play_alternate);
+ printf("Version: %d\n", oxcf->Version);
+ printf("encode_breakout: %d\n", oxcf->encode_breakout);
+ printf("error resilient: %d\n", oxcf->error_resilient_mode);
+ printf("frame parallel detokenization: %d\n",
+ oxcf->frame_parallel_decoding_mode);
*/
return VPX_CODEC_OK;
}
@@ -409,7 +415,8 @@
MAP(VP8E_SET_NOISE_SENSITIVITY, xcfg.noise_sensitivity);
MAP(VP8E_SET_SHARPNESS, xcfg.Sharpness);
MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh);
- MAP(VP8E_SET_TOKEN_PARTITIONS, xcfg.token_partitions);
+ MAP(VP9E_SET_TILE_COLUMNS, xcfg.tile_columns);
+ MAP(VP9E_SET_TILE_ROWS, xcfg.tile_rows);
MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames);
MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength);
@@ -417,9 +424,8 @@
MAP(VP8E_SET_TUNING, xcfg.tuning);
MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level);
MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct);
-#if CONFIG_LOSSLESS
MAP(VP9E_SET_LOSSLESS, xcfg.lossless);
-#endif
+ MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode);
}
res = validate_config(ctx, &ctx->cfg, &xcfg);
@@ -540,6 +546,8 @@
yv12->u_buffer = img->planes[VPX_PLANE_U];
yv12->v_buffer = img->planes[VPX_PLANE_V];
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
yv12->y_width = img->d_w;
yv12->y_height = img->d_h;
yv12->uv_width = (1 + yv12->y_width) / 2;
@@ -578,6 +586,46 @@
}
+static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
+ uint8_t marker = 0xc0;
+ int mag, mask, index_sz;
+
+ assert(ctx->pending_frame_count);
+ assert(ctx->pending_frame_count <= 8);
+
+ /* Add the number of frames to the marker byte */
+ marker |= ctx->pending_frame_count - 1;
+
+ /* Choose the magnitude */
+ for (mag = 0, mask = 0xff; mag < 4; mag++) {
+ if (ctx->pending_frame_magnitude < mask)
+ break;
+ mask <<= 8;
+ mask |= 0xff;
+ }
+ marker |= mag << 3;
+
+ /* Write the index */
+ index_sz = 2 + (mag + 1) * ctx->pending_frame_count;
+ if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
+ uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
+ int i, j;
+
+ *x++ = marker;
+ for (i = 0; i < ctx->pending_frame_count; i++) {
+ int this_sz = ctx->pending_frame_sizes[i];
+
+ for (j = 0; j <= mag; j++) {
+ *x++ = this_sz & 0xff;
+ this_sz >>= 8;
+ }
+ }
+ *x++ = marker;
+ ctx->pending_cx_data_sz += index_sz;
+ }
+ return index_sz;
+}
+
static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img,
vpx_codec_pts_t pts,
@@ -670,14 +718,11 @@
if (img != NULL) {
res = image2yuvconfig(img, &sd);
- if (vp9_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags,
+ if (vp9_receive_raw_frame(ctx->cpi, lib_flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
-
- /* reset for next frame */
- ctx->next_frame_flag = 0;
}
cx_data = ctx->cx_data;
@@ -714,6 +759,8 @@
if (!ctx->pending_cx_data)
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_frame_magnitude |= size;
cx_data += size;
cx_data_sz -= size;
continue;
@@ -773,10 +820,16 @@
else*/
{
if (ctx->pending_cx_data) {
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_frame_magnitude |= size;
+ ctx->pending_cx_data_sz += size;
+ size += write_superframe_index(ctx);
pkt.data.frame.buf = ctx->pending_cx_data;
- pkt.data.frame.sz = ctx->pending_cx_data_sz + size;
+ pkt.data.frame.sz = ctx->pending_cx_data_sz;
ctx->pending_cx_data = NULL;
ctx->pending_cx_data_sz = 0;
+ ctx->pending_frame_count = 0;
+ ctx->pending_frame_magnitude = 0;
} else {
pkt.data.frame.buf = cx_data;
pkt.data.frame.sz = size;
@@ -818,9 +871,9 @@
}
-static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx,
- int ctr_id,
- va_list args) {
+static vpx_codec_err_t vp8e_copy_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
@@ -829,12 +882,28 @@
YV12_BUFFER_CONFIG sd;
image2yuvconfig(&frame->img, &sd);
- vp9_get_reference_enc(ctx->cpi, frame->frame_type, &sd);
+ vp9_copy_reference_enc(ctx->cpi, frame->frame_type, &sd);
return VPX_CODEC_OK;
} else
return VPX_CODEC_INVALID_PARAM;
}
+static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
+
+ if (data) {
+ YV12_BUFFER_CONFIG* fb;
+
+ vp9_get_reference_enc(ctx->cpi, data->idx, &fb);
+ yuvconfig2image(&data->img, fb, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
@@ -979,8 +1048,6 @@
scalemode.v_scaling_mode);
if (!res) {
- /*force next frame a key frame to effect scaling mode */
- ctx->next_frame_flag |= FRAMEFLAGS_KEY;
return VPX_CODEC_OK;
} else
return VPX_CODEC_INVALID_PARAM;
@@ -991,7 +1058,7 @@
static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {
{VP8_SET_REFERENCE, vp8e_set_reference},
- {VP8_COPY_REFERENCE, vp8e_get_reference},
+ {VP8_COPY_REFERENCE, vp8e_copy_reference},
{VP8_SET_POSTPROC, vp8e_set_previewpp},
{VP8E_UPD_ENTROPY, vp8e_update_entropy},
{VP8E_UPD_REFERENCE, vp8e_update_reference},
@@ -1004,7 +1071,8 @@
{VP8E_SET_ENABLEAUTOALTREF, set_param},
{VP8E_SET_SHARPNESS, set_param},
{VP8E_SET_STATIC_THRESHOLD, set_param},
- {VP8E_SET_TOKEN_PARTITIONS, set_param},
+ {VP9E_SET_TILE_COLUMNS, set_param},
+ {VP9E_SET_TILE_ROWS, set_param},
{VP8E_GET_LAST_QUANTIZER, get_param},
{VP8E_GET_LAST_QUANTIZER_64, get_param},
{VP8E_SET_ARNR_MAXFRAMES, set_param},
@@ -1013,9 +1081,8 @@
{VP8E_SET_TUNING, set_param},
{VP8E_SET_CQ_LEVEL, set_param},
{VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param},
-#if CONFIG_LOSSLESS
{VP9E_SET_LOSSLESS, set_param},
-#endif
+ {VP9_GET_REFERENCE, get_reference},
{ -1, NULL},
};
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -17,6 +17,7 @@
#include "vpx_version.h"
#include "decoder/vp9_onyxd.h"
#include "decoder/vp9_onyxd_int.h"
+#include "vp9/vp9_iface_common.h"
#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
typedef vpx_codec_stream_info_t vp8_stream_info_t;
@@ -63,6 +64,7 @@
vpx_image_t img;
int img_setup;
int img_avail;
+ int invert_tile_order;
};
static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si,
@@ -229,8 +231,8 @@
if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)
res = VPX_CODEC_UNSUP_BITSTREAM;
- si->w = (c[3] | (c[4] << 8)) & 0x3fff;
- si->h = (c[5] | (c[6] << 8)) & 0x3fff;
+ si->w = (c[3] | (c[4] << 8));
+ si->h = (c[5] | (c[6] << 8));
/*printf("w=%d, h=%d\n", si->w, si->h);*/
if (!(si->h | si->w))
@@ -273,36 +275,6 @@
return res;
}
-static void yuvconfig2image(vpx_image_t *img,
- const YV12_BUFFER_CONFIG *yv12,
- void *user_priv) {
- /** vpx_img_wrap() doesn't allow specifying independent strides for
- * the Y, U, and V planes, nor other alignment adjustments that
- * might be representable by a YV12_BUFFER_CONFIG, so we just
- * initialize all the fields.*/
- img->fmt = yv12->clrtype == REG_YUV ?
- VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
- img->w = yv12->y_stride;
- img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15;
- img->d_w = yv12->y_width;
- img->d_h = yv12->y_height;
- img->x_chroma_shift = 1;
- img->y_chroma_shift = 1;
- img->planes[VPX_PLANE_Y] = yv12->y_buffer;
- img->planes[VPX_PLANE_U] = yv12->u_buffer;
- img->planes[VPX_PLANE_V] = yv12->v_buffer;
- img->planes[VPX_PLANE_ALPHA] = NULL;
- img->stride[VPX_PLANE_Y] = yv12->y_stride;
- img->stride[VPX_PLANE_U] = yv12->uv_stride;
- img->stride[VPX_PLANE_V] = yv12->uv_stride;
- img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
- img->bps = 12;
- img->user_priv = user_priv;
- img->img_data = yv12->buffer_alloc;
- img->img_data_owner = 0;
- img->self_allocd = 0;
-}
-
static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
const uint8_t **data,
unsigned int data_sz,
@@ -362,6 +334,7 @@
oxcf.Version = 9;
oxcf.postprocess = 0;
oxcf.max_threads = ctx->cfg.threads;
+ oxcf.inv_tile_order = ctx->invert_tile_order;
optr = vp9_create_decompressor(&oxcf);
/* If postprocessing was enabled by the application and a
@@ -424,6 +397,39 @@
return res;
}
+static void parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8],
+ int *count) {
+ uint8_t marker;
+
+ assert(data_sz);
+ marker = data[data_sz - 1];
+ *count = 0;
+
+ if ((marker & 0xe0) == 0xc0) {
+ const int frames = (marker & 0x7) + 1;
+ const int mag = ((marker >> 3) & 3) + 1;
+ const int index_sz = 2 + mag * frames;
+
+ if (data_sz >= index_sz && data[data_sz - index_sz] == marker) {
+ // found a valid superframe index
+ int i, j;
+ const uint8_t *x = data + data_sz - index_sz + 1;
+
+ for (i = 0; i < frames; i++) {
+ int this_sz = 0;
+
+ for (j = 0; j < mag; j++)
+ this_sz |= (*x++) << (j * 8);
+ sizes[i] = this_sz;
+ }
+
+ *count = frames;
+ }
+ }
+}
+
static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx,
const uint8_t *data,
unsigned int data_sz,
@@ -431,9 +437,43 @@
long deadline) {
const uint8_t *data_start = data;
const uint8_t *data_end = data + data_sz;
- vpx_codec_err_t res;
+ vpx_codec_err_t res = 0;
+ uint32_t sizes[8];
+ int frames_this_pts, frame_count = 0;
+ parse_superframe_index(data, data_sz, sizes, &frames_this_pts);
+
do {
+ // Skip over the superframe index, if present
+ if (data_sz && (*data_start & 0xe0) == 0xc0) {
+ const uint8_t marker = *data_start;
+ const int frames = (marker & 0x7) + 1;
+ const int mag = ((marker >> 3) & 3) + 1;
+ const int index_sz = 2 + mag * frames;
+
+ if (data_sz >= index_sz && data_start[index_sz - 1] == marker) {
+ data_start += index_sz;
+ data_sz -= index_sz;
+ if (data_start < data_end)
+ continue;
+ else
+ break;
+ }
+ }
+
+ // Use the correct size for this frame, if an index is present.
+ if (frames_this_pts) {
+ uint32_t this_sz = sizes[frame_count];
+
+ if (data_sz < this_sz) {
+ ctx->base.err_detail = "Invalid frame size in index";
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ data_sz = this_sz;
+ frame_count++;
+ }
+
res = decode_one(ctx, &data_start, data_sz, user_priv, deadline);
assert(data_start >= data);
assert(data_start <= data_end);
@@ -545,6 +585,8 @@
yv12->u_buffer = img->planes[VPX_PLANE_U];
yv12->v_buffer = img->planes[VPX_PLANE_V];
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
yv12->y_width = img->d_w;
yv12->y_height = img->d_h;
yv12->uv_width = yv12->y_width / 2;
@@ -580,9 +622,9 @@
}
-static vpx_codec_err_t vp9_get_reference(vpx_codec_alg_priv_t *ctx,
- int ctr_id,
- va_list args) {
+static vpx_codec_err_t vp9_copy_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
@@ -592,13 +634,29 @@
image2yuvconfig(&frame->img, &sd);
- return vp9_get_reference_dec(ctx->pbi,
- (VP9_REFFRAME)frame->frame_type, &sd);
+ return vp9_copy_reference_dec(ctx->pbi,
+ (VP9_REFFRAME)frame->frame_type, &sd);
} else
return VPX_CODEC_INVALID_PARAM;
}
+static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
+
+ if (data) {
+ YV12_BUFFER_CONFIG* fb;
+
+ vp9_get_reference_dec(ctx->pbi, data->idx, &fb);
+ yuvconfig2image(&data->img, fb, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
@@ -645,9 +703,7 @@
VP9D_COMP *pbi = (VP9D_COMP *)ctx->pbi;
if (update_info) {
- *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME
- + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME
- + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME;
+ *update_info = pbi->refresh_frame_flags;
return VPX_CODEC_OK;
} else
@@ -671,9 +727,16 @@
}
+static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
+ int ctr_id,
+ va_list args) {
+ ctx->invert_tile_order = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
{VP8_SET_REFERENCE, vp9_set_reference},
- {VP8_COPY_REFERENCE, vp9_get_reference},
+ {VP8_COPY_REFERENCE, vp9_copy_reference},
{VP8_SET_POSTPROC, vp8_set_postproc},
{VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options},
{VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options},
@@ -681,6 +744,8 @@
{VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options},
{VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates},
{VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted},
+ {VP9_GET_REFERENCE, get_reference},
+ {VP9_INVERT_TILE_DECODE_ORDER, set_invert_tile_order},
{ -1, NULL},
};
--- /dev/null
+++ b/vp9/vp9_iface_common.h
@@ -1,0 +1,43 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_VP9_IFACE_COMMON_H_
+#define VP9_VP9_IFACE_COMMON_H_
+
+static void yuvconfig2image(vpx_image_t *img,
+ const YV12_BUFFER_CONFIG *yv12,
+ void *user_priv) {
+ /** vpx_img_wrap() doesn't allow specifying independent strides for
+ * the Y, U, and V planes, nor other alignment adjustments that
+ * might be representable by a YV12_BUFFER_CONFIG, so we just
+ * initialize all the fields.*/
+ img->fmt = yv12->clrtype == REG_YUV ?
+ VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
+ img->w = yv12->y_stride;
+ img->h = (yv12->y_height + 2 * VP9BORDERINPIXELS + 15) & ~15;
+ img->d_w = yv12->y_width;
+ img->d_h = yv12->y_height;
+ img->x_chroma_shift = 1;
+ img->y_chroma_shift = 1;
+ img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+ img->planes[VPX_PLANE_U] = yv12->u_buffer;
+ img->planes[VPX_PLANE_V] = yv12->v_buffer;
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = yv12->y_stride;
+ img->stride[VPX_PLANE_U] = yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+ img->bps = 12;
+ img->user_priv = user_priv;
+ img->img_data = yv12->buffer_alloc;
+ img->img_data_owner = 0;
+ img->self_allocd = 0;
+}
+
+#endif
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -65,7 +65,6 @@
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_sad_c.c
-VP9_CX_SRCS-yes += encoder/vp9_satd_c.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
@@ -95,8 +94,9 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_fwalsh_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
+#VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
@@ -103,12 +103,18 @@
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_ssse3.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_variance_impl_ssse3.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+#VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
-VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
+#VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_quantize_sse4.asm
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_quantize_mmx.asm
VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_encodeopt.asm
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt.asm
+
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_intrinsics.c
+ifeq ($(HAVE_SSE2),yes)
+vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.d: CFLAGS += -msse2
+vp9/encoder/x86/vp9_dct_sse2_intrinsics.c.o: CFLAGS += -msse2
+endif
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -38,5 +38,11 @@
VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_idct_blk_sse2.c
+VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/vp9_dequantize_x86.c
+ifeq ($(HAVE_SSE2),yes)
+vp9/decoder/x86/vp9_dequantize_x86.c.o: CFLAGS += -msse2
+vp9/decoder/x86/vp9_dequantize_x86.c.d: CFLAGS += -msse2
+endif
+
$(eval $(call asm_offsets_template,\
vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/vp9_asm_dec_offsets.c))
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -44,6 +44,12 @@
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
+
+ /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+)
+ * for its control ids. These should be migrated to something like the
+ * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI.
+ */
+ VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
VP8_COMMON_CTRL_ID_MAX,
VP8_DECODER_CTRL_ID_START = 256
};
@@ -97,6 +103,10 @@
vpx_image_t img; /**< reference frame data in image format */
} vpx_ref_frame_t;
+typedef struct vp9_ref_frame {
+ int idx; /**< frame index to get (input) */
+ vpx_image_t img; /**< img structure to populate (output) */
+} vp9_ref_frame_t;
/*!\brief vp8 decoder control function parameter type
*
@@ -110,6 +120,7 @@
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int)
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
+VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *)
/*! @} - end defgroup vp8 */
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -187,7 +187,10 @@
/* TODO(jkoleszar): Move to vp9cx.h */
- VP9E_SET_LOSSLESS
+ VP9E_SET_LOSSLESS,
+ VP9E_SET_TILE_COLUMNS,
+ VP9E_SET_TILE_ROWS,
+ VP9E_SET_FRAME_PARALLEL_DECODING
};
/*!\brief vpx 1-D scaling mode
@@ -298,6 +301,9 @@
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */
VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int)
+VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int)
+VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int)
+
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
@@ -305,6 +311,7 @@
VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
+VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
/*! @} - end defgroup vp8_encoder */
#include "vpx_codec_impl_bottom.h"
#endif
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -69,6 +69,9 @@
*/
VP8_SET_DECRYPT_KEY,
+ /** For testing. */
+ VP9_INVERT_TILE_DECODE_ORDER,
+
VP8_DECODER_CTRL_ID_MAX
};
@@ -85,6 +88,7 @@
VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *)
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *)
VPX_CTRL_USE_TYPE(VP8_SET_DECRYPT_KEY, const unsigned char *)
+VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
/*! @} - end defgroup vp8_decoder */
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -28,6 +28,8 @@
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#define INT64_MAX _I64_MAX
+#define INT16_MAX _I16_MAX
+#define INT16_MIN _I16_MIN
#endif
#ifndef _UINTPTR_T_DEFINED
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -11,6 +11,7 @@
#ifndef VPX_PORTS_MEM_H
#define VPX_PORTS_MEM_H
+
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -35,36 +35,41 @@
return 0;
}
-/****************************************************************************
- *
- ****************************************************************************/
-int
-vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) {
- /*NOTE:*/
-
+int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
+ int width, int height, int border) {
if (ybf) {
- int y_stride = ((width + 2 * border) + 31) & ~31;
- int yplane_size = (height + 2 * border) * y_stride;
- int uv_width = width >> 1;
- int uv_height = height >> 1;
+ int aligned_width = (width + 15) & ~15;
+ int aligned_height = (height + 15) & ~15;
+ int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
+ int yplane_size = (aligned_height + 2 * border) * y_stride;
+ int uv_width = aligned_width >> 1;
+ int uv_height = aligned_height >> 1;
/** There is currently a bunch of code which assumes
* uv_stride == y_stride/2, so enforce this here. */
int uv_stride = y_stride >> 1;
int uvplane_size = (uv_height + border) * uv_stride;
+ const int frame_size = yplane_size + 2 * uvplane_size;
- vp8_yv12_de_alloc_frame_buffer(ybf);
+ if (!ybf->buffer_alloc) {
+ ybf->buffer_alloc = vpx_memalign(32, frame_size);
+ ybf->buffer_alloc_sz = frame_size;
+ }
- /** Only support allocating buffers that have a height and width that
- * are multiples of 16, and a border that's a multiple of 32.
- * The border restriction is required to get 16-byte alignment of the
- * start of the chroma rows without intoducing an arbitrary gap
- * between planes, which would break the semantics of things like
- * vpx_img_set_rect(). */
- if ((width & 0xf) | (height & 0xf) | (border & 0x1f))
+ if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size)
+ return -1;
+
+ /* Only support allocating buffers that have a border that's a multiple
+ * of 32. The border restriction is required to get 16-byte alignment of
+ * the start of the chroma rows without intoducing an arbitrary gap
+ * between planes, which would break the semantics of things like
+ * vpx_img_set_rect(). */
+ if (border & 0x1f)
return -3;
- ybf->y_width = width;
- ybf->y_height = height;
+ ybf->y_crop_width = width;
+ ybf->y_crop_height = height;
+ ybf->y_width = aligned_width;
+ ybf->y_height = aligned_height;
ybf->y_stride = y_stride;
ybf->uv_width = uv_width;
@@ -72,21 +77,23 @@
ybf->uv_stride = uv_stride;
ybf->border = border;
- ybf->frame_size = yplane_size + 2 * uvplane_size;
+ ybf->frame_size = frame_size;
- ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size);
-
- if (ybf->buffer_alloc == NULL)
- return -1;
-
ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2;
ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2;
ybf->corrupted = 0; /* assume not currupted by errors */
- } else {
- return -2;
+ return 0;
}
+ return -2;
+}
- return 0;
+int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
+ int width, int height, int border) {
+ if (ybf) {
+ vp8_yv12_de_alloc_frame_buffer(ybf);
+ return vp8_yv12_realloc_frame_buffer(ybf, width, height, border);
+ }
+ return -2;
}
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include <assert.h>
#include "vpx_scale/yv12config.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_scale/vpx_scale.h"
@@ -20,180 +20,81 @@
/****************************************************************************
*
****************************************************************************/
-void
-vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
+static void extend_plane(uint8_t *s, /* source */
+ int sp, /* source pitch */
+ int w, /* width */
+ int h, /* height */
+ int et, /* extend top border */
+ int el, /* extend left border */
+ int eb, /* extend bottom border */
+ int er) { /* extend right border */
int i;
- unsigned char *src_ptr1, *src_ptr2;
- unsigned char *dest_ptr1, *dest_ptr2;
+ uint8_t *src_ptr1, *src_ptr2;
+ uint8_t *dest_ptr1, *dest_ptr2;
+ int linesize;
- unsigned int Border;
- int plane_stride;
- int plane_height;
- int plane_width;
-
- /***********/
- /* Y Plane */
- /***********/
- Border = ybf->border;
- plane_stride = ybf->y_stride;
- plane_height = ybf->y_height;
- plane_width = ybf->y_width;
-
/* copy the left and right most columns out */
- src_ptr1 = ybf->y_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
+ src_ptr1 = s;
+ src_ptr2 = s + w - 1;
+ dest_ptr1 = s - el;
+ dest_ptr2 = s + w;
- for (i = 0; i < plane_height; i++) {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
+ for (i = 0; i < h; i++) {
+ vpx_memset(dest_ptr1, src_ptr1[0], el);
+ vpx_memset(dest_ptr2, src_ptr2[0], er);
+ src_ptr1 += sp;
+ src_ptr2 += sp;
+ dest_ptr1 += sp;
+ dest_ptr2 += sp;
}
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->y_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
+ /* Now copy the top and bottom lines into each line of the respective
+ * borders
+ */
+ src_ptr1 = s - el;
+ src_ptr2 = s + sp * (h - 1) - el;
+ dest_ptr1 = s + sp * (-et) - el;
+ dest_ptr2 = s + sp * (h) - el;
+ linesize = el + er + w;
- for (i = 0; i < (int)Border; i++) {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
+ for (i = 0; i < et; i++) {
+ vpx_memcpy(dest_ptr1, src_ptr1, linesize);
+ dest_ptr1 += sp;
}
-
- /***********/
- /* U Plane */
- /***********/
- plane_stride = ybf->uv_stride;
- plane_height = ybf->uv_height;
- plane_width = ybf->uv_width;
- Border /= 2;
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->u_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++) {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
+ for (i = 0; i < eb; i++) {
+ vpx_memcpy(dest_ptr2, src_ptr2, linesize);
+ dest_ptr2 += sp;
}
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->u_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++) {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /***********/
- /* V Plane */
- /***********/
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->v_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++) {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->v_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)(Border); i++) {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
}
+void
+vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
+ assert(ybf->y_height - ybf->y_crop_height < 16);
+ assert(ybf->y_width - ybf->y_crop_width < 16);
+ assert(ybf->y_height - ybf->y_crop_height >= 0);
+ assert(ybf->y_width - ybf->y_crop_width >= 0);
-static void
-extend_frame_borders_yonly_c(YV12_BUFFER_CONFIG *ybf) {
- int i;
- unsigned char *src_ptr1, *src_ptr2;
- unsigned char *dest_ptr1, *dest_ptr2;
+ extend_plane(ybf->y_buffer, ybf->y_stride,
+ ybf->y_crop_width, ybf->y_crop_height,
+ ybf->border, ybf->border,
+ ybf->border + ybf->y_height - ybf->y_crop_height,
+ ybf->border + ybf->y_width - ybf->y_crop_width);
- unsigned int Border;
- int plane_stride;
- int plane_height;
- int plane_width;
+ extend_plane(ybf->u_buffer, ybf->uv_stride,
+ (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2,
+ ybf->border / 2, ybf->border / 2,
+ (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2,
+ (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2);
- /***********/
- /* Y Plane */
- /***********/
- Border = ybf->border;
- plane_stride = ybf->y_stride;
- plane_height = ybf->y_height;
- plane_width = ybf->y_width;
-
- /* copy the left and right most columns out */
- src_ptr1 = ybf->y_buffer;
- src_ptr2 = src_ptr1 + plane_width - 1;
- dest_ptr1 = src_ptr1 - Border;
- dest_ptr2 = src_ptr2 + 1;
-
- for (i = 0; i < plane_height; i++) {
- vpx_memset(dest_ptr1, src_ptr1[0], Border);
- vpx_memset(dest_ptr2, src_ptr2[0], Border);
- src_ptr1 += plane_stride;
- src_ptr2 += plane_stride;
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = ybf->y_buffer - Border;
- src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
- dest_ptr1 = src_ptr1 - (Border * plane_stride);
- dest_ptr2 = src_ptr2 + plane_stride;
-
- for (i = 0; i < (int)Border; i++) {
- vpx_memcpy(dest_ptr1, src_ptr1, plane_stride);
- vpx_memcpy(dest_ptr2, src_ptr2, plane_stride);
- dest_ptr1 += plane_stride;
- dest_ptr2 += plane_stride;
- }
-
- plane_stride /= 2;
- plane_height /= 2;
- plane_width /= 2;
- Border /= 2;
-
+ extend_plane(ybf->v_buffer, ybf->uv_stride,
+ (ybf->y_crop_width + 1) / 2, (ybf->y_crop_height + 1) / 2,
+ ybf->border / 2, ybf->border / 2,
+ (ybf->border + ybf->y_height - ybf->y_crop_height + 1) / 2,
+ (ybf->border + ybf->y_width - ybf->y_crop_width + 1) / 2);
}
-
/****************************************************************************
*
* ROUTINE : vp8_yv12_copy_frame
@@ -215,6 +116,14 @@
YV12_BUFFER_CONFIG *dst_ybc) {
int row;
unsigned char *source, *dest;
+
+#if 0
+ /* These assertions are valid in the codec, but the libvpx-tester uses
+ * this code slightly differently.
+ */
+ assert(src_ybc->y_width == dst_ybc->y_width);
+ assert(src_ybc->y_height == dst_ybc->y_height);
+#endif
source = src_ybc->y_buffer;
dest = dst_ybc->y_buffer;
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -42,6 +42,8 @@
typedef struct yv12_buffer_config {
int y_width;
int y_height;
+ int y_crop_width;
+ int y_crop_height;
int y_stride;
/* int yinternal_width; */
@@ -55,6 +57,7 @@
uint8_t *v_buffer;
uint8_t *buffer_alloc;
+ int buffer_alloc_sz;
int border;
int frame_size;
YUV_TYPE clrtype;
@@ -65,6 +68,8 @@
int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
int width, int height, int border);
+ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
+ int width, int height, int border);
int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf);
#ifdef __cplusplus
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1028,7 +1028,8 @@
static const arg_def_t *global_args[] = {
&use_yv12, &use_i420, &usage, &threads, &profile,
- &width, &height, &stereo_mode, &timebase, &framerate, &error_resilient,
+ &width, &height, &stereo_mode, &timebase, &framerate,
+ &error_resilient,
&lag_in_frames, NULL
};
@@ -1103,7 +1104,11 @@
static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1,
"CPU Used (-16..16)");
static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
- "Number of token partitions to use, log2");
+ "Number of token partitions to use, log2");
+static const arg_def_t tile_cols = ARG_DEF(NULL, "tile-columns", 1,
+ "Number of tile columns to use, log2");
+static const arg_def_t tile_rows = ARG_DEF(NULL, "tile-rows", 1,
+ "Number of tile rows to use, log2");
static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
"Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
@@ -1123,8 +1128,10 @@
"Constrained Quality Level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
"Max I-frame bitrate (pct)");
-#if CONFIG_LOSSLESS
static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode");
+#if CONFIG_VP9_ENCODER
+static const arg_def_t frame_parallel_decoding = ARG_DEF(
+ NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
#endif
#if CONFIG_VP8_ENCODER
@@ -1147,22 +1154,18 @@
#if CONFIG_VP9_ENCODER
static const arg_def_t *vp9_args[] = {
&cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
- &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
- &tune_ssim, &cq_level, &max_intra_rate_pct,
-#if CONFIG_LOSSLESS
- &lossless,
-#endif
+ &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
+ &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless,
+ &frame_parallel_decoding,
NULL
};
static const int vp9_arg_ctrl_map[] = {
VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
- VP8E_SET_TOKEN_PARTITIONS,
+ VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS,
VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
-#if CONFIG_LOSSLESS
- VP9E_SET_LOSSLESS,
-#endif
+ VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING,
0
};
#endif
@@ -1479,14 +1482,16 @@
#define mmin(a, b) ((a) < (b) ? (a) : (b))
static void find_mismatch(vpx_image_t *img1, vpx_image_t *img2,
int yloc[2], int uloc[2], int vloc[2]) {
- int match = 1;
- int i, j;
- yloc[0] = yloc[1] = -1;
- for (i = 0, match = 1; match && i < img1->d_h; i+=32) {
- for (j = 0; match && j < img1->d_w; j+=32) {
+ const unsigned int bsize = 64;
+ const unsigned int bsize2 = bsize >> 1;
+ unsigned int match = 1;
+ unsigned int i, j;
+ yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
+ for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
+ for (j = 0; match && j < img1->d_w; j += bsize) {
int k, l;
- int si = mmin(i + 32, img1->d_h) - i;
- int sj = mmin(j + 32, img1->d_w) - j;
+ int si = mmin(i + bsize, img1->d_h) - i;
+ int sj = mmin(j + bsize, img1->d_w) - j;
for (k = 0; match && k < si; k++)
for (l = 0; match && l < sj; l++) {
if (*(img1->planes[VPX_PLANE_Y] +
@@ -1495,6 +1500,10 @@
(i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
yloc[0] = i + k;
yloc[1] = j + l;
+ yloc[2] = *(img1->planes[VPX_PLANE_Y] +
+ (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
+ yloc[3] = *(img2->planes[VPX_PLANE_Y] +
+ (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
match = 0;
break;
}
@@ -1501,12 +1510,12 @@
}
}
}
- uloc[0] = uloc[1] = -1;
- for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) {
- for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) {
+ uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
+ for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) {
+ for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) {
int k, l;
- int si = mmin(i + 16, (img1->d_h + 1) / 2) - i;
- int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j;
+ int si = mmin(i + bsize2, (img1->d_h + 1) / 2) - i;
+ int sj = mmin(j + bsize2, (img1->d_w + 1) / 2) - j;
for (k = 0; match && k < si; k++)
for (l = 0; match && l < sj; l++) {
if (*(img1->planes[VPX_PLANE_U] +
@@ -1515,6 +1524,10 @@
(i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
uloc[0] = i + k;
uloc[1] = j + l;
+ uloc[2] = *(img1->planes[VPX_PLANE_U] +
+ (i + k) * img1->stride[VPX_PLANE_U] + j + l);
+ uloc[3] = *(img2->planes[VPX_PLANE_U] +
+ (i + k) * img2->stride[VPX_PLANE_V] + j + l);
match = 0;
break;
}
@@ -1521,12 +1534,12 @@
}
}
}
- vloc[0] = vloc[1] = -1;
- for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i+=16) {
- for (j = 0; j < match && (img1->d_w + 1) / 2; j+=16) {
+ vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
+ for (i = 0, match = 1; match && i < (img1->d_h + 1) / 2; i += bsize2) {
+ for (j = 0; j < match && (img1->d_w + 1) / 2; j += bsize2) {
int k, l;
- int si = mmin(i + 16, (img1->d_h + 1) / 2) - i;
- int sj = mmin(j + 16, (img1->d_w + 1) / 2) - j;
+ int si = mmin(i + bsize2, (img1->d_h + 1) / 2) - i;
+ int sj = mmin(j + bsize2, (img1->d_w + 1) / 2) - j;
for (k = 0; match && k < si; k++)
for (l = 0; match && l < sj; l++) {
if (*(img1->planes[VPX_PLANE_V] +
@@ -1535,6 +1548,10 @@
(i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
vloc[0] = i + k;
vloc[1] = j + l;
+ vloc[2] = *(img1->planes[VPX_PLANE_V] +
+ (i + k) * img1->stride[VPX_PLANE_V] + j + l);
+ vloc[3] = *(img2->planes[VPX_PLANE_V] +
+ (i + k) * img2->stride[VPX_PLANE_V] + j + l);
match = 0;
break;
}
@@ -1546,7 +1563,7 @@
static int compare_img(vpx_image_t *img1, vpx_image_t *img2)
{
int match = 1;
- int i;
+ unsigned int i;
match &= (img1->fmt == img2->fmt);
match &= (img1->w == img2->w);
@@ -1638,8 +1655,6 @@
stats_io_t stats;
struct vpx_image *img;
vpx_codec_ctx_t decoder;
- vpx_ref_frame_t ref_enc;
- vpx_ref_frame_t ref_dec;
int mismatch_seen;
};
@@ -2221,16 +2236,7 @@
#if CONFIG_DECODERS
if (global->test_decode != TEST_DECODE_OFF) {
- int width, height;
-
vpx_codec_dec_init(&stream->decoder, global->codec->dx_iface(), NULL, 0);
-
- width = (stream->config.cfg.g_w + 15) & ~15;
- height = (stream->config.cfg.g_h + 15) & ~15;
- vpx_img_alloc(&stream->ref_enc.img, VPX_IMG_FMT_I420, width, height, 1);
- vpx_img_alloc(&stream->ref_dec.img, VPX_IMG_FMT_I420, width, height, 1);
- stream->ref_enc.frame_type = VP8_LAST_FRAME;
- stream->ref_dec.frame_type = VP8_LAST_FRAME;
}
#endif
}
@@ -2311,6 +2317,8 @@
if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
stream->frames_out++;
}
+ if (!global->quiet)
+ fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz);
update_rate_histogram(&stream->rate_hist, cfg, pkt);
if (stream->config.write_webm) {
@@ -2373,6 +2381,8 @@
stream->psnr_sse_total += pkt->data.psnr.sse[0];
stream->psnr_samples_total += pkt->data.psnr.samples[0];
for (i = 0; i < 4; i++) {
+ if (!global->quiet)
+ fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
}
stream->psnr_count++;
@@ -2411,26 +2421,59 @@
static void test_decode(struct stream_state *stream,
- enum TestDecodeFatality fatal) {
+ enum TestDecodeFatality fatal,
+ const struct codec_item *codec) {
+ vpx_image_t enc_img, dec_img;
+
if (stream->mismatch_seen)
return;
- vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &stream->ref_enc);
+ /* Get the internal reference frame */
+ if (codec->fourcc == VP8_FOURCC) {
+ struct vpx_ref_frame ref_enc, ref_dec;
+ int width, height;
+
+ width = (stream->config.cfg.g_w + 15) & ~15;
+ height = (stream->config.cfg.g_h + 15) & ~15;
+ vpx_img_alloc(&ref_enc.img, VPX_IMG_FMT_I420, width, height, 1);
+ enc_img = ref_enc.img;
+ vpx_img_alloc(&ref_dec.img, VPX_IMG_FMT_I420, width, height, 1);
+ dec_img = ref_dec.img;
+
+ ref_enc.frame_type = VP8_LAST_FRAME;
+ ref_dec.frame_type = VP8_LAST_FRAME;
+ vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &ref_enc);
+ vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &ref_dec);
+ } else {
+ struct vp9_ref_frame ref;
+
+ ref.idx = 0;
+ vpx_codec_control(&stream->encoder, VP9_GET_REFERENCE, &ref);
+ enc_img = ref.img;
+ vpx_codec_control(&stream->decoder, VP9_GET_REFERENCE, &ref);
+ dec_img = ref.img;
+ }
ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame");
- vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &stream->ref_dec);
ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
- if (!compare_img(&stream->ref_enc.img, &stream->ref_dec.img)) {
- int y[2], u[2], v[2];
- find_mismatch(&stream->ref_enc.img, &stream->ref_dec.img,
- y, u, v);
+ if (!compare_img(&enc_img, &dec_img)) {
+ int y[4], u[4], v[4];
+ find_mismatch(&enc_img, &dec_img, y, u, v);
+ stream->decoder.err = 1;
warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL,
- "Stream %d: Encode/decode mismatch on frame %d"
- " at Y[%d, %d], U[%d, %d], V[%d, %d]",
+ "Stream %d: Encode/decode mismatch on frame %d at"
+ " Y[%d, %d] {%d/%d},"
+ " U[%d, %d] {%d/%d},"
+ " V[%d, %d] {%d/%d}",
stream->index, stream->frames_out,
- y[0], y[1], u[0], u[1], v[0], v[1]);
+ y[0], y[1], y[2], y[3],
+ u[0], u[1], u[2], u[3],
+ v[0], v[1], v[2], v[3]);
stream->mismatch_seen = stream->frames_out;
}
+
+ vpx_img_free(&enc_img);
+ vpx_img_free(&dec_img);
}
@@ -2544,7 +2587,6 @@
" and --passes=2\n", stream->index, global.pass);
});
-
/* Use the frame rate from the file only if none was specified
* on the command-line.
*/
@@ -2656,7 +2698,7 @@
}
if (got_data && global.test_decode != TEST_DECODE_OFF)
- FOREACH_STREAM(test_decode(stream, global.test_decode));
+ FOREACH_STREAM(test_decode(stream, global.test_decode, global.codec));
}
fflush(stdout);
@@ -2688,8 +2730,6 @@
if (global.test_decode != TEST_DECODE_OFF) {
FOREACH_STREAM(vpx_codec_destroy(&stream->decoder));
- FOREACH_STREAM(vpx_img_free(&stream->ref_enc.img));
- FOREACH_STREAM(vpx_img_free(&stream->ref_dec.img));
}
close_input_file(&input);
--
⑨