ref: 66c0d1100babdb1ec17f2e9243666eeb713ce67b
parent: a8d903e53996a95bed90130737fe1471f539d278
parent: a9ebbcc33836d8666f98de7a6d416a6dcd0cae9a
author: John Koleszar <jkoleszar@google.com>
date: Thu Apr 18 10:27:16 EDT 2013
Merge "convolve: support larger blocks, fix asm saturation bug" into experimental
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -35,6 +35,13 @@
return (rand() >> 8) & 0xff;
}
+ uint8_t Rand8Extremes(void) {
+ // Returns a random value near 0 or near 255, to better exercise
+ // saturation behavior.
+ const uint8_t r = Rand8();
+ return r < 128 ? r << 4 : r >> 4;
+ }
+
int PseudoUniform(int range) {
return (rand() >> 8) % range;
}
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -66,7 +66,7 @@
// support.
const int kInterp_Extend = 4;
const unsigned int intermediate_height =
- (kInterp_Extend - 1) + output_height + kInterp_Extend;
+ (kInterp_Extend - 1) + output_height + kInterp_Extend;
/* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
* where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
@@ -75,7 +75,7 @@
* = 23
* and filter_max_width = 16
*/
- uint8_t intermediate_buffer[23 * 16];
+ uint8_t intermediate_buffer[71 * 64];
const int intermediate_next_stride = 1 - intermediate_height * output_width;
// Horizontal pass (src -> transposed intermediate).
@@ -158,13 +158,13 @@
unsigned int dst_stride,
unsigned int output_width,
unsigned int output_height) {
- uint8_t tmp[16*16];
+ uint8_t tmp[64*64];
- assert(output_width <= 16);
- assert(output_height <= 16);
- filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16,
+ assert(output_width <= 64);
+ assert(output_height <= 64);
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
output_width, output_height);
- block2d_average_c(tmp, 16, dst_ptr, dst_stride,
+ block2d_average_c(tmp, 64, dst_ptr, dst_stride,
output_width, output_height);
}
@@ -188,10 +188,10 @@
protected:
static const int kDataAlignment = 16;
- static const int kOuterBlockSize = 32;
+ static const int kOuterBlockSize = 128;
static const int kInputStride = kOuterBlockSize;
static const int kOutputStride = kOuterBlockSize;
- static const int kMaxDimension = 16;
+ static const int kMaxDimension = 64;
int Width() const { return GET_PARAM(0); }
int Height() const { return GET_PARAM(1); }
@@ -221,7 +221,7 @@
::libvpx_test::ACMRandom prng;
for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)
- input_[i] = prng.Rand8();
+ input_[i] = prng.Rand8Extremes();
}
void CheckGuardBlocks() {
@@ -308,7 +308,30 @@
vp9_sub_pel_filters_8s,
vp9_sub_pel_filters_8lp
};
+const int kNumFilterBanks = sizeof(kTestFilterList) /
+ sizeof(kTestFilterList[0]);
+const int kNumFilters = 16;
+TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
+ for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+ const int16_t (*filters)[8] = kTestFilterList[filter_bank];
+ for (int i = 0; i < kNumFilters; i++) {
+ const int p0 = filters[i][0] + filters[i][1];
+ const int p1 = filters[i][2] + filters[i][3];
+ const int p2 = filters[i][4] + filters[i][5];
+ const int p3 = filters[i][6] + filters[i][7];
+ EXPECT_LE(p0, 128);
+ EXPECT_LE(p1, 128);
+ EXPECT_LE(p2, 128);
+ EXPECT_LE(p3, 128);
+ EXPECT_LE(p0 + p3, 128);
+ EXPECT_LE(p0 + p3 + p1, 128);
+ EXPECT_LE(p0 + p3 + p1 + p2, 128);
+ EXPECT_EQ(p0 + p1 + p2 + p3, 128);
+ }
+ }
+}
+
const int16_t kInvalidFilter[8] = { 0 };
TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
@@ -316,12 +339,9 @@
uint8_t* const out = output();
uint8_t ref[kOutputStride * kMaxDimension];
- const int kNumFilterBanks = sizeof(kTestFilterList) /
- sizeof(kTestFilterList[0]);
for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
const int16_t (*filters)[8] = kTestFilterList[filter_bank];
- const int kNumFilters = 16;
for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -368,7 +388,7 @@
::libvpx_test::ACMRandom prng;
for (int y = 0; y < Height(); ++y) {
for (int x = 0; x < Width(); ++x) {
- const uint8_t r = prng.Rand8();
+ const uint8_t r = prng.Rand8Extremes();
out[y * kOutputStride + x] = r;
ref[y * kOutputStride + x] = r;
@@ -440,6 +460,7 @@
TEST_P(ConvolveTest, ChangeFilterWorks) {
uint8_t* const in = input();
uint8_t* const out = output();
+ const int kPixelSelected = 4;
REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
kChangeFilters[8], 17, kChangeFilters[4], 16,
@@ -446,10 +467,10 @@
Width(), Height()));
for (int x = 0; x < Width(); ++x) {
- if (x < 8)
- ASSERT_EQ(in[4], out[x]) << "x == " << x;
- else
- ASSERT_EQ(in[12], out[x]) << "x == " << x;
+ const int kQ4StepAdjust = x >> 4;
+ const int kFilterPeriodAdjust = (x >> 3) << 3;
+ const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
+ ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
}
REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
@@ -457,10 +478,10 @@
Width(), Height()));
for (int y = 0; y < Height(); ++y) {
- if (y < 8)
- ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
- else
- ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+ const int kQ4StepAdjust = y >> 4;
+ const int kFilterPeriodAdjust = (y >> 3) << 3;
+ const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
+ ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
}
REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
@@ -468,9 +489,13 @@
Width(), Height()));
for (int y = 0; y < Height(); ++y) {
+ const int kQ4StepAdjustY = y >> 4;
+ const int kFilterPeriodAdjustY = (y >> 3) << 3;
+ const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
for (int x = 0; x < Width(); ++x) {
- const int ref_x = x < 8 ? 4 : 12;
- const int ref_y = y < 8 ? 4 : 12;
+ const int kQ4StepAdjustX = x >> 4;
+ const int kFilterPeriodAdjustX = (x >> 3) << 3;
+ const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
<< "x == " << x << ", y == " << y;
@@ -489,9 +514,17 @@
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_c),
make_tuple(8, 4, &convolve8_c),
+ make_tuple(4, 8, &convolve8_c),
make_tuple(8, 8, &convolve8_c),
make_tuple(16, 8, &convolve8_c),
- make_tuple(16, 16, &convolve8_c)));
+ make_tuple(8, 16, &convolve8_c),
+ make_tuple(16, 16, &convolve8_c),
+ make_tuple(32, 16, &convolve8_c),
+ make_tuple(16, 32, &convolve8_c),
+ make_tuple(32, 32, &convolve8_c),
+ make_tuple(64, 32, &convolve8_c),
+ make_tuple(32, 64, &convolve8_c),
+ make_tuple(64, 64, &convolve8_c)));
}
#if HAVE_SSSE3
@@ -503,7 +536,15 @@
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_ssse3),
make_tuple(8, 4, &convolve8_ssse3),
+ make_tuple(4, 8, &convolve8_ssse3),
make_tuple(8, 8, &convolve8_ssse3),
make_tuple(16, 8, &convolve8_ssse3),
- make_tuple(16, 16, &convolve8_ssse3)));
+ make_tuple(8, 16, &convolve8_ssse3),
+ make_tuple(16, 16, &convolve8_ssse3),
+ make_tuple(32, 16, &convolve8_ssse3),
+ make_tuple(16, 32, &convolve8_ssse3),
+ make_tuple(32, 32, &convolve8_ssse3),
+ make_tuple(64, 32, &convolve8_ssse3),
+ make_tuple(32, 64, &convolve8_ssse3),
+ make_tuple(64, 64, &convolve8_ssse3)));
#endif
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -331,14 +331,14 @@
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
/* Fixed size intermediate buffer places limits on parameters.
- * Maximum intermediate_height is 39, for y_step_q4 == 32,
- * h == 16, taps == 8.
+ * Maximum intermediate_height is 135, for y_step_q4 == 32,
+ * h == 64, taps == 8.
*/
- uint8_t temp[16 * 39];
+ uint8_t temp[64 * 135];
int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
- assert(w <= 16);
- assert(h <= 16);
+ assert(w <= 64);
+ assert(h <= 64);
assert(taps <= 8);
assert(y_step_q4 <= 32);
@@ -346,10 +346,10 @@
intermediate_height = h;
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
- temp, 16,
+ temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height, taps);
- convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, taps);
}
@@ -360,14 +360,14 @@
const int16_t *filter_y, int y_step_q4,
int w, int h, int taps) {
/* Fixed size intermediate buffer places limits on parameters.
- * Maximum intermediate_height is 39, for y_step_q4 == 32,
- * h == 16, taps == 8.
+ * Maximum intermediate_height is 135, for y_step_q4 == 32,
+ * h == 64, taps == 8.
*/
- uint8_t temp[16 * 39];
+ uint8_t temp[64 * 135];
int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
- assert(w <= 16);
- assert(h <= 16);
+ assert(w <= 64);
+ assert(h <= 64);
assert(taps <= 8);
assert(y_step_q4 <= 32);
@@ -375,10 +375,10 @@
intermediate_height = h;
convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
- temp, 16,
+ temp, 64,
filter_x, x_step_q4, filter_y, y_step_q4,
w, intermediate_height, taps);
- convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+ convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, taps);
}
@@ -563,16 +563,16 @@
const int16_t *filter_y, int y_step_q4,
int w, int h) {
/* Fixed size intermediate buffer places limits on parameters. */
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
- assert(w <= 16);
- assert(h <= 16);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
+ assert(w <= 64);
+ assert(h <= 64);
vp9_convolve8(src, src_stride,
- temp, 16,
+ temp, 64,
filter_x, x_step_q4,
filter_y, y_step_q4,
w, h);
- vp9_convolve_avg(temp, 16,
+ vp9_convolve_avg(temp, 64,
dst, dst_stride,
NULL, 0, /* These unused parameter should be removed! */
NULL, 0, /* These unused parameter should be removed! */
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -278,11 +278,9 @@
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
- // check w/h due to fixed size fdata2 array
- assert(w <= 16);
- assert(h <= 16);
+ assert(h <= 64);
if (x_step_q4 == 16 && y_step_q4 == 16 &&
filter_x[3] != 128 && filter_y[3] != 128) {
@@ -324,11 +322,9 @@
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h) {
- DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
- // check w/h due to fixed size fdata2 array
- assert(w <= 16);
- assert(h <= 16);
+ assert(h <= 64);
if (x_step_q4 == 16 && y_step_q4 == 16 &&
filter_x[3] != 128 && filter_y[3] != 128) {
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -81,10 +81,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -165,10 +165,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -250,10 +250,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm0, krd
- paddsw xmm4, xmm6
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
@@ -285,10 +285,10 @@
pmaddubsw xmm4, k4k5
pmaddubsw xmm6, k6k7
+ paddsw xmm0, xmm6
paddsw xmm0, xmm2
- paddsw xmm4, xmm6
- paddsw xmm0, krd
paddsw xmm0, xmm4
+ paddsw xmm0, krd
psraw xmm0, 7
packuswb xmm0, xmm0
--
⑨