shithub: libvpx

Download patch

ref: ef41c6286d1151dd6eeabea4e9160364f5aeee8e
parent: 71b38a144ebc50cb8bb043366ae959267acf4206
author: Linfeng Zhang <linfengz@google.com>
date: Wed Sep 6 08:01:07 EDT 2017

Update convolve functions' assertions

So that 4 to 1 frame scaling can call them.

Change-Id: I9ec438aa63b923ba164ad3c59d7ecfa12789eab5

--- a/test/vp9_scale_test.cc
+++ b/test/vp9_scale_test.cc
@@ -49,10 +49,10 @@
 
   void RunTest() {
     static const int kNumSizesToTest = 4;
-    static const int kNumScaleFactorsToTest = 2;
+    static const int kNumScaleFactorsToTest = 4;
     static const int kWidthsToTest[] = { 16, 32, 48, 64 };
     static const int kHeightsToTest[] = { 16, 20, 24, 28 };
-    static const int kScaleFactors[] = { 1, 2 };
+    static const int kScaleFactors[] = { 1, 2, 3, 4 };
     for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
       for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
         for (int h = 0; h < kNumSizesToTest; ++h) {
@@ -132,8 +132,8 @@
 
 TEST_P(ScaleTest, DISABLED_Speed) {
   static const int kCountSpeedTestBlock = 100;
-  static const int kNumScaleFactorsToTest = 2;
-  static const int kScaleFactors[] = { 1, 2 };
+  static const int kNumScaleFactorsToTest = 4;
+  static const int kScaleFactors[] = { 1, 2, 3, 4 };
   const int src_height = 1280;
   const int src_width = 720;
   for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) {
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -129,6 +129,9 @@
   // --Must round-up because block may be located at sub-pixel position.
   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+  // When calling in frame scaling function, the smallest scaling factor is x1/4
+  // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
+  // big enough.
   uint8_t temp[64 * 135];
   const int intermediate_height =
       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
@@ -135,8 +138,8 @@
 
   assert(w <= 64);
   assert(h <= 64);
-  assert(y_step_q4 <= 32);
-  assert(x_step_q4 <= 32);
+  assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
+  assert(x_step_q4 <= 64);
 
   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
                  filter, x0_q4, x_step_q4, w, intermediate_height);
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -828,6 +828,9 @@
   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
   // --Require an additional 8 rows for the horiz_w8 transpose tail.
+  // When calling in frame scaling function, the smallest scaling factor is x1/4
+  // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
+  // big enough.
   DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]);
   const int intermediate_height =
       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
@@ -834,8 +837,8 @@
 
   assert(w <= 64);
   assert(h <= 64);
-  assert(y_step_q4 <= 32);
-  assert(x_step_q4 <= 32);
+  assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
+  assert(x_step_q4 <= 64);
 
   if (w >= 8) {
     scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),