shithub: libvpx

--- a/test/vp9_scale_test.cc

+++ b/test/vp9_scale_test.cc

@@ -49,10 +49,10 @@

   void RunTest() {

     static const int kNumSizesToTest = 4;

-    static const int kNumScaleFactorsToTest = 2;

+    static const int kNumScaleFactorsToTest = 4;

     static const int kWidthsToTest[] = { 16, 32, 48, 64 };

     static const int kHeightsToTest[] = { 16, 20, 24, 28 };

-    static const int kScaleFactors[] = { 1, 2 };

+    static const int kScaleFactors[] = { 1, 2, 3, 4 };

     for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {

       for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {

         for (int h = 0; h < kNumSizesToTest; ++h) {

@@ -132,8 +132,8 @@

 TEST_P(ScaleTest, DISABLED_Speed) {

   static const int kCountSpeedTestBlock = 100;

-  static const int kNumScaleFactorsToTest = 2;

-  static const int kScaleFactors[] = { 1, 2 };

+  static const int kNumScaleFactorsToTest = 4;

+  static const int kScaleFactors[] = { 1, 2, 3, 4 };

   const int src_height = 1280;

   const int src_width = 720;

   for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) {

--- a/vpx_dsp/vpx_convolve.c

+++ b/vpx_dsp/vpx_convolve.c

@@ -129,6 +129,9 @@

   // --Must round-up because block may be located at sub-pixel position.

   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

+  // When calling in frame scaling function, the smallest scaling factor is x1/4

+  // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still

+  // big enough.

   uint8_t temp[64 * 135];

   const int intermediate_height =

       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

@@ -135,8 +138,8 @@

   assert(w <= 64);

   assert(h <= 64);

-  assert(y_step_q4 <= 32);

-  assert(x_step_q4 <= 32);

+  assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));

+  assert(x_step_q4 <= 64);

   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,

                  filter, x0_q4, x_step_q4, w, intermediate_height);

--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c

+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c

@@ -828,6 +828,9 @@

   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

   // --Require an additional 8 rows for the horiz_w8 transpose tail.

+  // When calling in frame scaling function, the smallest scaling factor is x1/4

+  // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still

+  // big enough.

   DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]);

   const int intermediate_height =

       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

@@ -834,8 +837,8 @@

   assert(w <= 64);

   assert(h <= 64);

-  assert(y_step_q4 <= 32);

-  assert(x_step_q4 <= 32);

+  assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));

+  assert(x_step_q4 <= 64);

   if (w >= 8) {

     scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),

--

⑨