shithub: libvpx

--- a/test/resize_test.cc

+++ b/test/resize_test.cc

@@ -114,7 +114,8 @@

 TEST_P(ResizeInternalTest, TestInternalResizeWorks) {

   ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,

-                                       30, 1, 0, 5);

+                                       30, 1, 0, 6);

+  cfg_.rc_target_bitrate = 5000;

   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

   for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();

--- a/vp9/common/vp9_blockd.h

+++ b/vp9/common/vp9_blockd.h

@@ -292,9 +292,12 @@

   int x_num;

   int x_den;

   int x_offset_q4;

+  int x_step_q4;

   int y_num;

   int y_den;

   int y_offset_q4;

+  int y_step_q4;

+  convolve_fn_t predict[2][2][2];  // horiz, vert, avg

};

 typedef struct macroblockd {

--- a/vp9/common/vp9_convolve.c

+++ b/vp9/common/vp9_convolve.c

@@ -206,16 +206,25 @@

                        const int16_t *filter_x, int x_step_q4,

                        const int16_t *filter_y, int y_step_q4,

                        int w, int h, int taps) {

-  /* Fixed size intermediate buffer places limits on parameters. */

-  uint8_t temp[16 * 23];

+  /* Fixed size intermediate buffer places limits on parameters.

+   * Maximum intermediate_height is 39, for y_step_q4 == 32,

+   * h == 16, taps == 8.

+   */

+  uint8_t temp[16 * 39];

+  int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;

   assert(w <= 16);

   assert(h <= 16);

   assert(taps <= 8);

+  assert(y_step_q4 <= 32);

+  if (intermediate_height < h)

+    intermediate_height = h;

   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

                    temp, 16,

                    filter_x, x_step_q4, filter_y, y_step_q4,

-                   w, h + taps - 1, taps);

+                   w, intermediate_height, taps);

   convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,

                   filter_x, x_step_q4, filter_y, y_step_q4,

                   w, h, taps);

@@ -226,16 +235,25 @@

                            const int16_t *filter_x, int x_step_q4,

                            const int16_t *filter_y, int y_step_q4,

                            int w, int h, int taps) {

-  /* Fixed size intermediate buffer places limits on parameters. */

-  uint8_t temp[16 * 23];

+  /* Fixed size intermediate buffer places limits on parameters.

+   * Maximum intermediate_height is 39, for y_step_q4 == 32,

+   * h == 16, taps == 8.

+   */

+  uint8_t temp[16 * 39];

+  int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;

   assert(w <= 16);

   assert(h <= 16);

   assert(taps <= 8);

+  assert(y_step_q4 <= 32);

+  if (intermediate_height < h)

+    intermediate_height = h;

   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

                    temp, 16,

                    filter_x, x_step_q4, filter_y, y_step_q4,

-                   w, h + taps - 1, taps);

+                   w, intermediate_height, taps);

   convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,

                       filter_x, x_step_q4, filter_y, y_step_q4,

                       w, h, taps);

--- a/vp9/common/vp9_convolve.h

+++ b/vp9/common/vp9_convolve.h

@@ -33,11 +33,8 @@

                       int w, int h);

 struct subpix_fn_table {

-  convolve_fn_t predict[2][2][2];  // horiz, vert, avg

   const int16_t (*filter_x)[8];

   const int16_t (*filter_y)[8];

-  int x_step_q4;

-  int y_step_q4;

};

 #endif  // VP9_COMMON_CONVOLVE_H_

--- a/vp9/common/vp9_filter.c

+++ b/vp9/common/vp9_filter.c

@@ -15,7 +15,10 @@

 #include "vp9_rtcd.h"

 #include "vp9/common/vp9_common.h"

-DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS][8]) = {

+/* TODO(jkoleszar): We can avoid duplicating these tables 2X by forcing 256

+ * byte alignment of the table's base address.

+ */

+DECLARE_ALIGNED(16, const int16_t, vp9_bilinear_filters[SUBPEL_SHIFTS*2][8]) = {

   { 0, 0, 0, 128,   0, 0, 0, 0 },

   { 0, 0, 0, 120,   8, 0, 0, 0 },

   { 0, 0, 0, 112,  16, 0, 0, 0 },

@@ -31,12 +34,29 @@

   { 0, 0, 0,  32,  96, 0, 0, 0 },

   { 0, 0, 0,  24, 104, 0, 0, 0 },

   { 0, 0, 0,  16, 112, 0, 0, 0 },

+  { 0, 0, 0,   8, 120, 0, 0, 0 },

+  { 0, 0, 0, 128,   0, 0, 0, 0 },

+  { 0, 0, 0, 120,   8, 0, 0, 0 },

+  { 0, 0, 0, 112,  16, 0, 0, 0 },

+  { 0, 0, 0, 104,  24, 0, 0, 0 },

+  { 0, 0, 0,  96,  32, 0, 0, 0 },

+  { 0, 0, 0,  88,  40, 0, 0, 0 },

+  { 0, 0, 0,  80,  48, 0, 0, 0 },

+  { 0, 0, 0,  72,  56, 0, 0, 0 },

+  { 0, 0, 0,  64,  64, 0, 0, 0 },

+  { 0, 0, 0,  56,  72, 0, 0, 0 },

+  { 0, 0, 0,  48,  80, 0, 0, 0 },

+  { 0, 0, 0,  40,  88, 0, 0, 0 },

+  { 0, 0, 0,  32,  96, 0, 0, 0 },

+  { 0, 0, 0,  24, 104, 0, 0, 0 },

+  { 0, 0, 0,  16, 112, 0, 0, 0 },

   { 0, 0, 0,   8, 120, 0, 0, 0 }

};

 #define FILTER_ALPHA       0

 #define FILTER_ALPHA_SHARP 1

-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8]) = {

+DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8])

+    = {

 #if FILTER_ALPHA == 0

   /* Lagrangian interpolation filter */

   { 0,   0,   0, 128,   0,   0,   0,  0},

@@ -54,6 +74,22 @@

   { -1,   4, -11,  37, 112, -16,   4, -1},

   { -1,   3,  -9,  27, 118, -13,   4, -1},

   { 0,   2,  -6,  18, 122, -10,   3, -1},

+  { 0,   1,  -3,   8, 126,  -5,   1,  0},

+  { 0,   0,   0, 128,   0,   0,   0,  0},

+  { 0,   1,  -5, 126,   8,  -3,   1,  0},

+  { -1,   3, -10, 122,  18,  -6,   2,  0},

+  { -1,   4, -13, 118,  27,  -9,   3, -1},

+  { -1,   4, -16, 112,  37, -11,   4, -1},

+  { -1,   5, -18, 105,  48, -14,   4, -1},

+  { -1,   5, -19,  97,  58, -16,   5, -1},

+  { -1,   6, -19,  88,  68, -18,   5, -1},

+  { -1,   6, -19,  78,  78, -19,   6, -1},

+  { -1,   5, -18,  68,  88, -19,   6, -1},

+  { -1,   5, -16,  58,  97, -19,   5, -1},

+  { -1,   4, -14,  48, 105, -18,   5, -1},

+  { -1,   4, -11,  37, 112, -16,   4, -1},

+  { -1,   3,  -9,  27, 118, -13,   4, -1},

+  { 0,   2,  -6,  18, 122, -10,   3, -1},

   { 0,   1,  -3,   8, 126,  -5,   1,  0}

 #elif FILTER_ALPHA == 50

   /* Generated using MATLAB:

@@ -82,7 +118,8 @@

 #endif  /* FILTER_ALPHA */

};

-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8]) = {

+DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8])

+    = {

 #if FILTER_ALPHA_SHARP == 1

   /* dct based filter */

   {0,   0,   0, 128,   0,   0,   0, 0},

@@ -100,6 +137,22 @@

   {-2,   6, -13,  37, 115, -20,   9, -4},

   {-2,   5, -10,  27, 121, -17,   7, -3},

   {-1,   3,  -6,  17, 125, -13,   5, -2},

+  {0,   1,  -3,   8, 127,  -7,   3, -1},

+  {0,   0,   0, 128,   0,   0,   0, 0},

+  {-1,   3,  -7, 127,   8,  -3,   1, 0},

+  {-2,   5, -13, 125,  17,  -6,   3, -1},

+  {-3,   7, -17, 121,  27, -10,   5, -2},

+  {-4,   9, -20, 115,  37, -13,   6, -2},

+  {-4,  10, -23, 108,  48, -16,   8, -3},

+  {-4,  10, -24, 100,  59, -19,   9, -3},

+  {-4,  11, -24,  90,  70, -21,  10, -4},

+  {-4,  11, -23,  80,  80, -23,  11, -4},

+  {-4,  10, -21,  70,  90, -24,  11, -4},

+  {-3,   9, -19,  59, 100, -24,  10, -4},

+  {-3,   8, -16,  48, 108, -23,  10, -4},

+  {-2,   6, -13,  37, 115, -20,   9, -4},

+  {-2,   5, -10,  27, 121, -17,   7, -3},

+  {-1,   3,  -6,  17, 125, -13,   5, -2},

   {0,   1,  -3,   8, 127,  -7,   3, -1}

 #elif FILTER_ALPHA_SHARP == 75

   /* alpha = 0.75 */

@@ -123,7 +176,7 @@

};

 DECLARE_ALIGNED(16, const int16_t,

-                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8]) = {

+                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8]) = {

   /* 8-tap lowpass filter */

   /* Hamming window */

   {-1, -7, 32, 80, 32, -7, -1,  0},

@@ -141,10 +194,43 @@

   { 1, -3, -4, 50, 76, 16, -8,  0},

   { 1, -3, -5, 45, 78, 20, -8,  0},

   { 1, -2, -7, 41, 79, 24, -8,  0},

+  { 1, -2, -7, 37, 80, 28, -8, -1},

+  {-1, -7, 32, 80, 32, -7, -1,  0},

+  {-1, -8, 28, 80, 37, -7, -2,  1},

+  { 0, -8, 24, 79, 41, -7, -2,  1},

+  { 0, -8, 20, 78, 45, -5, -3,  1},

+  { 0, -8, 16, 76, 50, -4, -3,  1},

+  { 0, -7, 13, 74, 54, -3, -4,  1},

+  { 1, -7,  9, 71, 58, -1, -4,  1},

+  { 1, -6,  6, 68, 62,  1, -5,  1},

+  { 1, -6,  4, 65, 65,  4, -6,  1},

+  { 1, -5,  1, 62, 68,  6, -6,  1},

+  { 1, -4, -1, 58, 71,  9, -7,  1},

+  { 1, -4, -3, 54, 74, 13, -7,  0},

+  { 1, -3, -4, 50, 76, 16, -8,  0},

+  { 1, -3, -5, 45, 78, 20, -8,  0},

+  { 1, -2, -7, 41, 79, 24, -8,  0},

   { 1, -2, -7, 37, 80, 28, -8, -1}

};

-DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8]) = {

+DECLARE_ALIGNED(16, const int16_t, vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8])

+    = {

+  {0, 0,   0, 128,   0,   0, 0,  0},

+  {0, 1,  -5, 125,   8,  -2, 1,  0},

+  {0, 1,  -8, 122,  17,  -5, 1,  0},

+  {0, 2, -11, 116,  27,  -8, 2,  0},

+  {0, 3, -14, 110,  37, -10, 2,  0},

+  {0, 3, -15, 103,  47, -12, 2,  0},

+  {0, 3, -16,  95,  57, -14, 3,  0},

+  {0, 3, -16,  86,  67, -15, 3,  0},

+  {0, 3, -16,  77,  77, -16, 3,  0},

+  {0, 3, -15,  67,  86, -16, 3,  0},

+  {0, 3, -14,  57,  95, -16, 3,  0},

+  {0, 2, -12,  47, 103, -15, 3,  0},

+  {0, 2, -10,  37, 110, -14, 3,  0},

+  {0, 2,  -8,  27, 116, -11, 2,  0},

+  {0, 1,  -5,  17, 122,  -8, 1,  0},

+  {0, 1,  -2,   8, 125,  -5, 1,  0},

   {0, 0,   0, 128,   0,   0, 0,  0},

   {0, 1,  -5, 125,   8,  -2, 1,  0},

   {0, 1,  -8, 122,  17,  -5, 1,  0},

--- a/vp9/common/vp9_filter.h

+++ b/vp9/common/vp9_filter.h

@@ -21,11 +21,11 @@

 #define SUBPEL_SHIFTS 16

-extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS][8];

-extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS][8];

-extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS][8];

-extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS][8];

-extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS][8];

+extern const int16_t vp9_bilinear_filters[SUBPEL_SHIFTS*2][8];

+extern const int16_t vp9_sub_pel_filters_6[SUBPEL_SHIFTS*2][8];

+extern const int16_t vp9_sub_pel_filters_8[SUBPEL_SHIFTS*2][8];

+extern const int16_t vp9_sub_pel_filters_8s[SUBPEL_SHIFTS*2][8];

+extern const int16_t vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS*2][8];

 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear

 // filter kernel as a 2 tap filter.

--- a/vp9/common/vp9_mbpitch.c

+++ b/vp9/common/vp9_mbpitch.c

@@ -71,17 +71,6 @@

     setup_block(&blockd[block + 4], stride, v, v2, stride,

       ((block - 16) >> 1) * 4 * stride + (block & 1) * 4, bs);

-  // TODO(jkoleszar): this will move once we're actually scaling.

-  xd->scale_factor[0].x_num = 1;

-  xd->scale_factor[0].x_den = 1;

-  xd->scale_factor[0].y_num = 1;

-  xd->scale_factor[0].y_den = 1;

-  xd->scale_factor[0].x_offset_q4 = 0;

-  xd->scale_factor[0].y_offset_q4 = 0;

-  xd->scale_factor[1]= xd->scale_factor[0];

-  xd->scale_factor_uv[0] = xd->scale_factor[0];

-  xd->scale_factor_uv[1] = xd->scale_factor[1];

 void vp9_setup_block_dptrs(MACROBLOCKD *xd) {

--- a/vp9/common/vp9_onyxc_int.h

+++ b/vp9/common/vp9_onyxc_int.h

@@ -145,6 +145,7 @@

*/

   int active_ref_idx[3]; /* each frame can reference 3 buffers */

   int new_fb_idx;

+  struct scale_factors active_ref_scale[3];

   YV12_BUFFER_CONFIG post_proc_buffer;

   YV12_BUFFER_CONFIG temp_scale_frame;

--- a/vp9/common/vp9_reconinter.c

+++ b/vp9/common/vp9_reconinter.c

@@ -17,9 +17,22 @@

 #include "vp9/common/vp9_reconinter.h"

 #include "vp9/common/vp9_reconintra.h"

-void vp9_setup_interp_filters(MACROBLOCKD *xd,

-                              INTERPOLATIONFILTERTYPE mcomp_filter_type,

-                              VP9_COMMON *cm) {

+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,

+                                       YV12_BUFFER_CONFIG *other,

+                                       int this_w, int this_h) {

+  int other_w, other_h;

+  other_h = other->y_height;

+  other_w = other->y_width;

+  scale->x_num = other_w;

+  scale->x_den = this_w;

+  scale->x_offset_q4 = 0;  // calculated per-mb

+  scale->x_step_q4 = 16 * other_w / this_w;

+  scale->y_num = other_h;

+  scale->y_den = this_h;

+  scale->y_offset_q4 = 0;  // calculated per-mb

+  scale->y_step_q4 = 16 * other_h / this_h;

   // TODO(agrange): Investigate the best choice of functions to use here

   // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what

   // to do at full-pel offsets. The current selection, where the filter is

@@ -26,17 +39,75 @@

   // applied in one direction only, and not at all for 0,0, seems to give the

   // best quality, but it may be worth trying an additional mode that does

   // do the filtering on full-pel.

-  xd->subpix.predict[0][0][0] = vp9_convolve_copy;

-  xd->subpix.predict[0][0][1] = vp9_convolve_avg;

-  xd->subpix.predict[0][1][0] = vp9_convolve8_vert;

-  xd->subpix.predict[0][1][1] = vp9_convolve8_avg_vert;

-  xd->subpix.predict[1][0][0] = vp9_convolve8_horiz;

-  xd->subpix.predict[1][0][1] = vp9_convolve8_avg_horiz;

-  xd->subpix.predict[1][1][0] = vp9_convolve8;

-  xd->subpix.predict[1][1][1] = vp9_convolve8_avg;

+  if (scale->x_step_q4 == 16) {

+    if (scale->y_step_q4 == 16) {

+      // No scaling in either direction.

+      scale->predict[0][0][0] = vp9_convolve_copy;

+      scale->predict[0][0][1] = vp9_convolve_avg;

+      scale->predict[0][1][0] = vp9_convolve8_vert;

+      scale->predict[0][1][1] = vp9_convolve8_avg_vert;

+      scale->predict[1][0][0] = vp9_convolve8_horiz;

+      scale->predict[1][0][1] = vp9_convolve8_avg_horiz;

+    } else {

+      // No scaling in x direction. Must always scale in the y direction.

+      scale->predict[0][0][0] = vp9_convolve8_vert;

+      scale->predict[0][0][1] = vp9_convolve8_avg_vert;

+      scale->predict[0][1][0] = vp9_convolve8_vert;

+      scale->predict[0][1][1] = vp9_convolve8_avg_vert;

+      scale->predict[1][0][0] = vp9_convolve8;

+      scale->predict[1][0][1] = vp9_convolve8_avg;

+    }

+  } else {

+    if (scale->y_step_q4 == 16) {

+      // No scaling in the y direction. Must always scale in the x direction.

+      scale->predict[0][0][0] = vp9_convolve8_horiz;

+      scale->predict[0][0][1] = vp9_convolve8_avg_horiz;

+      scale->predict[0][1][0] = vp9_convolve8;

+      scale->predict[0][1][1] = vp9_convolve8_avg;

+      scale->predict[1][0][0] = vp9_convolve8_horiz;

+      scale->predict[1][0][1] = vp9_convolve8_avg_horiz;

+    } else {

+      // Must always scale in both directions.

+      scale->predict[0][0][0] = vp9_convolve8;

+      scale->predict[0][0][1] = vp9_convolve8_avg;

+      scale->predict[0][1][0] = vp9_convolve8;

+      scale->predict[0][1][1] = vp9_convolve8_avg;

+      scale->predict[1][0][0] = vp9_convolve8;

+      scale->predict[1][0][1] = vp9_convolve8_avg;

+    }

+  }

+  // 2D subpel motion always gets filtered in both directions

+  scale->predict[1][1][0] = vp9_convolve8;

+  scale->predict[1][1][1] = vp9_convolve8_avg;

+}

-  xd->subpix.x_step_q4 = 16;

-  xd->subpix.y_step_q4 = 16;

+void vp9_setup_interp_filters(MACROBLOCKD *xd,

+                              INTERPOLATIONFILTERTYPE mcomp_filter_type,

+                              VP9_COMMON *cm) {

+  int i;

+  /* Calculate scaling factors for each of the 3 available references */

+  for (i = 0; i < 3; ++i) {

+    if (cm->active_ref_idx[i] >= NUM_YV12_BUFFERS) {

+      memset(&cm->active_ref_scale[i], 0, sizeof(cm->active_ref_scale[i]));

+      continue;

+    }

+    vp9_setup_scale_factors_for_frame(&cm->active_ref_scale[i],

+                                      &cm->yv12_fb[cm->active_ref_idx[i]],

+                                      cm->mb_cols * 16, cm->mb_rows * 16);

+  }

+  if (xd->mode_info_context) {

+    MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;

+    set_scale_factors(xd,

+                      mbmi->ref_frame - 1,

+                      mbmi->second_ref_frame - 1,

+                      cm->active_ref_scale);

+  }

   switch (mcomp_filter_type) {

     case EIGHTTAP:

     case SWITCHABLE:

@@ -146,30 +217,50 @@

-static int32_t scale_motion_vector_component(int mv,

-                                             int num,

-                                             int den,

-                                             int offset_q4) {

+static void set_scaled_offsets(struct scale_factors *scale,

+                               int row, int col) {

+  const int x_q4 = 16 * col;

+  const int y_q4 = 16 * row;

+  scale->x_offset_q4 = (x_q4 * scale->x_num / scale->x_den) & 0xf;

+  scale->y_offset_q4 = (y_q4 * scale->y_num / scale->y_den) & 0xf;

+}

+static int32_t scale_motion_vector_component_q3(int mv_q3,

+                                                int num,

+                                                int den,

+                                                int offset_q4) {

   // returns the scaled and offset value of the mv component.

-  // input and output mv have the same units -- this would work with either q3

-  // or q4 motion vectors. Offset is given as a q4 fractional number.

-  const int32_t mv_q4 = mv * 16;

+  const int32_t mv_q4 = mv_q3 << 1;

   /* TODO(jkoleszar): make fixed point, or as a second multiply? */

-  return (mv_q4 * num / den + offset_q4 + 8) >> 4;

+  return mv_q4 * num / den + offset_q4;

-static int_mv32 scale_motion_vector(const int_mv *src_mv,

-                                    const struct scale_factors *scale) {

+static int32_t scale_motion_vector_component_q4(int mv_q4,

+                                                int num,

+                                                int den,

+                                                int offset_q4) {

+  // returns the scaled and offset value of the mv component.

+  /* TODO(jkoleszar): make fixed point, or as a second multiply? */

+  return mv_q4 * num / den + offset_q4;

+}

+static int_mv32 scale_motion_vector_q3_to_q4(

+    const int_mv *src_mv,

+    const struct scale_factors *scale) {

   // returns mv * scale + offset

   int_mv32 result;

-  result.as_mv.row = scale_motion_vector_component(src_mv->as_mv.row,

-                                                   scale->y_num, scale->y_den,

-                                                   scale->y_offset_q4);

-  result.as_mv.col = scale_motion_vector_component(src_mv->as_mv.col,

-                                                   scale->x_num, scale->x_den,

-                                                   scale->x_offset_q4);

+  result.as_mv.row = scale_motion_vector_component_q3(src_mv->as_mv.row,

+                                                      scale->y_num,

+                                                      scale->y_den,

+                                                      scale->y_offset_q4);

+  result.as_mv.col = scale_motion_vector_component_q3(src_mv->as_mv.col,

+                                                      scale->x_num,

+                                                      scale->x_den,

+                                                      scale->x_offset_q4);

   return result;

@@ -181,12 +272,13 @@

                                const struct subpix_fn_table *subpix) {

   int_mv32 mv;

-  mv = scale_motion_vector(mv_q3, scale);

-  src = src + (mv.as_mv.row >> 3) * src_stride + (mv.as_mv.col >> 3);

-  subpix->predict[!!(mv.as_mv.col & 7)][!!(mv.as_mv.row & 7)][do_avg](

+  mv = scale_motion_vector_q3_to_q4(mv_q3, scale);

+  src = src + (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);

+  scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg](

       src, src_stride, dst, dst_stride,

-      subpix->filter_x[(mv.as_mv.col & 7) << 1], subpix->x_step_q4,

-      subpix->filter_y[(mv.as_mv.row & 7) << 1], subpix->y_step_q4,

+      subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4,

+      subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4,

       w, h);

@@ -205,19 +297,19 @@

   const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)

                         + (frac_mv_q4->as_mv.col & 0xf);

   const int scaled_mv_row_q4 =

-      scale_motion_vector_component(mv_row_q4, scale->y_num, scale->y_den,

-                                    scale->y_offset_q4);

+      scale_motion_vector_component_q4(mv_row_q4, scale->y_num, scale->y_den,

+                                       scale->y_offset_q4);

   const int scaled_mv_col_q4 =

-      scale_motion_vector_component(mv_col_q4, scale->x_num, scale->x_den,

-                                    scale->x_offset_q4);

+      scale_motion_vector_component_q4(mv_col_q4, scale->x_num, scale->x_den,

+                                       scale->x_offset_q4);

   const int subpel_x = scaled_mv_col_q4 & 15;

   const int subpel_y = scaled_mv_row_q4 & 15;

   src = src + (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4);

-  subpix->predict[!!subpel_x][!!subpel_y][do_avg](

+  scale->predict[!!subpel_x][!!subpel_y][do_avg](

       src, src_stride, dst, dst_stride,

-      subpix->filter_x[subpel_x], subpix->x_step_q4,

-      subpix->filter_y[subpel_y], subpix->y_step_q4,

+      subpix->filter_x[subpel_x], scale->x_step_q4,

+      subpix->filter_y[subpel_y], scale->y_step_q4,

       w, h);

@@ -261,7 +353,9 @@

 /*encoder only*/

-void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd) {

+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,

+                                        int mb_row,

+                                        int mb_col) {

   int i, j;

   BLOCKD *blockd = xd->block;

@@ -339,11 +433,17 @@

   for (i = 16; i < 24; i += 2) {

     const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;

+    const int x = 4 * (i & 1);

+    const int y = ((i - 16) >> 1) * 4;

     int which_mv;

     BLOCKD *d0 = &blockd[i];

     BLOCKD *d1 = &blockd[i + 1];

     for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {

+      set_scaled_offsets(&xd->scale_factor_uv[which_mv],

+                         mb_row * 8 + y, mb_col * 8 + x);

       build_2x1_inter_predictor(d0, d1, xd->scale_factor_uv, 4, 8, which_mv,

                                 &xd->subpix);

@@ -389,7 +489,9 @@

 /*encoder only*/

 void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,

                                          uint8_t *dst_y,

-                                         int dst_ystride) {

+                                         int dst_ystride,

+                                         int mb_row,

+                                         int mb_col) {

   const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;

   int which_mv;

@@ -399,14 +501,19 @@

                  : xd->mode_info_context->mbmi.need_to_clamp_mvs;

     uint8_t *base_pre;

     int_mv ymv;

+    int pre_stride;

     ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;

     base_pre = which_mv ? xd->second_pre.y_buffer

                         : xd->pre.y_buffer;

+    pre_stride = which_mv ? xd->second_pre.y_stride

+                          : xd->pre.y_stride;

     if (clamp_mvs)

       clamp_mv_to_umv_border(&ymv.as_mv, xd);

-    vp9_build_inter_predictor(base_pre, xd->block[0].pre_stride,

+    set_scaled_offsets(&xd->scale_factor[which_mv], mb_row * 16, mb_col * 16);

+    vp9_build_inter_predictor(base_pre, pre_stride,

                               dst_y, dst_ystride,

                               &ymv, &xd->scale_factor[which_mv],

                               16, 16, which_mv, &xd->subpix);

@@ -416,7 +523,9 @@

 void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,

                                           uint8_t *dst_u,

                                           uint8_t *dst_v,

-                                          int dst_uvstride) {

+                                          int dst_uvstride,

+                                          int mb_row,

+                                          int mb_col) {

   const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;

   int which_mv;

@@ -425,7 +534,8 @@

         which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv

                  : xd->mode_info_context->mbmi.need_to_clamp_mvs;

     uint8_t *uptr, *vptr;

-    int pre_stride = xd->block[0].pre_stride;

+    int pre_stride = which_mv ? xd->second_pre.y_stride

+                              : xd->pre.y_stride;

     int_mv _o16x16mv;

     int_mv _16x16mv;

@@ -456,6 +566,9 @@

     uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);

     vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);

+    set_scaled_offsets(&xd->scale_factor_uv[which_mv],

+                       mb_row * 16, mb_col * 16);

     vp9_build_inter_predictor_q4(uptr, pre_stride,

                                  dst_u, dst_uvstride,

                                  &_16x16mv, &_o16x16mv,

@@ -475,7 +588,9 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride) {

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col) {

   uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;

   uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,

           *v2 = x->second_pre.v_buffer;

@@ -488,6 +603,7 @@

   for (n = 0; n < 4; n++) {

     const int x_idx = n & 1, y_idx = n >> 1;

+    int scaled_uv_offset;

     x->mb_to_top_edge    = edge[0] -      ((y_idx  * 16) << 3);

     x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 16) << 3);

@@ -494,14 +610,29 @@

     x->mb_to_left_edge   = edge[2] -      ((x_idx  * 16) << 3);

     x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 16) << 3);

-    x->pre.y_buffer = y1 + y_idx * 16 * x->pre.y_stride  + x_idx * 16;

-    x->pre.u_buffer = u1 + y_idx *  8 * x->pre.uv_stride + x_idx *  8;

-    x->pre.v_buffer = v1 + y_idx *  8 * x->pre.uv_stride + x_idx *  8;

+    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,

+                                                y_idx * 16,

+                                                x->pre.y_stride,

+                                                &x->scale_factor[0]);

+    scaled_uv_offset = scaled_buffer_offset(x_idx * 8,

+                                            y_idx * 8,

+                                            x->pre.uv_stride,

+                                            &x->scale_factor_uv[0]);

+    x->pre.u_buffer = u1 + scaled_uv_offset;

+    x->pre.v_buffer = v1 + scaled_uv_offset;

     if (x->mode_info_context->mbmi.second_ref_frame > 0) {

-      x->second_pre.y_buffer = y2 + y_idx * 16 * x->pre.y_stride  + x_idx * 16;

-      x->second_pre.u_buffer = u2 + y_idx *  8 * x->pre.uv_stride + x_idx *  8;

-      x->second_pre.v_buffer = v2 + y_idx *  8 * x->pre.uv_stride + x_idx *  8;

+      x->second_pre.y_buffer = y2 +

+          scaled_buffer_offset(x_idx * 16,

+                               y_idx * 16,

+                               x->second_pre.y_stride,

+                               &x->scale_factor[1]);

+      scaled_uv_offset = scaled_buffer_offset(x_idx * 8,

+                                              y_idx * 8,

+                                              x->second_pre.uv_stride,

+                                              &x->scale_factor_uv[1]);

+      x->second_pre.u_buffer = u2 + scaled_uv_offset;

+      x->second_pre.v_buffer = v2 + scaled_uv_offset;

     vp9_build_inter16x16_predictors_mb(x,

@@ -508,7 +639,7 @@

         dst_y + y_idx * 16 * dst_ystride  + x_idx * 16,

         dst_u + y_idx *  8 * dst_uvstride + x_idx *  8,

         dst_v + y_idx *  8 * dst_uvstride + x_idx *  8,

-        dst_ystride, dst_uvstride);

+        dst_ystride, dst_uvstride, mb_row + y_idx, mb_col + x_idx);

   x->mb_to_top_edge    = edge[0];

@@ -539,7 +670,9 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride) {

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col) {

   uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer;

   uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer,

           *v2 = x->second_pre.v_buffer;

@@ -552,6 +685,7 @@

   for (n = 0; n < 4; n++) {

     const int x_idx = n & 1, y_idx = n >> 1;

+    int scaled_uv_offset;

     x->mb_to_top_edge    = edge[0] -      ((y_idx  * 32) << 3);

     x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3);

@@ -558,14 +692,29 @@

     x->mb_to_left_edge   = edge[2] -      ((x_idx  * 32) << 3);

     x->mb_to_right_edge  = edge[3] + (((1 - x_idx) * 32) << 3);

-    x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride  + x_idx * 32;

-    x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;

-    x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;

+    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 32,

+                                                y_idx * 32,

+                                                x->pre.y_stride,

+                                                &x->scale_factor[0]);

+    scaled_uv_offset = scaled_buffer_offset(x_idx * 16,

+                                            y_idx * 16,

+                                            x->pre.uv_stride,

+                                            &x->scale_factor_uv[0]);

+    x->pre.u_buffer = u1 + scaled_uv_offset;

+    x->pre.v_buffer = v1 + scaled_uv_offset;

     if (x->mode_info_context->mbmi.second_ref_frame > 0) {

-      x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride  + x_idx * 32;

-      x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;

-      x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16;

+      x->second_pre.y_buffer = y2 +

+          scaled_buffer_offset(x_idx * 32,

+                               y_idx * 32,

+                               x->second_pre.y_stride,

+                               &x->scale_factor[1]);

+      scaled_uv_offset = scaled_buffer_offset(x_idx * 16,

+                                              y_idx * 16,

+                                              x->second_pre.uv_stride,

+                                              &x->scale_factor_uv[1]);

+      x->second_pre.u_buffer = u2 + scaled_uv_offset;

+      x->second_pre.v_buffer = v2 + scaled_uv_offset;

     vp9_build_inter32x32_predictors_sb(x,

@@ -572,7 +721,7 @@

         dst_y + y_idx * 32 * dst_ystride  + x_idx * 32,

         dst_u + y_idx * 16 * dst_uvstride + x_idx * 16,

         dst_v + y_idx * 16 * dst_uvstride + x_idx * 16,

-        dst_ystride, dst_uvstride);

+        dst_ystride, dst_uvstride, mb_row + y_idx * 2, mb_col + x_idx * 2);

   x->mb_to_top_edge    = edge[0];

@@ -769,17 +918,23 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride) {

-  vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride);

-  vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride);

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col) {

+  vp9_build_inter16x16_predictors_mby(xd, dst_y, dst_ystride, mb_row, mb_col);

+  vp9_build_inter16x16_predictors_mbuv(xd, dst_u, dst_v, dst_uvstride,

+                                       mb_row, mb_col);

-void vp9_build_inter_predictors_mb(MACROBLOCKD *xd) {

+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,

+                                   int mb_row,

+                                   int mb_col) {

   if (xd->mode_info_context->mbmi.mode != SPLITMV) {

     vp9_build_inter16x16_predictors_mb(xd, xd->predictor,

                                        &xd->predictor[256],

-                                       &xd->predictor[320], 16, 8);

+                                       &xd->predictor[320], 16, 8,

+                                       mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

     if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {

--- a/vp9/common/vp9_reconinter.h

+++ b/vp9/common/vp9_reconinter.h

@@ -18,12 +18,16 @@

 void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,

                                          uint8_t *dst_y,

-                                         int dst_ystride);

+                                         int dst_ystride,

+                                         int mb_row,

+                                         int mb_col);

 void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,

                                           uint8_t *dst_u,

                                           uint8_t *dst_v,

-                                          int dst_uvstride);

+                                          int dst_uvstride,

+                                          int mb_row,

+                                          int mb_col);

 void vp9_build_inter16x16_predictors_mb(MACROBLOCKD *xd,

                                         uint8_t *dst_y,

@@ -30,7 +34,9 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride);

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col);

 void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x,

                                         uint8_t *dst_y,

@@ -37,7 +43,9 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride);

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col);

 void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x,

                                         uint8_t *dst_y,

@@ -44,16 +52,26 @@

                                         uint8_t *dst_u,

                                         uint8_t *dst_v,

                                         int dst_ystride,

-                                        int dst_uvstride);

+                                        int dst_uvstride,

+                                        int mb_row,

+                                        int mb_col);

-void vp9_build_inter_predictors_mb(MACROBLOCKD *xd);

+void vp9_build_inter_predictors_mb(MACROBLOCKD *xd,

+                                   int mb_row,

+                                   int mb_col);

-void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd);

+void vp9_build_inter4x4_predictors_mbuv(MACROBLOCKD *xd,

+                                        int mb_row,

+                                        int mb_col);

 void vp9_setup_interp_filters(MACROBLOCKD *xd,

                               INTERPOLATIONFILTERTYPE filter,

                               VP9_COMMON *cm);

+void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,

+                                       YV12_BUFFER_CONFIG *other,

+                                       int this_w, int this_h);

 void vp9_build_inter_predictor(const uint8_t *src, int src_stride,

                                uint8_t *dst, int dst_stride,

                                const int_mv *mv_q3,

@@ -68,5 +86,56 @@

                                   const struct scale_factors *scale,

                                   int w, int h, int do_avg,

                                   const struct subpix_fn_table *subpix);

+static int scale_value_x(int val, const struct scale_factors *scale) {

+  return val * scale->x_num / scale->x_den;

+}

+static int scale_value_y(int val, const struct scale_factors *scale) {

+  return val * scale->y_num / scale->y_den;

+}

+static int scaled_buffer_offset(int x_offset,

+                                int y_offset,

+                                int stride,

+                                const struct scale_factors *scale) {

+  return scale_value_y(y_offset, scale) * stride +

+      scale_value_x(x_offset, scale);

+}

+static void setup_pred_block(YV12_BUFFER_CONFIG *dst,

+                             const YV12_BUFFER_CONFIG *src,

+                             int mb_row, int mb_col,

+                             const struct scale_factors *scale,

+                             const struct scale_factors *scale_uv) {

+  const int recon_y_stride = src->y_stride;

+  const int recon_uv_stride = src->uv_stride;

+  int recon_yoffset;

+  int recon_uvoffset;

+  if (scale) {

+    recon_yoffset = scaled_buffer_offset(16 * mb_col, 16 * mb_row,

+                                         recon_y_stride, scale);

+    recon_uvoffset = scaled_buffer_offset(8 * mb_col, 8 * mb_row,

+                                          recon_uv_stride, scale_uv);

+  } else {

+    recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;

+    recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;

+  }

+  *dst = *src;

+  dst->y_buffer += recon_yoffset;

+  dst->u_buffer += recon_uvoffset;

+  dst->v_buffer += recon_uvoffset;

+}

+static void set_scale_factors(MACROBLOCKD *xd,

+    int ref0, int ref1,

+    struct scale_factors scale_factor[MAX_REF_FRAMES]) {

+  xd->scale_factor[0] = scale_factor[ref0 >= 0 ? ref0 : 0];

+  xd->scale_factor[1] = scale_factor[ref1 >= 0 ? ref1 : 0];

+  xd->scale_factor_uv[0] = xd->scale_factor[0];

+  xd->scale_factor_uv[1] = xd->scale_factor[1];

+}

 #endif  // VP9_COMMON_VP9_RECONINTER_H_

--- a/vp9/decoder/vp9_decodemv.c

+++ b/vp9/decoder/vp9_decodemv.c

@@ -12,6 +12,7 @@

 #include "vp9/decoder/vp9_treereader.h"

 #include "vp9/common/vp9_entropymv.h"

 #include "vp9/common/vp9_entropymode.h"

+#include "vp9/common/vp9_reconinter.h"

 #include "vp9/decoder/vp9_onyxd_int.h"

 #include "vp9/common/vp9_findnearmv.h"

 #include "vp9/common/vp9_common.h"

@@ -749,21 +750,25 @@

     int_mv nearest_second, nearby_second, best_mv_second;

     vp9_prob mv_ref_p [VP9_MVREFS - 1];

-    int recon_y_stride, recon_yoffset;

-    int recon_uv_stride, recon_uvoffset;

     MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;

       int ref_fb_idx;

+      int recon_y_stride, recon_yoffset;

+      int recon_uv_stride, recon_uvoffset;

       /* Select the appropriate reference frame for this MB */

       ref_fb_idx = cm->active_ref_idx[ref_frame - 1];

-      recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride  ;

+      recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;

       recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;

-      recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);

-      recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);

+      recon_yoffset = scaled_buffer_offset(mb_col * 16, mb_row * 16,

+                                           recon_y_stride,

+                                           &xd->scale_factor[0]);

+      recon_uvoffset = scaled_buffer_offset(mb_col * 8, mb_row * 8,

+                                            recon_uv_stride,

+                                            &xd->scale_factor_uv[0]);

       xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;

       xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;

@@ -853,9 +858,21 @@

         mbmi->second_ref_frame = 1;

       if (mbmi->second_ref_frame > 0) {

         int second_ref_fb_idx;

+        int recon_y_stride, recon_yoffset;

+        int recon_uv_stride, recon_uvoffset;

         /* Select the appropriate reference frame for this MB */

         second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];

+        recon_y_stride = cm->yv12_fb[second_ref_fb_idx].y_stride;

+        recon_uv_stride = cm->yv12_fb[second_ref_fb_idx].uv_stride;

+        recon_yoffset = scaled_buffer_offset(mb_col * 16, mb_row * 16,

+                                             recon_y_stride,

+                                             &xd->scale_factor[1]);

+        recon_uvoffset = scaled_buffer_offset(mb_col * 8, mb_row * 8,

+                                             recon_uv_stride,

+                                             &xd->scale_factor_uv[1]);

         xd->second_pre.y_buffer =

           cm->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset;

         xd->second_pre.u_buffer =

@@ -1089,7 +1106,6 @@

         break;

       case NEWMV:

         read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);

         read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,

                     xd->allow_high_precision_mv);

@@ -1230,8 +1246,12 @@

   MODE_INFO *mi = xd->mode_info_context;

   MODE_INFO *prev_mi = xd->prev_mode_info_context;

-  if (pbi->common.frame_type == KEY_FRAME)

+  if (pbi->common.frame_type == KEY_FRAME) {

     kfread_modes(pbi, mi, mb_row, mb_col, bc);

-  else

+  } else {

     read_mb_modes_mv(pbi, mi, &mi->mbmi, prev_mi, mb_row, mb_col, bc);

+    set_scale_factors(xd,

+                      mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1,

+                      pbi->common.active_ref_scale);

+  }

--- a/vp9/decoder/vp9_decodframe.c

+++ b/vp9/decoder/vp9_decodframe.c

@@ -147,7 +147,8 @@

 /* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it

  *  to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.

*/

-static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) {

+static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,

+                          int mb_row, int mb_col) {

   BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;

   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {

@@ -168,7 +169,8 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

     } else if (sb_type == BLOCK_SIZE_SB32X32) {

       vp9_build_inter32x32_predictors_sb(xd,

                                          xd->dst.y_buffer,

@@ -175,7 +177,8 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

     } else {

       vp9_build_inter16x16_predictors_mb(xd,

                                          xd->dst.y_buffer,

@@ -182,7 +185,8 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

       if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {

         vp9_build_interintra_16x16_predictors_mb(xd,

@@ -599,7 +603,7 @@

     /* Special case:  Force the loopfilter to skip when eobtotal and

      * mb_skip_coeff are zero.

*/

-    skip_recon_mb(pbi, xd);

+    skip_recon_mb(pbi, xd, mb_row, mb_col);

     return;

@@ -610,7 +614,8 @@

   } else {

     vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,

                                        xd->dst.u_buffer, xd->dst.v_buffer,

-                                       xd->dst.y_stride, xd->dst.uv_stride);

+                                       xd->dst.y_stride, xd->dst.uv_stride,

+                                       mb_row, mb_col);

   /* dequantization and idct */

@@ -720,7 +725,7 @@

     /* Special case:  Force the loopfilter to skip when eobtotal and

      * mb_skip_coeff are zero.

*/

-    skip_recon_mb(pbi, xd);

+    skip_recon_mb(pbi, xd, mb_row, mb_col);

     return;

@@ -731,7 +736,8 @@

   } else {

     vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,

                                        xd->dst.u_buffer, xd->dst.v_buffer,

-                                       xd->dst.y_stride, xd->dst.uv_stride);

+                                       xd->dst.y_stride, xd->dst.uv_stride,

+                                       mb_row, mb_col);

   /* dequantization and idct */

@@ -832,7 +838,7 @@

     /* Special case:  Force the loopfilter to skip when eobtotal and

        mb_skip_coeff are zero. */

     xd->mode_info_context->mbmi.mb_skip_coeff = 1;

-    skip_recon_mb(pbi, xd);

+    skip_recon_mb(pbi, xd, mb_row, mb_col);

     return;

 #ifdef DEC_DEBUG

@@ -859,7 +865,7 @@

            xd->mode_info_context->mbmi.mode, tx_size,

            xd->mode_info_context->mbmi.interp_filter);

 #endif

-    vp9_build_inter_predictors_mb(xd);

+    vp9_build_inter_predictors_mb(xd, mb_row, mb_col);

   if (tx_size == TX_16X16) {

@@ -966,19 +972,15 @@

   MB_MODE_INFO *const mbmi = &mi->mbmi;

   if (mbmi->ref_frame > INTRA_FRAME) {

-    int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride;

+    int ref_fb_idx;

     /* Select the appropriate reference frame for this MB */

     ref_fb_idx = cm->active_ref_idx[mbmi->ref_frame - 1];

+    xd->scale_factor[0] = cm->active_ref_scale[mbmi->ref_frame - 1];

+    xd->scale_factor_uv[0] = cm->active_ref_scale[mbmi->ref_frame - 1];

+    setup_pred_block(&xd->pre, &cm->yv12_fb[ref_fb_idx], mb_row, mb_col,

+                     &xd->scale_factor[0], &xd->scale_factor_uv[0]);

-    ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;

-    ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col;

-    xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset;

-    ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;

-    ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col;

-    xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset;

-    xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset;

     /* propagate errors from reference frames */

     xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted;

@@ -988,12 +990,9 @@

       /* Select the appropriate reference frame for this MB */

       second_ref_fb_idx = cm->active_ref_idx[mbmi->second_ref_frame - 1];

-      xd->second_pre.y_buffer =

-          cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset;

-      xd->second_pre.u_buffer =

-          cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset;

-      xd->second_pre.v_buffer =

-          cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset;

+      setup_pred_block(&xd->second_pre, &cm->yv12_fb[second_ref_fb_idx],

+                       mb_row, mb_col,

+                       &xd->scale_factor[1], &xd->scale_factor_uv[1]);

       /* propagate errors from reference frames */

       xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted;

@@ -1204,6 +1203,26 @@

+static void update_frame_size(VP9D_COMP *pbi) {

+  VP9_COMMON *cm = &pbi->common;

+  /* our internal buffers are always multiples of 16 */

+  int width = (cm->Width + 15) & ~15;

+  int height = (cm->Height + 15) & ~15;

+  cm->mb_rows = height >> 4;

+  cm->mb_cols = width >> 4;

+  cm->MBs = cm->mb_rows * cm->mb_cols;

+  cm->mode_info_stride = cm->mb_cols + 1;

+  memset(cm->mip, 0,

+        (cm->mb_cols + 1) * (cm->mb_rows + 1) * sizeof(MODE_INFO));

+  vp9_update_mode_info_border(cm, cm->mip);

+  cm->mi = cm->mip + cm->mode_info_stride + 1;

+  cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;

+  vp9_update_mode_info_in_image(cm, cm->mi);

+}

 int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {

   BOOL_DECODER header_bc, residual_bc;

   VP9_COMMON *const pc = &pbi->common;

@@ -1281,9 +1300,25 @@

                              "Invalid frame height");

-        if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))

-          vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,

-                             "Failed to allocate frame buffers");

+        if (!pbi->initial_width || !pbi->initial_height) {

+          if (vp9_alloc_frame_buffers(pc, pc->Width, pc->Height))

+            vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,

+                               "Failed to allocate frame buffers");

+          pbi->initial_width = pc->Width;

+          pbi->initial_height = pc->Height;

+        }

+        if (pc->Width > pbi->initial_width) {

+          vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,

+                             "Frame width too large");

+        }

+        if (pc->Height > pbi->initial_height) {

+          vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,

+                             "Frame height too large");

+        }

+        update_frame_size(pbi);

@@ -1294,6 +1329,11 @@

   init_frame(pbi);

+  /* Reset the frame pointers to the current frame size */

+  vp8_yv12_realloc_frame_buffer(&pc->yv12_fb[pc->new_fb_idx],

+                                pc->mb_cols * 16, pc->mb_rows * 16,

+                                VP9BORDERINPIXELS);

   if (vp9_start_decode(&header_bc, data,

                        (unsigned int)first_partition_length_in_bytes))

--- a/vp9/decoder/vp9_onyxd_int.h

+++ b/vp9/decoder/vp9_onyxd_int.h

@@ -38,6 +38,8 @@

   int decoded_key_frame;

+  int initial_width;

+  int initial_height;

 } VP9D_COMP;

 int vp9_decode_frame(VP9D_COMP *cpi, const unsigned char **p_data_end);

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -654,7 +654,7 @@

   // Set up destination pointers

   setup_pred_block(&xd->dst,

                    &cm->yv12_fb[dst_fb_idx],

-                   mb_row, mb_col);

+                   mb_row, mb_col, NULL, NULL);

   /* Set up limit values for MV components to prevent them from

    * extending beyond the UMV borders assuming 16x16 block size */

@@ -679,7 +679,7 @@

   xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end);

   /* set up source buffers */

-  setup_pred_block(&x->src, cpi->Source, mb_row, mb_col);

+  setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL);

   /* R/D setup */

   x->rddiv = cpi->RDDIV;

@@ -1272,9 +1272,6 @@

   totalrate = 0;

-  // Functions setup for all frame types so we can use MC in AltRef

-  vp9_setup_interp_filters(xd, cm->mcomp_filter_type, cm);

   // Reset frame count of inter 0,0 motion vector usage.

   cpi->inter_zz_count = 0;

@@ -2100,7 +2097,8 @@

     setup_pred_block(&xd->pre,

                      &cpi->common.yv12_fb[ref_fb_idx],

-                     mb_row, mb_col);

+                     mb_row, mb_col,

+                     &xd->scale_factor[0], &xd->scale_factor_uv[0]);

     if (mbmi->second_ref_frame > 0) {

       int second_ref_fb_idx;

@@ -2114,11 +2112,12 @@

       setup_pred_block(&xd->second_pre,

                        &cpi->common.yv12_fb[second_ref_fb_idx],

-                       mb_row, mb_col);

+                       mb_row, mb_col,

+                       &xd->scale_factor[1], &xd->scale_factor_uv[1]);

     if (!x->skip) {

-      vp9_encode_inter16x16(x);

+      vp9_encode_inter16x16(x, mb_row, mb_col);

       // Clear mb_skip_coeff if mb_no_coeff_skip is not set

       if (!cpi->common.mb_no_coeff_skip)

@@ -2130,7 +2129,8 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

       if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {

         vp9_build_interintra_16x16_predictors_mb(xd,

@@ -2327,7 +2327,8 @@

     setup_pred_block(&xd->pre,

                      &cpi->common.yv12_fb[ref_fb_idx],

-                     mb_row, mb_col);

+                     mb_row, mb_col,

+                     &xd->scale_factor[0], &xd->scale_factor_uv[0]);

     if (xd->mode_info_context->mbmi.second_ref_frame > 0) {

       int second_ref_fb_idx;

@@ -2341,12 +2342,14 @@

       setup_pred_block(&xd->second_pre,

                        &cpi->common.yv12_fb[second_ref_fb_idx],

-                       mb_row, mb_col);

+                       mb_row, mb_col,

+                       &xd->scale_factor[1], &xd->scale_factor_uv[1]);

     vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,

                                        xd->dst.u_buffer, xd->dst.v_buffer,

-                                       xd->dst.y_stride, xd->dst.uv_stride);

+                                       xd->dst.y_stride, xd->dst.uv_stride,

+                                       mb_row, mb_col);

   if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {

@@ -2553,7 +2556,8 @@

     setup_pred_block(&xd->pre,

                      &cpi->common.yv12_fb[ref_fb_idx],

-                     mb_row, mb_col);

+                     mb_row, mb_col,

+                     &xd->scale_factor[0], &xd->scale_factor_uv[0]);

     if (xd->mode_info_context->mbmi.second_ref_frame > 0) {

       int second_ref_fb_idx;

@@ -2567,12 +2571,14 @@

       setup_pred_block(&xd->second_pre,

                        &cpi->common.yv12_fb[second_ref_fb_idx],

-                       mb_row, mb_col);

+                       mb_row, mb_col,

+                       &xd->scale_factor[1], &xd->scale_factor_uv[1]);

     vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer,

                                        xd->dst.u_buffer, xd->dst.v_buffer,

-                                       xd->dst.y_stride, xd->dst.uv_stride);

+                                       xd->dst.y_stride, xd->dst.uv_stride,

+                                       mb_row, mb_col);

   if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) {

--- a/vp9/encoder/vp9_encodemb.c

+++ b/vp9/encoder/vp9_encodemb.c

@@ -684,10 +684,10 @@

-void vp9_encode_inter16x16(MACROBLOCK *x) {

+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) {

   MACROBLOCKD *const xd = &x->e_mbd;

-  vp9_build_inter_predictors_mb(xd);

+  vp9_build_inter_predictors_mb(xd, mb_row, mb_col);

   subtract_mb(x);

   vp9_fidct_mb(x);

   vp9_recon_mb(xd);

@@ -694,11 +694,11 @@

 /* this function is used by first pass only */

-void vp9_encode_inter16x16y(MACROBLOCK *x) {

+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col) {

   MACROBLOCKD *xd = &x->e_mbd;

   BLOCK *b = &x->block[0];

-  vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);

+  vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);

   vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);

--- a/vp9/encoder/vp9_encodemb.h

+++ b/vp9/encoder/vp9_encodemb.h

@@ -23,7 +23,7 @@

 #include "vp9/encoder/vp9_onyx_int.h"

 struct VP9_ENCODER_RTCD;

-void vp9_encode_inter16x16(MACROBLOCK *x);

+void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col);

 void vp9_transform_mbuv_4x4(MACROBLOCK *x);

 void vp9_transform_mby_4x4(MACROBLOCK *x);

@@ -30,7 +30,7 @@

 void vp9_optimize_mby_4x4(MACROBLOCK *x);

 void vp9_optimize_mbuv_4x4(MACROBLOCK *x);

-void vp9_encode_inter16x16y(MACROBLOCK *x);

+void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);

 void vp9_transform_mb_8x8(MACROBLOCK *mb);

 void vp9_transform_mby_8x8(MACROBLOCK *x);

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -613,7 +613,7 @@

           this_error = motion_error;

           vp9_set_mbmode_and_mvs(x, NEWMV, &mv);

           xd->mode_info_context->mbmi.txfm_size = TX_4X4;

-          vp9_encode_inter16x16y(x);

+          vp9_encode_inter16x16y(x, mb_row, mb_col);

           sum_mvr += mv.as_mv.row;

           sum_mvr_abs += abs(mv.as_mv.row);

           sum_mvc += mv.as_mv.col;

--- a/vp9/encoder/vp9_mbgraph.c

+++ b/vp9/encoder/vp9_mbgraph.c

@@ -20,7 +20,9 @@

 static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,

                                               int_mv *ref_mv,

-                                              int_mv *dst_mv) {

+                                              int_mv *dst_mv,

+                                              int mb_row,

+                                              int mb_col) {

   MACROBLOCK   *const x  = &cpi->mb;

   MACROBLOCKD *const xd = &x->e_mbd;

   BLOCK *b  = &x->block[0];

@@ -72,7 +74,7 @@

   vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv);

-  vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);

+  vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16, mb_row, mb_col);

   best_err = vp9_sad16x16(xd->dst.y_buffer, xd->dst.y_stride,

                           xd->predictor, 16, INT_MAX);

@@ -93,8 +95,9 @@

   YV12_BUFFER_CONFIG *buf,

   int buf_mb_y_offset,

   YV12_BUFFER_CONFIG *ref,

-  int mb_y_offset

-) {

+  int mb_y_offset,

+  int mb_row,

+  int mb_col) {

   MACROBLOCK   *const x  = &cpi->mb;

   MACROBLOCKD *const xd = &x->e_mbd;

   unsigned int err, tmp_err;

@@ -124,7 +127,7 @@

   // Test last reference frame using the previous best mv as the

   // starting point (best reference) for the search

-  tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv);

+  tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);

   if (tmp_err < err) {

     err            = tmp_err;

     dst_mv->as_int = tmp_mv.as_int;

@@ -136,7 +139,8 @@

     int_mv zero_ref_mv, tmp_mv;

     zero_ref_mv.as_int = 0;

-    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv);

+    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,

+                                        mb_row, mb_col);

     if (tmp_err < err) {

       dst_mv->as_int = tmp_mv.as_int;

       err = tmp_err;

@@ -229,7 +233,9 @@

   int gld_y_offset,

   YV12_BUFFER_CONFIG *alt_ref,

   int_mv *prev_alt_ref_mv,

-  int arf_y_offset

+  int arf_y_offset,

+  int mb_row,

+  int mb_col

) {

   MACROBLOCK   *const x  = &cpi->mb;

   MACROBLOCKD *const xd = &x->e_mbd;

@@ -249,7 +255,8 @@

     int g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv,

                                                 &stats->ref[GOLDEN_FRAME].m.mv,

                                                 buf, mb_y_offset,

-                                                golden_ref, gld_y_offset);

+                                                golden_ref, gld_y_offset,

+                                                mb_row, mb_col);

     stats->ref[GOLDEN_FRAME].err = g_motion_error;

   } else {

     stats->ref[GOLDEN_FRAME].err = INT_MAX;

@@ -326,7 +333,8 @@

       update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,

                               golden_ref, &gld_left_mv, gld_y_in_offset,

-                              alt_ref,    &arf_left_mv, arf_y_in_offset);

+                              alt_ref,    &arf_left_mv, arf_y_in_offset,

+                              mb_row, mb_col);

       arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;

       gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;

       if (mb_col == 0) {

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -10,6 +10,7 @@

 #include "vpx_config.h"

+#include "vp9/common/vp9_filter.h"

 #include "vp9/common/vp9_onyxc_int.h"

 #include "vp9/common/vp9_reconinter.h"

 #include "vp9/encoder/vp9_onyx_int.h"

@@ -2198,6 +2199,69 @@

 #endif

+static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,

+                                   YV12_BUFFER_CONFIG *dst_fb) {

+  const int in_w = src_fb->y_width;

+  const int in_h = src_fb->y_height;

+  const int out_w = dst_fb->y_width;

+  const int out_h = dst_fb->y_height;

+  int x, y;

+  for (y = 0; y < out_h; y += 16) {

+    for (x = 0; x < out_w; x += 16) {

+      int x_q4 = x * 16 * in_w / out_w;

+      int y_q4 = y * 16 * in_h / out_h;

+      uint8_t *src, *dst;

+      int src_stride, dst_stride;

+      src = src_fb->y_buffer +

+          y * in_h / out_h * src_fb->y_stride +

+          x * in_w / out_w;

+      dst = dst_fb->y_buffer +

+          y * dst_fb->y_stride +

+          x;

+      src_stride = src_fb->y_stride;

+      dst_stride = dst_fb->y_stride;

+      vp9_convolve8(src, src_stride, dst, dst_stride,

+                    vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,

+                    vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,

+                    16, 16);

+      x_q4 >>= 1;

+      y_q4 >>= 1;

+      src_stride = src_fb->uv_stride;

+      dst_stride = dst_fb->uv_stride;

+      src = src_fb->u_buffer +

+          y / 2 * in_h / out_h * src_fb->uv_stride +

+          x / 2 * in_w / out_w;

+      dst = dst_fb->u_buffer +

+          y / 2 * dst_fb->uv_stride +

+          x / 2;

+      vp9_convolve8(src, src_stride, dst, dst_stride,

+                    vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,

+                    vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,

+                    8, 8);

+      src = src_fb->v_buffer +

+          y / 2 * in_h / out_h * src_fb->uv_stride +

+          x / 2 * in_w / out_w;

+      dst = dst_fb->v_buffer +

+          y / 2 * dst_fb->uv_stride +

+          x / 2;

+      vp9_convolve8(src, src_stride, dst, dst_stride,

+                    vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,

+                    vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,

+                    8, 8);

+    }

+  }

+  vp8_yv12_extend_frame_borders(dst_fb);

+}

 static void update_alt_ref_frame_stats(VP9_COMP *cpi) {

   VP9_COMMON *cm = &cpi->common;

@@ -2583,6 +2647,15 @@

   int mcomp_filter_index = 0;

   int64_t mcomp_filter_cost[4];

+  /* Scale the source buffer, if required */

+  if (cm->Width != cpi->un_scaled_source->y_width ||

+      cm->Height != cpi->un_scaled_source->y_height) {

+    scale_and_extend_frame(cpi->un_scaled_source, &cpi->scaled_source);

+    cpi->Source = &cpi->scaled_source;

+  } else {

+    cpi->Source = cpi->un_scaled_source;

+  }

   // Clear down mmx registers to allow floating point in what follows

   vp9_clear_system_state();

@@ -3760,23 +3833,6 @@

   vp8_yv12_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx],

                                 cm->mb_cols * 16, cm->mb_rows * 16,

                                 VP9BORDERINPIXELS);

-  /* Disable any references that have different size */

-  if ((cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_width !=

-       cm->yv12_fb[cm->new_fb_idx].y_width) ||

-      (cm->yv12_fb[cm->active_ref_idx[cpi->lst_fb_idx]].y_height !=

-       cm->yv12_fb[cm->new_fb_idx].y_height))

-    cpi->ref_frame_flags &= ~VP9_LAST_FLAG;

-  if ((cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_width !=

-       cm->yv12_fb[cm->new_fb_idx].y_width) ||

-      (cm->yv12_fb[cm->active_ref_idx[cpi->gld_fb_idx]].y_height !=

-       cm->yv12_fb[cm->new_fb_idx].y_height))

-    cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;

-  if ((cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_width !=

-       cm->yv12_fb[cm->new_fb_idx].y_width) ||

-      (cm->yv12_fb[cm->active_ref_idx[cpi->alt_fb_idx]].y_height !=

-       cm->yv12_fb[cm->new_fb_idx].y_height))

-    cpi->ref_frame_flags &= ~VP9_ALT_FLAG;

   vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm);

   if (cpi->pass == 1) {

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1671,8 +1671,9 @@

 static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate,

-                              int *distortion, int *skip, int fullpixel) {

-  vp9_build_inter4x4_predictors_mbuv(&x->e_mbd);

+                              int *distortion, int *skip, int fullpixel,

+                              int mb_row, int mb_col) {

+  vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col);

   vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,

                     x->e_mbd.predictor, x->src.uv_stride);

   return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1);

@@ -3108,13 +3109,25 @@

                                int_mv frame_nearest_mv[MAX_REF_FRAMES],

                                int_mv frame_near_mv[MAX_REF_FRAMES],

                                int frame_mdcounts[4][4],

-                               YV12_BUFFER_CONFIG yv12_mb[4]) {

-  YV12_BUFFER_CONFIG *yv12 = &cpi->common.yv12_fb[idx];

+                               YV12_BUFFER_CONFIG yv12_mb[4],

+                               struct scale_factors scale[MAX_REF_FRAMES]) {

+  VP9_COMMON *cm = &cpi->common;

+  YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.active_ref_idx[idx]];

   MACROBLOCKD *const xd = &x->e_mbd;

   MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;

-  setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col);

+  // set up scaling factors

+  scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];

+  scale[frame_type].x_offset_q4 =

+      (mb_col * 16 * scale[frame_type].x_num / scale[frame_type].x_den) & 0xf;

+  scale[frame_type].y_offset_q4 =

+      (mb_row * 16 * scale[frame_type].y_num / scale[frame_type].y_den) & 0xf;

+  // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this

+  // use the UV scaling factors.

+  setup_pred_block(&yv12_mb[frame_type], yv12, mb_row, mb_col,

+                   &scale[frame_type], &scale[frame_type]);

   // Gets an initial list of candidate vectors from neighbours and orders them

   vp9_find_mv_refs(&cpi->common, xd, xd->mode_info_context,

                    cpi->common.error_resilient_mode ?

@@ -3198,7 +3211,8 @@

                                  int mode_index,

                                  INTERPOLATIONFILTERTYPE *best_filter,

                                  int_mv frame_mv[MB_MODE_COUNT]

-                                                [MAX_REF_FRAMES]) {

+                                                [MAX_REF_FRAMES],

+                                 int mb_row, int mb_col) {

   VP9_COMMON *cm = &cpi->common;

   MACROBLOCKD *xd = &x->e_mbd;

   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;

@@ -3376,7 +3390,8 @@

                                            xd->dst.u_buffer,

                                            xd->dst.v_buffer,

                                            xd->dst.y_stride,

-                                           xd->dst.uv_stride);

+                                           xd->dst.uv_stride,

+                                           mb_row, mb_col);

         var = vp9_variance64x64(*(b->base_src), b->src_stride,

                                 xd->dst.y_buffer, xd->dst.y_stride, &sse);

         // Note our transform coeffs are 8 times an orthogonal transform.

@@ -3460,7 +3475,8 @@

                                            xd->dst.u_buffer,

                                            xd->dst.v_buffer,

                                            xd->dst.y_stride,

-                                           xd->dst.uv_stride);

+                                           xd->dst.uv_stride,

+                                           mb_row, mb_col);

         var = vp9_variance32x32(*(b->base_src), b->src_stride,

                                 xd->dst.y_buffer, xd->dst.y_stride, &sse);

         // Note our transform coeffs are 8 times an orthogonal transform.

@@ -3542,7 +3558,8 @@

         int tmp_dist_y, tmp_dist_u, tmp_dist_v;

         // TODO(jkoleszar): these 2 y/uv should be replaced with one call to

         // vp9_build_interintra_16x16_predictors_mb().

-        vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);

+        vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,

+                                            mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

         if (is_comp_interintra_pred) {

@@ -3551,7 +3568,8 @@

 #endif

         vp9_build_inter16x16_predictors_mbuv(xd, xd->predictor + 256,

-                                             xd->predictor + 320, 8);

+                                             xd->predictor + 320, 8,

+                                             mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

         if (is_comp_interintra_pred) {

@@ -3647,7 +3665,8 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

     } else if (block_size == BLOCK_32X32) {

       vp9_build_inter32x32_predictors_sb(xd,

                                          xd->dst.y_buffer,

@@ -3654,11 +3673,13 @@

                                          xd->dst.u_buffer,

                                          xd->dst.v_buffer,

                                          xd->dst.y_stride,

-                                         xd->dst.uv_stride);

+                                         xd->dst.uv_stride,

+                                         mb_row, mb_col);

     } else {

       // TODO(jkoleszar): These y/uv fns can be replaced with their mb

       // equivalent

-      vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16);

+      vp9_build_inter16x16_predictors_mby(xd, xd->predictor, 16,

+                                          mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

       if (is_comp_interintra_pred) {

         vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16);

@@ -3665,7 +3686,8 @@

 #endif

       vp9_build_inter16x16_predictors_mbuv(xd, &xd->predictor[256],

-                                           &xd->predictor[320], 8);

+                                           &xd->predictor[320], 8,

+                                           mb_row, mb_col);

 #if CONFIG_COMP_INTERINTRA_PRED

       if (is_comp_interintra_pred) {

         vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256],

@@ -3860,6 +3882,8 @@

   int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,

                                              cpi->common.y1dc_delta_q);

+  struct scale_factors scale_factor[4];

   vpx_memset(mode8x8, 0, sizeof(mode8x8));

   vpx_memset(&frame_mv, 0, sizeof(frame_mv));

   vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));

@@ -3883,24 +3907,24 @@

   if (cpi->ref_frame_flags & VP9_LAST_FLAG) {

-    setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->lst_fb_idx],

+    setup_buffer_inter(cpi, x, cpi->lst_fb_idx,

                        LAST_FRAME, BLOCK_16X16, mb_row, mb_col,

                        frame_mv[NEARESTMV], frame_mv[NEARMV],

-                       frame_mdcounts, yv12_mb);

+                       frame_mdcounts, yv12_mb, scale_factor);

   if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {

-    setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->gld_fb_idx],

+    setup_buffer_inter(cpi, x, cpi->gld_fb_idx,

                        GOLDEN_FRAME, BLOCK_16X16, mb_row, mb_col,

                        frame_mv[NEARESTMV], frame_mv[NEARMV],

-                       frame_mdcounts, yv12_mb);

+                       frame_mdcounts, yv12_mb, scale_factor);

   if (cpi->ref_frame_flags & VP9_ALT_FLAG) {

-    setup_buffer_inter(cpi, x, cpi->common.active_ref_idx[cpi->alt_fb_idx],

+    setup_buffer_inter(cpi, x, cpi->alt_fb_idx,

                        ALTREF_FRAME, BLOCK_16X16, mb_row, mb_col,

                        frame_mv[NEARESTMV], frame_mv[NEARMV],

-                       frame_mdcounts, yv12_mb);

+                       frame_mdcounts, yv12_mb, scale_factor);

   *returnintra = INT64_MAX;

@@ -3955,6 +3979,10 @@

     mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;

     mbmi->interp_filter = cm->mcomp_filter_type;

+    set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                      scale_factor);

     vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);

     // Test best rd so far against threshold for trying this mode.

@@ -3970,6 +3998,18 @@

         !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))

       continue;

+    // only scale on zeromv.

+    if (mbmi->ref_frame > 0 &&

+          (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||

+           yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&

+        this_mode != ZEROMV)

+      continue;

+    if (mbmi->second_ref_frame > 0 &&

+          (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||

+           yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&

+        this_mode != ZEROMV)

+      continue;

     // current coding mode under rate-distortion optimization test loop

 #if CONFIG_COMP_INTERINTRA_PRED

     mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);

@@ -4288,7 +4328,7 @@

         int uv_skippable;

         rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,

-                       cpi->common.full_pixel);

+                       cpi->common.full_pixel, mb_row, mb_col);

         rate2 += rate_uv;

         distortion2 += distortion_uv;

         skippable = skippable && uv_skippable;

@@ -4330,7 +4370,8 @@

                                   &rate_y, &distortion,

                                   &rate_uv, &distortion_uv,

                                   &mode_excluded, &disable_skip,

-                                  mode_index, &tmp_best_filter, frame_mv);

+                                  mode_index, &tmp_best_filter, frame_mv,

+                                  mb_row, mb_col);

       if (this_rd == INT64_MAX)

         continue;

@@ -4581,6 +4622,8 @@

     mbmi->mb_skip_coeff =

       (cpi->common.mb_no_coeff_skip) ? 1 : 0;

     mbmi->partitioning = 0;

+    set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                      scale_factor);

     vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));

     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));

@@ -4633,6 +4676,8 @@

 end:

+  set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                    scale_factor);

   store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],

                        best_mode_index, &best_partition,

                        &mbmi->ref_mvs[mbmi->ref_frame][0],

@@ -4846,9 +4891,9 @@

   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,

                                     VP9_ALT_FLAG };

   int idx_list[4] = {0,

-                     cpi->common.active_ref_idx[cpi->lst_fb_idx],

-                     cpi->common.active_ref_idx[cpi->gld_fb_idx],

-                     cpi->common.active_ref_idx[cpi->alt_fb_idx]};

+                     cpi->lst_fb_idx,

+                     cpi->gld_fb_idx,

+                     cpi->alt_fb_idx};

   int mdcounts[4];

   int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };

   int saddone = 0;

@@ -4875,6 +4920,7 @@

   int rate_uv_16x16 = 0, rate_uv_tokenonly_16x16 = 0;

   int dist_uv_16x16 = 0, uv_skip_16x16 = 0;

   MB_PREDICTION_MODE mode_uv_16x16 = NEARESTMV;

+  struct scale_factors scale_factor[4];

   xd->mode_info_context->mbmi.segment_id = segment_id;

   estimate_ref_frame_costs(cpi, segment_id, ref_costs);

@@ -4890,7 +4936,7 @@

       setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size,

                          mb_row, mb_col, frame_mv[NEARESTMV],

                          frame_mv[NEARMV], frame_mdcounts,

-                         yv12_mb);

+                         yv12_mb, scale_factor);

     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;

     frame_mv[ZEROMV][ref_frame].as_int = 0;

@@ -4969,6 +5015,8 @@

     mbmi->ref_frame = ref_frame;

     mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;

+    set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                      scale_factor);

     comp_pred = mbmi->second_ref_frame > INTRA_FRAME;

     mbmi->mode = this_mode;

     mbmi->uv_mode = DC_PRED;

@@ -4976,6 +5024,18 @@

     mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);

     mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);

 #endif

+    if (mbmi->ref_frame > 0 &&

+          (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||

+           yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&

+        this_mode != ZEROMV)

+      continue;

+    if (mbmi->second_ref_frame > 0 &&

+          (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||

+           yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&

+        this_mode != ZEROMV)

+      continue;

     // Evaluate all sub-pel filters irrespective of whether we can use

     // them for this frame.

     mbmi->interp_filter = cm->mcomp_filter_type;

@@ -5000,6 +5060,8 @@

       if (!(cpi->ref_frame_flags & flag_list[second_ref]))

         continue;

       mbmi->second_ref_frame = second_ref;

+      set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                        scale_factor);

       xd->second_pre = yv12_mb[second_ref];

       mode_excluded =

@@ -5098,7 +5160,8 @@

                                   &rate_y, &distortion_y,

                                   &rate_uv, &distortion_uv,

                                   &mode_excluded, &disable_skip,

-                                  mode_index, &tmp_best_filter, frame_mv);

+                                  mode_index, &tmp_best_filter, frame_mv,

+                                  mb_row, mb_col);

       if (this_rd == INT64_MAX)

         continue;

@@ -5351,6 +5414,8 @@

  end:

+  set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,

+                    scale_factor);

     PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ?

                             &x->sb32_context[xd->sb_index] :

--- a/vp9/encoder/vp9_rdopt.h

+++ b/vp9/encoder/vp9_rdopt.h

@@ -45,18 +45,4 @@

 extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x,

                                    MB_PREDICTION_MODE mb, int_mv *mv);

-static void setup_pred_block(YV12_BUFFER_CONFIG *dst,

-                             const YV12_BUFFER_CONFIG *src,

-                             int mb_row, int mb_col) {

-  const int recon_y_stride = src->y_stride;

-  const int recon_uv_stride = src->uv_stride;

-  const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col;

-  const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col;

-  *dst = *src;

-  dst->y_buffer += recon_yoffset;

-  dst->u_buffer += recon_uvoffset;

-  dst->v_buffer += recon_uvoffset;

-}

 #endif  // VP9_ENCODER_VP9_RDOPT_H_

--- a/vp9/encoder/vp9_temporal_filter.c

+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -456,6 +456,13 @@

 , start_frame);

 #endif

+  // Setup scaling factors. Scaling on each of the arnr frames is not supported

+  vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0],

+      &cpi->common.yv12_fb[cpi->common.new_fb_idx],

+      16 * cpi->common.mb_cols,

+      16 * cpi->common.mb_rows);

+  cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];

   // Setup frame pointers, NULL indicates frame not included in filter

   vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));

   for (frame = 0; frame < frames_to_blur; frame++) {

--

⑨