shithub: libvpx

Download patch

ref: 452dc21500a2339ee685cb28efbd2af1b856ea12
parent: d0369d6fd42062b442135ea173f722198bb47fec
author: Adrian Grange <agrange@google.com>
date: Thu Oct 2 11:26:42 EDT 2014

Move input frame scaling into the recode loop

Move the point at which input frames are scaled
into the recode loop. This will allow us to change
the coded frame size dynamically in response
to previous attempts to encode the frame at a
higher resolution.

A following patch will implement a scheme for
resizing the frame in the recode loop.

Change-Id: I6a59c02d6ac1626512edad6de8b60063b79433e6

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -160,7 +160,7 @@
   VP9_COMMON *const cm = &cpi->common;
   int i;
 
-  // Delete sementation map
+  // Delete segmentation map.
   vpx_free(cpi->segmentation_map);
   cpi->segmentation_map = NULL;
   vpx_free(cm->last_frame_seg_map);
@@ -2415,30 +2415,33 @@
   const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
-    const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
-
     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
-    if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) &&
-        (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) {
-      const int new_fb = get_free_fb(cm);
-      vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
-                               cm->width, cm->height,
-                               cm->subsampling_x, cm->subsampling_y,
+    if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
+      const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+      const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+
+      if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
+        const int new_fb = get_free_fb(cm);
+        vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
+                                 cm->width, cm->height,
+                                 cm->subsampling_x, cm->subsampling_y,
 #if CONFIG_VP9_HIGHBITDEPTH
-                               cm->use_highbitdepth,
+                                 cm->use_highbitdepth,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+                                 VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
 #if CONFIG_VP9_HIGHBITDEPTH
-      scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf,
-                             (int)cm->bit_depth);
+        scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf,
+                               (int)cm->bit_depth);
 #else
-      scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
+        scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-      cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+        cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+      } else {
+        cpi->scaled_ref_idx[ref_frame - 1] = idx;
+        ++cm->frame_bufs[idx].ref_count;
+      }
     } else {
-      cpi->scaled_ref_idx[ref_frame - 1] = idx;
-      cm->frame_bufs[idx].ref_count++;
+      cpi->scaled_ref_idx[ref_frame - 1] = INVALID_REF_BUFFER_IDX;
     }
   }
 }
@@ -2446,9 +2449,13 @@
 static void release_scaled_references(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   int i;
-
-  for (i = 0; i < 3; i++)
-    cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--;
+  for (i = 0; i < 3; ++i) {
+    const int idx = cpi->scaled_ref_idx[i];
+    RefCntBuffer *const buf =
+        idx != INVALID_REF_BUFFER_IDX ? &cm->frame_bufs[idx] : NULL;
+    if (buf != NULL)
+      --buf->ref_count;
+  }
 }
 
 static void full_to_model_count(unsigned int *model_count,
@@ -2533,10 +2540,181 @@
 }
 #endif
 
-static void encode_without_recode_loop(VP9_COMP *cpi,
-                                       int q) {
+static void set_mv_search_params(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const unsigned int max_mv_def = MIN(cm->width, cm->height);
+
+  // Default based on max resolution.
+  cpi->mv_step_param = vp9_init_search_range(max_mv_def);
+
+  if (cpi->sf.mv.auto_mv_step_size) {
+    if (frame_is_intra_only(cm)) {
+      // Initialize max_mv_magnitude for use in the first INTER frame
+      // after a key/intra-only frame.
+      cpi->max_mv_magnitude = max_mv_def;
+    } else {
+      if (cm->show_frame)
+        // Allow mv_steps to correspond to twice the max mv magnitude found
+        // in the previous frame, capped by the default max_mv_magnitude based
+        // on resolution.
+        cpi->mv_step_param =
+            vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+      cpi->max_mv_magnitude = 0;
+    }
+  }
+}
+
+static void set_size_dependent_vars(VP9_COMP *cpi, int *q,
+                                    int *bottom_index, int *top_index) {
   VP9_COMMON *const cm = &cpi->common;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+
+  // Setup variables that depend on the dimensions of the frame.
+  set_mv_search_params(cpi);
+
+  // Configure experimental use of segmentation for enhanced coding of
+  // static regions if indicated.
+  // Only allowed in the second pass of a two pass encode, as it requires
+  // lagged coding, and if the relevant speed feature flag is set.
+  if (oxcf->pass == 2 && cpi->sf.static_segmentation)
+    configure_static_seg_features(cpi);
+
+#if CONFIG_VP9_POSTPROC
+  if (oxcf->noise_sensitivity > 0) {
+    int l = 0;
+    switch (oxcf->noise_sensitivity) {
+      case 1:
+        l = 20;
+        break;
+      case 2:
+        l = 40;
+        break;
+      case 3:
+        l = 60;
+        break;
+      case 4:
+      case 5:
+        l = 100;
+        break;
+      case 6:
+        l = 150;
+        break;
+    }
+    vp9_denoise(cpi->Source, cpi->Source, l);
+  }
+#endif  // CONFIG_VP9_POSTPROC
+
+  vp9_set_speed_features(cpi);
+
+  vp9_set_rd_speed_thresholds(cpi);
+  vp9_set_rd_speed_thresholds_sub8x8(cpi);
+
+  // Decide q and q bounds.
+  *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+
+  if (!frame_is_intra_only(cm)) {
+    cm->interp_filter = cpi->sf.default_interp_filter;
+    // TODO: Decide this more intelligently.
+    vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
+  }
+}
+
+static void init_motion_estimation(VP9_COMP *cpi) {
+  int y_stride = cpi->scaled_source.y_stride;
+
+  if (cpi->sf.mv.search_method == NSTEP) {
+    vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
+  } else if (cpi->sf.mv.search_method == DIAMOND) {
+    vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
+  }
+}
+
+void set_frame_size(VP9_COMP *cpi) {
+  int ref_frame;
+  VP9_COMMON *const cm = &cpi->common;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
+  // For two pass encodes analyse the first pass stats and determine
+  // the bit allocation and other parameters for this frame / group of frames.
+  if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+    vp9_rc_get_second_pass_params(cpi);
+  }
+
+  if (!cpi->use_svc && cpi->multi_arf_allowed) {
+    if (cm->frame_type == KEY_FRAME) {
+      init_buffer_indices(cpi);
+    } else if (oxcf->pass == 2) {
+      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+      cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
+    }
+  }
+
+  if (oxcf->pass == 2 &&
+      cm->current_video_frame == 0 &&
+      oxcf->allow_spatial_resampling &&
+      oxcf->rc_mode == VPX_VBR) {
+    // Internal scaling is triggered on the first frame.
+    vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
+                         oxcf->scaled_frame_height);
+  }
+
+  // Reset the frame pointers to the current frame size.
+  vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
+                           cm->width, cm->height,
+                           cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                           cm->use_highbitdepth,
+#endif
+                           VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+
+  alloc_util_frame_buffers(cpi);
+  init_motion_estimation(cpi);
+
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+    YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
+    RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
+    ref_buf->buf = buf;
+    ref_buf->idx = idx;
+#if CONFIG_VP9_HIGHBITDEPTH
+    vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+                                      buf->y_crop_width, buf->y_crop_height,
+                                      cm->width, cm->height,
+                                      (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+                                          1 : 0);
+#else
+    vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+                                      buf->y_crop_width, buf->y_crop_height,
+                                      cm->width, cm->height);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+    if (vp9_is_scaled(&ref_buf->sf))
+      vp9_extend_frame_borders(buf);
+  }
+
+  set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
+}
+
+static void encode_without_recode_loop(VP9_COMP *cpi) {
+  int q;
+  int bottom_index, top_index;  // Dummy.
+  VP9_COMMON *const cm = &cpi->common;
+
   vp9_clear_system_state();
+
+  set_frame_size(cpi);
+
+  cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+                                      &cpi->scaled_source);
+
+  if (cpi->unscaled_last_source != NULL)
+    cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+                                             &cpi->scaled_last_source);
+
+  vp9_scale_references(cpi);
+
+  set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
   vp9_set_quantizer(cm, q);
   setup_frame(cpi);
   // Variance adaptive and in frame q adjustment experiments are mutually
@@ -2559,28 +2737,45 @@
 
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     size_t *size,
-                                    uint8_t *dest,
-                                    int q,
-                                    int bottom_index,
-                                    int top_index) {
+                                    uint8_t *dest) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
+  int q;
+  int q_low, q_high;
+  int bottom_index, top_index;
   int loop_count = 0;
   int loop = 0;
   int overshoot_seen = 0;
   int undershoot_seen = 0;
-  int q_low = bottom_index, q_high = top_index;
   int frame_over_shoot_limit;
   int frame_under_shoot_limit;
 
-  // Decide frame size bounds
-  vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
-                                   &frame_under_shoot_limit,
-                                   &frame_over_shoot_limit);
-
   do {
     vp9_clear_system_state();
 
+    if (loop_count == 0) {
+      set_frame_size(cpi);
+
+      // Decide frame size bounds
+      vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
+                                       &frame_under_shoot_limit,
+                                       &frame_over_shoot_limit);
+
+      cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+                                        &cpi->scaled_source);
+
+      if (cpi->unscaled_last_source != NULL)
+        cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+                                                 &cpi->scaled_last_source);
+
+      vp9_scale_references(cpi);
+
+      set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
+      q_low = bottom_index;
+      q_high = top_index;
+    }
+
     vp9_set_quantizer(cm, q);
 
     if (loop_count == 0)
@@ -2857,31 +3052,6 @@
   cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
 }
 
-static void set_mv_search_params(VP9_COMP *cpi) {
-  const VP9_COMMON *const cm = &cpi->common;
-  const unsigned int max_mv_def = MIN(cm->width, cm->height);
-
-  // Default based on max resolution.
-  cpi->mv_step_param = vp9_init_search_range(max_mv_def);
-
-  if (cpi->sf.mv.auto_mv_step_size) {
-    if (frame_is_intra_only(cm)) {
-      // Initialize max_mv_magnitude for use in the first INTER frame
-      // after a key/intra-only frame.
-      cpi->max_mv_magnitude = max_mv_def;
-    } else {
-      if (cm->show_frame)
-        // Allow mv_steps to correspond to twice the max mv magnitude found
-        // in the previous frame, capped by the default max_mv_magnitude based
-        // on resolution.
-        cpi->mv_step_param =
-            vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
-      cpi->max_mv_magnitude = 0;
-    }
-  }
-}
-
-
 int setup_interp_filter_search_mask(VP9_COMP *cpi) {
   INTERP_FILTER ifilter;
   int ref_total[MAX_REF_FRAMES] = {0};
@@ -2916,21 +3086,9 @@
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   struct segmentation *const seg = &cm->seg;
   TX_SIZE t;
-  int q;
-  int top_index;
-  int bottom_index;
 
   set_ext_overrides(cpi);
 
-  cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
-                                      &cpi->scaled_source);
-
-  if (cpi->unscaled_last_source != NULL)
-    cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
-                                             &cpi->scaled_last_source);
-
-  vp9_scale_references(cpi);
-
   vp9_clear_system_state();
 
   // Enable or disable mode based tweaking of the zbin.
@@ -2945,14 +3103,11 @@
   // Set default state for segment based loop filter update flags.
   cm->lf.mode_ref_delta_update = 0;
 
-  set_mv_search_params(cpi);
-
   if (cpi->oxcf.pass == 2 &&
       cpi->sf.adaptive_interp_filter_search)
     cpi->sf.interp_filter_search_mask =
         setup_interp_filter_search_mask(cpi);
 
-
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
     // Reset the loop filter deltas and segmentation map.
@@ -3010,13 +3165,6 @@
     }
   }
 
-  // Configure experimental use of segmentation for enhanced coding of
-  // static regions if indicated.
-  // Only allowed in second pass of two pass (as requires lagged coding)
-  // and if the relevant speed feature flag is set.
-  if (oxcf->pass == 2 && cpi->sf.static_segmentation)
-    configure_static_seg_features(cpi);
-
   // Check if the current frame is skippable for the partition search in the
   // second pass according to the first pass stats
   if (cpi->sf.allow_partition_search_skip && oxcf->pass == 2 &&
@@ -3038,31 +3186,6 @@
 
   vp9_clear_system_state();
 
-#if CONFIG_VP9_POSTPROC
-  if (oxcf->noise_sensitivity > 0) {
-    int l = 0;
-    switch (oxcf->noise_sensitivity) {
-      case 1:
-        l = 20;
-        break;
-      case 2:
-        l = 40;
-        break;
-      case 3:
-        l = 60;
-        break;
-      case 4:
-      case 5:
-        l = 100;
-        break;
-      case 6:
-        l = 150;
-        break;
-    }
-    vp9_denoise(cpi->Source, cpi->Source, l);
-  }
-#endif
-
 #if CONFIG_INTERNAL_STATS
   {
     int i;
@@ -3071,24 +3194,10 @@
   }
 #endif
 
-  vp9_set_speed_features(cpi);
-
-  vp9_set_rd_speed_thresholds(cpi);
-  vp9_set_rd_speed_thresholds_sub8x8(cpi);
-
-  // Decide q and q bounds.
-  q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
-
-  if (!frame_is_intra_only(cm)) {
-    cm->interp_filter = cpi->sf.default_interp_filter;
-    /* TODO: Decide this more intelligently */
-    vp9_set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
-  }
-
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    encode_without_recode_loop(cpi, q);
+    encode_without_recode_loop(cpi);
   } else {
-    encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
+    encode_with_recode_loop(cpi, size, dest);
   }
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
@@ -3231,16 +3340,6 @@
   vp9_twopass_postencode_update(cpi);
 }
 
-static void init_motion_estimation(VP9_COMP *cpi) {
-  int y_stride = cpi->scaled_source.y_stride;
-
-  if (cpi->sf.mv.search_method == NSTEP) {
-    vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
-  } else if (cpi->sf.mv.search_method == DIAMOND) {
-    vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
-  }
-}
-
 static void check_initial_width(VP9_COMP *cpi,
 #if CONFIG_VP9_HIGHBITDEPTH
                                 int use_highbitdepth,
@@ -3259,10 +3358,11 @@
     alloc_ref_frame_buffers(cpi);
     alloc_util_frame_buffers(cpi);
 
-    init_motion_estimation(cpi);
+    init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
 
     cpi->initial_width = cm->width;
     cpi->initial_height = cm->height;
+    cpi->initial_mbs = cm->MBs;
   }
 }
 
@@ -3404,14 +3504,13 @@
                             int64_t *time_stamp, int64_t *time_end, int flush) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   RATE_CONTROL *const rc = &cpi->rc;
   struct vpx_usec_timer  cmptimer;
   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
   struct lookahead_entry *last_source = NULL;
   struct lookahead_entry *source = NULL;
-  MV_REFERENCE_FRAME ref_frame;
   int arf_src_index;
+  int i;
 
   if (is_two_pass_svc(cpi)) {
 #if CONFIG_SPATIAL_SVC
@@ -3533,80 +3632,28 @@
     vp9_restore_layer_context(cpi);
   }
 
-  // start with a 0 size frame
-  *size = 0;
-
-  /* find a free buffer for the new frame, releasing the reference previously
-   * held.
-   */
+  // Find a free buffer for the new frame, releasing the reference previously
+  // held.
   cm->frame_bufs[cm->new_fb_idx].ref_count--;
   cm->new_fb_idx = get_free_fb(cm);
 
-  // For two pass encodes analyse the first pass stats and determine
-  // the bit allocation and other parameters for this frame / group of frames.
-  if ((oxcf->pass == 2) && (!cpi->use_svc || is_two_pass_svc(cpi))) {
-    vp9_rc_get_second_pass_params(cpi);
-  }
+  // Start with a 0 size frame.
+  *size = 0;
 
-  if (!cpi->use_svc && cpi->multi_arf_allowed) {
-    if (cm->frame_type == KEY_FRAME) {
-      init_buffer_indices(cpi);
-    } else if (oxcf->pass == 2) {
-      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-      cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
-    }
-  }
-
   cpi->frame_flags = *frame_flags;
 
-  if (oxcf->pass == 2 &&
-      cm->current_video_frame == 0 &&
-      oxcf->allow_spatial_resampling &&
-      oxcf->rc_mode == VPX_VBR) {
-    // Internal scaling is triggered on the first frame.
-    vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
-                         oxcf->scaled_frame_height);
+  // Delay setting frame-size until the encode loop for 2-pass non-SVC.
+  if (!(oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi)))) {
+    set_frame_size(cpi);
   }
 
-  // Reset the frame pointers to the current frame size
-  vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
-                           cm->width, cm->height,
-                           cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
-                           cm->use_highbitdepth,
-#endif
-                           VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
-
-  alloc_util_frame_buffers(cpi);
-  init_motion_estimation(cpi);
-
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
-    YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
-    RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
-    ref_buf->buf = buf;
-    ref_buf->idx = idx;
-#if CONFIG_VP9_HIGHBITDEPTH
-    vp9_setup_scale_factors_for_frame(&ref_buf->sf,
-                                      buf->y_crop_width, buf->y_crop_height,
-                                      cm->width, cm->height,
-                                      (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
-                                          1 : 0);
-#else
-    vp9_setup_scale_factors_for_frame(&ref_buf->sf,
-                                      buf->y_crop_width, buf->y_crop_height,
-                                      cm->width, cm->height);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    if (vp9_is_scaled(&ref_buf->sf))
-      vp9_extend_frame_borders(buf);
-  }
-
-  set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
-
   if (oxcf->aq_mode == VARIANCE_AQ) {
     vp9_vaq_init();
   }
 
+  for (i = 0; i < 3; ++i)
+    cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX;
+
   if (oxcf->pass == 1 &&
       (!cpi->use_svc || is_two_pass_svc(cpi))) {
     const int lossless = is_lossless_requested(oxcf);
@@ -3635,7 +3682,7 @@
   if (cm->refresh_frame_context)
     cm->frame_contexts[cm->frame_context_idx] = cm->fc;
 
-  // Frame was dropped, release scaled references.
+  // No frame encoded, or frame was dropped, release scaled references.
   if (*size == 0) {
     release_scaled_references(cpi);
   }
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -44,6 +44,7 @@
 #endif
 
 #define DEFAULT_GF_INTERVAL         10
+#define INVALID_REF_BUFFER_IDX      -1
 
 typedef struct {
   int nmvjointcost[MV_JOINTS];
@@ -375,6 +376,7 @@
 
   int initial_width;
   int initial_height;
+  int initial_mbs;
 
   int use_svc;
 
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -145,7 +145,7 @@
   struct vpx_codec_cx_pkt pkt;
   pkt.kind = VPX_CODEC_FPMB_STATS_PKT;
   pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
-  pkt.data.firstpass_mb_stats.sz = cm->MBs * sizeof(uint8_t);
+  pkt.data.firstpass_mb_stats.sz = cpi->initial_mbs * sizeof(uint8_t);
   vpx_codec_pkt_list_add(pktlist, &pkt);
 }
 #endif
@@ -339,9 +339,9 @@
 
 // Refine the motion search range according to the frame dimension
 // for first pass test.
-static int get_search_range(const VP9_COMMON *cm) {
+static int get_search_range(const VP9_COMP *cpi) {
   int sr = 0;
-  const int dim = MIN(cm->width, cm->height);
+  const int dim = MIN(cpi->initial_width, cpi->initial_height);
 
   while ((dim << sr) < MAX_FULL_PEL_VAL)
     ++sr;
@@ -361,7 +361,7 @@
 
   int step_param = 3;
   int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-  const int sr = get_search_range(&cpi->common);
+  const int sr = get_search_range(cpi);
   step_param += sr;
   further_steps -= sr;
 
@@ -490,7 +490,7 @@
 
 #if CONFIG_FP_MB_STATS
   if (cpi->use_fp_mb_stats) {
-    vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->MBs);
+    vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cpi->initial_mbs);
   }
 #endif
 
@@ -946,7 +946,7 @@
     // where the typical "real" energy per MB also falls.
     // Initial estimate here uses sqrt(mbs) to define the min_err, where the
     // number of mbs is propotional to image area.
-    const double min_err = 200 * sqrt(cm->MBs);
+    const double min_err = 200 * sqrt(cpi->initial_mbs);
 
     fps.frame = cm->current_video_frame;
     fps.spatial_layer_id = cpi->svc.spatial_layer_id;
@@ -954,9 +954,9 @@
     fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
     fps.intra_error = (double)(intra_error >> 8) + min_err;
     fps.count = 1.0;
-    fps.pcnt_inter = (double)intercount / cm->MBs;
-    fps.pcnt_second_ref = (double)second_ref_count / cm->MBs;
-    fps.pcnt_neutral = (double)neutral_count / cm->MBs;
+    fps.pcnt_inter = (double)intercount / cpi->initial_mbs;
+    fps.pcnt_second_ref = (double)second_ref_count / cpi->initial_mbs;
+    fps.pcnt_neutral = (double)neutral_count / cpi->initial_mbs;
 
     if (mvcount > 0) {
       fps.MVr = (double)sum_mvr / mvcount;
@@ -967,7 +967,7 @@
       fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount;
       fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
       fps.new_mv_count = new_mv_count;
-      fps.pcnt_motion = (double)mvcount / cm->MBs;
+      fps.pcnt_motion = (double)mvcount / cpi->initial_mbs;
     } else {
       fps.MVr = 0.0;
       fps.mvr_abs = 0.0;
@@ -1081,7 +1081,7 @@
   if (section_target_bandwidth <= 0) {
     return rc->worst_quality;  // Highest value allowed
   } else {
-    const int num_mbs = cpi->common.MBs;
+    const int num_mbs = cpi->initial_mbs;
     const double section_err = stats->coded_error / stats->count;
     const double err_per_mb = section_err / num_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
@@ -1195,9 +1195,10 @@
 #define LOW_SR_DIFF_TRHESH 0.1
 #define SR_DIFF_MAX 128.0
 
-static double get_sr_decay_rate(const VP9_COMMON *cm,
+static double get_sr_decay_rate(const VP9_COMP *cpi,
                                 const FIRSTPASS_STATS *frame) {
-  double sr_diff = (frame->sr_coded_error - frame->coded_error) / cm->MBs;
+  double sr_diff =
+      (frame->sr_coded_error - frame->coded_error) / cpi->initial_mbs;
   double sr_decay = 1.0;
   const double motion_amplitude_factor =
     frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
@@ -1214,19 +1215,19 @@
 
 // This function gives an estimate of how badly we believe the prediction
 // quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMMON *cm,
+static double get_zero_motion_factor(const VP9_COMP *cpi,
                                      const FIRSTPASS_STATS *frame) {
   const double zero_motion_pct = frame->pcnt_inter -
                                  frame->pcnt_motion;
-  double sr_decay = get_sr_decay_rate(cm, frame);
+  double sr_decay = get_sr_decay_rate(cpi, frame);
   return MIN(sr_decay, zero_motion_pct);
 }
 
 #define ZM_POWER_FACTOR 0.75
 
-static double get_prediction_decay_rate(const VP9_COMMON *cm,
+static double get_prediction_decay_rate(const VP9_COMP *cpi,
                                         const FIRSTPASS_STATS *next_frame) {
-  const double sr_decay_rate = get_sr_decay_rate(cm, next_frame);
+  const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
   const double zero_motion_factor =
     (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
                 ZM_POWER_FACTOR));
@@ -1322,7 +1323,7 @@
   const double q_correction = MIN((0.8 + (lq * 0.001)), 1.0);
 
   // Underlying boost factor is based on inter error ratio.
-  frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) /
+  frame_boost = (BASELINE_ERR_PER_MB * cpi->initial_mbs) /
                 DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
   frame_boost = frame_boost * BOOST_FACTOR * q_correction;
 
@@ -1371,7 +1372,7 @@
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                           ? MIN_DECAY_FACTOR : decay_accumulator;
     }
@@ -1410,7 +1411,7 @@
 
     // Cumulative effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR : decay_accumulator;
     }
@@ -1729,7 +1730,8 @@
     gf_group_err -= gf_first_frame_err;
 
   // Motion breakout threshold for loop below depends on image size.
-  mv_ratio_accumulator_thresh = (cpi->common.width + cpi->common.height) / 4.0;
+  mv_ratio_accumulator_thresh =
+      (cpi->initial_height + cpi->initial_width) / 4.0;
 
   // Set a maximum and minimum interval for the GF group.
   // If the image appears almost completely static we can extend beyond this.
@@ -1781,7 +1783,7 @@
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
       last_loop_decay_rate = loop_decay_rate;
-      loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
+      loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
 
       decay_accumulator = decay_accumulator * loop_decay_rate;
 
@@ -1788,7 +1790,7 @@
       // Monitor for static sections.
       zero_motion_accumulator =
         MIN(zero_motion_accumulator,
-            get_zero_motion_factor(&cpi->common, &next_frame));
+            get_zero_motion_factor(cpi, &next_frame));
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
@@ -2065,8 +2067,7 @@
         break;
 
       // How fast is the prediction quality decaying?
-      loop_decay_rate = get_prediction_decay_rate(&cpi->common,
-                                                  twopass->stats_in);
+      loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
 
       // We want to know something about the recent past... rather than
       // as used elsewhere where we are concerned with decay in prediction
@@ -2177,7 +2178,7 @@
     // Monitor for static sections.
     zero_motion_accumulator =
       MIN(zero_motion_accumulator,
-          get_zero_motion_factor(&cpi->common, &next_frame));
+          get_zero_motion_factor(cpi, &next_frame));
 
     // Not all frames in the group are necessarily used in calculating boost.
     if ((i <= rc->max_gf_interval) ||
@@ -2188,7 +2189,7 @@
       // How fast is prediction quality decaying.
       if (!detect_flash(twopass, 0)) {
         const double loop_decay_rate =
-          get_prediction_decay_rate(&cpi->common, &next_frame);
+          get_prediction_decay_rate(cpi, &next_frame);
         decay_accumulator *= loop_decay_rate;
         decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
         av_decay_accumulator += decay_accumulator;