shithub: libvpx

Download patch

ref: 53ff43adc341068945f0857bcf28846080e8f368
parent: 417df1d42e0f4b02387ec9100b6546ddcc918bd6
author: Deb Mukherjee <debargha@google.com>
date: Mon Jul 8 12:01:01 EDT 2013

Prunes out full-rd computation based on modeled rd

Adds a speed feature to eliminate full-rd computation if the modeled
rd or rd based on a different parameter in the same mode is already
a lot larger than the best rd yet.

Specifically, only search the sharp and smooth filters if the modeled
rd cost based on the  regular filter is within a certain factor of the
best rd cost so far. Also, skip full-rd computation of non splitmv
inter modes if the modeled rd cost based on pred error is within the
same factor of the best rd cost so far.

Also adds some enhancements in the rd search for splitmv mode to
speed things up by early breakouts. Negligible impact on performance.

Resuts on derfraw300:
psnr:    -0.013% with the splitmv enhancements, -0.24% with the rd
         breakout feature on.
speedup: 6% with splitmv enhancements, 20% with also residual breakout
         (tested on football sequence at 600 Kbps)

Change-Id: I37abc308ea9f110c1679ce649b6a7e73ab1ad5fc

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -469,8 +469,8 @@
   }
 #if CONFIG_ALPHA
   // TODO(jkoleszar): Using the Y w/h for now
-  mb->plane[3].subsampling_x = 0;
-  mb->plane[3].subsampling_y = 0;
+  xd->plane[3].subsampling_x = 0;
+  xd->plane[3].subsampling_y = 0;
 #endif
 }
 
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -720,6 +720,7 @@
   sf->disable_splitmv = 0;
   sf->mode_search_skip_flags = 0;
   sf->last_chroma_intra_mode = TM_PRED;
+  sf->use_rd_breakout = 0;
 
   // Skip any mode not chosen at size < X for all sizes > X
   // Hence BLOCK_SIZE_SB64X64 (skip is off)
@@ -767,6 +768,7 @@
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA;
         sf->last_chroma_intra_mode = H_PRED;
+        sf->use_rd_breakout = 1;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -790,6 +792,7 @@
                                      FLAG_SKIP_COMP_BESTINTRA |
                                      FLAG_SKIP_COMP_REFMISMATCH;
         sf->last_chroma_intra_mode = DC_PRED;
+        sf->use_rd_breakout = 1;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -804,6 +807,7 @@
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
                                      FLAG_SKIP_COMP_REFMISMATCH;
+        sf->use_rd_breakout = 1;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -818,6 +822,7 @@
                                      FLAG_SKIP_INTRA_BESTINTER |
                                      FLAG_SKIP_COMP_BESTINTRA |
                                      FLAG_SKIP_COMP_REFMISMATCH;
+        sf->use_rd_breakout = 1;
       }
       /*
       if (speed == 2) {
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -275,6 +275,7 @@
   // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
   unsigned int mode_search_skip_flags;
   MB_PREDICTION_MODE last_chroma_intra_mode;
+  int use_rd_breakout;
 } SPEED_FEATURES;
 
 enum BlockSize {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -473,6 +473,31 @@
   *out_dist_sum = dist_sum << 4;
 }
 
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
+                              MACROBLOCK *x, MACROBLOCKD *xd,
+                              int *out_rate_sum, int64_t *out_dist_sum) {
+  // Note our transform coeffs are 8 times an orthogonal transform.
+  // Hence quantizer step is also 8 times. To get effective quantizer
+  // we need to divide by 8 before sending to modeling function.
+  struct macroblock_plane *const p = &x->plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
+
+  // TODO(dkovalev) the same code in get_plane_block_size
+  const int bw = plane_block_width(bsize, pd);
+  const int bh = plane_block_height(bsize, pd);
+  const enum BlockSize bs = get_block_size(bw, bh);
+  unsigned int sse;
+  int rate;
+  int64_t dist;
+  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+                            pd->dst.buf, pd->dst.stride, &sse);
+  // sse works better than var, since there is no dc prediction used
+  model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
+
+  *out_rate_sum = rate;
+  *out_dist_sum = dist << 4;
+}
+
 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                  TX_SIZE tx_size,
                                  MACROBLOCK *x, MACROBLOCKD *xd,
@@ -1644,8 +1669,9 @@
   return cost;
 }
 
-static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
+static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
                                        MACROBLOCK *x,
+                                       int64_t best_yrd,
                                        int i,
                                        int *labelyrate,
                                        int64_t *distortion,
@@ -1652,6 +1678,7 @@
                                        ENTROPY_CONTEXT *ta,
                                        ENTROPY_CONTEXT *tl) {
   int k;
+  VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
   const int bw = plane_block_width(bsize, &xd->plane[0]);
@@ -1673,9 +1700,6 @@
   int64_t thisdistortion = 0;
   int thisrate = 0;
 
-  *labelyrate = 0;
-  *distortion = 0;
-
   vp9_build_inter_predictor(pre,
                             xd->plane[0].pre[0].stride,
                             dst,
@@ -1685,9 +1709,6 @@
                             bw, bh, 0 /* no avg */, &xd->subpix,
                             MV_PRECISION_Q3);
 
-  // TODO(debargha): Make this work properly with the
-  // implicit-compoundinter-weight experiment when implicit
-  // weighting for splitmv modes is turned on.
   if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
     uint8_t* const second_pre =
     raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@@ -1700,10 +1721,28 @@
                               &xd->subpix, MV_PRECISION_Q3);
   }
 
+  // Turning this section off for now since it hurts quality and does not
+  // improve speed much
+  /*
+  if (cpi->sf.use_rd_breakout &&
+      best_yrd < INT64_MAX) {
+    int64_t thisrd;
+    model_rd_for_sb_y(cpi, bsize, x, xd, &thisrate, &thisdistortion);
+    thisrd = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
+    if (thisrd / 2 > best_yrd) {
+      *distortion = thisdistortion;
+      *labelyrate = thisrate;
+      return thisrd;
+    }
+  }
+  */
+
   vp9_subtract_block(bh, bw, src_diff, 8,
                      src, src_stride,
                      dst, xd->plane[0].dst.stride);
 
+  *labelyrate = 0;
+  *distortion = 0;
   k = i;
   for (idy = 0; idy < bh / 4; ++idy) {
     for (idx = 0; idx < bw / 4; ++idx) {
@@ -1789,7 +1828,7 @@
   MB_PREDICTION_MODE this_mode;
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
   const int label_count = 4;
-  int64_t this_segment_rd = 0, other_segment_rd;
+  int64_t this_segment_rd = 0;
   int label_mv_thresh;
   int segmentyrate = 0;
   int best_eobs[4] = { 0 };
@@ -1812,8 +1851,6 @@
   label_mv_thresh = 1 * bsi->mvthresh / label_count;
 
   // Segmentation method overheads
-  other_segment_rd = this_segment_rd;
-
   for (idy = 0; idy < 2; idy += bh) {
     for (idx = 0; idx < 2; idx += bw) {
       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
@@ -1820,7 +1857,7 @@
       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
       int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
-      int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+      int64_t best_label_rd = INT64_MAX;
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int bestlabelyrate = 0;
       i = idy * 2 + idx;
@@ -1961,8 +1998,9 @@
             mv_check_bounds(x, &second_mode_mv[this_mode]))
           continue;
 
-        this_rd = encode_inter_mb_segment(&cpi->common,
-                                          x, i, &labelyrate,
+        this_rd = encode_inter_mb_segment(cpi, x,
+                                          bsi->segment_rd - this_segment_rd,
+                                          i, &labelyrate,
                                           &distortion, t_above_s, t_left_s);
         this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
         rate += labelyrate;
@@ -1991,8 +2029,12 @@
       bd += sbd;
       segmentyrate += bestlabelyrate;
       this_segment_rd += best_label_rd;
-      other_segment_rd += best_other_rd;
 
+      if (this_segment_rd > bsi->segment_rd) {
+        bsi->segment_rd = INT64_MAX;
+        return;
+      }
+
       for (j = 1; j < bh; ++j)
         vpx_memcpy(&x->partition_info->bmi[i + j * 2],
                    &x->partition_info->bmi[i],
@@ -2004,33 +2046,31 @@
     }
   } /* for each label */
 
-  if (this_segment_rd < bsi->segment_rd) {
-    bsi->r = br;
-    bsi->d = bd;
-    bsi->segment_yrate = segmentyrate;
-    bsi->segment_rd = this_segment_rd;
+  bsi->r = br;
+  bsi->d = bd;
+  bsi->segment_yrate = segmentyrate;
+  bsi->segment_rd = this_segment_rd;
 
-    // store everything needed to come back to this!!
-    for (i = 0; i < 4; i++) {
-      bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
-      if (mbmi->ref_frame[1] > 0)
-        bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
-      bsi->modes[i] = x->partition_info->bmi[i].mode;
-      bsi->eobs[i] = best_eobs[i];
-    }
+  // store everything needed to come back to this!!
+  for (i = 0; i < 4; i++) {
+    bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
+    if (mbmi->ref_frame[1] > 0)
+      bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
+    bsi->modes[i] = x->partition_info->bmi[i].mode;
+    bsi->eobs[i] = best_eobs[i];
   }
 }
 
-static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
-                                       int_mv *best_ref_mv,
-                                       int_mv *second_best_ref_mv,
-                                       int64_t best_rd,
-                                       int *returntotrate,
-                                       int *returnyrate,
-                                       int64_t *returndistortion,
-                                       int *skippable, int mvthresh,
-                                       int_mv seg_mvs[4][MAX_REF_FRAMES],
-                                       int mi_row, int mi_col) {
+static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
+                                           int_mv *best_ref_mv,
+                                           int_mv *second_best_ref_mv,
+                                           int64_t best_rd,
+                                           int *returntotrate,
+                                           int *returnyrate,
+                                           int64_t *returndistortion,
+                                           int *skippable, int mvthresh,
+                                           int_mv seg_mvs[4][MAX_REF_FRAMES],
+                                           int mi_row, int mi_col) {
   int i;
   BEST_SEG_INFO bsi;
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -2079,7 +2119,7 @@
   *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
   mbmi->mode = bsi.modes[3];
 
-  return (int)(bsi.segment_rd);
+  return bsi.segment_rd;
 }
 
 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2586,6 +2626,7 @@
   int best_needs_copy = 0;
   uint8_t *orig_dst[MAX_MB_PLANE];
   int orig_dst_stride[MAX_MB_PLANE];
+  int rs = 0;
 
   switch (this_mode) {
     int rate_mv;
@@ -2659,6 +2700,14 @@
   *rate2 += cost_mv_ref(cpi, this_mode,
                         mbmi->mb_mode_context[mbmi->ref_frame[0]]);
 
+  if (!(*mode_excluded)) {
+    if (is_comp_pred) {
+      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+    } else {
+      *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+    }
+  }
+
   pred_exists = 0;
   interpolating_intpel_seen = 0;
   // Are all MVs integer pel for Y and UV
@@ -2669,6 +2718,7 @@
         (mbmi->mv[1].as_mv.col & 15) == 0;
   // Search for best switchable filter by checking the variance of
   // pred error irrespective of whether the filter will be used
+  *best_filter = EIGHTTAP;
   if (cpi->sf.use_8tap_always) {
     *best_filter = EIGHTTAP;
     vp9_zero(cpi->rd_filter_cache);
@@ -2679,7 +2729,7 @@
 
     cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
     for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
-      int rs, j;
+      int j;
       int64_t rs_rd;
       const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
       const int is_intpel_interp = intpel_mv &&
@@ -2731,6 +2781,15 @@
           tmp_dist_sum = dist_sum;
         }
       }
+      if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+        if (rd / 2 > ref_best_rd) {
+          for (i = 0; i < MAX_MB_PLANE; i++) {
+            xd->plane[i].dst.buf = orig_dst[i];
+            xd->plane[i].dst.stride = orig_dst_stride[i];
+          }
+          return INT64_MAX;
+        }
+      }
       newbest = i == 0 || rd < best_rd;
 
       if (newbest) {
@@ -2754,11 +2813,11 @@
       xd->plane[i].dst.stride = orig_dst_stride[i];
     }
   }
-
   // Set the appripriate filter
   mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
       cm->mcomp_filter_type : *best_filter;
   vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+  rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
 
   if (pred_exists) {
     if (best_needs_copy) {
@@ -2774,6 +2833,23 @@
     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
   }
 
+
+  if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+    int tmp_rate;
+    int64_t tmp_dist;
+    model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
+    rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+    // if current pred_error modeled rd is substantially more than the best
+    // so far, do not bother doing full rd
+    if (rd / 2 > ref_best_rd) {
+      for (i = 0; i < MAX_MB_PLANE; i++) {
+        xd->plane[i].dst.buf = orig_dst[i];
+        xd->plane[i].dst.stride = orig_dst_stride[i];
+      }
+      return INT64_MAX;
+    }
+  }
+
   if (cpi->common.mcomp_filter_type == SWITCHABLE)
     *rate2 += get_switchable_rate(cm, x);
 
@@ -2818,7 +2894,7 @@
           *distortion = sse + sse2;
           *rate2 = 500;
 
-          // for best_yrd calculation
+          // for best yrd calculation
           *rate_uv = 0;
           *distortion_uv = sse2;
 
@@ -2859,14 +2935,6 @@
     *skippable = skippable_y && skippable_uv;
   }
 
-  if (!(*mode_excluded)) {
-    if (is_comp_pred) {
-      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
-    } else {
-      *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
-    }
-  }
-
   for (i = 0; i < MAX_MB_PLANE; i++) {
     xd->plane[i].dst.buf = orig_dst[i];
     xd->plane[i].dst.stride = orig_dst_stride[i];
@@ -2963,6 +3031,7 @@
                      cpi->gld_fb_idx,
                      cpi->alt_fb_idx};
   int64_t best_rd = INT64_MAX;
+  int64_t best_yrd = INT64_MAX;
   int64_t best_txfm_rd[NB_TXFM_MODES];
   int64_t best_txfm_diff[NB_TXFM_MODES];
   int64_t best_pred_diff[NB_PREDICTION_TYPES];
@@ -3358,16 +3427,20 @@
         int newbest, rs;
         int64_t rs_rd;
         mbmi->interp_filter =
-        vp9_switchable_interp[switchable_filter_index];
+            vp9_switchable_interp[switchable_filter_index];
         vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
 
         tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
                      &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
-                     second_ref, INT64_MAX,
+                     second_ref,
+                     best_yrd,
                      &rate, &rate_y, &distortion,
                      &skippable,
                      (int)this_rd_thresh, seg_mvs,
                      mi_row, mi_col);
+        if (tmp_rd == INT64_MAX) {
+          continue;
+        }
         cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
         rs = get_switchable_rate(cm, x);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -3375,6 +3448,7 @@
             MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
         if (cm->mcomp_filter_type == SWITCHABLE)
           tmp_rd += rs_rd;
+
         newbest = (tmp_rd < tmp_best_rd);
         if (newbest) {
           tmp_best_filter = mbmi->interp_filter;
@@ -3393,8 +3467,21 @@
               for (i = 0; i < 4; i++)
                 tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
               pred_exists = 1;
+              if (switchable_filter_index == 0 &&
+                  cpi->sf.use_rd_breakout &&
+                  best_rd < INT64_MAX) {
+                if (tmp_best_rdu / 2 > best_rd) {
+                  // skip searching the other filters if the first is
+                  // already substantially larger than the best so far
+                  tmp_best_filter = mbmi->interp_filter;
+                  tmp_best_rdu = INT64_MAX;
+                  break;
+                }
+              }
             }
       }  // switchable_filter_index loop
+      if (tmp_best_rdu == INT64_MAX)
+        continue;
 
       mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
                              tmp_best_filter : cm->mcomp_filter_type);
@@ -3404,11 +3491,14 @@
         // switchable list (bilinear, 6-tap) is indicated at the frame level
         tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
                      &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
-                     second_ref, INT64_MAX,
+                     second_ref,
+                     best_yrd,
                      &rate, &rate_y, &distortion,
                      &skippable,
                      (int)this_rd_thresh, seg_mvs,
                      mi_row, mi_col);
+        if (tmp_rd == INT64_MAX)
+          continue;
       } else {
         if (cpi->common.mcomp_filter_type == SWITCHABLE) {
           int rs = get_switchable_rate(cm, x);
@@ -3431,21 +3521,6 @@
       if (cpi->common.mcomp_filter_type == SWITCHABLE)
         rate2 += get_switchable_rate(cm, x);
 
-      // If even the 'Y' rd value of split is higher than best so far
-      // then dont bother looking at UV
-      vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
-                                      BLOCK_SIZE_SB8X8);
-      vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
-      super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
-                                &uv_skippable, NULL, BLOCK_SIZE_SB8X8, TX_4X4);
-      rate2 += rate_uv;
-      distortion2 += distortion_uv;
-      skippable = skippable && uv_skippable;
-
-      txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-      for (i = 0; i < NB_TXFM_MODES; ++i)
-        txfm_cache[i] = txfm_cache[ONLY_4X4];
-
       if (!mode_excluded) {
         if (is_comp_pred)
           mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
@@ -3452,8 +3527,26 @@
         else
           mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
       }
-
       compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
+
+      if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
+          best_rd) {
+        // If even the 'Y' rd value of split is higher than best so far
+        // then dont bother looking at UV
+        vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
+                                        BLOCK_SIZE_SB8X8);
+        vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
+        super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
+                                  &uv_skippable, NULL,
+                                  BLOCK_SIZE_SB8X8, TX_4X4);
+        rate2 += rate_uv;
+        distortion2 += distortion_uv;
+        skippable = skippable && uv_skippable;
+
+        txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+        for (i = 0; i < NB_TXFM_MODES; ++i)
+          txfm_cache[i] = txfm_cache[ONLY_4X4];
+      }
     } else {
       compmode_cost = vp9_cost_bit(comp_mode_p,
                                    mbmi->ref_frame[1] > INTRA_FRAME);
@@ -3495,7 +3588,7 @@
       if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
         // Back out the coefficient coding costs
         rate2 -= (rate_y + rate_uv);
-        // for best_yrd calculation
+        // for best yrd calculation
         rate_uv = 0;
 
         if (mb_skip_allowed) {
@@ -3593,6 +3686,8 @@
         *returnrate = rate2;
         *returndistortion = distortion2;
         best_rd = this_rd;
+        best_yrd = best_rd -
+                   RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
         best_partition = *x->partition_info;
--