shithub: libvpx

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -5545,12 +5545,13 @@

   /* restore UMV window */

   x->mv_limits = tmp_mv_limits;

+  // TODO(yunqing): may use higher tap interp filter than 2 taps.

   // Ignore mv costing by sending NULL pointer instead of cost array

   bestsme = cpi->find_fractional_mv_step(

       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,

       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,

-      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,

-      0);

+      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,

+      USE_2_TAPS);

   return bestsme;

--- a/vp9/encoder/vp9_mbgraph.c

+++ b/vp9/encoder/vp9_mbgraph.c

@@ -57,11 +57,12 @@

     uint32_t distortion;

     uint32_t sse;

+    // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.

     cpi->find_fractional_mv_step(

         x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,

         &v_fn_ptr, 0, mv_sf->subpel_search_level,

         cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,

-        0);

+        0, USE_2_TAPS);

   xd->mi[0]->mode = NEWMV;

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -367,14 +367,12 @@

   *ir = (int)divide_and_round(x1 * b, y1);

-uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,

-                                 const MV *ref_mv, int allow_hp,

-                                 int error_per_bit,

-                                 const vp9_variance_fn_ptr_t *vfp,

-                                 int forced_stop, int iters_per_step,

-                                 int *cost_list, int *mvjcost, int *mvcost[2],

-                                 uint32_t *distortion, uint32_t *sse1,

-                                 const uint8_t *second_pred, int w, int h) {

+uint32_t vp9_skip_sub_pixel_tree(

+    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,

+    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

+    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

+    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

+    int h, int use_accurate_subpel_search) {

   SETUP_SUBPEL_SEARCH;

   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,

                                src_stride, y, y_stride, second_pred, w, h,

@@ -397,6 +395,7 @@

   (void)sse;

   (void)thismse;

   (void)cost_list;

+  (void)use_accurate_subpel_search;

   return besterr;

@@ -406,7 +405,7 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   SETUP_SUBPEL_SEARCH;

   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,

                                src_stride, y, y_stride, second_pred, w, h,

@@ -418,6 +417,7 @@

   (void)allow_hp;

   (void)forced_stop;

   (void)hstep;

+  (void)use_accurate_subpel_search;

   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&

       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&

@@ -471,8 +471,10 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   SETUP_SUBPEL_SEARCH;

+  (void)use_accurate_subpel_search;

   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,

                                src_stride, y, y_stride, second_pred, w, h,

                                offset, mvjcost, mvcost, sse1, distortion);

@@ -531,8 +533,10 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   SETUP_SUBPEL_SEARCH;

+  (void)use_accurate_subpel_search;

   besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,

                                src_stride, y, y_stride, second_pred, w, h,

                                offset, mvjcost, mvcost, sse1, distortion);

@@ -622,7 +626,7 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   const uint8_t *const z = x->plane[0].src.buf;

   const uint8_t *const src_address = z;

   const int src_stride = x->plane[0].src.stride;

@@ -650,6 +654,8 @@

   int kr, kc;

   MvLimits subpel_mv_limits;

+  (void)use_accurate_subpel_search;

   vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);

   minc = subpel_mv_limits.col_min;

   maxc = subpel_mv_limits.col_max;

@@ -2587,7 +2593,8 @@

   (void)tc;            \

   (void)sse;           \

   (void)thismse;       \

-  (void)cost_list;

+  (void)cost_list;     \

+  (void)use_accurate_subpel_search;

 // Return the maximum MV.

 uint32_t vp9_return_max_sub_pixel_mv(

@@ -2595,7 +2602,7 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   COMMON_MV_TEST;

   (void)minr;

@@ -2617,7 +2624,7 @@

     int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h) {

+    int h, int use_accurate_subpel_search) {

   COMMON_MV_TEST;

   (void)maxr;

--- a/vp9/encoder/vp9_mcomp.h

+++ b/vp9/encoder/vp9_mcomp.h

@@ -75,7 +75,7 @@

     int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only

     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],

     uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,

-    int h);

+    int h, int use_accurate_subpel_search);

 extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;

 extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -247,7 +247,8 @@

         x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,

         x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,

         cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),

-        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);

+        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,

+        cpi->sf.use_accurate_subpel_search);

     *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,

                                x->mvcost, MV_COST_WEIGHT);

@@ -1539,7 +1540,8 @@

         cpi->common.allow_high_precision_mv, x->errorperbit,

         &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,

         cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),

-        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0);

+        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0,

+        cpi->sf.use_accurate_subpel_search);

   } else if (svc->use_base_mv && svc->spatial_layer_id) {

     if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {

       const int pre_stride = xd->plane[0].pre[0].stride;

@@ -2758,7 +2760,8 @@

                 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,

                 cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),

                 x->nmvjointcost, x->mvcost, &dummy_dist,

-                &x->pred_sse[ref_frame], NULL, 0, 0);

+                &x->pred_sse[ref_frame], NULL, 0, 0,

+                cpi->sf.use_accurate_subpel_search);

             xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;

           } else {

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -1821,7 +1821,7 @@

           x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,

           x->errorperbit, &cpi->fn_ptr[bsize], 0,

           cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost,

-          &dis, &sse, second_pred, pw, ph);

+          &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search);

     // Restore the pointer to the first (possibly scaled) prediction buffer.

@@ -1875,6 +1875,8 @@

   const BLOCK_SIZE bsize = mi->sb_type;

   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];

   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];

+  const int pw = num_4x4_blocks_wide << 2;

+  const int ph = num_4x4_blocks_high << 2;

   ENTROPY_CONTEXT t_above[2], t_left[2];

   int subpelmv = 1, have_ref = 0;

   SPEED_FEATURES *const sf = &cpi->sf;

@@ -2011,7 +2013,8 @@

                 x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,

                 sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list),

                 x->nmvjointcost, x->mvcost, &distortion,

-                &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);

+                &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph,

+                cpi->sf.use_accurate_subpel_search);

             // save motion search result for use in compound prediction

             seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;

@@ -2330,6 +2333,8 @@

   const int best_predmv_idx = x->mv_best_ref_index[ref];

   const YV12_BUFFER_CONFIG *scaled_ref_frame =

       vp9_get_scaled_ref_frame(cpi, ref);

+  const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;

+  const int ph = num_4x4_blocks_high_lookup[bsize] << 2;

   MV pred_mv[3];

   pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;

@@ -2452,7 +2457,8 @@

         x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,

         &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,

         cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),

-        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);

+        x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,

+        cpi->sf.use_accurate_subpel_search);

   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,

                              x->mvcost, MV_COST_WEIGHT);

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -288,6 +288,7 @@

     sf->exhaustive_searches_thresh =

         (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)

                                                                 : INT_MAX;

+    sf->use_accurate_subpel_search = USE_2_TAPS;

   if (speed >= 2) {

@@ -450,6 +451,7 @@

   sf->disable_golden_ref = 0;

   sf->enable_tpl_model = 0;

   sf->enhanced_full_pixel_motion_search = 0;

+  sf->use_accurate_subpel_search = USE_2_TAPS;

   if (speed >= 1) {

     sf->allow_txfm_domain_distortion = 1;

@@ -942,6 +944,7 @@

   sf->ml_prune_rect_partition_threhold[2] = -1;

   sf->ml_prune_rect_partition_threhold[3] = -1;

   sf->ml_var_partition_pruning = 0;

+  sf->use_accurate_subpel_search = USE_8_TAPS;

   // Some speed-up features even for best quality as minimal impact on quality.

   sf->adaptive_rd_thresh = 1;

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -243,6 +243,12 @@

   RE_ENCODE_MAXQ = 2

 } OVERSHOOT_DETECTION_CBR_RT;

+typedef enum {

+  USE_2_TAPS = 0,

+  USE_4_TAPS,

+  USE_8_TAPS,

+} SUBPEL_SEARCH_TYPE;

 typedef struct SPEED_FEATURES {

   MV_SPEED_FEATURES mv;

@@ -586,6 +592,10 @@

   // Allow for disabling golden reference.

   int disable_golden_ref;

+  // Allow sub-pixel search to use interpolation filters with different taps in

+  // order to achieve accurate motion search result.

+  SUBPEL_SEARCH_TYPE use_accurate_subpel_search;

 } SPEED_FEATURES;

 struct VP9_COMP;

--- a/vp9/encoder/vp9_temporal_filter.c

+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -421,12 +421,13 @@

   /* restore UMV window */

   x->mv_limits = tmp_mv_limits;

+  // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.

   // Ignore mv costing by sending NULL pointer instead of cost array

   bestsme = cpi->find_fractional_mv_step(

       x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,

       x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_search_level,

-      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,

-      0);

+      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,

+      USE_2_TAPS);

   // Restore input state

   x->plane[0].src = src;

--

⑨