shithub: libvpx

Download patch

ref: 706f1f10e016f30f6f68afd938e25df9765ffb87
parent: ee554c8cebcb0e3a7a549b98a8b3d398fc88fd07
author: Angie Chiang <angiebird@google.com>
date: Wed Jul 17 08:36:14 EDT 2019

Make vp9_prepare_nb_full_mvs only return valid mvs

In this case, vp9_nb_mvs_inconsistency doesn't need to check
whether each neighbor mv is valid or not.

non_greedy_mv encoding time is reduced by 1.5%

Change-Id: I3216c98481e777d5e0b917ea20ee39b7ca9c9d23

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5912,6 +5912,7 @@
   // TODO(angiebird): Figure out lambda's proper value.
   const int lambda = cpi->tpl_stats[frame_idx].lambda;
   int_mv nb_full_mvs[NB_MVS_NUM];
+  int nb_full_mv_num;
 #endif
 
   MV best_ref_mv1 = { 0, 0 };
@@ -5934,10 +5935,11 @@
 #if CONFIG_NON_GREEDY_MV
   (void)search_method;
   (void)sadpb;
-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
-                          bsize, nb_full_mvs);
+  nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row,
+                                           mi_col, rf_idx, bsize, nb_full_mvs);
   vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
-                             &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv);
+                             &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
+                             mv);
 #else
   (void)frame_idx;
   (void)mi_row;
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1899,7 +1899,7 @@
   }
 }
 
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
                                  int mv_num) {
   // The bahavior of this function is to compute log2 of mv difference,
   // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)
@@ -1910,15 +1910,15 @@
   int i;
   int64_t min_abs_diff = INT64_MAX;
   int cnt = 0;
+  assert(mv_num <= NB_MVS_NUM);
   for (i = 0; i < mv_num; ++i) {
-    if (nb_mvs[i].as_int != INVALID_MV) {
-      MV nb_mv = nb_mvs[i].as_mv;
-      const int64_t row_diff = abs(mv->row - nb_mv.row);
-      const int64_t col_diff = abs(mv->col - nb_mv.col);
-      const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
-      min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
-      ++cnt;
-    }
+    MV nb_mv = nb_full_mvs[i].as_mv;
+    const int64_t row_diff = abs(mv->row - nb_mv.row);
+    const int64_t col_diff = abs(mv->col - nb_mv.col);
+    const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
+    assert(nb_full_mvs[i].as_int != INVALID_MV);
+    min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
+    ++cnt;
   }
   if (cnt) {
     return log2_approximation(1 + min_abs_diff);
@@ -2251,12 +2251,13 @@
   return bestsad;
 }
 
-void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
-                             int_mv *nb_full_mvs) {
+int vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
+                            int mi_col, int rf_idx, BLOCK_SIZE bsize,
+                            int_mv *nb_full_mvs) {
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+  int nb_full_mv_num = 0;
   int i;
   for (i = 0; i < NB_MVS_NUM; ++i) {
     int r = dirs[i][0] * mi_height;
@@ -2266,17 +2267,15 @@
       const TplDepStats *tpl_ptr =
           &tpl_frame
                ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
-      int_mv *mv =
-          get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
       if (tpl_ptr->ready[rf_idx]) {
-        nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
-      } else {
-        nb_full_mvs[i].as_int = INVALID_MV;
+        int_mv *mv =
+            get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
+        nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv->as_mv);
+        ++nb_full_mv_num;
       }
-    } else {
-      nb_full_mvs[i].as_int = INVALID_MV;
     }
   }
+  return nb_full_mv_num;
 }
 #endif  // CONFIG_NON_GREEDY_MV
 
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -149,9 +149,9 @@
   return out_mv;
 }
 struct TplDepFrame;
-void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
-                             int_mv *nb_full_mvs);
+int vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
+                            int mi_col, int rf_idx, BLOCK_SIZE bsize,
+                            int_mv *nb_full_mvs);
 
 static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {
   BLOCK_SIZE square_bsize;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2496,15 +2496,15 @@
 
   int bestsme = INT_MAX;
 #if CONFIG_NON_GREEDY_MV
-  int_mv nb_full_mvs[NB_MVS_NUM];
-  const int nb_full_mv_num = NB_MVS_NUM;
   int gf_group_idx = cpi->twopass.gf_group.index;
   int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
   BLOCK_SIZE square_bsize = get_square_block_size(bsize);
+  int_mv nb_full_mvs[NB_MVS_NUM];
+  const int nb_full_mv_num =
+      vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
+                              gf_rf_idx, square_bsize, nb_full_mvs);
   const int lambda = (pw * ph) / 4;
   assert(pw * ph == lambda << 2);
-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
-                          gf_rf_idx, square_bsize, nb_full_mvs);
 #else   // CONFIG_NON_GREEDY_MV
   int sadpb = x->sadperbit16;
 #endif  // CONFIG_NON_GREEDY_MV