shithub: libvpx

Download patch

ref: d749bc7b33230f128baec7ecfa19c0d3e1607956
parent: bb407a27b2e32f89f0e9eeee2bcd0aa9d5cfea3f
parent: 037d67f684683ffad22e38ab9a6381ccfedd813f
author: Angie Chiang <angiebird@google.com>
date: Mon Jul 15 14:40:10 EDT 2019

Merge changes I9288c88d,Ib1ac6f57,I02fac56a,Id6a8b117

* changes:
  Use sdx8f in exhaustive_mesh_search_single_step
  Sync the behavior of exhaustive_mesh_search
  Refactor exhaustive_mesh_search_new
  Simplify code in exhaustive_mesh_search_new

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1533,6 +1533,7 @@
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
+// TODO(angiebird): make sdx8f available for highbitdepth if needed
 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
   cpi->fn_ptr[BT].sdf = SDF;                             \
   cpi->fn_ptr[BT].sdaf = SDAF;                           \
@@ -1539,7 +1540,8 @@
   cpi->fn_ptr[BT].vf = VF;                               \
   cpi->fn_ptr[BT].svf = SVF;                             \
   cpi->fn_ptr[BT].svaf = SVAF;                           \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;
+  cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
+  cpi->fn_ptr[BT].sdx8f = NULL;
 
 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
@@ -2438,62 +2440,67 @@
   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
   cpi->source_var_thresh = 0;
   cpi->frames_till_next_var_check = 0;
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
+  cpi->fn_ptr[BT].sdf = SDF;                             \
+  cpi->fn_ptr[BT].sdaf = SDAF;                           \
+  cpi->fn_ptr[BT].vf = VF;                               \
+  cpi->fn_ptr[BT].svf = SVF;                             \
+  cpi->fn_ptr[BT].svaf = SVAF;                           \
+  cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
+  cpi->fn_ptr[BT].sdx8f = SDX8F;
 
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
-  cpi->fn_ptr[BT].sdf = SDF;                      \
-  cpi->fn_ptr[BT].sdaf = SDAF;                    \
-  cpi->fn_ptr[BT].vf = VF;                        \
-  cpi->fn_ptr[BT].svf = SVF;                      \
-  cpi->fn_ptr[BT].svaf = SVAF;                    \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;
-
+  // TODO(angiebird): make sdx8f available for every block size
   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
-      vpx_sad32x16x4d)
+      vpx_sad32x16x4d, NULL)
 
   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
-      vpx_sad16x32x4d)
+      vpx_sad16x32x4d, NULL)
 
   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
-      vpx_sad64x32x4d)
+      vpx_sad64x32x4d, NULL)
 
   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
-      vpx_sad32x64x4d)
+      vpx_sad32x64x4d, NULL)
 
   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
-      vpx_sad32x32x4d)
+      vpx_sad32x32x4d, NULL)
 
   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
-      vpx_sad64x64x4d)
+      vpx_sad64x64x4d, NULL)
 
   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
-      vpx_sad16x16x4d)
+      vpx_sad16x16x4d, vpx_sad16x16x8)
 
   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
-      vpx_sad16x8x4d)
+      vpx_sad16x8x4d, vpx_sad16x8x8)
 
   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
-      vpx_sad8x16x4d)
+      vpx_sad8x16x4d, vpx_sad8x16x8)
 
   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
-      vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d)
+      vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
+      vpx_sad8x8x8)
 
   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
-      vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d)
+      vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
+      NULL)
 
   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
-      vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d)
+      vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
+      NULL)
 
   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
-      vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d)
+      vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
+      vpx_sad4x4x8)
 
 #if CONFIG_VP9_HIGHBITDEPTH
   highbd_set_var_fns(cpi);
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1923,47 +1923,99 @@
   return best_cost;
 }
 
-static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
-                                          int range, int step,
-                                          const vp9_variance_fn_ptr_t *fn_ptr,
-                                          const MV *center_mv, int lambda,
-                                          const int_mv *nb_full_mvs,
-                                          int full_mv_num) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  MV fcenter_mv = { center_mv->row, center_mv->col };
+static int64_t exhaustive_mesh_search_multi_step(
+    MV *best_mv, const MV *center_mv, int range, int step,
+    const struct buf_2d *src, const struct buf_2d *pre, int lambda,
+    const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
+    const vp9_variance_fn_ptr_t *fn_ptr) {
   int64_t best_sad;
+  int r, c;
+  int start_col, end_col, start_row, end_row;
+  *best_mv = *center_mv;
+  best_sad =
+      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+                            get_buf_from_mv(pre, center_mv), pre->stride)
+       << LOG2_PRECISION) +
+      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+  for (r = start_row; r <= end_row; r += step) {
+    for (c = start_col; c <= end_col; c += step) {
+      const MV mv = { r, c };
+      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+                                         get_buf_from_mv(pre, &mv), pre->stride)
+                    << LOG2_PRECISION;
+      if (sad < best_sad) {
+        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
+        }
+      }
+    }
+  }
+  return best_sad;
+}
+
+static int64_t exhaustive_mesh_search_single_step(
+    MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
+    const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
+    int full_mv_num, const MvLimits *mv_limits,
+    const vp9_variance_fn_ptr_t *fn_ptr) {
+  int64_t best_sad;
   int r, c, i;
   int start_col, end_col, start_row, end_row;
-  int col_step = (step > 1) ? step : 4;
 
-  assert(step >= 1);
-
-  clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  *best_mv = fcenter_mv;
+  *best_mv = *center_mv;
   best_sad =
-      ((int64_t)fn_ptr->sdf(what->buf, what->stride,
-                            get_buf_from_mv(in_what, &fcenter_mv),
-                            in_what->stride)
+      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
+                            get_buf_from_mv(pre, center_mv), pre->stride)
        << LOG2_PRECISION) +
-      lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
-  start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
-  start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
-  end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
-  end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
+      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
+  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
+  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
+  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
+  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
+  for (r = start_row; r <= end_row; r += 1) {
+    c = start_col;
+    // sdx8f may not be available some block size
+    if (fn_ptr->sdx8f) {
+      while (c + 7 <= end_col) {
+        unsigned int sads[8];
+        const MV mv = { r, c };
+        const uint8_t *buf = get_buf_from_mv(pre, &mv);
+        fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads);
 
-  for (r = start_row; r <= end_row; r += step) {
-    for (c = start_col; c <= end_col; c += col_step) {
-      // Step > 1 means we are not checking every location in this pass.
-      if (step > 1) {
-        const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
-        int64_t sad =
-            (int64_t)fn_ptr->sdf(what->buf, what->stride,
-                                 get_buf_from_mv(in_what, &mv), in_what->stride)
-            << LOG2_PRECISION;
+        for (i = 0; i < 8; ++i) {
+          int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
+          if (sad < best_sad) {
+            const MV mv = { r, c + i };
+            sad += lambda *
+                   vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
+            }
+          }
+        }
+        c += 8;
+      }
+    }
+    while (c + 3 <= end_col) {
+      unsigned int sads[4];
+      const uint8_t *addrs[4];
+      for (i = 0; i < 4; ++i) {
+        const MV mv = { r, c + i };
+        addrs[i] = get_buf_from_mv(pre, &mv);
+      }
+      fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
+
+      for (i = 0; i < 4; ++i) {
+        int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
         if (sad < best_sad) {
+          const MV mv = { r, c + i };
           sad +=
               lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
           if (sad < best_sad) {
@@ -1971,51 +2023,46 @@
             *best_mv = mv;
           }
         }
-      } else {
-        // 4 sads in a single call if we are checking every location
-        if (c + 3 <= end_col) {
-          unsigned int sads[4];
-          const uint8_t *addrs[4];
-          for (i = 0; i < 4; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            addrs[i] = get_buf_from_mv(in_what, &mv);
-          }
-          fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
-          for (i = 0; i < 4; ++i) {
-            int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
-            if (sad < best_sad) {
-              const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-              sad += lambda *
-                     vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
-              if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv = mv;
-              }
-            }
-          }
-        } else {
-          for (i = 0; i < end_col - c; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride,
-                                               get_buf_from_mv(in_what, &mv),
-                                               in_what->stride)
-                          << LOG2_PRECISION;
-            if (sad < best_sad) {
-              sad += lambda *
-                     vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
-              if (sad < best_sad) {
-                best_sad = sad;
-                *best_mv = mv;
-              }
-            }
-          }
+      }
+      c += 4;
+    }
+    while (c <= end_col) {
+      const MV mv = { r, c };
+      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
+                                         get_buf_from_mv(pre, &mv), pre->stride)
+                    << LOG2_PRECISION;
+      if (sad < best_sad) {
+        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
         }
       }
+      c += 1;
     }
   }
-
   return best_sad;
+}
+
+static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
+                                          int range, int step,
+                                          const vp9_variance_fn_ptr_t *fn_ptr,
+                                          const MV *center_mv, int lambda,
+                                          const int_mv *nb_full_mvs,
+                                          int full_mv_num) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *src = &x->plane[0].src;
+  const struct buf_2d *pre = &xd->plane[0].pre[0];
+  assert(step >= 1);
+  assert(is_mv_in(&x->mv_limits, center_mv));
+  if (step == 1) {
+    return exhaustive_mesh_search_single_step(
+        best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
+        &x->mv_limits, fn_ptr);
+  }
+  return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
+                                           pre, lambda, nb_full_mvs,
+                                           full_mv_num, &x->mv_limits, fn_ptr);
 }
 
 static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -76,6 +76,7 @@
   vpx_subpixvariance_fn_t svf;
   vpx_subp_avg_variance_fn_t svaf;
   vpx_sad_multi_d_fn_t sdx4df;
+  vpx_sad_multi_fn_t sdx8f;
 } vp9_variance_fn_ptr_t;
 #endif  // CONFIG_VP9