shithub: libvpx

Download patch

ref: d3c78fd71ffea6c55f2705de2eb0c92449c4fc82
parent: 534e61d5a3fc121bfcc52969acc4c9e7ac798d14
author: Angie Chiang <angiebird@google.com>
date: Tue Oct 16 08:31:13 EDT 2018

Variant implementation of changing mv search order

We start mv search from the block with highest feature score, then
move on to the block's neighbors with with an searching order using
their feature scores.

We use max heap to help us achieve the functionality.

This feature is under flag USE_PQSORT

Change-Id: Ie5dc5ea715b0f9a7a594e5080a7cb4f5309f5597

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2368,6 +2368,9 @@
     CHECK_MEM_ERROR(
         cm, cpi->feature_score_loc_sort,
         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_heap,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
 #endif
     // TODO(jingning): Reduce the actual memory use for tpl model build up.
     for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
@@ -2588,6 +2591,7 @@
 #if CONFIG_NON_GREEDY_MV
   vpx_free(cpi->feature_score_loc_arr);
   vpx_free(cpi->feature_score_loc_sort);
+  vpx_free(cpi->feature_score_loc_heap);
 #endif
   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
@@ -5967,7 +5971,7 @@
 }
 
 #if CONFIG_NON_GREEDY_MV
-int compare_feature_score(const void *a, const void *b) {
+static int compare_feature_score(const void *a, const void *b) {
   const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
   const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
   if (aa->feature_score < bb->feature_score) {
@@ -5978,8 +5982,87 @@
     return 0;
   }
 }
-#endif
 
+#define USE_PQSORT 1
+
+#if USE_PQSORT
+static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
+                         FEATURE_SCORE_LOC **output) {
+  if (*size > 0) {
+    *output = heap[0];
+    --*size;
+    if (*size > 0) {
+      int p, l, r;
+      heap[0] = heap[*size];
+      p = 0;
+      l = 2 * p + 1;
+      r = 2 * p + 2;
+      while (l < *size) {
+        FEATURE_SCORE_LOC *tmp;
+        int c = l;
+        if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
+          c = r;
+        }
+        if (heap[p]->feature_score >= heap[c]->feature_score) {
+          break;
+        }
+        tmp = heap[p];
+        heap[p] = heap[c];
+        heap[c] = tmp;
+        p = c;
+        l = 2 * p + 1;
+        r = 2 * p + 2;
+      }
+    }
+  } else {
+    assert(0);
+  }
+}
+
+static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
+                          FEATURE_SCORE_LOC *input) {
+  int c, p;
+  FEATURE_SCORE_LOC *tmp;
+  heap[*size] = input;
+  ++*size;
+  c = *size - 1;
+  p = c >> 1;
+  while (c > 0) {
+    if (heap[c]->feature_score > heap[p]->feature_score) {
+      tmp = heap[p];
+      heap[p] = heap[c];
+      heap[c] = tmp;
+      c = p;
+      p = c >> 1;
+    } else {
+      break;
+    }
+  }
+}
+
+static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
+                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                  int *heap_size) {
+  const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
+  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+  int i;
+  for (i = 0; i < NB_MVS_NUM; ++i) {
+    int r = dirs[i][0] * mi_unit;
+    int c = dirs[i][1] * mi_unit;
+    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+        mi_col + c < tpl_frame->mi_cols) {
+      FEATURE_SCORE_LOC *fs_loc =
+          &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
+                                      (mi_col + c)];
+      if (fs_loc->visited == 0) {
+        max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
+      }
+    }
+  }
+}
+#endif  // USE_PQSORT
+#endif  // CONFIG_NON_GREEDY_MV
+
 void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
                        BLOCK_SIZE bsize) {
   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -6013,8 +6096,12 @@
 #if CONFIG_NON_GREEDY_MV
   int rf_idx;
   int fs_loc_sort_size;
+#if USE_PQSORT
+  int fs_loc_heap_size;
+#else
   int i;
-#endif
+#endif  // USE_PQSORT
+#endif  // CONFIG_NON_GREEDY_MV
 
   // Setup scaling factor
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -6072,6 +6159,7 @@
           &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
       tpl_stats->feature_score = get_feature_score(
           xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+      fs_loc->visited = 0;
       fs_loc->feature_score = tpl_stats->feature_score;
       fs_loc->mi_row = mi_row;
       fs_loc->mi_col = mi_col;
@@ -6083,6 +6171,7 @@
   qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
         sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
 
+#if !USE_PQSORT
   for (i = 0; i < fs_loc_sort_size; ++i) {
     int mb_y_offset;
     TplDepStats *tpl_stats;
@@ -6108,8 +6197,45 @@
           bsize, mi_row, mi_col, tpl_stats, rf_idx);
     }
   }
+#else   // !USE_PQSORT
+  fs_loc_heap_size = 0;
+  max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+                cpi->feature_score_loc_sort[0]);
 
-#endif
+  while (fs_loc_heap_size > 0) {
+    int mb_y_offset;
+    TplDepStats *tpl_stats;
+    FEATURE_SCORE_LOC *fs_loc;
+
+    max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
+
+    fs_loc->visited = 1;
+
+    mi_row = fs_loc->mi_row;
+    mi_col = fs_loc->mi_col;
+
+    mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+    tpl_stats = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+
+    set_mv_limits(cm, x, mi_row, mi_col);
+
+    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+      if (ref_frame[rf_idx] == NULL) {
+        tpl_stats->ready[rf_idx] = 0;
+        continue;
+      } else {
+        tpl_stats->ready[rf_idx] = 1;
+      }
+      motion_compensated_prediction(
+          cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+          ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride,
+          bsize, mi_row, mi_col, tpl_stats, rf_idx);
+    }
+    add_nb_blocks_to_heap(cpi, tpl_frame, bsize, mi_row, mi_col,
+                          &fs_loc_heap_size);
+  }
+#endif  // !USE_PQSORT
+#endif  // CONFIG_NON_GREEDY_MV
   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -505,6 +505,7 @@
 #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
 #if CONFIG_NON_GREEDY_MV
 typedef struct FEATURE_SCORE_LOC {
+  int visited;
   double feature_score;
   int mi_row;
   int mi_col;
@@ -540,6 +541,7 @@
 #if CONFIG_NON_GREEDY_MV
   FEATURE_SCORE_LOC *feature_score_loc_arr;
   FEATURE_SCORE_LOC **feature_score_loc_sort;
+  FEATURE_SCORE_LOC **feature_score_loc_heap;
 #endif
 
   TileDataEnc *tile_data;