shithub: libvpx

Download patch

ref: 8991d1c18da94bb5aff2afd1d0940fff90dd271b
parent: 260965646d27eb962f3c51c2c00b0da2b7d19392
parent: b54cdcc3de62390dc438a36425665a89958aefea
author: Hui Su <huisu@google.com>
date: Mon Jul 23 22:41:25 EDT 2018

Merge "Add prune_ref_frame_for_rect_partitions feature"

--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -75,6 +75,8 @@
 
   // Used for the machine learning-based early termination
   int32_t sum_y_eobs;
+  // Skip certain ref frames during RD search of rectangular partitions.
+  uint8_t skip_ref_frame_mask;
 } PICK_MODE_CONTEXT;
 
 typedef struct PC_TREE {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2633,6 +2633,7 @@
                        ctx, INT64_MAX);
       break;
     case PARTITION_HORZ:
+      pc_tree->horizontal[0].skip_ref_frame_mask = 0;
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
                        subsize, &pc_tree->horizontal[0], INT64_MAX);
       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2642,6 +2643,7 @@
         vp9_rd_cost_init(&tmp_rdc);
         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+        pc_tree->horizontal[1].skip_ref_frame_mask = 0;
         rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
                          &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -2654,6 +2656,7 @@
       }
       break;
     case PARTITION_VERT:
+      pc_tree->vertical[0].skip_ref_frame_mask = 0;
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
                        subsize, &pc_tree->vertical[0], INT64_MAX);
       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2663,6 +2666,7 @@
         vp9_rd_cost_init(&tmp_rdc);
         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+        pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
                          &tmp_rdc, subsize,
                          &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
@@ -3712,10 +3716,12 @@
   int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
   int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
   int must_split = 0;
-
   int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
                           ? x->cb_rdmult
                           : cpi->rd.RDMULT;
+  // Ref frames picked in the [i_th] quarter subblock during square partition
+  // RD search. It may be used to prune ref frame selection of rect partitions.
+  uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
 
   (void)*tp_orig;
 
@@ -3846,6 +3852,14 @@
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
                      best_rdc.rdcost);
     if (this_rdc.rate != INT_MAX) {
+      if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+        const int ref1 = ctx->mic.ref_frame[0];
+        const int ref2 = ctx->mic.ref_frame[1];
+        for (i = 0; i < 4; ++i) {
+          ref_frames_used[i] |= (1 << ref1);
+          if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+        }
+      }
       if (bsize >= BLOCK_8X8) {
         this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
                                   cpi->partition_cost[pl][PARTITION_NONE], 0);
@@ -3970,8 +3984,18 @@
         pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
                        pc_tree->leaf_split[0], best_rdc.rdcost);
-
-      if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
+      if (sum_rdc.rate == INT_MAX) {
+        sum_rdc.rdcost = INT64_MAX;
+      } else {
+        if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+          const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
+          const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
+          for (i = 0; i < 4; ++i) {
+            ref_frames_used[i] |= (1 << ref1);
+            if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+          }
+        }
+      }
     } else {
       for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
            ++i) {
@@ -3999,6 +4023,13 @@
           sum_rdc.rdcost = INT64_MAX;
           break;
         } else {
+          if (cpi->sf.prune_ref_frame_for_rect_partitions &&
+              pc_tree->split[i]->none.rate != INT_MAX) {
+            const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
+            const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
+            ref_frames_used[i] |= (1 << ref1);
+            if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+          }
           sum_rdc.rate += this_rdc.rate;
           sum_rdc.dist += this_rdc.dist;
           sum_rdc.rdcost += this_rdc.rdcost;
@@ -4034,6 +4065,22 @@
         do_rect &= !partition_none_allowed;
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+  }
+
+  pc_tree->horizontal[0].skip_ref_frame_mask = 0;
+  pc_tree->horizontal[1].skip_ref_frame_mask = 0;
+  pc_tree->vertical[0].skip_ref_frame_mask = 0;
+  pc_tree->vertical[1].skip_ref_frame_mask = 0;
+  if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+    uint8_t used_frames;
+    used_frames = ref_frames_used[0] | ref_frames_used[1];
+    if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+    used_frames = ref_frames_used[2] | ref_frames_used[3];
+    if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+    used_frames = ref_frames_used[0] | ref_frames_used[2];
+    if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+    used_frames = ref_frames_used[1] | ref_frames_used[3];
+    if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
   }
 
   // PARTITION_HORZ
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3073,6 +3073,8 @@
                                // lock mechanism involved with reads from
                                // tile_mode_map
   const int mode_search_skip_flags = sf->mode_search_skip_flags;
+  const int is_rect_partition =
+      num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize];
   int64_t mask_filter = 0;
   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
 
@@ -3223,6 +3225,13 @@
     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
 
     vp9_zero(x->sum_y_eobs);
+
+    if (is_rect_partition) {
+      if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue;
+      if (second_ref_frame > 0 &&
+          (ctx->skip_ref_frame_mask & (1 << second_ref_frame)))
+        continue;
+    }
 
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -70,11 +70,14 @@
   // speed 0 features
   sf->partition_search_breakout_thr.dist = (1 << 20);
   sf->partition_search_breakout_thr.rate = 80;
+  sf->use_square_only_threshold = BLOCK_SIZES;
 
-  // Currently, the machine-learning based partition search early termination
-  // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
   if (is_480p_or_larger) {
+    // Currently, the machine-learning based partition search early termination
+    // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
     sf->ml_partition_search_early_termination = 1;
+  } else {
+    sf->use_square_only_threshold = BLOCK_32X32;
   }
 
   if (!is_1080p_or_larger) {
@@ -92,6 +95,7 @@
 
   if (speed >= 1) {
     sf->ml_partition_search_early_termination = 0;
+    sf->use_square_only_threshold = BLOCK_4X4;
 
     if (is_720p_or_larger) {
       sf->disable_split_mask =
@@ -193,7 +197,7 @@
   sf->allow_skip_recode = 1;
   sf->less_rectangular_check = 1;
   sf->use_square_partition_only = !frame_is_boosted(cpi);
-  sf->use_square_only_threshold = BLOCK_16X16;
+  sf->prune_ref_frame_for_rect_partitions = 1;
 
   if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
     sf->exhaustive_searches_thresh = (1 << 22);
@@ -210,6 +214,7 @@
 
   if (speed >= 1) {
     sf->enable_tpl_model = 0;
+    sf->prune_ref_frame_for_rect_partitions = 0;
     if (oxcf->pass == 2) {
       TWO_PASS *const twopass = &cpi->twopass;
       if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -226,10 +231,7 @@
     sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
     sf->allow_quant_coeff_opt = sf->optimize_coefficients;
     sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
-
-    sf->use_square_only_threshold = BLOCK_4X4;
     sf->less_rectangular_check = 1;
-
     sf->use_rd_breakout = 1;
     sf->adaptive_motion_search = 1;
     sf->mv.auto_mv_step_size = 1;
@@ -848,6 +850,7 @@
 #else
   sf->enable_tpl_model = 1;
 #endif
+  sf->prune_ref_frame_for_rect_partitions = 0;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -319,6 +319,9 @@
   int use_square_partition_only;
   BLOCK_SIZE use_square_only_threshold;
 
+  // Prune reference frames for rectangular partitions.
+  int prune_ref_frame_for_rect_partitions;
+
   // Sets min and max partition sizes for this 64x64 region based on the
   // same 64x64 in last encoded frame, and the left and above neighbor.
   AUTO_MIN_MAX_MODE auto_min_max_partition_size;