ref: a2cd0170163b58cb151e19e26b7cb0b23f1783c4
parent: 308454502c665aaff9366206ce798bd4940772d0
author: Hui Su <huisu@google.com>
date: Tue Sep 25 08:19:53 EDT 2018
Add ml_var_partition experiment Make partition decisions using machine learning models. The goal is to achieve better coding quality than the variance-based parititioning without much encoding speed loss. To enable this experiment, use --enable-ml-var-partition for config. When eanbled, the variance-based partitioning is replaced by this ML based partitioing for speed 6 and above in real time mode(except low resolution or high bit-depth). Current coding gains(average PSNR): speed 6 speed 7 speed 8 rtc 2.04% 2.65% 3.90% ytlivehr 3.11% 4.53% 11.57% hdres(rtc mode) 5.10% Further testing and tuning is needed to see if the speed and quality tradeoff is reasonable. Change-Id: I0da5a2fbc22c3261832b32920ee36d9b19d417af
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@
fp_mb_stats
emulate_hardware
non_greedy_mv
+ ml_var_partition
"
CONFIG_LIST="
dependency_tracking
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -208,6 +208,9 @@
void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest,
int stride, int eob, int bd);
#endif
+#if CONFIG_ML_VAR_PARTITION
+ DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]);
+#endif // CONFIG_ML_VAR_PARTITION
};
#ifdef __cplusplus
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4345,6 +4345,83 @@
}
}
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+#define LABELS 2
+static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const NN_CONFIG *nn_config = NULL;
+ float thresh_low = -0.2f;
+ float thresh_high = 0.0f;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ nn_config = &vp9_var_part_nnconfig_64;
+ thresh_low = -0.3f;
+ thresh_high = -0.1f;
+ break;
+ case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
+ case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
+ case BLOCK_8X8: break;
+ default: assert(0 && "Unexpected block size."); return -1;
+ }
+
+ if (!nn_config) return -1;
+
+ vpx_clear_system_state();
+
+ {
+ float features[FEATURES] = { 0.0f };
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+ int feature_idx = 0;
+ float score[LABELS];
+
+ features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ {
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const int sb_offset_row = 8 * (mi_row & 7);
+ const int sb_offset_col = 8 * (mi_col & 7);
+ const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const int pred_stride = 64;
+ unsigned int sse;
+ int i;
+ // Variance of whole block.
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+ const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+ features[feature_idx++] = logf((float)var + 1.0f);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ // Variance of quarter block.
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred + pred_offset, pred_stride, &sse);
+ const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+ features[feature_idx++] = var_ratio;
+ }
+ }
+
+ assert(feature_idx == FEATURES);
+ nn_predict(features, nn_config, score);
+ if (score[0] > thresh_high) return 3;
+ if (score[0] < thresh_low) return 0;
+ return -1;
+ }
+}
+#undef FEATURES
+#undef LABELS
+#endif // CONFIG_ML_VAR_PARTITION
+
static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4374,6 +4451,11 @@
!force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
int partition_vert_allowed =
!force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+#if CONFIG_ML_VAR_PARTITION
+ const int use_ml_based_partitioning =
+ sf->partition_search_type == ML_BASED_PARTITION;
+#endif // CONFIG_ML_VAR_PARTITION
+
(void)*tp_orig;
// Avoid checking for rectangular partitions for speed >= 6.
@@ -4404,6 +4486,18 @@
partition_vert_allowed &= force_vert_split;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (use_ml_based_partitioning) {
+ if (partition_none_allowed || do_split) do_rect = 0;
+ if (partition_none_allowed && do_split) {
+ const int ml_predicted_partition =
+ ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
+ if (ml_predicted_partition == 0) do_split = 0;
+ if (ml_predicted_partition == 3) partition_none_allowed = 0;
+ }
+ }
+#endif // CONFIG_ML_VAR_PARTITION
+
if (!partition_none_allowed && !do_split) do_rect = 1;
ctx->pred_pixel_ready =
@@ -4419,26 +4513,28 @@
ctx->skip = x->skip;
if (this_rdc.rate != INT_MAX) {
- int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
this_rdc.rdcost =
RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < best_rdc.rdcost) {
- int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
- int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
-
- dist_breakout_thr >>=
- 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
- rate_breakout_thr *= num_pels_log2_lookup[bsize];
-
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
- this_rdc.dist < dist_breakout_thr) {
- do_split = 0;
- do_rect = 0;
+#if CONFIG_ML_VAR_PARTITION
+ if (!use_ml_based_partitioning)
+#endif // CONFIG_ML_VAR_PARTITION
+ {
+ int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
+ int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
+ dist_breakout_thr >>=
+ 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+ if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
+ this_rdc.dist < dist_breakout_thr) {
+ do_split = 0;
+ do_rect = 0;
+ }
}
}
}
@@ -4837,6 +4933,111 @@
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
+#if CONFIG_ML_VAR_PARTITION
+// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
+static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *x, int mi_row, int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int is_key_frame = frame_is_intra_only(cm);
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+
+ if (!is_key_frame) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = NULL;
+ const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
+ (mi_row + 4 < cm->mi_rows);
+ int pixels_wide = 64, pixels_high = 64;
+ unsigned int y_sad_g, y_sad_thr;
+ unsigned int y_sad = UINT_MAX;
+
+ assert(yv12 != NULL);
+
+ if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+ if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+ cpi->svc.use_gf_temporal_ref_current_layer) {
+ // For now, GOLDEN will not be used for non-zero spatial layers, since
+ // it may not be a temporal reference.
+ yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
+
+ // Only compute y_sad_g (sad for golden reference) for speed < 8.
+ if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
+ (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ y_sad_g = cpi->fn_ptr[bsize].sdf(
+ x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ } else {
+ y_sad_g = UINT_MAX;
+ }
+
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) {
+ yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ALTREF_FRAME - 1].sf);
+ mi->ref_frame[0] = ALTREF_FRAME;
+ y_sad_g = UINT_MAX;
+ } else {
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
+ mi->ref_frame[0] = LAST_FRAME;
+ }
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = BLOCK_64X64;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = BILINEAR;
+
+ {
+ const MV dummy_mv = { 0, 0 };
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+ &dummy_mv);
+ x->sb_use_mv_part = 1;
+ x->sb_mvcol_part = mi->mv[0].as_mv.col;
+ x->sb_mvrow_part = mi->mv[0].as_mv.row;
+ }
+
+ // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+ // are close if short_circuit_low_temp_var is on.
+ y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+ if (y_sad_g < y_sad_thr) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ mi->ref_frame[0] = GOLDEN_FRAME;
+ mi->mv[0].as_int = 0;
+ y_sad = y_sad_g;
+ } else {
+ x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+ }
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = x->est_pred;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
+ case 10:
+ memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ case 12:
+ memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ }
+#else
+ memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+#endif // CONFIG_ML_VAR_PARTITION
+
static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, int mi_row,
TOKENEXTRA **tp) {
@@ -4928,6 +5129,17 @@
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
break;
+#if CONFIG_ML_VAR_PARTITION
+ case ML_BASED_PARTITION:
+ get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
+ x->max_partition_size = BLOCK_64X64;
+ x->min_partition_size = BLOCK_8X8;
+ x->sb_pickmode_part = 1;
+ nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
+ td->pc_root);
+ break;
+#endif // CONFIG_ML_VAR_PARTITION
case SOURCE_VAR_BASED_PARTITION:
set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
--- a/vp9/encoder/vp9_partition_models.h
+++ b/vp9/encoder/vp9_partition_models.h
@@ -834,6 +834,136 @@
};
#undef FEATURES
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = {
+ -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f,
+ -0.143823f, 0.378511f, -1.997788f, -2.166866f, -1.930158f, -1.202127f,
+ -0.611875f, -0.506422f, -0.432487f, 0.071205f, 0.578172f, -0.154285f,
+ -0.051830f, 0.331681f, -1.457177f, -2.443546f, -2.000302f, -1.389283f,
+ 0.372084f, -0.464917f, 2.265235f, 2.385787f, 2.312722f, 2.127868f,
+ -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f, 0.193976f,
+ -0.305611f, 0.256632f, 0.309388f, -0.437439f, 1.702640f, -5.007069f,
+ -0.323450f, 0.294227f, 1.267193f, 1.056601f, 0.387181f, -0.191215f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer0[8] = {
+ -0.044396f, -0.938166f, 0.000000f, -0.916375f,
+ 1.242299f, 0.000000f, -0.405734f, 0.014206f,
+};
+
+static const float vp9_var_part_nn_weights_64_layer1[8] = {
+ 1.635945f, 0.979557f, 0.455315f, 1.197199f,
+ -2.251024f, -0.464953f, 1.378676f, -0.111927f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer1[1] = {
+ -0.37972447f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_64_layer0,
+ vp9_var_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_64_layer0,
+ vp9_var_part_nn_bias_64_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = {
+ 0.067243f, -0.083598f, -2.191159f, 2.726434f, -3.324013f, 3.477977f,
+ 0.323736f, -0.510199f, 2.960693f, 2.937661f, 2.888476f, 2.938315f,
+ -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f,
+ 0.665153f, -0.273210f, 0.028279f, 0.972220f, -0.445596f, 1.756611f,
+ -0.177892f, -0.091758f, 0.436661f, -0.521506f, 0.133786f, 0.266743f,
+ 0.637367f, -0.160084f, -1.396269f, 1.020841f, -1.112971f, 0.919496f,
+ -0.235883f, 0.651954f, 0.109061f, -0.429463f, 0.740839f, -0.962060f,
+ 0.299519f, -0.386298f, 1.550231f, 2.464915f, 1.311969f, 2.561612f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer0[8] = {
+ 0.368242f, 0.736617f, 0.000000f, 0.757287f,
+ 0.000000f, 0.613248f, -0.776390f, 0.928497f,
+};
+
+static const float vp9_var_part_nn_weights_32_layer1[8] = {
+ 0.939884f, -2.420850f, -0.410489f, -0.186690f,
+ 0.063287f, -0.522011f, 0.484527f, -0.639625f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer1[1] = {
+ -0.6455006f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_32_layer0,
+ vp9_var_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_32_layer0,
+ vp9_var_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = {
+ 0.742567f, -0.580624f, -0.244528f, 0.331661f, -0.113949f, -0.559295f,
+ -0.386061f, 0.438653f, 1.467463f, 0.211589f, 0.513972f, 1.067855f,
+ -0.876679f, 0.088560f, -0.687483f, -0.380304f, -0.016412f, 0.146380f,
+ 0.015318f, 0.000351f, -2.764887f, 3.269717f, 2.752428f, -2.236754f,
+ 0.561539f, -0.852050f, -0.084667f, 0.202057f, 0.197049f, 0.364922f,
+ -0.463801f, 0.431790f, 1.872096f, -0.091887f, -0.055034f, 2.443492f,
+ -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f,
+ 0.642021f, -0.875117f, 2.040794f, 1.921070f, 1.792413f, 1.839727f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer0[8] = {
+ 2.901234f, -1.940932f, -0.198970f, -0.406524f,
+ 0.059422f, -1.879207f, -0.232340f, 2.979821f,
+};
+
+static const float vp9_var_part_nn_weights_16_layer1[8] = {
+ -0.528731f, 0.375234f, -0.088422f, 0.668629f,
+ 0.870449f, 0.578735f, 0.546103f, -1.957207f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer1[1] = {
+ -1.95769405f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_16_layer0,
+ vp9_var_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_16_layer0,
+ vp9_var_part_nn_bias_16_layer1,
+ },
+};
+#undef FEATURES
+#endif // CONFIG_ML_VAR_PARTITION
+
// Partition pruning model(linear).
static const float vp9_partition_feature_mean[24] = {
303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -623,7 +623,18 @@
sf->use_altref_onepass = 1;
sf->use_compound_nonrd_pickmode = 1;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)
+ sf->partition_search_type = ML_BASED_PARTITION;
+ else
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
sf->partition_search_type = VAR_BASED_PARTITION;
+#endif // CONFIG_ML_VAR_PARTITION
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -136,20 +136,25 @@
} INTERP_FILTER_MASK;
typedef enum {
- // Search partitions using RD/NONRD criterion
+ // Search partitions using RD/NONRD criterion.
SEARCH_PARTITION,
- // Always use a fixed size partition
+ // Always use a fixed size partition.
FIXED_PARTITION,
REFERENCE_PARTITION,
// Use an arbitrary partitioning scheme based on source variance within
- // a 64X64 SB
+ // a 64X64 SB.
VAR_BASED_PARTITION,
- // Use non-fixed partitions based on source variance
- SOURCE_VAR_BASED_PARTITION
+ // Use non-fixed partitions based on source variance.
+ SOURCE_VAR_BASED_PARTITION,
+
+#if CONFIG_ML_VAR_PARTITION
+ // Make partition decisions with machine learning models.
+ ML_BASED_PARTITION
+#endif // CONFIG_ML_VAR_PARTITION
} PARTITION_SEARCH_TYPE;
typedef enum {