shithub: libvpx

--- a/configure

+++ b/configure

@@ -279,6 +279,7 @@

     fp_mb_stats

     emulate_hardware

     non_greedy_mv

+    ml_var_partition

 CONFIG_LIST="

     dependency_tracking

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -208,6 +208,9 @@

   void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest,

                               int stride, int eob, int bd);

 #endif

+#if CONFIG_ML_VAR_PARTITION

+  DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]);

+#endif  // CONFIG_ML_VAR_PARTITION

};

 #ifdef __cplusplus

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -4345,6 +4345,83 @@

+#if CONFIG_ML_VAR_PARTITION

+#define FEATURES 6

+#define LABELS 2

+static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,

+                                      BLOCK_SIZE bsize, int mi_row,

+                                      int mi_col) {

+  VP9_COMMON *const cm = &cpi->common;

+  const NN_CONFIG *nn_config = NULL;

+  float thresh_low = -0.2f;

+  float thresh_high = 0.0f;

+  switch (bsize) {

+    case BLOCK_64X64:

+      nn_config = &vp9_var_part_nnconfig_64;

+      thresh_low = -0.3f;

+      thresh_high = -0.1f;

+      break;

+    case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;

+    case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;

+    case BLOCK_8X8: break;

+    default: assert(0 && "Unexpected block size."); return -1;

+  }

+  if (!nn_config) return -1;

+  vpx_clear_system_state();

+  {

+    float features[FEATURES] = { 0.0f };

+    const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);

+    int feature_idx = 0;

+    float score[LABELS];

+    features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);

+    vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);

+    {

+      const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];

+      const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);

+      const int sb_offset_row = 8 * (mi_row & 7);

+      const int sb_offset_col = 8 * (mi_col & 7);

+      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;

+      const uint8_t *src = x->plane[0].src.buf;

+      const int src_stride = x->plane[0].src.stride;

+      const int pred_stride = 64;

+      unsigned int sse;

+      int i;

+      // Variance of whole block.

+      const unsigned int var =

+          cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);

+      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);

+      features[feature_idx++] = logf((float)var + 1.0f);

+      for (i = 0; i < 4; ++i) {

+        const int x_idx = (i & 1) * bs / 2;

+        const int y_idx = (i >> 1) * bs / 2;

+        const int src_offset = y_idx * src_stride + x_idx;

+        const int pred_offset = y_idx * pred_stride + x_idx;

+        // Variance of quarter block.

+        const unsigned int sub_var =

+            cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,

+                                    pred + pred_offset, pred_stride, &sse);

+        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;

+        features[feature_idx++] = var_ratio;

+      }

+    }

+    assert(feature_idx == FEATURES);

+    nn_predict(features, nn_config, score);

+    if (score[0] > thresh_high) return 3;

+    if (score[0] < thresh_low) return 0;

+    return -1;

+  }

+}

+#undef FEATURES

+#undef LABELS

+#endif  // CONFIG_ML_VAR_PARTITION

 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,

                                  TileDataEnc *tile_data, TOKENEXTRA **tp,

                                  int mi_row, int mi_col, BLOCK_SIZE bsize,

@@ -4374,6 +4451,11 @@

       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;

   int partition_vert_allowed =

       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;

+#if CONFIG_ML_VAR_PARTITION

+  const int use_ml_based_partitioning =

+      sf->partition_search_type == ML_BASED_PARTITION;

+#endif  // CONFIG_ML_VAR_PARTITION

   (void)*tp_orig;

   // Avoid checking for rectangular partitions for speed >= 6.

@@ -4404,6 +4486,18 @@

     partition_vert_allowed &= force_vert_split;

+#if CONFIG_ML_VAR_PARTITION

+  if (use_ml_based_partitioning) {

+    if (partition_none_allowed || do_split) do_rect = 0;

+    if (partition_none_allowed && do_split) {

+      const int ml_predicted_partition =

+          ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);

+      if (ml_predicted_partition == 0) do_split = 0;

+      if (ml_predicted_partition == 3) partition_none_allowed = 0;

+    }

+  }

+#endif  // CONFIG_ML_VAR_PARTITION

   if (!partition_none_allowed && !do_split) do_rect = 1;

   ctx->pred_pixel_ready =

@@ -4419,26 +4513,28 @@

     ctx->skip = x->skip;

     if (this_rdc.rate != INT_MAX) {

-      int pl = partition_plane_context(xd, mi_row, mi_col, bsize);

+      const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);

       this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];

       this_rdc.rdcost =

           RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);

       if (this_rdc.rdcost < best_rdc.rdcost) {

-        int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;

-        int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;

-        dist_breakout_thr >>=

-            8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);

-        rate_breakout_thr *= num_pels_log2_lookup[bsize];

         best_rdc = this_rdc;

         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;

-        if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&

-            this_rdc.dist < dist_breakout_thr) {

-          do_split = 0;

-          do_rect = 0;

+#if CONFIG_ML_VAR_PARTITION

+        if (!use_ml_based_partitioning)

+#endif  // CONFIG_ML_VAR_PARTITION

+        {

+          int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;

+          int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;

+          dist_breakout_thr >>=

+              8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);

+          rate_breakout_thr *= num_pels_log2_lookup[bsize];

+          if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&

+              this_rdc.dist < dist_breakout_thr) {

+            do_split = 0;

+            do_rect = 0;

+          }

@@ -4837,6 +4933,111 @@

     update_partition_context(xd, mi_row, mi_col, subsize, bsize);

+#if CONFIG_ML_VAR_PARTITION

+// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.

+static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,

+                               MACROBLOCK *x, int mi_row, int mi_col) {

+  VP9_COMMON *const cm = &cpi->common;

+  const int is_key_frame = frame_is_intra_only(cm);

+  set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);

+  if (!is_key_frame) {

+    MACROBLOCKD *xd = &x->e_mbd;

+    MODE_INFO *mi = xd->mi[0];

+    YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);

+    const YV12_BUFFER_CONFIG *yv12_g = NULL;

+    const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +

+                             (mi_row + 4 < cm->mi_rows);

+    int pixels_wide = 64, pixels_high = 64;

+    unsigned int y_sad_g, y_sad_thr;

+    unsigned int y_sad = UINT_MAX;

+    assert(yv12 != NULL);

+    if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);

+    if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);

+    if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||

+        cpi->svc.use_gf_temporal_ref_current_layer) {

+      // For now, GOLDEN will not be used for non-zero spatial layers, since

+      // it may not be a temporal reference.

+      yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);

+    }

+    // Only compute y_sad_g (sad for golden reference) for speed < 8.

+    if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&

+        (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {

+      vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,

+                           &cm->frame_refs[GOLDEN_FRAME - 1].sf);

+      y_sad_g = cpi->fn_ptr[bsize].sdf(

+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,

+          xd->plane[0].pre[0].stride);

+    } else {

+      y_sad_g = UINT_MAX;

+    }

+    if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&

+        cpi->rc.is_src_frame_alt_ref) {

+      yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);

+      vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,

+                           &cm->frame_refs[ALTREF_FRAME - 1].sf);

+      mi->ref_frame[0] = ALTREF_FRAME;

+      y_sad_g = UINT_MAX;

+    } else {

+      vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,

+                           &cm->frame_refs[LAST_FRAME - 1].sf);

+      mi->ref_frame[0] = LAST_FRAME;

+    }

+    mi->ref_frame[1] = NONE;

+    mi->sb_type = BLOCK_64X64;

+    mi->mv[0].as_int = 0;

+    mi->interp_filter = BILINEAR;

+    {

+      const MV dummy_mv = { 0, 0 };

+      y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,

+                                            &dummy_mv);

+      x->sb_use_mv_part = 1;

+      x->sb_mvcol_part = mi->mv[0].as_mv.col;

+      x->sb_mvrow_part = mi->mv[0].as_mv.row;

+    }

+    // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad

+    // are close if short_circuit_low_temp_var is on.

+    y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;

+    if (y_sad_g < y_sad_thr) {

+      vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,

+                           &cm->frame_refs[GOLDEN_FRAME - 1].sf);

+      mi->ref_frame[0] = GOLDEN_FRAME;

+      mi->mv[0].as_int = 0;

+      y_sad = y_sad_g;

+    } else {

+      x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;

+    }

+    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);

+    xd->plane[0].dst.buf = x->est_pred;

+    xd->plane[0].dst.stride = 64;

+    vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);

+  } else {

+#if CONFIG_VP9_HIGHBITDEPTH

+    switch (xd->bd) {

+      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;

+      case 10:

+        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));

+        break;

+      case 12:

+        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));

+        break;

+    }

+#else

+    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+  }

+}

+#endif  // CONFIG_ML_VAR_PARTITION

 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,

                                 TileDataEnc *tile_data, int mi_row,

                                 TOKENEXTRA **tp) {

@@ -4928,6 +5129,17 @@

         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,

                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);

         break;

+#if CONFIG_ML_VAR_PARTITION

+      case ML_BASED_PARTITION:

+        get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);

+        x->max_partition_size = BLOCK_64X64;

+        x->min_partition_size = BLOCK_8X8;

+        x->sb_pickmode_part = 1;

+        nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,

+                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,

+                             td->pc_root);

+        break;

+#endif  // CONFIG_ML_VAR_PARTITION

       case SOURCE_VAR_BASED_PARTITION:

         set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);

         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,

--- a/vp9/encoder/vp9_partition_models.h

+++ b/vp9/encoder/vp9_partition_models.h

@@ -834,6 +834,136 @@

};

 #undef FEATURES

+#if CONFIG_ML_VAR_PARTITION

+#define FEATURES 6

+static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = {

+  -0.249572f, 0.205532f,  -2.175608f, 1.094836f,  -2.986370f, 0.193160f,

+  -0.143823f, 0.378511f,  -1.997788f, -2.166866f, -1.930158f, -1.202127f,

+  -0.611875f, -0.506422f, -0.432487f, 0.071205f,  0.578172f,  -0.154285f,

+  -0.051830f, 0.331681f,  -1.457177f, -2.443546f, -2.000302f, -1.389283f,

+  0.372084f,  -0.464917f, 2.265235f,  2.385787f,  2.312722f,  2.127868f,

+  -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f,  0.193976f,

+  -0.305611f, 0.256632f,  0.309388f,  -0.437439f, 1.702640f,  -5.007069f,

+  -0.323450f, 0.294227f,  1.267193f,  1.056601f,  0.387181f,  -0.191215f,

+};

+static const float vp9_var_part_nn_bias_64_layer0[8] = {

+  -0.044396f, -0.938166f, 0.000000f,  -0.916375f,

+  1.242299f,  0.000000f,  -0.405734f, 0.014206f,

+};

+static const float vp9_var_part_nn_weights_64_layer1[8] = {

+  1.635945f,  0.979557f,  0.455315f, 1.197199f,

+  -2.251024f, -0.464953f, 1.378676f, -0.111927f,

+};

+static const float vp9_var_part_nn_bias_64_layer1[1] = {

+  -0.37972447f,

+};

+static const NN_CONFIG vp9_var_part_nnconfig_64 = {

+  FEATURES,  // num_inputs

+  1,         // num_outputs

+  1,         // num_hidden_layers

+  {

+      8,

+  },  // num_hidden_nodes

+  {

+      vp9_var_part_nn_weights_64_layer0,

+      vp9_var_part_nn_weights_64_layer1,

+  },

+  {

+      vp9_var_part_nn_bias_64_layer0,

+      vp9_var_part_nn_bias_64_layer1,

+  },

+};

+static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = {

+  0.067243f,  -0.083598f, -2.191159f, 2.726434f,  -3.324013f, 3.477977f,

+  0.323736f,  -0.510199f, 2.960693f,  2.937661f,  2.888476f,  2.938315f,

+  -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f,

+  0.665153f,  -0.273210f, 0.028279f,  0.972220f,  -0.445596f, 1.756611f,

+  -0.177892f, -0.091758f, 0.436661f,  -0.521506f, 0.133786f,  0.266743f,

+  0.637367f,  -0.160084f, -1.396269f, 1.020841f,  -1.112971f, 0.919496f,

+  -0.235883f, 0.651954f,  0.109061f,  -0.429463f, 0.740839f,  -0.962060f,

+  0.299519f,  -0.386298f, 1.550231f,  2.464915f,  1.311969f,  2.561612f,

+};

+static const float vp9_var_part_nn_bias_32_layer0[8] = {

+  0.368242f, 0.736617f, 0.000000f,  0.757287f,

+  0.000000f, 0.613248f, -0.776390f, 0.928497f,

+};

+static const float vp9_var_part_nn_weights_32_layer1[8] = {

+  0.939884f, -2.420850f, -0.410489f, -0.186690f,

+  0.063287f, -0.522011f, 0.484527f,  -0.639625f,

+};

+static const float vp9_var_part_nn_bias_32_layer1[1] = {

+  -0.6455006f,

+};

+static const NN_CONFIG vp9_var_part_nnconfig_32 = {

+  FEATURES,  // num_inputs

+  1,         // num_outputs

+  1,         // num_hidden_layers

+  {

+      8,

+  },  // num_hidden_nodes

+  {

+      vp9_var_part_nn_weights_32_layer0,

+      vp9_var_part_nn_weights_32_layer1,

+  },

+  {

+      vp9_var_part_nn_bias_32_layer0,

+      vp9_var_part_nn_bias_32_layer1,

+  },

+};

+static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = {

+  0.742567f,  -0.580624f, -0.244528f, 0.331661f,  -0.113949f, -0.559295f,

+  -0.386061f, 0.438653f,  1.467463f,  0.211589f,  0.513972f,  1.067855f,

+  -0.876679f, 0.088560f,  -0.687483f, -0.380304f, -0.016412f, 0.146380f,

+  0.015318f,  0.000351f,  -2.764887f, 3.269717f,  2.752428f,  -2.236754f,

+  0.561539f,  -0.852050f, -0.084667f, 0.202057f,  0.197049f,  0.364922f,

+  -0.463801f, 0.431790f,  1.872096f,  -0.091887f, -0.055034f, 2.443492f,

+  -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f,

+  0.642021f,  -0.875117f, 2.040794f,  1.921070f,  1.792413f,  1.839727f,

+};

+static const float vp9_var_part_nn_bias_16_layer0[8] = {

+  2.901234f, -1.940932f, -0.198970f, -0.406524f,

+  0.059422f, -1.879207f, -0.232340f, 2.979821f,

+};

+static const float vp9_var_part_nn_weights_16_layer1[8] = {

+  -0.528731f, 0.375234f, -0.088422f, 0.668629f,

+  0.870449f,  0.578735f, 0.546103f,  -1.957207f,

+};

+static const float vp9_var_part_nn_bias_16_layer1[1] = {

+  -1.95769405f,

+};

+static const NN_CONFIG vp9_var_part_nnconfig_16 = {

+  FEATURES,  // num_inputs

+  1,         // num_outputs

+  1,         // num_hidden_layers

+  {

+      8,

+  },  // num_hidden_nodes

+  {

+      vp9_var_part_nn_weights_16_layer0,

+      vp9_var_part_nn_weights_16_layer1,

+  },

+  {

+      vp9_var_part_nn_bias_16_layer0,

+      vp9_var_part_nn_bias_16_layer1,

+  },

+};

+#undef FEATURES

+#endif  // CONFIG_ML_VAR_PARTITION

 // Partition pruning model(linear).

 static const float vp9_partition_feature_mean[24] = {

   303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -623,7 +623,18 @@

       sf->use_altref_onepass = 1;

       sf->use_compound_nonrd_pickmode = 1;

+#if CONFIG_ML_VAR_PARTITION

+    if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)

+      sf->partition_search_type = ML_BASED_PARTITION;

+    else

+      sf->partition_search_type = VAR_BASED_PARTITION;

+#if CONFIG_VP9_HIGHBITDEPTH

+    if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)

+      sf->partition_search_type = VAR_BASED_PARTITION;

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+#else

     sf->partition_search_type = VAR_BASED_PARTITION;

+#endif  // CONFIG_ML_VAR_PARTITION

     sf->mv.search_method = NSTEP;

     sf->mv.reduce_first_step_size = 1;

     sf->skip_encode_sb = 0;

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -136,20 +136,25 @@

 } INTERP_FILTER_MASK;

 typedef enum {

-  // Search partitions using RD/NONRD criterion

+  // Search partitions using RD/NONRD criterion.

   SEARCH_PARTITION,

-  // Always use a fixed size partition

+  // Always use a fixed size partition.

   FIXED_PARTITION,

   REFERENCE_PARTITION,

   // Use an arbitrary partitioning scheme based on source variance within

-  // a 64X64 SB

+  // a 64X64 SB.

   VAR_BASED_PARTITION,

-  // Use non-fixed partitions based on source variance

-  SOURCE_VAR_BASED_PARTITION

+  // Use non-fixed partitions based on source variance.

+  SOURCE_VAR_BASED_PARTITION,

+#if CONFIG_ML_VAR_PARTITION

+  // Make partition decisions with machine learning models.

+  ML_BASED_PARTITION

+#endif  // CONFIG_ML_VAR_PARTITION

 } PARTITION_SEARCH_TYPE;

 typedef enum {

--

⑨