shithub: libvpx

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -116,6 +116,13 @@

 #endif

+#if CONFIG_VP9_HIGHBITDEPTH

+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

+                         TX_SIZE tx_size);

+#endif

+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

+                  TX_SIZE tx_size);

 // compute adaptive threshold for skip recoding

 static int compute_context_model_thresh(const VP9_COMP *const cpi) {

   const VP9_COMMON *const cm = &cpi->common;

@@ -977,6 +984,12 @@

   vpx_free(cpi->consec_zero_mv);

   cpi->consec_zero_mv = NULL;

+  vpx_free(cpi->stack_rank_buffer);

+  cpi->stack_rank_buffer = NULL;

+  vpx_free(cpi->mb_wiener_variance);

+  cpi->mb_wiener_variance = NULL;

   vp9_free_ref_frame_buffers(cm->buffer_pool);

 #if CONFIG_VP9_POSTPROC

   vp9_free_postproc_buffers(cm);

@@ -2367,6 +2380,14 @@

   vp9_set_speed_features_framesize_independent(cpi);

   vp9_set_speed_features_framesize_dependent(cpi);

+  if (cpi->sf.enable_wiener_variance) {

+    CHECK_MEM_ERROR(cm, cpi->stack_rank_buffer,

+                    vpx_calloc(UINT16_MAX, sizeof(*cpi->stack_rank_buffer)));

+    CHECK_MEM_ERROR(cm, cpi->mb_wiener_variance,

+                    vpx_calloc(cm->mb_rows * cm->mb_cols,

+                               sizeof(*cpi->mb_wiener_variance)));

+  }

 #if CONFIG_NON_GREEDY_MV

   cpi->feature_score_loc_alloc = 0;

   cpi->tpl_ready = 0;

@@ -4691,6 +4712,97 @@

+// Process the wiener variance in 16x16 block basis.

+static void set_mb_wiener_variance(VP9_COMP *cpi) {

+  VP9_COMMON *cm = &cpi->common;

+  uint8_t *buffer = cpi->Source->y_buffer;

+  int buf_stride = cpi->Source->y_stride;

+#if CONFIG_VP9_HIGHBITDEPTH

+  ThreadData *td = &cpi->td;

+  MACROBLOCK *x = &td->mb;

+  MACROBLOCKD *xd = &x->e_mbd;

+  DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);

+  DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);

+  uint8_t *zero_pred;

+#else

+  DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);

+#endif

+  DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);

+  DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);

+  int mb_row, mb_col;

+  // Hard coded operating block size

+  const int block_size = 16;

+  const int coeff_count = block_size * block_size;

+  const TX_SIZE tx_size = TX_16X16;

+  if (cpi->sf.enable_wiener_variance == 0) return;

+#if CONFIG_VP9_HIGHBITDEPTH

+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)

+    zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);

+  else

+    zero_pred = zero_pred8;

+#endif

+  memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);

+  for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {

+    for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {

+      int idx, hist_count = 0;

+      int16_t median_val = 0;

+      uint8_t *mb_buffer =

+          buffer + mb_row * block_size * buf_stride + mb_col * block_size;

+      int64_t wiener_variance = 0;

+#if CONFIG_VP9_HIGHBITDEPTH

+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

+        vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,

+                                  mb_buffer, buf_stride, zero_pred, block_size,

+                                  xd->bd);

+        highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);

+      } else {

+        vpx_subtract_block(block_size, block_size, src_diff, block_size,

+                           mb_buffer, buf_stride, zero_pred, block_size);

+        wht_fwd_txfm(src_diff, block_size, coeff, tx_size);

+      }

+#else

+      vpx_subtract_block(block_size, block_size, src_diff, block_size,

+                         mb_buffer, buf_stride, zero_pred, block_size);

+      wht_fwd_txfm(src_diff, block_size, coeff, tx_size);

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+      for (idx = 0; idx < UINT16_MAX; ++idx) cpi->stack_rank_buffer[idx] = 0;

+      for (idx = 0; idx < coeff_count; ++idx)

+        ++cpi->stack_rank_buffer[abs(coeff[idx])];

+      for (idx = 0; idx < UINT16_MAX; ++idx) {

+        hist_count += cpi->stack_rank_buffer[idx];

+        if (hist_count >= coeff_count / 2) break;

+      }

+      // Noise level estimation

+      median_val = idx;

+      // Wiener filter

+      for (idx = 1; idx < coeff_count; ++idx) {

+        int sign = coeff[idx] < 0;

+        int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];

+        coeff[idx] = (int16_t)((sqr_coeff * coeff[idx]) /

+                               (sqr_coeff + (int64_t)median_val * median_val));

+        if (sign) coeff[idx] = -coeff[idx];

+        wiener_variance += (int64_t)coeff[idx] * coeff[idx];

+      }

+      cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =

+          wiener_variance / coeff_count;

+    }

+  }

+}

 static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,

                                       uint8_t *dest,

                                       unsigned int *frame_flags) {

@@ -4777,6 +4889,8 @@

+  set_mb_wiener_variance(cpi);

   vpx_clear_system_state();

 #if CONFIG_INTERNAL_STATS

@@ -5827,8 +5941,8 @@

 #if CONFIG_VP9_HIGHBITDEPTH

-static void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

-                                TX_SIZE tx_size) {

+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

+                         TX_SIZE tx_size) {

   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.

   switch (tx_size) {

     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;

@@ -5839,8 +5953,8 @@

 #endif  // CONFIG_VP9_HIGHBITDEPTH

-static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

-                         TX_SIZE tx_size) {

+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,

+                  TX_SIZE tx_size) {

   switch (tx_size) {

     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;

     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -628,6 +628,9 @@

   int ext_refresh_frame_context_pending;

   int ext_refresh_frame_context;

+  int64_t *mb_wiener_variance;

+  int *stack_rank_buffer;

   YV12_BUFFER_CONFIG last_frame_uf;

   TOKENEXTRA *tile_tok[4][1 << 6];

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -939,6 +939,10 @@

   sf->tx_size_search_breakout = 1;

   sf->tx_size_search_depth = 2;

+  // Manually turn this on during experimentation. Off by default to disable its

+  // effect on the baseline encoder.

+  sf->enable_wiener_variance = 0;

   sf->exhaustive_searches_thresh =

       (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)

                                                               : INT_MAX;

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -288,6 +288,9 @@

   // level within a frame.

   int allow_skip_recode;

+  // Enable Wiener filter based block complexity analysis.

+  int enable_wiener_variance;

   // Coefficient probability model approximation step size

   int coeff_prob_appx_step;

--

⑨