ref: 12222936041712620ebadd6ce99d1a12f3cac51b
parent: 7969c6e0b72e0e64cfe8304968559ffdc370d46d
 parent: ff36b9c78b8a069978c7ba57c31a8b63fbf8b599
	author: Jingning Han <jingning@google.com>
	date: Wed Mar 13 12:17:52 EDT 2019
	
Merge "Set up Wiener variance for macroblocks in a frame"
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -116,6 +116,13 @@
}
#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size);
+#endif
+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size);
+
// compute adaptive threshold for skip recoding
 static int compute_context_model_thresh(const VP9_COMP *const cpi) {const VP9_COMMON *const cm = &cpi->common;
@@ -977,6 +984,12 @@
vpx_free(cpi->consec_zero_mv);
cpi->consec_zero_mv = NULL;
+ vpx_free(cpi->stack_rank_buffer);
+ cpi->stack_rank_buffer = NULL;
+
+ vpx_free(cpi->mb_wiener_variance);
+ cpi->mb_wiener_variance = NULL;
+
vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
vp9_free_postproc_buffers(cm);
@@ -2367,6 +2380,14 @@
vp9_set_speed_features_framesize_independent(cpi);
vp9_set_speed_features_framesize_dependent(cpi);
+  if (cpi->sf.enable_wiener_variance) {+ CHECK_MEM_ERROR(cm, cpi->stack_rank_buffer,
+ vpx_calloc(UINT16_MAX, sizeof(*cpi->stack_rank_buffer)));
+ CHECK_MEM_ERROR(cm, cpi->mb_wiener_variance,
+ vpx_calloc(cm->mb_rows * cm->mb_cols,
+ sizeof(*cpi->mb_wiener_variance)));
+ }
+
#if CONFIG_NON_GREEDY_MV
cpi->feature_score_loc_alloc = 0;
cpi->tpl_ready = 0;
@@ -4691,6 +4712,97 @@
}
}
+// Process the wiener variance in 16x16 block basis.
+static void set_mb_wiener_variance(VP9_COMP *cpi) {+ VP9_COMMON *cm = &cpi->common;
+ uint8_t *buffer = cpi->Source->y_buffer;
+ int buf_stride = cpi->Source->y_stride;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ ThreadData *td = &cpi->td;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
+ DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
+ uint8_t *zero_pred;
+#else
+ DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
+#endif
+
+ DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
+
+ int mb_row, mb_col;
+ // Hard coded operating block size
+ const int block_size = 16;
+ const int coeff_count = block_size * block_size;
+ const TX_SIZE tx_size = TX_16X16;
+
+ if (cpi->sf.enable_wiener_variance == 0) return;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
+ else
+ zero_pred = zero_pred8;
+#endif
+
+ memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
+
+  for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {+    for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {+ int idx, hist_count = 0;
+ int16_t median_val = 0;
+ uint8_t *mb_buffer =
+ buffer + mb_row * block_size * buf_stride + mb_col * block_size;
+ int64_t wiener_variance = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {+ vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
+ mb_buffer, buf_stride, zero_pred, block_size,
+ xd->bd);
+ highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+      } else {+ vpx_subtract_block(block_size, block_size, src_diff, block_size,
+ mb_buffer, buf_stride, zero_pred, block_size);
+ wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+ }
+#else
+ vpx_subtract_block(block_size, block_size, src_diff, block_size,
+ mb_buffer, buf_stride, zero_pred, block_size);
+ wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (idx = 0; idx < UINT16_MAX; ++idx) cpi->stack_rank_buffer[idx] = 0;
+
+ for (idx = 0; idx < coeff_count; ++idx)
+ ++cpi->stack_rank_buffer[abs(coeff[idx])];
+
+      for (idx = 0; idx < UINT16_MAX; ++idx) {+ hist_count += cpi->stack_rank_buffer[idx];
+ if (hist_count >= coeff_count / 2) break;
+ }
+
+ // Noise level estimation
+ median_val = idx;
+
+ // Wiener filter
+      for (idx = 1; idx < coeff_count; ++idx) {+ int sign = coeff[idx] < 0;
+ int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
+ coeff[idx] = (int16_t)((sqr_coeff * coeff[idx]) /
+ (sqr_coeff + (int64_t)median_val * median_val));
+ if (sign) coeff[idx] = -coeff[idx];
+
+ wiener_variance += (int64_t)coeff[idx] * coeff[idx];
+ }
+ cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
+ wiener_variance / coeff_count;
+ }
+ }
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
uint8_t *dest,
                                       unsigned int *frame_flags) {@@ -4777,6 +4889,8 @@
}
}
+ set_mb_wiener_variance(cpi);
+
vpx_clear_system_state();
#if CONFIG_INTERNAL_STATS
@@ -5827,8 +5941,8 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
-                                TX_SIZE tx_size) {+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                         TX_SIZE tx_size) {// TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
   switch (tx_size) {case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
@@ -5839,8 +5953,8 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
-                         TX_SIZE tx_size) {+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                  TX_SIZE tx_size) {   switch (tx_size) {case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -628,6 +628,9 @@
int ext_refresh_frame_context_pending;
int ext_refresh_frame_context;
+ int64_t *mb_wiener_variance;
+ int *stack_rank_buffer;
+
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tile_tok[4][1 << 6];
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -939,6 +939,10 @@
sf->tx_size_search_breakout = 1;
sf->tx_size_search_depth = 2;
+ // Manually turn this on during experimentation. Off by default to disable its
+ // effect on the baseline encoder.
+ sf->enable_wiener_variance = 0;
+
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
: INT_MAX;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -288,6 +288,9 @@
// level within a frame.
int allow_skip_recode;
+ // Enable Wiener filter based block complexity analysis.
+ int enable_wiener_variance;
+
// Coefficient probability model approximation step size
int coeff_prob_appx_step;
--
⑨