ref: ca6bfe2cd49c2b519744d6184153393707a92921
parent: 8839cffe2ddce58faeb4f624dcc9584bcfa4019b
author: Jingning Han <jingning@google.com>
date: Fri Aug 10 12:42:21 EDT 2018
Unify the YUV plane temporal filter operation Unify the temporal filter operations for the luma and chroma components. Handle them in a single loop over the pixels in the processing block. Change-Id: I9ea1946f3a6fb37da6867aa78140d45cad0facf0
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -94,6 +94,130 @@
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
+static int mod_index(int sum_dist, int index, int rounding, int strength,
+ int filter_weight) {
+ int mod = (sum_dist * 3) / index;
+ mod += rounding;
+ mod >>= strength;
+
+ mod = VPXMIN(16, mod);
+
+ mod = 16 - mod;
+ mod *= filter_weight;
+
+ return mod;
+}
+
+static void apply_temporal_filter(
+ const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred,
+ int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1,
+ int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred,
+ int uv_buf_stride, unsigned int block_width, unsigned int block_height,
+ int ss_x, int ss_y, int strength, int filter_weight,
+ uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator,
+ uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) {
+ unsigned int i, j, k, m;
+ int modifier;
+ const int rounding = (1 << strength) >> 1;
+ const int uv_block_width = block_width >> ss_x;
+ const int uv_block_height = block_height >> ss_y;
+
+ assert(strength >= 0);
+ assert(strength <= 6);
+
+ assert(filter_weight >= 0);
+ assert(filter_weight <= 2);
+
+ for (i = 0, k = 0, m = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int pixel_value = y_pred[i * y_buf_stride + j];
+
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ const int row = (int)i + idy;
+ const int col = (int)j + idx;
+
+ if (row >= 0 && row < (int)block_height && col >= 0 &&
+ col < (int)block_width) {
+ const int diff = y_frame1[row * (int)y_stride + col] -
+ y_pred[row * (int)block_width + col];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
+
+ modifier = mod_index(modifier, index, rounding, strength, filter_weight);
+
+ y_count[k] += modifier;
+ y_accumulator[k] += modifier * pixel_value;
+
+ ++k;
+
+ // Process chroma component
+ if (!(i & ss_y) && !(j & ss_x)) {
+ const int uv_r = i >> ss_y;
+ const int uv_c = j >> ss_x;
+
+ const int u_pixel_value = u_pred[uv_r * uv_buf_stride + uv_c];
+ const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c];
+
+ // non-local mean approach
+ int u_diff_sse[9] = { 0 };
+ int v_diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+ int u_mod = 0, v_mod = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ const int row = uv_r + idy;
+ const int col = uv_c + idx;
+
+ if (row >= 0 && row < uv_block_height && col >= 0 &&
+ col < uv_block_width) {
+ int diff = u_frame1[row * uv_stride + col] -
+ u_pred[row * uv_buf_stride + col];
+ u_diff_sse[index] = diff * diff;
+
+ diff = v_frame1[row * uv_stride + col] -
+ v_pred[row * uv_buf_stride + col];
+ v_diff_sse[index] = diff * diff;
+
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ for (idx = 0; idx < 9; ++idx) {
+ u_mod += u_diff_sse[idx];
+ v_mod += v_diff_sse[idx];
+ }
+
+ u_mod = mod_index(u_mod, index, rounding, strength, filter_weight);
+ v_mod = mod_index(v_mod, index, rounding, strength, filter_weight);
+
+ u_count[m] += u_mod;
+ u_accumulator[m] += u_mod * u_pixel_value;
+ v_count[m] += v_mod;
+ v_accumulator[m] += v_mod * v_pixel_value;
+
+ ++m;
+ } // Complete YUV pixel
+ }
+ }
+}
+
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
const uint8_t *frame2,
unsigned int block_width,
@@ -421,31 +545,23 @@
accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 512,
- count + 512);
+ apply_temporal_filter(
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, 16,
+ f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256, predictor + 512, mb_uv_width, 16,
+ 16, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y,
+ strength, filter_weight, accumulator, count, accumulator + 256,
+ count + 256, accumulator + 512, count + 512);
}
#else
// Apply the filter (YUV)
- vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 512,
- count + 512);
+ apply_temporal_filter(
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, 16,
+ f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256, predictor + 512, mb_uv_width, 16, 16,
+ mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, strength,
+ filter_weight, accumulator, count, accumulator + 256, count + 256,
+ accumulator + 512, count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}