ref: 8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f
parent: c0f70cca406a2eca0d70476721b94754c2e5e4e2
author: Jingning Han <jingning@google.com>
date: Wed May 15 08:19:59 EDT 2013
Add building blocks for 4x8/8x4 rd search These building blocks enable rate-distortion optimization search over block sizes of 8x4 and 4x8. Need to convert them into mmx/sse forms. Change-Id: I570ea2d22d14ceec3fe3575128d7dfa172a577de
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -272,7 +272,7 @@
typedef struct {
MB_MODE_INFO mbmi;
- union b_mode_info bmi[4];
+ union b_mode_info bmi[16];
} MODE_INFO;
struct scale_factors {
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -367,6 +367,19 @@
prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
specialize vp9_sub_pixel_avg_variance8x8
+# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
+prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance8x4
+
+prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
+specialize vp9_sub_pixel_avg_variance8x4
+
+prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
+specialize vp9_sub_pixel_variance4x8
+
+prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
+specialize vp9_sub_pixel_avg_variance4x8
+
prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance4x4 sse2 mmx
vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
@@ -404,6 +417,13 @@
prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad8x8 mmx sse2
+# TODO(jingning): need to covert these functions into mmx/sse2 form
+prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad8x4
+
+prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
+specialize vp9_sad4x8
+
prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad4x4 mmx sse
@@ -508,6 +528,13 @@
prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad8x8x4d sse2
+
+# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
+prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad8x4x4d
+
+prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
+specialize vp9_sad4x8x4d
prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad4x4x4d sse
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1597,11 +1597,15 @@
vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
- BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+ BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
+ vp9_sub_pixel_avg_variance8x4, NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad8x4x4d)
- BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+ BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
+ vp9_sub_pixel_avg_variance4x8, NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1096,6 +1096,50 @@
return r;
}
+static enum BlockSize get_block_size(int bw, int bh) {
+ if (bw == 4 && bh == 4)
+ return BLOCK_4X4;
+
+ if (bw == 4 && bh == 8)
+ return BLOCK_4X8;
+
+ if (bw == 8 && bh == 4)
+ return BLOCK_8X4;
+
+ if (bw == 8 && bh == 8)
+ return BLOCK_8X8;
+
+ if (bw == 8 && bh == 16)
+ return BLOCK_8X16;
+
+ if (bw == 16 && bh == 8)
+ return BLOCK_16X8;
+
+ if (bw == 16 && bh == 16)
+ return BLOCK_16X16;
+
+ if (bw == 32 && bh == 32)
+ return BLOCK_32X32;
+
+ if (bw == 32 && bh == 16)
+ return BLOCK_32X16;
+
+ if (bw == 16 && bh == 32)
+ return BLOCK_16X32;
+
+ if (bw == 64 && bh == 32)
+ return BLOCK_64X32;
+
+ if (bw == 32 && bh == 64)
+ return BLOCK_32X64;
+
+ if (bw == 64 && bh == 64)
+ return BLOCK_64X64;
+
+ assert(0);
+ return -1;
+}
+
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
@@ -1111,6 +1155,10 @@
int sbr = 0, sbd = 0;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
+#if CONFIG_AB4X4
+ BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
+ int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+#endif
vp9_variance_fn_ptr_t *v_fn_ptr;
@@ -1120,7 +1168,11 @@
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
+#if CONFIG_AB4X4
+ v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
+#else
v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
+#endif
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1668,51 +1720,6 @@
scale[frame_type].y_num == scale[frame_type].y_den)
mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
frame_type, block_size);
-}
-
-
-static enum BlockSize get_block_size(int bw, int bh) {
- if (bw == 4 && bh == 4)
- return BLOCK_4X4;
-
- if (bw == 4 && bh == 8)
- return BLOCK_4X8;
-
- if (bw == 8 && bh == 4)
- return BLOCK_8X4;
-
- if (bw == 8 && bh == 8)
- return BLOCK_8X8;
-
- if (bw == 8 && bh == 16)
- return BLOCK_8X16;
-
- if (bw == 16 && bh == 8)
- return BLOCK_16X8;
-
- if (bw == 16 && bh == 16)
- return BLOCK_16X16;
-
- if (bw == 32 && bh == 32)
- return BLOCK_32X32;
-
- if (bw == 32 && bh == 16)
- return BLOCK_32X16;
-
- if (bw == 16 && bh == 32)
- return BLOCK_16X32;
-
- if (bw == 64 && bh == 32)
- return BLOCK_64X32;
-
- if (bw == 32 && bh == 64)
- return BLOCK_32X64;
-
- if (bw == 64 && bh == 64)
- return BLOCK_64X64;
-
- assert(0);
- return -1;
}
static void model_rd_from_var_lapndz(int var, int n, int qstep,
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -156,7 +156,22 @@
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
}
+unsigned int vp9_sad8x4_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4);
+}
+unsigned int vp9_sad4x8_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8);
+}
+
unsigned int vp9_sad4x4_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t *ref_ptr,
@@ -561,6 +576,36 @@
ref_ptr[2], ref_stride, 0x7fffffff);
sad_array[3] = vp9_sad8x16(src_ptr, src_stride,
ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
}
void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -820,3 +820,91 @@
comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
+
+unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 5, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+
+ return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse,
+ const uint8_t *second_pred) {
+ uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 5, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+ comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+ return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 4, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+
+ return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse,
+ const uint8_t *second_pred) {
+ uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 4, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+ comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+ return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
+}
--
⑨