ref: 810b612c233e82addc09412815ce06dfdaa589c9
parent: 5f3612c35e1cadb024317f6223ceabeebac1235a
author: Jingning Han <jingning@google.com>
date: Wed May 15 18:28:36 EDT 2013
Enable bit-stream support to 8x4 and 4x8 partition The recursive partition type search is enabled down to 4x4, 4x8 and 8x4, followed by the corresponding rate-distortion optimization for the per-partition encoding mode decisions. The bit-stream writing/reading synchronized in supporting the rectangular partition of 8x8 block. This provides above 1% coding performance gains on derf. To do next: 1. re-design the rate-distortion loop for inter prediction below 8x8. 2. re-design the rate-distortion loop for intra prediction below 4x4. 3. make the loop-filter aware of rectangular partition of 8x8 block. 4. clean the unused probability models. 5. update default probability values. Change-Id: Idd41a315b16879db08f045a322241f46f1d53f20
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -417,6 +417,7 @@
static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
switch (subsize) {
+ case BLOCK_SIZE_SB64X64:
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB32X32:
@@ -444,10 +445,10 @@
static INLINE void update_partition_context(MACROBLOCKD *xd,
BLOCK_SIZE_TYPE sb_type,
BLOCK_SIZE_TYPE sb_size) {
- int bsl = mi_width_log2(sb_size), bs = 1 << bsl;
- int bwl = mi_width_log2(sb_type);
- int bhl = mi_height_log2(sb_type);
- int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
+ int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
+ int bwl = b_width_log2(sb_type);
+ int bhl = b_height_log2(sb_type);
+ int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
int i;
#if !CONFIG_AB4X4
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -107,10 +107,10 @@
[PARTITION_TYPES - 1] = {
// FIXME(jingning,rbultje) put real probabilities here
#if CONFIG_AB4X4
- {202, 162, 107},
- {16, 2, 169},
- {3, 246, 19},
- {104, 90, 134},
+ {105, 88, 252},
+ {113, 88, 249},
+ {113, 106, 251},
+ {126, 105, 107},
#endif
{202, 162, 107},
{16, 2, 169},
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -540,8 +540,9 @@
int_mv *const mv0 = &mbmi->mv[0];
int_mv *const mv1 = &mbmi->mv[1];
- const int bw = 1 << mi_width_log2(mi->mbmi.sb_type);
- const int bh = 1 << mi_height_log2(mi->mbmi.sb_type);
+ BLOCK_SIZE_TYPE bsize = mi->mbmi.sb_type;
+ int bw = 1 << b_width_log2(bsize);
+ int bh = 1 << b_height_log2(bsize);
const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
cm->height == cm->last_height &&
@@ -549,6 +550,7 @@
cm->last_show_frame;
int mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge;
+ int j, idx, idy;
mbmi->need_to_clamp_mvs = 0;
mbmi->need_to_clamp_secondmv = 0;
@@ -562,7 +564,8 @@
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values
// that are in 1/8th pel units
- set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
+ set_mi_row_col(cm, xd, mi_row, 1 << mi_height_log2(bsize),
+ mi_col, 1 << mi_width_log2(bsize));
mb_to_top_edge = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
mb_to_bottom_edge = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
@@ -613,14 +616,14 @@
mbmi->mode = ZEROMV;
} else {
#if CONFIG_AB4X4
- if (mbmi->sb_type >= BLOCK_SIZE_SB8X8)
+ if (bsize >= BLOCK_SIZE_SB8X8)
mbmi->mode = read_sb_mv_ref(r, mv_ref_p);
else
mbmi->mode = SPLITMV;
#else
- mbmi->mode = mbmi->sb_type > BLOCK_SIZE_SB8X8 ?
- read_sb_mv_ref(r, mv_ref_p)
- : read_mv_ref(r, mv_ref_p);
+ mbmi->mode = bsize > BLOCK_SIZE_SB8X8 ?
+ read_sb_mv_ref(r, mv_ref_p)
+ : read_mv_ref(r, mv_ref_p);
#endif
vp9_accum_mv_refs(cm, mbmi->mode, mbmi->mb_mode_context[ref_frame]);
}
@@ -685,81 +688,88 @@
mbmi->uv_mode = DC_PRED;
switch (mbmi->mode) {
- case SPLITMV: {
- const int num_p = 4;
- int j = 0;
-
+ case SPLITMV:
+#if !CONFIG_AB4X4
+ bw = 1, bh = 1;
+#endif
mbmi->need_to_clamp_mvs = 0;
- do { // for each subset j
- int_mv leftmv, abovemv, second_leftmv, second_abovemv;
- int_mv blockmv, secondmv;
- int mv_contz;
- int blockmode;
- int k = j;
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ int_mv leftmv, abovemv, second_leftmv, second_abovemv;
+ int_mv blockmv, secondmv;
+ int mv_contz;
+ int blockmode;
+ int i, k;
+ j = idy * 2 + idx;
+ k = j;
- leftmv.as_int = left_block_mv(xd, mi, k);
- abovemv.as_int = above_block_mv(mi, k, mis);
- second_leftmv.as_int = 0;
- second_abovemv.as_int = 0;
- if (mbmi->second_ref_frame > 0) {
- second_leftmv.as_int = left_block_second_mv(xd, mi, k);
- second_abovemv.as_int = above_block_second_mv(mi, k, mis);
- }
- mv_contz = vp9_mv_cont(&leftmv, &abovemv);
- blockmode = read_sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]);
- cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++;
+ leftmv.as_int = left_block_mv(xd, mi, k);
+ abovemv.as_int = above_block_mv(mi, k, mis);
+ second_leftmv.as_int = 0;
+ second_abovemv.as_int = 0;
+ if (mbmi->second_ref_frame > 0) {
+ second_leftmv.as_int = left_block_second_mv(xd, mi, k);
+ second_abovemv.as_int = above_block_second_mv(mi, k, mis);
+ }
+ mv_contz = vp9_mv_cont(&leftmv, &abovemv);
+ blockmode = read_sub_mv_ref(r, cm->fc.sub_mv_ref_prob[mv_contz]);
+ cm->fc.sub_mv_ref_counts[mv_contz][blockmode - LEFT4X4]++;
- switch (blockmode) {
- case NEW4X4:
- decode_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
- &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ switch (blockmode) {
+ case NEW4X4:
+ decode_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
- if (mbmi->second_ref_frame > 0)
- decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
- &cm->fc.NMVcount, xd->allow_high_precision_mv);
+ if (mbmi->second_ref_frame > 0)
+ decode_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+ &cm->fc.NMVcount, xd->allow_high_precision_mv);
-#ifdef VPX_MODE_COUNT
- vp9_mv_cont_count[mv_contz][3]++;
-#endif
- break;
- case LEFT4X4:
- blockmv.as_int = leftmv.as_int;
- if (mbmi->second_ref_frame > 0)
- secondmv.as_int = second_leftmv.as_int;
-#ifdef VPX_MODE_COUNT
- vp9_mv_cont_count[mv_contz][0]++;
-#endif
- break;
- case ABOVE4X4:
- blockmv.as_int = abovemv.as_int;
- if (mbmi->second_ref_frame > 0)
- secondmv.as_int = second_abovemv.as_int;
-#ifdef VPX_MODE_COUNT
- vp9_mv_cont_count[mv_contz][1]++;
-#endif
- break;
- case ZERO4X4:
- blockmv.as_int = 0;
- if (mbmi->second_ref_frame > 0)
- secondmv.as_int = 0;
-#ifdef VPX_MODE_COUNT
- vp9_mv_cont_count[mv_contz][2]++;
-#endif
- break;
- default:
- break;
+ #ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][3]++;
+ #endif
+ break;
+ case LEFT4X4:
+ blockmv.as_int = leftmv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = second_leftmv.as_int;
+ #ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][0]++;
+ #endif
+ break;
+ case ABOVE4X4:
+ blockmv.as_int = abovemv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = second_abovemv.as_int;
+ #ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][1]++;
+ #endif
+ break;
+ case ZERO4X4:
+ blockmv.as_int = 0;
+ if (mbmi->second_ref_frame > 0)
+ secondmv.as_int = 0;
+ #ifdef VPX_MODE_COUNT
+ vp9_mv_cont_count[mv_contz][2]++;
+ #endif
+ break;
+ default:
+ break;
+ }
+ mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
+ if (mbmi->second_ref_frame > 0)
+ mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
+
+ for (i = 1; i < bh; ++i)
+ vpx_memcpy(&mi->bmi[j + i * 2], &mi->bmi[j], sizeof(mi->bmi[j]));
+ for (i = 1; i < bw; ++i)
+ vpx_memcpy(&mi->bmi[j + i], &mi->bmi[j], sizeof(mi->bmi[j]));
}
- mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
- if (mbmi->second_ref_frame > 0)
- mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
- } while (++j < num_p);
- }
+ }
- mv0->as_int = mi->bmi[3].as_mv[0].as_int;
- mv1->as_int = mi->bmi[3].as_mv[1].as_int;
+ mv0->as_int = mi->bmi[3].as_mv[0].as_int;
+ mv1->as_int = mi->bmi[3].as_mv[1].as_int;
+ break; /* done with SPLITMV */
- break; /* done with SPLITMV */
-
case NEARMV:
// Clip "next_nearest" so that it does not extend to far out of image
assign_and_clamp_mv(mv0, &nearby, mb_to_left_edge,
@@ -822,7 +832,7 @@
mv0->as_int = 0;
#if CONFIG_AB4X4
- if (mbmi->sb_type >= BLOCK_SIZE_SB8X8) {
+ if (bsize >= BLOCK_SIZE_SB8X8) {
mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
@@ -829,7 +839,7 @@
mbmi->mode = I4X4_PRED;
}
#else
- if (mbmi->sb_type > BLOCK_SIZE_SB8X8) {
+ if (bsize > BLOCK_SIZE_SB8X8) {
mbmi->mode = read_sb_ymode(r, cm->fc.sb_ymode_prob);
cm->fc.sb_ymode_counts[mbmi->mode]++;
} else {
@@ -840,7 +850,7 @@
// If MB mode is I4X4_PRED read the block modes
#if CONFIG_AB4X4
- if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
+ if (bsize < BLOCK_SIZE_SB8X8) {
#else
if (mbmi->mode == I4X4_PRED) {
#endif
@@ -857,21 +867,21 @@
}
#if CONFIG_AB4X4
- if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
- mbmi->sb_type >= BLOCK_SIZE_SB8X8) {
+ if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
+ bsize >= BLOCK_SIZE_SB8X8) {
#else
if (cm->txfm_mode == TX_MODE_SELECT && mbmi->mb_skip_coeff == 0 &&
((mbmi->ref_frame == INTRA_FRAME && mbmi->mode != I4X4_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))) {
#endif
- const int allow_16x16 = mbmi->sb_type >= BLOCK_SIZE_MB16X16;
- const int allow_32x32 = mbmi->sb_type >= BLOCK_SIZE_SB32X32;
+ const int allow_16x16 = bsize >= BLOCK_SIZE_MB16X16;
+ const int allow_32x32 = bsize >= BLOCK_SIZE_SB32X32;
mbmi->txfm_size = select_txfm_size(cm, r, allow_16x16, allow_32x32);
- } else if (mbmi->sb_type >= BLOCK_SIZE_SB32X32 &&
+ } else if (bsize >= BLOCK_SIZE_SB32X32 &&
cm->txfm_mode >= ALLOW_32X32) {
mbmi->txfm_size = TX_32X32;
} else if (cm->txfm_mode >= ALLOW_16X16 &&
- mbmi->sb_type >= BLOCK_SIZE_MB16X16
+ bsize >= BLOCK_SIZE_MB16X16
#if !CONFIG_AB4X4
&& ((mbmi->ref_frame == INTRA_FRAME && mbmi->mode <= TM_PRED) ||
(mbmi->ref_frame != INTRA_FRAME && mbmi->mode != SPLITMV))
@@ -880,7 +890,7 @@
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode >= ALLOW_8X8 &&
#if CONFIG_AB4X4
- (mbmi->sb_type >= BLOCK_SIZE_SB8X8))
+ (bsize >= BLOCK_SIZE_SB8X8))
#else
(!(mbmi->ref_frame == INTRA_FRAME && mbmi->mode == I4X4_PRED) &&
!(mbmi->ref_frame != INTRA_FRAME && mbmi->mode == SPLITMV)))
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -413,6 +413,11 @@
vp9_reader *r, BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *const xd = &pbi->mb;
+#if CONFIG_AB4X4
+ if (bsize < BLOCK_SIZE_SB8X8)
+ if (xd->ab_index > 0)
+ return;
+#endif
set_offsets(pbi, bsize, mi_row, mi_col);
vp9_decode_mb_mode_mv(pbi, xd, mi_row, mi_col, r);
set_refs(pbi, mi_row, mi_col);
@@ -465,6 +470,7 @@
}
subsize = get_subsize(bsize, partition);
+ *(get_sb_index(xd, subsize)) = 0;
switch (partition) {
case PARTITION_NONE:
@@ -472,11 +478,13 @@
break;
case PARTITION_HORZ:
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
+ *(get_sb_index(xd, subsize)) = 1;
if (mi_row + bs < pc->mi_rows)
decode_modes_b(pbi, mi_row + bs, mi_col, r, subsize);
break;
case PARTITION_VERT:
decode_modes_b(pbi, mi_row, mi_col, r, subsize);
+ *(get_sb_index(xd, subsize)) = 1;
if (mi_col + bs < pc->mi_cols)
decode_modes_b(pbi, mi_row, mi_col + bs, r, subsize);
break;
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -34,7 +34,7 @@
static void recon_write_yuv_frame(const char *name,
const YV12_BUFFER_CONFIG *s,
int w, int _h) {
- FILE *yuv_file = fopen((char *)name, "ab");
+ FILE *yuv_file = fopen(name, "ab");
const uint8_t *src = s->y_buffer;
int h = _h;
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -696,39 +696,50 @@
nmvc, xd->allow_high_precision_mv);
break;
case SPLITMV: {
- int j = 0;
+ int j;
+ B_PREDICTION_MODE blockmode;
+ int_mv blockmv;
+ int k = -1; /* first block in subset j */
+ int mv_contz;
+ int_mv leftmv, abovemv;
+ int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl;
+ int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl;
+ int idx, idy;
+#if !CONFIG_AB4X4
+ bw = 1, bh = 1;
+#endif
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ j = idy * 2 + idx;
+ blockmode = cpi->mb.partition_info->bmi[j].mode;
+ blockmv = cpi->mb.partition_info->bmi[j].mv;
+ k = j;
+ leftmv.as_int = left_block_mv(xd, m, k);
+ abovemv.as_int = above_block_mv(m, k, mis);
+ mv_contz = vp9_mv_cont(&leftmv, &abovemv);
- do {
- B_PREDICTION_MODE blockmode;
- int_mv blockmv;
- int k = -1; /* first block in subset j */
- int mv_contz;
- int_mv leftmv, abovemv;
-
- blockmode = cpi->mb.partition_info->bmi[j].mode;
- blockmv = cpi->mb.partition_info->bmi[j].mv;
- k = j;
- leftmv.as_int = left_block_mv(xd, m, k);
- abovemv.as_int = above_block_mv(m, k, mis);
- mv_contz = vp9_mv_cont(&leftmv, &abovemv);
-
- write_sub_mv_ref(bc, blockmode,
- cpi->common.fc.sub_mv_ref_prob[mv_contz]);
- cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++;
- if (blockmode == NEW4X4) {
+ write_sub_mv_ref(bc, blockmode,
+ cpi->common.fc.sub_mv_ref_prob[mv_contz]);
+ cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++;
+ if (blockmode == NEW4X4) {
#ifdef ENTROPY_STATS
- active_section = 11;
+ active_section = 11;
#endif
- vp9_encode_mv(bc, &blockmv.as_mv, &mi->best_mv.as_mv,
- nmvc, xd->allow_high_precision_mv);
-
- if (mi->second_ref_frame > 0)
- vp9_encode_mv(bc,
- &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
- &mi->best_second_mv.as_mv,
+ vp9_encode_mv(bc, &blockmv.as_mv, &mi->best_mv.as_mv,
nmvc, xd->allow_high_precision_mv);
+
+ if (mi->second_ref_frame > 0)
+ vp9_encode_mv(bc,
+ &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
+ &mi->best_second_mv.as_mv,
+ nmvc, xd->allow_high_precision_mv);
+ }
}
- } while (++j < cpi->mb.partition_info->count);
+ }
+
+#ifdef MODE_STATS
+ ++count_mb_seg[mi->partitioning];
+#endif
break;
}
default:
@@ -837,6 +848,11 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+#if CONFIG_AB4X4
+ if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8)
+ if (xd->ab_index > 0)
+ return;
+#endif
xd->mode_info_context = m;
set_mi_row_col(&cpi->common, xd, mi_row,
1 << mi_height_log2(m->mbmi.sb_type),
@@ -891,7 +907,7 @@
#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
- if (xd->ab_index != 0)
+ if (xd->ab_index > 0)
return;
#endif
@@ -910,6 +926,7 @@
}
subsize = get_subsize(bsize, partition);
+ *(get_sb_index(xd, subsize)) = 0;
switch (partition) {
case PARTITION_NONE:
@@ -917,11 +934,13 @@
break;
case PARTITION_HORZ:
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
+ *(get_sb_index(xd, subsize)) = 1;
if ((mi_row + bs) < cm->mi_rows)
write_modes_b(cpi, m + bs * mis, bc, tok, tok_end, mi_row + bs, mi_col);
break;
case PARTITION_VERT:
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
+ *(get_sb_index(xd, subsize)) = 1;
if ((mi_col + bs) < cm->mi_cols)
write_modes_b(cpi, m + bs, bc, tok, tok_end, mi_row, mi_col + bs);
break;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -786,6 +786,12 @@
if (sub_index != -1)
*(get_sb_index(xd, bsize)) = sub_index;
+
+#if CONFIG_AB4X4
+ if (bsize < BLOCK_SIZE_SB8X8)
+ if (xd->ab_index > 0)
+ return;
+#endif
set_offsets(cpi, mi_row, mi_col, bsize);
update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
@@ -828,13 +834,8 @@
if (bsl == bwl && bsl == bhl) {
#if CONFIG_AB4X4
- if (output_enabled && bsize >= BLOCK_SIZE_SB8X8) {
- if (bsize > BLOCK_SIZE_SB8X8 ||
- (bsize == BLOCK_SIZE_SB8X8 && c1 == bsize))
+ if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
- else
- cpi->partition_count[pl][PARTITION_SPLIT]++;
- }
#else
if (output_enabled && bsize > BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
@@ -909,7 +910,6 @@
return;
}
#endif
-
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
// buffer the above/left context information of the block in search.
@@ -939,7 +939,7 @@
for (i = 0; i < 4; ++i) {
int x_idx = (i & 1) * (ms >> 1);
int y_idx = (i >> 1) * (ms >> 1);
- int r, d;
+ int r = 0, d = 0;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
@@ -966,10 +966,13 @@
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
- // TODO(jingning): need to enable 4x8 and 8x4 partition coding
// PARTITION_HORZ
if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) &&
+#if CONFIG_AB4X4
+ (bsize >= BLOCK_SIZE_SB8X8)) {
+#else
(bsize >= BLOCK_SIZE_MB16X16)) {
+#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
@@ -978,7 +981,7 @@
get_block_context(x, subsize));
if (mi_row + ms <= cm->mi_rows) {
- int r, d;
+ int r = 0, d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@@ -992,8 +995,12 @@
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
+#if CONFIG_AB4X4
+ if (r2 < INT_MAX)
+ r2 += x->partition_cost[pl][PARTITION_HORZ];
+#else
r2 += x->partition_cost[pl][PARTITION_HORZ];
-
+#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;
@@ -1005,7 +1012,11 @@
// PARTITION_VERT
if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) &&
+#if CONFIG_AB4X4
+ (bsize >= BLOCK_SIZE_SB8X8)) {
+#else
(bsize >= BLOCK_SIZE_MB16X16)) {
+#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_VERT);
@@ -1013,7 +1024,7 @@
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize));
if (mi_col + ms <= cm->mi_cols) {
- int r, d;
+ int r = 0, d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1;
@@ -1027,8 +1038,12 @@
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
+#if CONFIG_AB4X4
+ if (r2 < INT_MAX)
+ r2 += x->partition_cost[pl][PARTITION_VERT];
+#else
r2 += x->partition_cost[pl][PARTITION_VERT];
-
+#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -573,7 +573,11 @@
int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl;
int idx, idy;
+#if CONFIG_AB4X4
+ if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
+#else
if (mbmi->mode == SPLITMV) {
+#endif
int i;
PARTITION_INFO *pi = x->partition_info;
#if !CONFIG_AB4X4
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -910,6 +910,11 @@
MB_MODE_INFO * mbmi = &mic->mbmi;
const int mis = xd->mode_info_stride;
int i, cost = 0, thismvcost = 0;
+#if CONFIG_AB4X4
+ int idx, idy;
+ int bw = 1 << b_width_log2(mbmi->sb_type);
+ int bh = 1 << b_height_log2(mbmi->sb_type);
+#endif
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
@@ -993,6 +998,17 @@
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
+#if CONFIG_AB4X4
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
+ &mic->bmi[i], sizeof(mic->bmi[i]));
+ vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
+ &x->partition_info->bmi[i],
+ sizeof(x->partition_info->bmi[i]));
+ }
+ }
+#endif
}
cost += thismvcost;
@@ -1007,8 +1023,15 @@
int *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
- int i;
+ int i, k;
MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
+ int bwl = b_width_log2(bsize), bw = 1 << bwl;
+ int bhl = b_height_log2(bsize), bh = 1 << bhl;
+ int idx, idy;
+#if !CONFIG_AB4X4
+ bw = 1, bh = 1;
+#endif
*labelyrate = 0;
*distortion = 0;
@@ -1018,10 +1041,10 @@
uint8_t* const src =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
x->plane[0].src.buf, src_stride);
- int16_t* const src_diff =
+ int16_t* src_diff =
raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
+ int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
uint8_t* const pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].pre[0].buf,
@@ -1030,7 +1053,8 @@
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride);
- int thisdistortion;
+ int thisdistortion = 0;
+ int thisrate = 0;
vp9_build_inter_predictor(pre,
xd->plane[0].pre[0].stride,
@@ -1038,7 +1062,7 @@
xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[0],
&xd->scale_factor[0],
- 4, 4, 0 /* no avg */, &xd->subpix);
+ 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
// TODO(debargha): Make this work properly with the
// implicit-compoundinter-weight experiment when implicit
@@ -1051,22 +1075,33 @@
vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
dst, xd->plane[0].dst.stride,
&xd->mode_info_context->bmi[i].as_mv[1],
- &xd->scale_factor[1], 4, 4, 1,
+ &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
&xd->subpix);
}
- vp9_subtract_block(4, 4, src_diff, 8,
+ vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
- x->fwd_txm4x4(src_diff, coeff, 16);
- x->quantize_b_4x4(x, i, DCT_DCT, 16);
- thisdistortion = vp9_block_error(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff,
- i, 16), 16);
+
+ k = i;
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ k += (idy * 2 + idx);
+ src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
+ x->plane[0].src_diff);
+ coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
+ x->fwd_txm4x4(src_diff, coeff, 16);
+ x->quantize_b_4x4(x, k, DCT_DCT, 16);
+ thisdistortion += vp9_block_error(coeff,
+ BLOCK_OFFSET(xd->plane[0].dqcoeff,
+ k, 16), 16);
+ thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
+ ta + (k & 1),
+ tl + (k >> 1), TX_4X4, 16);
+ }
+ }
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC,
- ta + (i & 1),
- tl + (i >> 1), TX_4X4, 16);
+ *labelyrate += thisrate;
}
}
*distortion >>= 2;
@@ -1155,16 +1190,19 @@
int sbr = 0, sbd = 0;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
-#if CONFIG_AB4X4
BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
- int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
-#endif
-
+ int bwl = b_width_log2(bsize), bw = 1 << bwl;
+ int bhl = b_height_log2(bsize), bh = 1 << bhl;
+ int idx, idy;
vp9_variance_fn_ptr_t *v_fn_ptr;
- ENTROPY_CONTEXT t_above[2], t_left[2];
- ENTROPY_CONTEXT t_above_b[2], t_left_b[2];
+ ENTROPY_CONTEXT t_above[4], t_left[4];
+ ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
+#if !CONFIG_AB4X4
+ bh = 1, bw = 1;
+#endif
+
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
@@ -1181,183 +1219,367 @@
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
+#if !CONFIG_AB4X4
rate += vp9_cost_mv_ref(cpi, SPLITMV,
mbmi->mb_mode_context[mbmi->ref_frame]);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
+#endif
other_segment_rd = this_segment_rd;
- for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
- int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
- int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
- B_PREDICTION_MODE mode_selected = ZERO4X4;
- int bestlabelyrate = 0;
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ // TODO(jingning,rbultje): rewrite the rate-distortion optimization
+ // loop for 4x4/4x8/8x4 block coding
+#if CONFIG_AB4X4
+ int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
+ int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+ B_PREDICTION_MODE mode_selected = ZERO4X4;
+ int bestlabelyrate = 0;
+ i = idy * 2 + idx;
- // search for the best motion vector on this segment
- for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
- int64_t this_rd;
- int distortion;
- int labelyrate;
- ENTROPY_CONTEXT t_above_s[2], t_left_s[2];
+ // search for the best motion vector on this segment
+ for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) {
+ int64_t this_rd;
+ int distortion;
+ int labelyrate;
+ ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
- vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
- vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
+ vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
+ vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
- // motion search for newmv (single predictor case only)
- if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
- int sseshift, n;
- int step_param = 0;
- int further_steps;
- int thissme, bestsme = INT_MAX;
- const struct buf_2d orig_src = x->plane[0].src;
- const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
+ // motion search for newmv (single predictor case only)
+ if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
+ int sseshift, n;
+ int step_param = 0;
+ int further_steps;
+ int thissme, bestsme = INT_MAX;
+ const struct buf_2d orig_src = x->plane[0].src;
+ const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
- /* Is the best so far sufficiently good that we cant justify doing
- * and new motion search. */
- if (best_label_rd < label_mv_thresh)
- break;
+ /* Is the best so far sufficiently good that we cant justify doing
+ * and new motion search. */
+ if (best_label_rd < label_mv_thresh)
+ break;
- if (cpi->compressor_speed) {
- // use previous block's result as next block's MV predictor.
- if (i > 0) {
- bsi->mvp.as_int =
- x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
- if (i == 2)
+ if (cpi->compressor_speed) {
+ // use previous block's result as next block's MV predictor.
+ if (i > 0) {
bsi->mvp.as_int =
- x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
- step_param = 2;
+ x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
+ if (i == 2)
+ bsi->mvp.as_int =
+ x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
+ step_param = 2;
+ }
}
- }
- further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+ further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
- {
- int sadpb = x->sadperbit4;
- int_mv mvp_full;
+ {
+ int sadpb = x->sadperbit4;
+ int_mv mvp_full;
- mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
- mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+ mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
+ mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
- // find first label
- n = i;
+ // find first label
+ n = i;
- // adjust src pointer for this segment
- x->plane[0].src.buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
- x->plane[0].src.buf,
- x->plane[0].src.stride);
- assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
- x->e_mbd.plane[0].pre[0].buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
- x->e_mbd.plane[0].pre[0].buf,
- x->e_mbd.plane[0].pre[0].stride);
+ // adjust src pointer for this segment
+ x->plane[0].src.buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->plane[0].src.buf,
+ x->plane[0].src.stride);
+ assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
+ x->e_mbd.plane[0].pre[0].buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->e_mbd.plane[0].pre[0].buf,
+ x->e_mbd.plane[0].pre[0].stride);
- bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 0, v_fn_ptr,
- bsi->ref_mv, &mode_mv[NEW4X4]);
+ bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+ sadpb, further_steps, 0, v_fn_ptr,
+ bsi->ref_mv, &mode_mv[NEW4X4]);
- sseshift = 0;
+ sseshift = 0;
- // Should we do a full search (best quality only)
- if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
- /* Check if mvp_full is within the range. */
- clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
- x->mv_row_min, x->mv_row_max);
+ // Should we do a full search (best quality only)
+ if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
+ /* Check if mvp_full is within the range. */
+ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
- thissme = cpi->full_search_sad(x, &mvp_full,
- sadpb, 16, v_fn_ptr,
- x->nmvjointcost, x->mvcost,
- bsi->ref_mv,
- n);
+ thissme = cpi->full_search_sad(x, &mvp_full,
+ sadpb, 16, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ bsi->ref_mv,
+ n);
- if (thissme < bestsme) {
- bestsme = thissme;
- mode_mv[NEW4X4].as_int =
- x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
- } else {
- /* The full search result is actually worse so re-instate the
- * previous best vector */
- x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
- mode_mv[NEW4X4].as_int;
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEW4X4].as_int =
+ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
+ } else {
+ /* The full search result is actually worse so re-instate the
+ * previous best vector */
+ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
+ mode_mv[NEW4X4].as_int;
+ }
}
}
- }
- if (bestsme < INT_MAX) {
- int distortion;
- unsigned int sse;
- cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4],
- bsi->ref_mv, x->errorperbit, v_fn_ptr,
- x->nmvjointcost, x->mvcost,
- &distortion, &sse);
+ if (bestsme < INT_MAX) {
+ int distortion;
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4],
+ bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ &distortion, &sse);
- // safe motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
+ // safe motion search result for use in compound prediction
+ seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
+ }
+
+ // restore src pointers
+ x->plane[0].src = orig_src;
+ x->e_mbd.plane[0].pre[0] = orig_pre;
+ } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
+ /* NEW4X4 */
+ /* motion search not completed? Then skip newmv for this block with
+ * comppred */
+ if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
+ seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
+ continue;
+ }
}
- // restore src pointers
- x->plane[0].src = orig_src;
- x->e_mbd.plane[0].pre[0] = orig_pre;
- } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
- /* NEW4X4 */
- /* motion search not completed? Then skip newmv for this block with
- * comppred */
- if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
- seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
+ rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
+ &second_mode_mv[this_mode], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
+ x->mvcost, cpi);
+
+ // Trap vectors that reach beyond the UMV borders
+ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
+ ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
continue;
}
- }
+ if (mbmi->second_ref_frame > 0 &&
+ mv_check_bounds(x, &second_mode_mv[this_mode]))
+ continue;
- rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
- &second_mode_mv[this_mode], seg_mvs[i],
- bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
- x->mvcost, cpi);
+ this_rd = encode_inter_mb_segment(&cpi->common,
+ x, labels, i, &labelyrate,
+ &distortion, t_above_s, t_left_s);
+ this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
+ rate += labelyrate;
- // Trap vectors that reach beyond the UMV borders
- if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
- ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
- ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
- ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
- continue;
- }
- if (mbmi->second_ref_frame > 0 &&
- mv_check_bounds(x, &second_mode_mv[this_mode]))
- continue;
+ if (this_rd < best_label_rd) {
+ sbr = rate;
+ sbd = distortion;
+ bestlabelyrate = labelyrate;
+ mode_selected = this_mode;
+ best_label_rd = this_rd;
+ for (j = 0; j < 4; j++)
+ if (labels[j] == i)
+ best_eobs[j] = x->e_mbd.plane[0].eobs[j];
- this_rd = encode_inter_mb_segment(&cpi->common,
- x, labels, i, &labelyrate,
- &distortion, t_above_s, t_left_s);
- this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
- rate += labelyrate;
+ vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
+ vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
+ }
+ } /*for each 4x4 mode*/
- if (this_rd < best_label_rd) {
- sbr = rate;
- sbd = distortion;
- bestlabelyrate = labelyrate;
- mode_selected = this_mode;
- best_label_rd = this_rd;
- for (j = 0; j < 4; j++)
- if (labels[j] == i)
- best_eobs[j] = x->e_mbd.plane[0].eobs[j];
+ vpx_memcpy(t_above, t_above_b, sizeof(t_above));
+ vpx_memcpy(t_left, t_left_b, sizeof(t_left));
- vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
- vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
- }
- } /*for each 4x4 mode*/
+ labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
+ &second_mode_mv[mode_selected], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
+ x->mvcost, cpi);
+#else
+ int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
+ int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+ B_PREDICTION_MODE mode_selected = ZERO4X4;
+ int bestlabelyrate = 0;
+ i = idy * 2 + idx;
- vpx_memcpy(t_above, t_above_b, sizeof(t_above));
- vpx_memcpy(t_left, t_left_b, sizeof(t_left));
+ // search for the best motion vector on this segment
+ for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
+ int64_t this_rd;
+ int distortion;
+ int labelyrate;
+ ENTROPY_CONTEXT t_above_s[2], t_left_s[2];
- labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
- &second_mode_mv[mode_selected], seg_mvs[i],
- bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
- x->mvcost, cpi);
+ vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
+ vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
- br += sbr;
- bd += sbd;
- segmentyrate += bestlabelyrate;
- this_segment_rd += best_label_rd;
- other_segment_rd += best_other_rd;
+ // motion search for newmv (single predictor case only)
+ if (mbmi->second_ref_frame <= 0 && this_mode == NEW4X4) {
+ int sseshift, n;
+ int step_param = 0;
+ int further_steps;
+ int thissme, bestsme = INT_MAX;
+ const struct buf_2d orig_src = x->plane[0].src;
+ const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
+
+ /* Is the best so far sufficiently good that we cant justify doing
+ * and new motion search. */
+ if (best_label_rd < label_mv_thresh)
+ break;
+
+ if (cpi->compressor_speed) {
+ // use previous block's result as next block's MV predictor.
+ if (i > 0) {
+ bsi->mvp.as_int =
+ x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
+ if (i == 2)
+ bsi->mvp.as_int =
+ x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
+ step_param = 2;
+ }
+ }
+
+ further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+
+ {
+ int sadpb = x->sadperbit4;
+ int_mv mvp_full;
+
+ mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
+ mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+
+ // find first label
+ n = i;
+
+ // adjust src pointer for this segment
+ x->plane[0].src.buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->plane[0].src.buf,
+ x->plane[0].src.stride);
+ assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
+ x->e_mbd.plane[0].pre[0].buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->e_mbd.plane[0].pre[0].buf,
+ x->e_mbd.plane[0].pre[0].stride);
+
+ bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+ sadpb, further_steps, 0, v_fn_ptr,
+ bsi->ref_mv, &mode_mv[NEW4X4]);
+
+ sseshift = 0;
+
+ // Should we do a full search (best quality only)
+ if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
+ /* Check if mvp_full is within the range. */
+ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
+
+ thissme = cpi->full_search_sad(x, &mvp_full,
+ sadpb, 16, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ bsi->ref_mv,
+ n);
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEW4X4].as_int =
+ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
+ } else {
+ /* The full search result is actually worse so re-instate the
+ * previous best vector */
+ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
+ mode_mv[NEW4X4].as_int;
+ }
+ }
+ }
+
+ if (bestsme < INT_MAX) {
+ int distortion;
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, &mode_mv[NEW4X4],
+ bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ &distortion, &sse);
+
+ // safe motion search result for use in compound prediction
+ seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEW4X4].as_int;
+ }
+
+ // restore src pointers
+ x->plane[0].src = orig_src;
+ x->e_mbd.plane[0].pre[0] = orig_pre;
+ } else if (mbmi->second_ref_frame > 0 && this_mode == NEW4X4) {
+ /* NEW4X4 */
+ /* motion search not completed? Then skip newmv for this block with
+ * comppred */
+ if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
+ seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
+ continue;
+ }
+ }
+
+ rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
+ &second_mode_mv[this_mode], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
+ x->mvcost, cpi);
+
+ // Trap vectors that reach beyond the UMV borders
+ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) ||
+ ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
+ continue;
+ }
+ if (mbmi->second_ref_frame > 0 &&
+ mv_check_bounds(x, &second_mode_mv[this_mode]))
+ continue;
+
+ this_rd = encode_inter_mb_segment(&cpi->common,
+ x, labels, i, &labelyrate,
+ &distortion, t_above_s, t_left_s);
+ this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
+ rate += labelyrate;
+
+ if (this_rd < best_label_rd) {
+ sbr = rate;
+ sbd = distortion;
+ bestlabelyrate = labelyrate;
+ mode_selected = this_mode;
+ best_label_rd = this_rd;
+ for (j = 0; j < 4; j++)
+ if (labels[j] == i)
+ best_eobs[j] = x->e_mbd.plane[0].eobs[j];
+
+ vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
+ vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
+ }
+ } /*for each 4x4 mode*/
+
+ vpx_memcpy(t_above, t_above_b, sizeof(t_above));
+ vpx_memcpy(t_left, t_left_b, sizeof(t_left));
+
+ labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
+ &second_mode_mv[mode_selected], seg_mvs[i],
+ bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
+ x->mvcost, cpi);
+#endif
+
+ br += sbr;
+ bd += sbd;
+ segmentyrate += bestlabelyrate;
+ this_segment_rd += best_label_rd;
+ other_segment_rd += best_other_rd;
+
+ for (j = 1; j < bh; ++j)
+ vpx_memcpy(&x->partition_info->bmi[i + j * 2],
+ &x->partition_info->bmi[i],
+ sizeof(x->partition_info->bmi[i]));
+ for (j = 1; j < bw; ++j)
+ vpx_memcpy(&x->partition_info->bmi[i + j],
+ &x->partition_info->bmi[i],
+ sizeof(x->partition_info->bmi[i]));
+ }
} /* for each label */
if (this_segment_rd < bsi->segment_rd) {
@@ -2504,12 +2726,23 @@
int distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable;
int64_t txfm_cache[NB_TXFM_MODES];
+ int i;
+ for (i = 0; i < NB_TXFM_MODES; ++i)
+ txfm_cache[i] = INT64_MAX;
+
// Test best rd so far against threshold for trying this mode.
+#if CONFIG_AB4X4
+ if (bsize >= BLOCK_SIZE_SB8X8 &&
+ (best_rd < cpi->rd_threshes[mode_index] ||
+ cpi->rd_threshes[mode_index] == INT_MAX))
+ continue;
+#else
if (best_rd <= cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX) {
continue;
}
+#endif
x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
@@ -2520,7 +2753,11 @@
continue;
}
+#if CONFIG_AB4X4
+ if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
+#else
if (cpi->speed > 0) {
+#endif
if (!(ref_frame_mask & (1 << ref_frame))) {
continue;
}
@@ -2652,6 +2889,11 @@
distortion2 += dist_uv[TX_4X4];
distortion_uv = dist_uv[TX_4X4];
mbmi->uv_mode = mode_uv[TX_4X4];
+#if CONFIG_AB4X4
+ txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < NB_TXFM_MODES; ++i)
+ txfm_cache[i] = txfm_cache[ONLY_4X4];
+#endif
} else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
vp9_build_intra_predictors_sby_s(xd, bsize);
@@ -2785,6 +3027,12 @@
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
+#if CONFIG_AB4X4
+ txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < NB_TXFM_MODES; ++i)
+ txfm_cache[i] = txfm_cache[ONLY_4X4];
+#endif
+
if (!mode_excluded) {
if (is_comp_pred)
mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
@@ -2855,7 +3103,11 @@
// Is Mb level skip allowed (i.e. not coded at segment level).
mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
+#if CONFIG_AB4X4
+ if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
+#else
if (skippable) {
+#endif
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// for best_yrd calculation
@@ -3001,12 +3253,13 @@
}
if (!mode_excluded && this_rd != INT64_MAX) {
for (i = 0; i < NB_TXFM_MODES; i++) {
- int64_t adj_rd;
+ int64_t adj_rd = INT64_MAX;
if (this_mode != I4X4_PRED) {
adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];
} else {
adj_rd = this_rd;
}
+
if (adj_rd < best_txfm_rd[i])
best_txfm_rd[i] = adj_rd;
}
@@ -3073,7 +3326,11 @@
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
- (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
+ (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
+#if CONFIG_AB4X4
+ && bsize >= BLOCK_SIZE_SB8X8
+#endif
+ ) {
mbmi->mode = ZEROMV;
mbmi->ref_frame = ALTREF_FRAME;
mbmi->second_ref_frame = NONE;
--
⑨