ref: b41c17d625dc269eef8d0fbcc971f17523057e3b
parent: 11876faa1171368d8921e3969151e08494a11858
author: Attila Nagy <attilanagy@google.com>
date: Tue Apr 17 06:40:56 EDT 2012
Shares one set of RD costs tables between all encoding threads RD costs were local to MACROBLOCK data and had to be copied all the time to each thread's MACROBLOCK data. Tables moved to a common place and only pointers are setup for each encoding thread. vp8_cost_tokens() generates 'int' costs so changed all types to be int (i.e. removed unsigned). NOTE: Could do some more cleaning in vp8cx_init_mbrthread_data(). Change-Id: Ifa4de4c6286dffaca7ed3082041fe5af1345ddc0
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -90,16 +90,17 @@
signed int act_zbin_adj;
signed int last_act_zbin_adj;
- int mvcosts[2][MVvals+1];
int *mvcost[2];
- int mvsadcosts[2][MVfpvals+1];
int *mvsadcost[2];
- int mbmode_cost[2][MB_MODE_COUNT];
- int intra_uv_mode_cost[2][MB_MODE_COUNT];
- unsigned int bmode_costs[10][10][10];
- unsigned int inter_bmode_costs[B_MODE_COUNT];
+ int (*mbmode_cost)[MB_MODE_COUNT];
+ int (*intra_uv_mode_cost)[MB_MODE_COUNT];
+ int (*bmode_costs)[10][10];
+ int *inter_bmode_costs;
+ int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
+ [MAX_ENTROPY_TOKENS];
- // These define limits to motion vector components to prevent them from extending outside the UMV borders
+ // These define limits to motion vector components to prevent
+ // them from extending outside the UMV borders
int mv_col_min;
int mv_col_max;
int mv_row_min;
@@ -115,7 +116,6 @@
unsigned char *active_ptr;
MV_CONTEXT *mvc;
- unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
int optimize;
int q_index;
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -337,22 +337,17 @@
z->src.v_buffer = x->src.v_buffer;
*/
+ z->mvcost[0] = x->mvcost[0];
+ z->mvcost[1] = x->mvcost[1];
+ z->mvsadcost[0] = x->mvsadcost[0];
+ z->mvsadcost[1] = x->mvsadcost[1];
- vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
- z->mvcost[0] = &z->mvcosts[0][mv_max+1];
- z->mvcost[1] = &z->mvcosts[1][mv_max+1];
- z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1];
- z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1];
+ z->token_costs = x->token_costs;
+ z->inter_bmode_costs = x->inter_bmode_costs;
+ z->mbmode_cost = x->mbmode_cost;
+ z->intra_uv_mode_cost = x->intra_uv_mode_cost;
+ z->bmode_costs = x->bmode_costs;
-
- vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs));
- vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs));
- //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
- //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
- vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost));
- vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost));
- vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs));
-
for (i = 0; i < 25; i++)
{
z->block[i].quant = x->block[i].quant;
@@ -359,17 +354,15 @@
z->block[i].quant_fast = x->block[i].quant_fast;
z->block[i].quant_shift = x->block[i].quant_shift;
z->block[i].zbin = x->block[i].zbin;
- z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
+ z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost;
z->block[i].round = x->block[i].round;
- z->q_index = x->q_index;
- z->act_zbin_adj = x->act_zbin_adj;
- z->last_act_zbin_adj = x->last_act_zbin_adj;
- /*
- z->block[i].src = x->block[i].src;
- */
- z->block[i].src_stride = x->block[i].src_stride;
+ z->block[i].src_stride = x->block[i].src_stride;
}
+ z->q_index = x->q_index;
+ z->act_zbin_adj = x->act_zbin_adj;
+ z->last_act_zbin_adj = x->last_act_zbin_adj;
+
{
MACROBLOCKD *xd = &x->e_mbd;
MACROBLOCKD *zd = &z->e_mbd;
@@ -401,9 +394,11 @@
zd->subpixel_predict16x16 = xd->subpixel_predict16x16;
zd->segmentation_enabled = xd->segmentation_enabled;
zd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
- vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
+ vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data,
+ sizeof(xd->segment_feature_data));
- vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
+ vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc,
+ sizeof(xd->dequant_y1_dc));
vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -18,6 +18,8 @@
void vp8_init_mode_costs(VP8_COMP *c)
{
VP8_COMMON *x = &c->common;
+ struct rd_costs_struct *rd_costs = &c->rd_costs;
+
{
const vp8_tree_p T = vp8_bmode_tree;
@@ -29,19 +31,24 @@
do
{
- vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], vp8_kf_bmode_prob[i][j], T);
+ vp8_cost_tokens(rd_costs->bmode_costs[i][j],
+ vp8_kf_bmode_prob[i][j], T);
}
while (++j < VP8_BINTRAMODES);
}
while (++i < VP8_BINTRAMODES);
- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T);
+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T);
}
- vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree);
+ vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob,
+ vp8_sub_mv_ref_tree);
- vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
- vp8_cost_tokens(c->mb.mbmode_cost[0], vp8_kf_ymode_prob, vp8_kf_ymode_tree);
+ vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree);
+ vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob,
+ vp8_kf_ymode_tree);
- vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree);
- vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, vp8_uv_mode_tree);
+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob,
+ vp8_uv_mode_tree);
+ vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob,
+ vp8_uv_mode_tree);
}
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1883,13 +1883,6 @@
cpi->gf_rate_correction_factor = 1.0;
cpi->twopass.est_max_qcorrection_factor = 1.0;
- cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1];
- cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1];
- cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1];
- cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1];
-
- cal_mvsadcosts(cpi->mb.mvsadcost);
-
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -2023,13 +2016,29 @@
cpi->common.error.setjmp = 0;
#if CONFIG_MULTI_RES_ENCODING
+
/* Calculate # of MBs in a row in lower-resolution level image. */
if (cpi->oxcf.mr_encoder_id > 0)
vp8_cal_low_res_mb_cols(cpi);
+
#endif
- return cpi;
+ /* setup RD costs to MACROBLOCK struct */
+ cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1];
+ cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1];
+ cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1];
+ cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1];
+
+ cal_mvsadcosts(cpi->mb.mvsadcost);
+
+ cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost;
+ cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost;
+ cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs;
+ cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs;
+ cpi->mb.token_costs = cpi->rd_costs.token_costs;
+
+ return cpi;
}
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -693,6 +693,17 @@
int mr_low_res_mb_cols;
#endif
+ struct rd_costs_struct
+ {
+ int mvcosts[2][MVvals+1];
+ int mvsadcosts[2][MVfpvals+1];
+ int mbmode_cost[2][MB_MODE_COUNT];
+ int intra_uv_mode_cost[2][MB_MODE_COUNT];
+ int bmode_costs[10][10][10];
+ int inter_bmode_costs[B_MODE_COUNT];
+ int token_costs[BLOCK_TYPES][COEF_BANDS]
+ [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+ } rd_costs;
} VP8_COMP;
void control_data_rate(VP8_COMP *cpi);
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -132,7 +132,7 @@
MACROBLOCK *x,
int ib,
B_PREDICTION_MODE *best_mode,
- unsigned int *mode_costs,
+ const int *mode_costs,
int *bestrate,
int *bestdistortion)
@@ -185,7 +185,7 @@
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int error;
int distortion = 0;
- unsigned int *bmode_costs;
+ const int *bmode_costs;
intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -235,7 +235,7 @@
cc->frames_since_golden = cpi->common.frames_since_golden;
vp8_copy(cc->mvc, cpi->common.fc.mvc);
- vp8_copy(cc->mvcosts, cpi->mb.mvcosts);
+ vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts);
vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob);
vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob);
@@ -272,7 +272,7 @@
vp8_copy(cpi->common.fc.mvc, cc->mvc);
- vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
+ vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts);
vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob);
vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob);
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -149,8 +149,8 @@
};
static void fill_token_costs(
- unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS],
- const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]
+ int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
+ const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
)
{
int i, j, k;
@@ -159,21 +159,24 @@
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < COEF_BANDS; j++)
for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+
// check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1
- if(k==0 && j>(i==0) )
- vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2);
+ if (k == 0 && j > (i == 0))
+ vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
else
- vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree);
+ vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
}
-static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- };
+static const int rd_iifactor[32] =
+{
+ 4, 4, 3, 2, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
/* values are now correlated to quantizer */
-static int sad_per_bit16lut[QINDEX_RANGE] =
+static const int sad_per_bit16lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
@@ -192,7 +195,7 @@
11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 14, 14
};
-static int sad_per_bit4lut[QINDEX_RANGE] =
+static const int sad_per_bit4lut[QINDEX_RANGE] =
{
2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
@@ -637,7 +640,7 @@
BLOCK *be,
BLOCKD *b,
B_PREDICTION_MODE *best_mode,
- unsigned int *bmode_costs,
+ const int *bmode_costs,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
@@ -717,7 +720,7 @@
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- unsigned int *bmode_costs;
+ const int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
--
⑨