ref: 0549f5aae91b7d2f1265cea7e2edf34fda8f0433
parent: 9b63cb057a73fa1f053dfd665b23fb944a083443
author: Gabriel Marin <gmx@chromium.org>
date: Tue Dec 13 11:22:48 EST 2016
Simplify address arithmetic in vp9_optimize_b Simplify address arithmetic on token_costs to reduce the number of generated instructions that are used for address arithmetic inside routine vp9_optimize_b. It also helps improve instruction scheduling depending on compiler and optimization level. Measured a 9.3% reduction in retired instructions and 5.3% reduction in execution time for this routine with GCC v4.8.4 and optimization flags -O3, and a reduction of up to 11.6% in execution time with other compilers. No change in behavior. TEST=Verified that encoded files match bit for bit, with and without this change. BUG=b/33678225 Change-Id: I6098650fb5cd2aa04e014fe6e68ca20761f3a21f
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -109,6 +109,8 @@
int64_t error0, error1;
int16_t t0, t1;
EXTRABIT e0;
+ unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ mb->token_costs[tx_size][type][ref];
int best, band, pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
@@ -148,10 +150,8 @@
if (next < default_eob) {
band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][0].token];
- rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][1].token];
+ rate0 += token_costs[band][0][pt][tokens[next][0].token];
+ rate1 += token_costs[band][0][pt][tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -208,13 +208,11 @@
band = band_translate[i + 1];
if (t0 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
- [tokens[next][0].token];
+ rate0 += token_costs[band][!x][pt][tokens[next][0].token];
}
if (t1 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
- rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
- [tokens[next][1].token];
+ rate1 += token_costs[band][!x][pt][tokens[next][1].token];
}
}
@@ -270,13 +268,11 @@
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
if (t0 != EOB_TOKEN) {
- tokens[next][0].rate +=
- mb->token_costs[tx_size][type][ref][band][1][pt][t0];
+ tokens[next][0].rate += token_costs[band][1][pt][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != EOB_TOKEN) {
- tokens[next][1].rate +=
- mb->token_costs[tx_size][type][ref][band][1][pt][t1];
+ tokens[next][1].rate += token_costs[band][1][pt][t1];
tokens[next][1].token = ZERO_TOKEN;
}
tokens[i][0].best_index = tokens[i][1].best_index = 0;
@@ -292,8 +288,8 @@
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
- rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
+ rate0 += token_costs[band][0][ctx][t0];
+ rate1 += token_costs[band][0][ctx][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = -1;