ref: ca9bedd538d728b80502a583d0f138401f50b6f6
parent: ab711815452ee44b06663b68ee21bb200e9eed41
author: Jingning Han <jingning@google.com>
date: Fri Mar 17 08:34:54 EDT 2017
Backport "Optimize the use case of token_cost table" to VP9 cherry picked from nextgenv2 90ea281f29df747282e56d3068a3ddbdde30cdd0 Change-Id: Ie989e60c6479ac3251cadaac9c7e795ccba52f4e
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -80,6 +80,18 @@
return pt;
}
+static const int16_t band_count_table[TX_SIZES][8] = {
+ { 1, 2, 3, 4, 3, 16 - 13, 0 },
+ { 1, 2, 3, 4, 11, 64 - 21, 0 },
+ { 1, 2, 3, 4, 11, 256 - 21, 0 },
+ { 1, 2, 3, 4, 11, 1024 - 21, 0 },
+};
+static const int16_t band_cum_count_table[TX_SIZES][8] = {
+ { 0, 1, 3, 6, 10, 13, 16, 0 },
+ { 0, 1, 3, 6, 10, 21, 64, 0 },
+ { 0, 1, 3, 6, 10, 21, 256, 0 },
+ { 0, 1, 3, 6, 10, 21, 1024, 0 },
+};
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
@@ -108,15 +120,21 @@
int rate0, rate1;
int64_t error0, error1;
int16_t t0, t1;
- unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- mb->token_costs[tx_size][type][ref];
- int best, band, pt, i, final_eob;
+ int best, band = (eob < default_eob) ? band_translate[eob]
+ : band_translate[eob - 1];
+ int pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
#endif
+ unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ mb->token_costs[tx_size][type][ref];
+ const int16_t *band_counts = &band_count_table[tx_size][band];
+ int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
+ token_costs += band;
+
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
@@ -129,8 +147,10 @@
tokens[eob][0].qc = 0;
tokens[eob][1] = tokens[eob][0];
- for (i = 0; i < eob; i++)
- token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
+ for (i = 0; i < eob; i++) {
+ const int rc = scan[i];
+ token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
+ }
for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
@@ -146,10 +166,9 @@
base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost);
/* Consider both possible successor states. */
if (next < default_eob) {
- band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += token_costs[band][0][pt][tokens[next][0].token];
- rate1 += token_costs[band][0][pt][tokens[next][1].token];
+ rate0 += (*token_costs)[0][pt][tokens[next][0].token];
+ rate1 += (*token_costs)[0][pt][tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -181,6 +200,12 @@
} else {
tokens[i][1] = tokens[i][0];
next = i;
+
+ if (!(--band_left)) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
continue;
}
@@ -197,14 +222,13 @@
t1 = t0;
}
if (next < default_eob) {
- band = band_translate[i + 1];
if (t0 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += token_costs[band][!x][pt][tokens[next][0].token];
+ rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
}
if (t1 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
- rate1 += token_costs[band][!x][pt][tokens[next][1].token];
+ rate1 += (*token_costs)[!x][pt][tokens[next][1].token];
}
}
@@ -252,26 +276,30 @@
/* There's no choice to make for a zero coefficient, so we don't
* add a new trellis node, but we do need to update the costs.
*/
- band = band_translate[i + 1];
pt = get_coef_context(nb, token_cache, i + 1);
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
if (t0 != EOB_TOKEN) {
- tokens[next][0].rate += token_costs[band][1][pt][t0];
+ tokens[next][0].rate += (*token_costs)[1][pt][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != EOB_TOKEN) {
- tokens[next][1].rate += token_costs[band][1][pt][t1];
+ tokens[next][1].rate += (*token_costs)[1][pt][t1];
tokens[next][1].token = ZERO_TOKEN;
}
tokens[i][0].best_index = tokens[i][1].best_index = 0;
/* Don't update next, because we didn't add a new node. */
}
+
+ if (!(--band_left)) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
}
/* Now pick the best path through the whole trellis. */
- band = band_translate[i + 1];
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
error0 = tokens[next][0].error;
@@ -278,8 +306,8 @@
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += token_costs[band][0][ctx][t0];
- rate1 += token_costs[band][0][ctx][t1];
+ rate0 += (*token_costs)[0][ctx][t0];
+ rate1 += (*token_costs)[0][ctx][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = -1;