ref: 3a6ec9ea72b7cc85f7dceb0753ee2d665ee2ef8c
parent: 8440cc48179d2e5d161007d45618c10a2d9d1352
author: Alex Converse <aconverse@google.com>
date: Thu Mar 16 12:34:26 EDT 2017
vp9_optimize_b: Combine extrabits cost with token lookup About 0.6% fewer cycles spent in vp9_optimize_b. Change-Id: I2ae62a78374c594ed81d4e3100a5848e2f6f2c4e
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -108,7 +108,6 @@
int rate0, rate1;
int64_t error0, error1;
int16_t t0, t1;
- EXTRABIT e0;
unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
int best, band, pt, i, final_eob;
@@ -144,7 +143,7 @@
/* Evaluate the first possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- vp9_get_token_extra(x, &t0, &e0);
+ vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits);
/* Consider both possible successor states. */
if (next < default_eob) {
band = band_translate[i + 1];
@@ -155,7 +154,6 @@
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
- base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -193,9 +191,9 @@
*/
t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
- e0 = 0;
+ base_bits = 0;
} else {
- vp9_get_token_extra(x, &t0, &e0);
+ vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits);
t1 = t0;
}
if (next < default_eob) {
@@ -213,7 +211,6 @@
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
- base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -79,12 +79,22 @@
extern const uint16_t vp9_cat6_high_cost[64];
extern const uint16_t vp9_cat6_high10_high_cost[256];
extern const uint16_t vp9_cat6_high12_high_cost[1024];
-static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits,
- const uint16_t *cat6_high_table) {
- if (token != CATEGORY6_TOKEN)
- return vp9_extra_bits[token].cost[extrabits >> 1];
- return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] +
- cat6_high_table[extrabits >> 9];
+
+static INLINE void vp9_get_token_extracost(const uint16_t *cat6_high_table,
+ int v, int16_t *token,
+ int *extracost) {
+ EXTRABIT extrabits; // unsigned extrabits
+ v = abs(v);
+ if (v >= CAT6_MIN_VAL) {
+ *token = CATEGORY6_TOKEN;
+ extrabits = v - CAT6_MIN_VAL;
+ *extracost =
+ vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8];
+ } else {
+ *token = vp9_dct_cat_lt_10_value_tokens[v].token;
+ extrabits = vp9_dct_cat_lt_10_value_tokens[v].extra >> 1;
+ *extracost = vp9_extra_bits[*token].cost[extrabits];
+ }
}
#if CONFIG_VP9_HIGHBITDEPTH