ref: 3cc6eb7c002ab37cfe074b93a1d15b4acb7ae70f
parent: 3140c443e431277232b58f045730ecaabf4a436a
parent: 26b6318de83761dd268a589f0b1324153e9d0923
author: Ronald S. Bultje <rbultje@google.com>
date: Tue Jul 2 07:48:15 EDT 2013
Merge "Make get_coef_context() branchless."
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -461,25 +461,25 @@
// for each position in raster scan order.
// -1 indicates the neighbor does not exist.
DECLARE_ALIGNED(16, int16_t,
- vp9_default_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
+ vp9_default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
+ vp9_col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
+ vp9_row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
+ vp9_col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
+ vp9_row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_default_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
+ vp9_default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
+ vp9_col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
+ vp9_row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_default_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
+ vp9_default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
- vp9_default_scan_32x32_neighbors[1024 * MAX_NEIGHBORS]);
+ vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]);
DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]);
@@ -504,15 +504,17 @@
}
static void init_scan_neighbors(const int16_t *scan,
int16_t *iscan,
- int l, int16_t *neighbors,
- int max_neighbors) {
+ int l, int16_t *neighbors) {
int l2 = l * l;
int n, i, j;
- for (n = 0; n < l2; n++) {
+ // dc doesn't use this type of prediction
+ neighbors[MAX_NEIGHBORS * 0 + 0] = 0;
+ neighbors[MAX_NEIGHBORS * 0 + 1] = 0;
+ iscan[0] = find_in_scan(scan, l, 0);
+ for (n = 1; n < l2; n++) {
int rc = scan[n];
iscan[n] = find_in_scan(scan, l, n);
- assert(max_neighbors == MAX_NEIGHBORS);
i = rc / l;
j = rc % l;
if (i > 0 && j > 0) {
@@ -524,93 +526,84 @@
// Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff
// as a context. If ADST or DCT is used in both directions, we
// use the combination of the two as a context.
- int a = find_in_scan(scan, l, (i - 1) * l + j);
- int b = find_in_scan(scan, l, i * l + j - 1);
+ int a = (i - 1) * l + j;
+ int b = i * l + j - 1;
if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 ||
scan == vp9_col_scan_16x16) {
- neighbors[max_neighbors * n + 0] = a;
- neighbors[max_neighbors * n + 1] = -1;
+ // in the col/row scan cases (as well as left/top edge cases), we set
+ // both contexts to the same value, so we can branchlessly do a+b+1>>1
+ // which automatically becomes a if a == b
+ neighbors[MAX_NEIGHBORS * n + 0] =
+ neighbors[MAX_NEIGHBORS * n + 1] = a;
} else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 ||
scan == vp9_row_scan_16x16) {
- neighbors[max_neighbors * n + 0] = b;
- neighbors[max_neighbors * n + 1] = -1;
+ neighbors[MAX_NEIGHBORS * n + 0] =
+ neighbors[MAX_NEIGHBORS * n + 1] = b;
} else {
- neighbors[max_neighbors * n + 0] = a;
- neighbors[max_neighbors * n + 1] = b;
+ neighbors[MAX_NEIGHBORS * n + 0] = a;
+ neighbors[MAX_NEIGHBORS * n + 1] = b;
}
} else if (i > 0) {
- neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j);
- neighbors[max_neighbors * n + 1] = -1;
- } else if (j > 0) {
- neighbors[max_neighbors * n + 0] =
- find_in_scan(scan, l, i * l + j - 1);
- neighbors[max_neighbors * n + 1] = -1;
+ neighbors[MAX_NEIGHBORS * n + 0] =
+ neighbors[MAX_NEIGHBORS * n + 1] = (i - 1) * l + j;
} else {
- assert(n == 0);
- // dc predictor doesn't use previous tokens
- neighbors[max_neighbors * n + 0] = -1;
+ assert(j > 0);
+ neighbors[MAX_NEIGHBORS * n + 0] =
+ neighbors[MAX_NEIGHBORS * n + 1] = i * l + j - 1;
}
- assert(neighbors[max_neighbors * n + 0] < n);
+ assert(iscan[neighbors[MAX_NEIGHBORS * n + 0]] < n);
}
+ // one padding item so we don't have to add branches in code to handle
+ // calls to get_coef_context() for the token after the final dc token
+ neighbors[MAX_NEIGHBORS * l2 + 0] = 0;
+ neighbors[MAX_NEIGHBORS * l2 + 1] = 0;
}
void vp9_init_neighbors() {
init_scan_neighbors(vp9_default_scan_4x4, vp9_default_iscan_4x4, 4,
- vp9_default_scan_4x4_neighbors, MAX_NEIGHBORS);
+ vp9_default_scan_4x4_neighbors);
init_scan_neighbors(vp9_row_scan_4x4, vp9_row_iscan_4x4, 4,
- vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS);
+ vp9_row_scan_4x4_neighbors);
init_scan_neighbors(vp9_col_scan_4x4, vp9_col_iscan_4x4, 4,
- vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS);
+ vp9_col_scan_4x4_neighbors);
init_scan_neighbors(vp9_default_scan_8x8, vp9_default_iscan_8x8, 8,
- vp9_default_scan_8x8_neighbors, MAX_NEIGHBORS);
+ vp9_default_scan_8x8_neighbors);
init_scan_neighbors(vp9_row_scan_8x8, vp9_row_iscan_8x8, 8,
- vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS);
+ vp9_row_scan_8x8_neighbors);
init_scan_neighbors(vp9_col_scan_8x8, vp9_col_iscan_8x8, 8,
- vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS);
+ vp9_col_scan_8x8_neighbors);
init_scan_neighbors(vp9_default_scan_16x16, vp9_default_iscan_16x16, 16,
- vp9_default_scan_16x16_neighbors, MAX_NEIGHBORS);
+ vp9_default_scan_16x16_neighbors);
init_scan_neighbors(vp9_row_scan_16x16, vp9_row_iscan_16x16, 16,
- vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS);
+ vp9_row_scan_16x16_neighbors);
init_scan_neighbors(vp9_col_scan_16x16, vp9_col_iscan_16x16, 16,
- vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS);
+ vp9_col_scan_16x16_neighbors);
init_scan_neighbors(vp9_default_scan_32x32, vp9_default_iscan_32x32, 32,
- vp9_default_scan_32x32_neighbors, MAX_NEIGHBORS);
+ vp9_default_scan_32x32_neighbors);
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad) {
+const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
if (scan == vp9_default_scan_4x4) {
- *pad = MAX_NEIGHBORS;
return vp9_default_scan_4x4_neighbors;
} else if (scan == vp9_row_scan_4x4) {
- *pad = MAX_NEIGHBORS;
return vp9_row_scan_4x4_neighbors;
} else if (scan == vp9_col_scan_4x4) {
- *pad = MAX_NEIGHBORS;
return vp9_col_scan_4x4_neighbors;
} else if (scan == vp9_default_scan_8x8) {
- *pad = MAX_NEIGHBORS;
return vp9_default_scan_8x8_neighbors;
} else if (scan == vp9_row_scan_8x8) {
- *pad = 2;
return vp9_row_scan_8x8_neighbors;
} else if (scan == vp9_col_scan_8x8) {
- *pad = 2;
return vp9_col_scan_8x8_neighbors;
} else if (scan == vp9_default_scan_16x16) {
- *pad = MAX_NEIGHBORS;
return vp9_default_scan_16x16_neighbors;
} else if (scan == vp9_row_scan_16x16) {
- *pad = 2;
return vp9_row_scan_16x16_neighbors;
} else if (scan == vp9_col_scan_16x16) {
- *pad = 2;
return vp9_col_scan_16x16_neighbors;
- } else if (scan == vp9_default_scan_32x32) {
- *pad = MAX_NEIGHBORS;
- return vp9_default_scan_32x32_neighbors;
} else {
- assert(0);
- return NULL;
+ assert(scan == vp9_default_scan_32x32);
+ return vp9_default_scan_32x32_neighbors;
}
}
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -166,28 +166,14 @@
}
#define MAX_NEIGHBORS 2
-static INLINE int get_coef_context(const int16_t *scan,
- const int16_t *neighbors,
- int nb_pad, uint8_t *token_cache,
- int c, int l) {
- int eob = l;
- assert(nb_pad == MAX_NEIGHBORS);
- if (c == eob) {
- return 0;
- } else {
- int ctx;
- assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
- if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
- ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
- token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
- } else {
- ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
- }
- return ctx;
- }
+static INLINE int get_coef_context(const int16_t *neighbors,
+ uint8_t *token_cache,
+ int c) {
+ return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
+ token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad);
+const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
// 128 lists of probabilities are stored for the following ONE node probs:
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -97,7 +97,7 @@
TX_SIZE txfm_size, const int16_t *dq,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
ENTROPY_CONTEXT above_ec, left_ec;
- int pt, c = 0, pad, default_eob;
+ int pt, c = 0;
int band;
vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
@@ -130,7 +130,6 @@
scan = get_scan_4x4(tx_type);
above_ec = A[0] != 0;
left_ec = L[0] != 0;
- default_eob = 16;
band_translate = vp9_coefband_trans_4x4;
break;
}
@@ -140,7 +139,6 @@
scan = get_scan_8x8(tx_type);
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
- default_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@@ -150,7 +148,6 @@
scan = get_scan_16x16(tx_type);
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
- default_eob = 256;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@@ -158,13 +155,12 @@
scan = vp9_default_scan_32x32;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
- default_eob = 1024;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
pt = combine_entropy_contexts(above_ec, left_ec);
- nb = vp9_get_coef_neighbors_handle(scan, &pad);
+ nb = vp9_get_coef_neighbors_handle(scan);
while (1) {
int val;
@@ -172,8 +168,7 @@
if (c >= seg_eob)
break;
if (c)
- pt = get_coef_context(scan, nb, pad, token_cache,
- c, default_eob);
+ pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
#if !CONFIG_BALANCED_COEFTREE
@@ -186,8 +181,7 @@
if (c >= seg_eob)
break;
if (c)
- pt = get_coef_context(scan, nb, pad, token_cache,
- c, default_eob);
+ pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -112,11 +112,10 @@
static int trellis_get_coeff_context(const int16_t *scan,
const int16_t *nb,
int idx, int token,
- uint8_t *token_cache,
- int pad, int l) {
+ uint8_t *token_cache) {
int bak = token_cache[scan[idx]], pt;
token_cache[scan[idx]] = vp9_pt_energy_class[token];
- pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
+ pt = get_coef_context(nb, token_cache, idx + 1);
token_cache[scan[idx]] = bak;
return pt;
}
@@ -141,7 +140,7 @@
int best, band, pt;
PLANE_TYPE type = xd->plane[plane].plane_type;
int err_mult = plane_rd_mult[type];
- int default_eob, pad;
+ int default_eob;
const int16_t *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
@@ -201,7 +200,7 @@
for (i = 0; i < eob; i++)
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
qcoeff_ptr[scan[i]]].token];
- nb = vp9_get_coef_neighbors_handle(scan, &pad);
+ nb = vp9_get_coef_neighbors_handle(scan);
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
@@ -220,8 +219,7 @@
/* Consider both possible successor states. */
if (next < default_eob) {
band = get_coef_band(band_translate, i + 1);
- pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
- pad, default_eob);
+ pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
mb->token_costs[tx_size][type][ref][0][band][pt]
[tokens[next][0].token];
@@ -273,14 +271,12 @@
if (next < default_eob) {
band = get_coef_band(band_translate, i + 1);
if (t0 != DCT_EOB_TOKEN) {
- pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
- pad, default_eob);
+ pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt]
[tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
- pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
- pad, default_eob);
+ pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt]
[tokens[next][1].token];
}
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -304,7 +304,7 @@
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt;
int c = 0;
- int cost = 0, pad;
+ int cost = 0;
const int16_t *scan, *nb;
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
@@ -314,7 +314,7 @@
ENTROPY_CONTEXT above_ec, left_ec;
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
- int seg_eob, default_eob;
+ int seg_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
@@ -372,8 +372,7 @@
assert(eob <= seg_eob);
pt = combine_entropy_contexts(above_ec, left_ec);
- nb = vp9_get_coef_neighbors_handle(scan, &pad);
- default_eob = seg_eob;
+ nb = vp9_get_coef_neighbors_handle(scan);
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
@@ -402,7 +401,7 @@
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
- pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+ pt = get_coef_context(nb, token_cache, c);
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
@@ -410,7 +409,7 @@
// eob token
if (c < seg_eob) {
- pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+ pt = get_coef_context(nb, token_cache, c);
cost += token_costs[0][get_coef_band(band_translate, c)][pt]
[DCT_EOB_TOKEN];
}
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -123,7 +123,7 @@
const int loff = (off >> mod) << tx_size;
ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff;
ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff;
- int seg_eob, default_eob, pad;
+ int seg_eob;
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
vp9_coeff_count *counts;
@@ -178,8 +178,7 @@
}
pt = combine_entropy_contexts(above_ec, left_ec);
- nb = vp9_get_coef_neighbors_handle(scan, &pad);
- default_eob = seg_eob;
+ nb = vp9_get_coef_neighbors_handle(scan);
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
@@ -191,7 +190,7 @@
int v = 0;
rc = scan[c];
if (c)
- pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+ pt = get_coef_context(nb, token_cache, c);
if (c < eob) {
v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);
--
⑨