ref: ad0c04126e32413807a24c1e0002b168d09b751a
parent: 5a4ae342706270467a9bf314823546805bc0d70a
author: Luc Trudeau <ltrudeau@twoorioles.com>
date: Fri Jul 26 06:06:50 EDT 2019
Unroll hi_token loop in decode_coeff
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -176,23 +176,44 @@
const int ctx = 1 + (eob > sw * sh * 2) + (eob > sw * sh * 4);
uint16_t *const lo_cdf = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][ctx];
- int tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3) + 1;
+
+ int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3);
+ int tok = 1 + tok_br;
if (dbg)
printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n",
t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng);
- if (tok == 3) {
+ // hi tok
+ if (tok_br == 2) {
+#define dbg_print_hi_tok(i, tok, tok_br) \
+ if (dbg)\
+ printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",\
+ imin(t_dim->ctx, 3), chroma, br_ctx, i, rc, tok, tok_br,\
+ ts->msac.rng)
const int br_ctx = get_br_ctx(levels, 1, tx_class, x, y, stride);
- do {
- const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
- br_cdf[br_ctx], 4);
- if (dbg)
- printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
- imin(t_dim->ctx, 3), chroma, br_ctx,
- eob, rc, tok_br, tok, ts->msac.rng);
- tok += tok_br;
- if (tok_br < 3) break;
- } while (tok < 15);
+
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ tok = 3 + tok_br;
+ dbg_print_hi_tok(eob, tok + tok_br, tok_br);
+
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ tok = 6 + tok_br;
+ dbg_print_hi_tok(eob, tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ tok = 9 + tok_br;
+ dbg_print_hi_tok(eob, tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok = 12 + dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ dbg_print_hi_tok(eob, tok + tok_br, tok_br);
+ }
+ }
+ }
}
cf[rc] = tok;
@@ -212,25 +233,38 @@
// hi tok
if (tok == 3) {
const int br_ctx = get_br_ctx(levels, 1, tx_class, x, y, stride);
- do {
- const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
- br_cdf[br_ctx], 4);
- if (dbg)
- printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
- imin(t_dim->ctx, 3), chroma, br_ctx,
- i, rc, tok_br, tok, ts->msac.rng);
- tok += tok_br;
- if (tok_br < 3) break;
- } while (tok < 15);
- }
+ int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ tok = 3 + tok_br;
+ dbg_print_hi_tok(i, tok + tok_br, tok_br);
+
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+
+ tok = 6 + tok_br;
+ dbg_print_hi_tok(i, tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ tok = 9 + tok_br;
+ dbg_print_hi_tok(i, tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok = 12 + dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ dbg_print_hi_tok(i, tok + tok_br, tok_br);
+ }
+ }
+ }
+ }
+#undef dbg_print_hi_tok
cf[rc] = tok;
levels[x * stride + y] = (uint8_t) tok;
}
{ // dc
- int ctx = 0;
- if (tx_class != TX_CLASS_2D)
- ctx = get_coef_nz_ctx(levels, tx, tx_class, 0, 0, stride);
+ const int ctx = (tx_class != TX_CLASS_2D) ?
+ get_coef_nz_ctx(levels, tx, tx_class, 0, 0, stride) : 0;
uint16_t *const lo_cdf = ts->cdf.coef.base_tok[t_dim->ctx][chroma][ctx];
dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 4);
if (dbg)
@@ -237,40 +271,80 @@
printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng);
+ // hi tok
if (dc_tok == 3) {
+#define dbg_print_hi_tok(dc_tok, tok_br) \
+ if (dbg) \
+ printf("Post-dc_hi_tok[%d][%d][%d][%d->%d]: r=%d\n", \
+ imin(t_dim->ctx, 3), chroma, br_ctx, tok_br, dc_tok, ts->msac.rng);
+
const int br_ctx = get_br_ctx(levels, 0, tx_class, 0, 0, stride);
- do {
- const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
- br_cdf[br_ctx], 4);
- if (dbg)
- printf("Post-dc_hi_tok[%d][%d][%d][%d->%d]: r=%d\n",
- imin(t_dim->ctx, 3), chroma, br_ctx,
- tok_br, dc_tok, ts->msac.rng);
- dc_tok += tok_br;
- if (tok_br < 3) break;
- } while (dc_tok < 15);
+
+ int tok_br =
+ dav1d_msac_decode_symbol_adapt4(&ts->msac, br_cdf[br_ctx], 4);
+ dc_tok = 3 + tok_br;
+
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ dc_tok = 6 + tok_br;
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ dc_tok = 9 + tok_br;
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ dc_tok = 12 + dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ }
+ }
+ }
}
+#undef dbg_print_hi_tok
}
} else { // dc-only
uint16_t *const lo_cdf = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][0];
- dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3) + 1;
+ int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3);
+ dc_tok = 1 + tok_br;
if (dbg)
printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
- if (dc_tok == 3) {
- do {
- const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
- br_cdf[0], 4);
- if (dbg)
- printf("Post-dc_hi_tok[%d][%d][%d][%d->%d]: r=%d\n",
- imin(t_dim->ctx, 3), chroma, 0,
- tok_br, dc_tok, ts->msac.rng);
- dc_tok += tok_br;
- if (tok_br < 3) break;
- } while (dc_tok < 15);
+ // hi tok
+ if (tok_br == 2) {
+#define dbg_print_hi_tok(dc_tok, tok_br) \
+ if (dbg) \
+ printf("Post-dc_hi_tok[%d][%d][0][%d->%d]: r=%d\n", \
+ imin(t_dim->ctx, 3), chroma, tok_br, dc_tok, ts->msac.rng);
+
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, br_cdf[0], 4);
+ dc_tok = 3 + tok_br;
+
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, br_cdf[0], 4);
+ dc_tok = 6 + tok_br;
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[0], 4);
+ dc_tok = 9 + tok_br;
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ if (tok_br == 3) {
+ dc_tok = 12 + dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[0], 4);
+ dbg_print_hi_tok(dc_tok + tok_br, tok_br);
+ }
+ }
+ }
}
}
+#undef dbg_print_hi_tok
// residual and sign
int dc_sign = 1 << 6;