ref: 0fdee4daee5b70dcbfc8e3f3fd5d82095b57d6bf
parent: d27598e418b0b9916c35b3e5187a5f98c00fe701
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Thu Nov 15 03:11:47 EST 2018
Add support for super-res Fixes #172.
--- a/src/cdef_apply_tmpl.c
+++ b/src/cdef_apply_tmpl.c
@@ -89,7 +89,7 @@
const int sbsz = 16;
const int sb64w = f->sb128w << 1;
const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
- const enum Dav1dPixelLayout layout = f->cur.p.p.layout;
+ const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
@@ -106,7 +106,7 @@
if (edges & HAVE_BOTTOM) {
// backup pre-filter data for next iteration
- backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,
+ backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.stride,
8, f->bw * 4, layout);
}
@@ -148,11 +148,11 @@
if (last_skip && edges & HAVE_LEFT) {
// we didn't backup the prefilter data because it wasn't
// there, so do it here instead
- backup2x8(lr_bak[bit], bptrs, f->cur.p.stride, 0, layout);
+ backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout);
}
if (edges & HAVE_RIGHT) {
// backup pre-filter data for next iteration
- backup2x8(lr_bak[!bit], bptrs, f->cur.p.stride, 8, layout);
+ backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout);
}
// the actual filter
@@ -165,10 +165,10 @@
uv_sec_lvl += uv_sec_lvl == 3;
uv_sec_lvl <<= BITDEPTH - 8;
unsigned variance;
- const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],
+ const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
&variance);
if (y_lvl) {
- dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0], lr_bak[bit][0],
+ dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][0][0][bx * 4],
&f->lf.cdef_line_ptr[tf][0][1][bx * 4],
@@ -179,10 +179,10 @@
}
if (uv_lvl && has_chroma) {
const int uvdir =
- f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
+ f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
for (int pl = 1; pl <= 2; pl++) {
- dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],
+ dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
lr_bak[bit][pl],
(pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
@@ -209,9 +209,9 @@
iptrs[2] += sbsz * 4 >> ss_hor;
}
- ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);
- ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
- ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+ ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
+ ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
+ ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
f->lf.top_pre_cdef_toggle ^= 1;
}
}
--- a/src/decode.c
+++ b/src/decode.c
@@ -422,11 +422,11 @@
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
if (i < pal_sz) {
- int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+ int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
if (i < pal_sz) {
- int bits = f->cur.p.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
- const int max = (1 << f->cur.p.p.bpc) - 1;
+ int bits = f->cur.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
+ const int max = (1 << f->cur.p.bpc) - 1;
do {
const int delta = msac_decode_bools(&ts->msac, bits);
@@ -478,9 +478,9 @@
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
if (msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) {
- const int bits = f->cur.p.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
- int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
- const int max = (1 << f->cur.p.p.bpc) - 1;
+ const int bits = f->cur.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
+ int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
+ const int max = (1 << f->cur.p.bpc) - 1;
for (int i = 1; i < b->pal_sz[1]; i++) {
int delta = msac_decode_bools(&ts->msac, bits);
if (delta && msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) delta = -delta;
@@ -488,7 +488,7 @@
}
} else {
for (int i = 0; i < b->pal_sz[1]; i++)
- pal[i] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+ pal[i] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
}
if (DEBUG_BLOCK_INFO) {
printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
@@ -634,7 +634,7 @@
} else {
assert(f->frame_hdr.txfm_mode == TX_LARGEST);
}
- b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+ b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
} else {
assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64);
int y, x, y_off, x_off;
@@ -652,7 +652,7 @@
if (DEBUG_BLOCK_INFO)
printf("Post-vartxtree[%x/%x]: r=%d\n",
b->tx_split[0], b->tx_split[1], t->ts->msac.rng);
- b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+ b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
}
}
@@ -694,8 +694,8 @@
&f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bx4 = t->bx & 31, by4 = t->by & 31;
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const int bw4 = b_dim[0], bh4 = b_dim[1];
const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
@@ -1138,7 +1138,7 @@
t_dim = &dav1d_txfm_dimensions[TX_4X4];
} else {
b->tx = dav1d_max_txfm_size_for_bs[bs][0];
- b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+ b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
t_dim = &dav1d_txfm_dimensions[b->tx];
if (f->frame_hdr.txfm_mode == TX_SWITCHABLE && t_dim->max > TX_4X4) {
const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
@@ -1166,7 +1166,7 @@
&f->frame_hdr, (const uint8_t (*)[8][2])
&ts->lflvl[b->seg_id][0][0][0],
t->bx, t->by, f->w4, f->h4, bs,
- b->tx, b->uvtx, f->cur.p.p.layout,
+ b->tx, b->uvtx, f->cur.p.layout,
&t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
@@ -1543,7 +1543,7 @@
if (f->seq_hdr.jnt_comp) {
const int jnt_ctx =
get_jnt_comp_ctx(f->seq_hdr.order_hint_n_bits,
- f->cur.p.poc, f->refp[b->ref[0]].p.poc,
+ f->cur.poc, f->refp[b->ref[0]].p.poc,
f->refp[b->ref[1]].p.poc, t->a, &t->l,
by4, bx4);
b->comp_type = COMP_INTER_WEIGHTED_AVG +
@@ -1833,7 +1833,7 @@
dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride,
&f->frame_hdr, lf_lvls, t->bx, t->by,
f->w4, f->h4, b->skip, bs, b->tx_split,
- b->uvtx, f->cur.p.p.layout,
+ b->uvtx, f->cur.p.layout,
&t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
@@ -1938,7 +1938,7 @@
const unsigned n_part = bl == BL_8X8 ? N_SUB8X8_PARTITIONS :
bl == BL_128X128 ? N_PARTITIONS - 2 : N_PARTITIONS;
bp = msac_decode_symbol_adapt(&t->ts->msac, pc, n_part);
- if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
+ if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
(bp == PARTITION_V || bp == PARTITION_V4 ||
bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
{
@@ -2139,7 +2139,7 @@
} else {
uint16_t cdf[2] = { gather_left_partition_prob(pc, bl), 0 };
is_split = msac_decode_symbol(&t->ts->msac, cdf, 2);
- if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
+ if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
return 1;
if (DEBUG_BLOCK_INFO)
printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
@@ -2230,12 +2230,30 @@
ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
// Reference Restoration Unit (used for exp coding)
- Av1Filter *const lf_mask =
- f->lf.mask + (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
- const int unit_idx = ((ts->tiling.row_start & 16) >> 3) +
- ((ts->tiling.col_start & 16) >> 4);
+ int sb_idx, unit_idx;
+ if (f->frame_hdr.super_res.enabled) {
+ // vertical components only
+ sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
+ unit_idx = (ts->tiling.row_start & 16) >> 3;
+ } else {
+ sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
+ unit_idx = ((ts->tiling.row_start & 16) >> 3) +
+ ((ts->tiling.col_start & 16) >> 4);
+ }
for (int p = 0; p < 3; p++) {
- ts->lr_ref[p] = &lf_mask->lr[p][unit_idx];
+ if (f->frame_hdr.super_res.enabled) {
+ const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int d = f->frame_hdr.super_res.width_scale_denominator;
+ const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];
+ const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
+ const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
+ const int px_x = x << (unit_size_log2 + ss_hor);
+ const int u_idx = unit_idx + ((px_x & 64) >> 1);
+ ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + (px_x >> 7)].lr[p][u_idx];
+ } else {
+ ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
+ }
+
ts->lr_ref[p]->filter_v[0] = 3;
ts->lr_ref[p]->filter_v[1] = -7;
ts->lr_ref[p]->filter_v[2] = 15;
@@ -2250,6 +2268,87 @@
atomic_init(&ts->progress, row_sb_start);
}
+static void read_restoration_info(Dav1dTileContext *const t,
+ Av1RestorationUnit *const lr, const int p,
+ const enum RestorationType frame_type)
+{
+ const Dav1dFrameContext *const f = t->f;
+ Dav1dTileState *const ts = t->ts;
+
+ if (frame_type == RESTORATION_SWITCHABLE) {
+ const int filter =
+ msac_decode_symbol_adapt(&ts->msac,
+ ts->cdf.m.restore_switchable, 3);
+ lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :
+ RESTORATION_WIENER :
+ RESTORATION_NONE;
+ } else {
+ const unsigned type =
+ msac_decode_bool_adapt(&ts->msac,
+ frame_type == RESTORATION_WIENER ?
+ ts->cdf.m.restore_wiener :
+ ts->cdf.m.restore_sgrproj);
+ lr->type = type ? frame_type : RESTORATION_NONE;
+ }
+
+ if (lr->type == RESTORATION_WIENER) {
+ lr->filter_v[0] =
+ !p ? msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_v[0] + 5, 16,
+ 1) - 5:
+ 0;
+ lr->filter_v[1] =
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_v[1] + 23, 32,
+ 2) - 23;
+ lr->filter_v[2] =
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_v[2] + 17, 64,
+ 3) - 17;
+
+ lr->filter_h[0] =
+ !p ? msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_h[0] + 5, 16,
+ 1) - 5:
+ 0;
+ lr->filter_h[1] =
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_h[1] + 23, 32,
+ 2) - 23;
+ lr->filter_h[2] =
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->filter_h[2] + 17, 64,
+ 3) - 17;
+ memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
+ ts->lr_ref[p] = lr;
+ if (DEBUG_BLOCK_INFO)
+ printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
+ p, lr->filter_v[0], lr->filter_v[1],
+ lr->filter_v[2], lr->filter_h[0],
+ lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
+ } else if (lr->type == RESTORATION_SGRPROJ) {
+ const unsigned idx = msac_decode_bools(&ts->msac, 4);
+ lr->sgr_idx = idx;
+ lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->sgr_weights[0] + 96, 128,
+ 4) - 96 :
+ 0;
+ lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?
+ msac_decode_subexp(&ts->msac,
+ ts->lr_ref[p]->sgr_weights[1] + 32, 128,
+ 4) - 32 :
+ iclip(128 - lr->sgr_weights[0], -32, 95);
+ memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
+ memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
+ ts->lr_ref[p] = lr;
+ if (DEBUG_BLOCK_INFO)
+ printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
+ p, lr->sgr_idx, lr->sgr_weights[0],
+ lr->sgr_weights[1], ts->msac.rng);
+ }
+}
+
int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
const Dav1dFrameContext *const f = t->f;
const enum BlockLevel root_bl = f->seq_hdr.sb128 ? BL_128X128 : BL_64X64;
@@ -2275,9 +2374,6 @@
return 0;
}
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
-
if (c->n_fc > 1 && f->frame_hdr.use_ref_frame_mvs) {
for (int n = 0; n < 7; n++)
if (dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step),
@@ -2311,99 +2407,52 @@
for (int p = 0; p < 3; p++) {
if (f->frame_hdr.restoration.type[p] == RESTORATION_NONE)
continue;
- const int by = t->by >> (ss_ver & !!p);
- const int bx = t->bx >> (ss_hor & !!p);
- const int bh = f->bh >> (ss_ver & !!p);
- const int bw = f->bw >> (ss_hor & !!p);
- const int unit_size_log2 =
- f->frame_hdr.restoration.unit_size[!!p];
- // 4pel unit size
- const int b_unit_size = 1 << (unit_size_log2 - 2);
- const unsigned mask = b_unit_size - 1;
- if (by & mask || bx & mask) continue;
- const int half_unit = b_unit_size >> 1;
+ const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];
+ const int y = t->by * 4 >> ss_ver;
+ const int h = (f->cur.p.h + ss_ver) >> ss_ver;
+
+ const int unit_size = 1 << unit_size_log2;
+ const unsigned mask = unit_size - 1;
+ if (y & mask) continue;
+ const int half_unit = unit_size >> 1;
// Round half up at frame boundaries, if there's more than one
// restoration unit
- const int bottom_round = by && by + half_unit > bh;
- const int right_round = bx && bx + half_unit > bw;
- if (bottom_round || right_round) continue;
- const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
- Av1RestorationUnit *const lr = &t->lf_mask->lr[p][unit_idx];
- const enum RestorationType frame_type =
- f->frame_hdr.restoration.type[p];
+ if (y && y + half_unit > h) continue;
- if (frame_type == RESTORATION_SWITCHABLE) {
- const int filter =
- msac_decode_symbol_adapt(&ts->msac,
- ts->cdf.m.restore_switchable, 3);
- lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :
- RESTORATION_WIENER :
- RESTORATION_NONE;
- } else {
- const unsigned type =
- msac_decode_bool_adapt(&ts->msac,
- frame_type == RESTORATION_WIENER ?
- ts->cdf.m.restore_wiener :
- ts->cdf.m.restore_sgrproj);
- lr->type = type ? frame_type : RESTORATION_NONE;
- }
+ const enum RestorationType frame_type = f->frame_hdr.restoration.type[p];
- if (lr->type == RESTORATION_WIENER) {
- lr->filter_v[0] =
- !p ? msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_v[0] + 5, 16,
- 1) - 5:
- 0;
- lr->filter_v[1] =
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_v[1] + 23, 32,
- 2) - 23;
- lr->filter_v[2] =
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_v[2] + 17, 64,
- 3) - 17;
+ if (f->frame_hdr.super_res.enabled) {
+ const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
+ const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
- lr->filter_h[0] =
- !p ? msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_h[0] + 5, 16,
- 1) - 5:
- 0;
- lr->filter_h[1] =
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_h[1] + 23, 32,
- 2) - 23;
- lr->filter_h[2] =
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->filter_h[2] + 17, 64,
- 3) - 17;
- memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
- ts->lr_ref[p] = lr;
- if (DEBUG_BLOCK_INFO)
- printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
- p, lr->filter_v[0], lr->filter_v[1],
- lr->filter_v[2], lr->filter_h[0],
- lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
- } else if (lr->type == RESTORATION_SGRPROJ) {
- const unsigned idx = msac_decode_bools(&ts->msac, 4);
- lr->sgr_idx = idx;
- lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->sgr_weights[0] + 96, 128,
- 4) - 96 :
- 0;
- lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?
- msac_decode_subexp(&ts->msac,
- ts->lr_ref[p]->sgr_weights[1] + 32, 128,
- 4) - 32 :
- iclip(128 - lr->sgr_weights[0], -32, 95);
- memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
- memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
- ts->lr_ref[p] = lr;
- if (DEBUG_BLOCK_INFO)
- printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
- p, lr->sgr_idx, lr->sgr_weights[0],
- lr->sgr_weights[1], ts->msac.rng);
+ const int d = f->frame_hdr.super_res.width_scale_denominator;
+ const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
+ const int x0 = ((4 * t->bx * d >> ss_hor) + rnd) >> shift;
+ const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
+
+ for (int x = x0; x < imin(x1, n_units); x++) {
+ const int px_x = x << (unit_size_log2 + ss_hor);
+ const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
+ const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
+ Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
+
+ read_restoration_info(t, lr, p, frame_type);
+ }
+ } else {
+ const int x = 4 * t->bx >> ss_hor;
+ if (x & mask) continue;
+ const int w = (f->cur.p.w + ss_hor) >> ss_hor;
+ // Round half up at frame boundaries, if there's more than one
+ // restoration unit
+ if (x && x + half_unit > w) continue;
+ const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
+ const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
+ Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
+
+ read_restoration_info(t, lr, p, frame_type);
}
}
if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
@@ -2423,8 +2472,8 @@
int align_h = (f->bh + 31) & ~31;
memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
&t->l.tx_lpf_y[t->by & 16], sb_step);
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
align_h >>= ss_ver;
-
memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
&t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
@@ -2512,22 +2561,13 @@
// update allocation of block contexts for above
if (f->sb128w > f->lf.line_sz) {
dav1d_freep_aligned(&f->lf.cdef_line);
- dav1d_freep_aligned(&f->lf.lr_lpf_line);
// note that we allocate all pixel arrays as if we were dealing with
// 10 bits/component data
uint16_t *ptr = f->lf.cdef_line =
dav1d_alloc_aligned(f->b4_stride * 4 * 12 * sizeof(uint16_t), 32);
+ if (!ptr) return -ENOMEM;
- uint16_t *lr_ptr = f->lf.lr_lpf_line =
- dav1d_alloc_aligned(f->b4_stride * 4 * 3 * 12 * sizeof(uint16_t), 32);
-
- if (!ptr || !lr_ptr) {
- if (ptr) dav1d_free_aligned(ptr);
- if (lr_ptr) dav1d_free_aligned(lr_ptr);
- return -ENOMEM;
- }
-
for (int pl = 0; pl <= 2; pl++) {
f->lf.cdef_line_ptr[0][pl][0] = ptr + f->b4_stride * 4 * 0;
f->lf.cdef_line_ptr[0][pl][1] = ptr + f->b4_stride * 4 * 1;
@@ -2534,12 +2574,26 @@
f->lf.cdef_line_ptr[1][pl][0] = ptr + f->b4_stride * 4 * 2;
f->lf.cdef_line_ptr[1][pl][1] = ptr + f->b4_stride * 4 * 3;
ptr += f->b4_stride * 4 * 4;
+ }
+ f->lf.line_sz = f->sb128w;
+ }
+
+ const ptrdiff_t lr_stride = (f->sr_cur.p.p.w + 31) & ~31;
+ if (lr_stride > f->lf.lr_line_sz) {
+ dav1d_freep_aligned(&f->lf.lr_lpf_line);
+
+ uint16_t *lr_ptr = f->lf.lr_lpf_line =
+ dav1d_alloc_aligned(lr_stride * 3 * 12 * sizeof(uint16_t), 32);
+
+ if (!lr_ptr) return -ENOMEM;
+
+ for (int pl = 0; pl <= 2; pl++) {
f->lf.lr_lpf_line_ptr[pl] = lr_ptr;
- lr_ptr += f->b4_stride * 4 * 12;
+ lr_ptr += lr_stride * 12;
}
- f->lf.line_sz = f->sb128w;
+ f->lf.lr_line_sz = lr_stride;
}
// update allocation for loopfilter masks
@@ -2579,6 +2633,13 @@
}
f->lf.mask_sz = f->sb128w * f->sb128h;
}
+ f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
+ if (f->sr_sb128w * f->sb128h > f->lf.lr_mask_sz) {
+ freep(&f->lf.lr_mask);
+ f->lf.lr_mask = malloc(f->sr_sb128w * f->sb128h * sizeof(*f->lf.lr_mask));
+ if (!f->lf.lr_mask) return -ENOMEM;
+ f->lf.lr_mask_sz = f->sr_sb128w * f->sb128h;
+ }
if (f->frame_hdr.loopfilter.sharpness != f->lf.last_sharpness) {
dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr.loopfilter.sharpness);
f->lf.last_sharpness = f->frame_hdr.loopfilter.sharpness;
@@ -2612,7 +2673,7 @@
const int order_hint_n_bits = f->seq_hdr.order_hint * f->seq_hdr.order_hint_n_bits;
const int ret = av1_init_ref_mv_common(f->libaom_cm, f->bw >> 1, f->bh >> 1,
f->b4_stride, f->seq_hdr.sb128,
- f->mvs, f->ref_mvs, f->cur.p.poc, f->refpoc,
+ f->mvs, f->ref_mvs, f->cur.poc, f->refpoc,
f->refrefpoc, f->frame_hdr.gmv,
f->frame_hdr.hp, f->frame_hdr.force_integer_mv,
f->frame_hdr.use_ref_frame_mvs,
@@ -2644,9 +2705,9 @@
const unsigned ref1poc = f->refp[j].p.poc;
const unsigned d1 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,
- ref0poc, f->cur.p.poc)), 31);
+ ref0poc, f->cur.poc)), 31);
const unsigned d0 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,
- ref1poc, f->cur.p.poc)), 31);
+ ref1poc, f->cur.poc)), 31);
const int order = d0 <= d1;
static const uint8_t quant_dist_weight[3][2] = {
@@ -2672,9 +2733,12 @@
// init loopfilter pointers
f->lf.mask_ptr = f->lf.mask;
- f->lf.p[0] = f->cur.p.data[0];
- f->lf.p[1] = f->cur.p.data[1];
- f->lf.p[2] = f->cur.p.data[2];
+ f->lf.p[0] = f->cur.data[0];
+ f->lf.p[1] = f->cur.data[1];
+ f->lf.p[2] = f->cur.data[2];
+ f->lf.sr_p[0] = f->sr_cur.p.data[0];
+ f->lf.sr_p[1] = f->sr_cur.p.data[1];
+ f->lf.sr_p[2] = f->sr_cur.p.data[2];
f->lf.tile_row = 1;
dav1d_cdf_thread_wait(&f->in_cdf);
@@ -2758,7 +2822,7 @@
// loopfilter + cdef + restoration
if (f->frame_thread.pass != 1)
f->bd_fn.filter_sbrow(f, sby);
- dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,
+ dav1d_thread_picture_signal(&f->sr_cur, (sby + 1) * f->sb_step * 4,
progress_plane_type);
}
}
@@ -2802,7 +2866,7 @@
pthread_mutex_unlock(&ts->tile_thread.lock);
}
if (progress == TILE_ERROR) {
- dav1d_thread_picture_signal(&f->cur, FRAME_ERROR,
+ dav1d_thread_picture_signal(&f->sr_cur, FRAME_ERROR,
progress_plane_type);
const uint64_t all_mask = ~0ULL >> (64 - f->n_tc);
pthread_mutex_lock(&f->tile_thread.lock);
@@ -2816,7 +2880,7 @@
// loopfilter + cdef + restoration
if (f->frame_thread.pass != 1)
f->bd_fn.filter_sbrow(f, sby);
- dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,
+ dav1d_thread_picture_signal(&f->sr_cur, (sby + 1) * f->sb_step * 4,
progress_plane_type);
}
}
@@ -2855,7 +2919,7 @@
retval = 0;
error:
- dav1d_thread_picture_signal(&f->cur, retval == 0 ? UINT_MAX : FRAME_ERROR,
+ dav1d_thread_picture_signal(&f->sr_cur, retval == 0 ? UINT_MAX : FRAME_ERROR,
PLANE_TYPE_ALL);
for (int i = 0; i < 7; i++) {
if (f->refp[i].p.data[0])
@@ -2863,7 +2927,8 @@
dav1d_ref_dec(&f->ref_mvs_ref[i]);
}
- dav1d_thread_picture_unref(&f->cur);
+ dav1d_picture_unref(&f->cur);
+ dav1d_thread_picture_unref(&f->sr_cur);
dav1d_cdf_thread_unref(&f->in_cdf);
if (f->frame_hdr.refresh_context) {
dav1d_cdf_thread_signal(&f->out_cdf);
@@ -2879,6 +2944,12 @@
return retval;
}
+static int get_upscale_x0(const int in_w, const int out_w, const int step) {
+ const int err = out_w * step - (in_w << 14);
+ const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
+ return x0 & 0x3fff;
+}
+
int dav1d_submit_frame(Dav1dContext *const c) {
Dav1dFrameContext *f;
int res = -1;
@@ -2966,9 +3037,9 @@
for (int i = 0; i < 7; i++) {
const int refidx = f->frame_hdr.refidx[i];
if (!c->refs[refidx].p.p.data[0] ||
- f->frame_hdr.width * 2 < c->refs[refidx].p.p.p.w ||
+ f->frame_hdr.width[0] * 2 < c->refs[refidx].p.p.p.w ||
f->frame_hdr.height * 2 < c->refs[refidx].p.p.p.h ||
- f->frame_hdr.width > c->refs[refidx].p.p.p.w * 16 ||
+ f->frame_hdr.width[0] > c->refs[refidx].p.p.p.w * 16 ||
f->frame_hdr.height > c->refs[refidx].p.p.p.h * 16 ||
f->seq_hdr.layout != c->refs[refidx].p.p.p.layout ||
f->seq_hdr.bpc != c->refs[refidx].p.p.p.bpc)
@@ -2979,16 +3050,16 @@
goto error;
}
dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
- if (f->frame_hdr.width != c->refs[refidx].p.p.p.w ||
+ f->ref_coded_width[i] = c->refs[refidx].coded_width;
+ if (f->frame_hdr.width[0] != c->refs[refidx].p.p.p.w ||
f->frame_hdr.height != c->refs[refidx].p.p.p.h)
{
#define scale_fac(ref_sz, this_sz) \
- (((ref_sz << 14) + (this_sz >> 1)) / this_sz)
+ ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
- f->frame_hdr.width);
+ f->frame_hdr.width[0]);
f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
f->frame_hdr.height);
-#undef scale_fac
f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
} else {
@@ -3015,35 +3086,53 @@
c->n_tile_data = 0;
// allocate frame
- if ((res = dav1d_thread_picture_alloc(&f->cur, f->frame_hdr.width,
- f->frame_hdr.height,
- f->seq_hdr.layout, f->seq_hdr.bpc,
- c->n_fc > 1 ? &f->frame_thread.td : NULL,
- f->frame_hdr.show_frame,
- &c->allocator)) < 0)
- {
- goto error;
+ res = dav1d_thread_picture_alloc(&f->sr_cur, f->frame_hdr.width[1],
+ f->frame_hdr.height,
+ f->seq_hdr.layout, f->seq_hdr.bpc,
+ c->n_fc > 1 ? &f->frame_thread.td : NULL,
+ f->frame_hdr.show_frame, &c->allocator);
+ if (res < 0) goto error;
+
+ f->sr_cur.p.poc = f->frame_hdr.frame_offset;
+ f->sr_cur.p.p.type = f->frame_hdr.frame_type;
+ f->sr_cur.p.p.pri = f->seq_hdr.pri;
+ f->sr_cur.p.p.trc = f->seq_hdr.trc;
+ f->sr_cur.p.p.mtrx = f->seq_hdr.mtrx;
+ f->sr_cur.p.p.chr = f->seq_hdr.chr;
+ f->sr_cur.p.p.fullrange = f->seq_hdr.color_range;
+
+ if (f->frame_hdr.super_res.enabled) {
+ res = dav1d_picture_alloc(&f->cur, f->frame_hdr.width[0],
+ f->frame_hdr.height, f->seq_hdr.layout,
+ f->seq_hdr.bpc, &c->allocator);
+ if (res < 0) goto error;
+ f->cur.poc = f->frame_hdr.frame_offset;
+ } else {
+ dav1d_picture_ref(&f->cur, &f->sr_cur.p);
}
- f->cur.p.poc = f->frame_hdr.frame_offset;
- f->cur.p.p.type = f->frame_hdr.frame_type;
- f->cur.p.p.pri = f->seq_hdr.pri;
- f->cur.p.p.trc = f->seq_hdr.trc;
- f->cur.p.p.mtrx = f->seq_hdr.mtrx;
- f->cur.p.p.chr = f->seq_hdr.chr;
- f->cur.p.p.fullrange = f->seq_hdr.color_range;
+ if (f->frame_hdr.super_res.enabled) {
+ f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
+ const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
+ f->resize_step[1] = scale_fac(in_cw, out_cw);
+#undef scale_fac
+ f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
+ f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
+ }
// move f->cur into output queue
if (c->n_fc == 1) {
if (f->frame_hdr.show_frame)
- dav1d_picture_ref(&c->out, &f->cur.p);
+ dav1d_picture_ref(&c->out, &f->sr_cur.p);
} else {
- dav1d_thread_picture_ref(out_delayed, &f->cur);
+ dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
}
- f->w4 = (f->frame_hdr.width + 3) >> 2;
+ f->w4 = (f->frame_hdr.width[0] + 3) >> 2;
f->h4 = (f->frame_hdr.height + 3) >> 2;
- f->bw = ((f->frame_hdr.width + 7) >> 3) << 1;
+ f->bw = ((f->frame_hdr.width[0] + 7) >> 3) << 1;
f->bh = ((f->frame_hdr.height + 7) >> 3) << 1;
f->sb128w = (f->bw + 31) >> 5;
f->sb128h = (f->bh + 31) >> 5;
@@ -3067,8 +3156,8 @@
for (int i = 0; i < 7; i++) {
const int refidx = f->frame_hdr.refidx[i];
if (c->refs[refidx].refmvs != NULL &&
- f->refp[i].p.p.w == f->cur.p.p.w &&
- f->refp[i].p.p.h == f->cur.p.p.h)
+ f->ref_coded_width[i] == f->cur.p.w &&
+ f->refp[i].p.p.h == f->cur.p.h)
{
f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
dav1d_ref_inc(f->ref_mvs_ref[i]);
@@ -3100,7 +3189,7 @@
if (f->frame_hdr.segmentation.temporal || !f->frame_hdr.segmentation.update_map) {
const int pri_ref = f->frame_hdr.primary_ref_frame;
assert(pri_ref != PRIMARY_REF_NONE);
- const int ref_w = ((f->refp[pri_ref].p.p.w + 7) >> 3) << 1;
+ const int ref_w = ((f->ref_coded_width[pri_ref] + 7) >> 3) << 1;
const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
if (ref_w == f->bw && ref_h == f->bh) {
f->prev_segmap_ref = c->refs[f->frame_hdr.refidx[pri_ref]].segmap;
@@ -3147,7 +3236,8 @@
if (f->frame_hdr.refresh_frame_flags & (1 << i)) {
if (c->refs[i].p.p.data[0])
dav1d_thread_picture_unref(&c->refs[i].p);
- dav1d_thread_picture_ref(&c->refs[i].p, &f->cur);
+ dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
+ c->refs[i].coded_width = f->frame_hdr.width[0];
if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);
if (f->frame_hdr.refresh_context) {
@@ -3207,7 +3297,8 @@
dav1d_ref_dec(&f->ref_mvs_ref[i]);
}
dav1d_picture_unref(&c->out);
- dav1d_thread_picture_unref(&f->cur);
+ dav1d_picture_unref(&f->cur);
+ dav1d_thread_picture_unref(&f->sr_cur);
dav1d_ref_dec(&f->mvs_ref);
for (int i = 0; i < f->n_tile_data; i++)
--- a/src/internal.h
+++ b/src/internal.h
@@ -99,6 +99,7 @@
Av1LoopfilterModeRefDeltas lf_mode_ref_deltas;
Av1FilmGrainData film_grain;
uint8_t qidx;
+ unsigned coded_width;
} refs[8];
CdfThreadContext cdf[8];
@@ -119,7 +120,9 @@
struct Dav1dFrameContext {
Av1SequenceHeader seq_hdr;
Av1FrameHeader frame_hdr;
- Dav1dThreadPicture refp[7], cur;
+ Dav1dThreadPicture refp[7];
+ Dav1dPicture cur; // during block coding / reconstruction
+ Dav1dThreadPicture sr_cur; // after super-resolution upscaling
Dav1dRef *mvs_ref;
refmvs *mvs, *ref_mvs[7];
Dav1dRef *ref_mvs_ref[7];
@@ -127,6 +130,7 @@
uint8_t *cur_segmap;
const uint8_t *prev_segmap;
unsigned refpoc[7], refrefpoc[7][7];
+ int ref_coded_width[7];
CdfThreadContext in_cdf, out_cdf;
struct {
Dav1dData data;
@@ -139,6 +143,7 @@
int scale; // if no scaling, this is 0
int step;
} svc[7][2 /* x, y */];
+ int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
const Dav1dContext *c;
Dav1dTileContext *tc;
@@ -157,7 +162,7 @@
int ipred_edge_sz;
pixel *ipred_edge[3];
ptrdiff_t b4_stride;
- int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step;
+ int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
uint16_t dq[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];
const uint8_t *qm[2 /* is_1d */][N_RECT_TX_SIZES][3 /* plane */];
BlockContext *a;
@@ -188,8 +193,9 @@
struct {
uint8_t (*level)[4];
Av1Filter *mask;
+ Av1Restoration *lr_mask;
int top_pre_cdef_toggle;
- int mask_sz /* w*h */, line_sz /* w */, re_sz /* h */;
+ int mask_sz /* w*h */, lr_mask_sz, line_sz /* w */, lr_line_sz, re_sz /* h */;
Av1FilterLUT lim_lut;
int last_sharpness;
uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
@@ -201,7 +207,7 @@
// in-loop filter per-frame state keeping
int tile_row; // for carry-over at tile row edges
- pixel *p[3];
+ pixel *p[3], *sr_p[3];
Av1Filter *mask_ptr, *prev_mask_ptr;
} lf;
--- a/src/levels.h
+++ b/src/levels.h
@@ -431,9 +431,12 @@
} operating_points[32];
int frame_offset;
int refresh_frame_flags;
- int width, height;
+ int width[2 /* { coded_width, superresolution_upscaled_width } */], height;
int render_width, render_height;
- int super_res;
+ struct {
+ int width_scale_denominator;
+ int enabled;
+ } super_res;
int have_render_size;
int allow_intrabc;
int frame_ref_short_signaling;
--- a/src/lf_apply_tmpl.c
+++ b/src/lf_apply_tmpl.c
@@ -183,8 +183,8 @@
const int sbsz = 32 >> is_sb64;
const int sbl2 = 5 - is_sb64;
const int halign = (f->bh + 31) & ~31;
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
@@ -211,7 +211,7 @@
y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
}
- if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+ if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
y++, uv_mask <<= 1)
@@ -247,7 +247,7 @@
y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
}
- if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+ if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
const unsigned cw = (w + ss_hor) >> ss_hor;
uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
@@ -268,7 +268,7 @@
x++, have_left = 1, ptr += 128, level_ptr += 32)
{
filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
- lflvl[x].filter_y[0], ptr, f->cur.p.stride[0],
+ lflvl[x].filter_y[0], ptr, f->cur.stride[0],
imin(32, f->w4 - x * 32), starty4, endy4);
}
@@ -275,7 +275,7 @@
level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
- lflvl[x].filter_y[1], ptr, f->cur.p.stride[0],
+ lflvl[x].filter_y[1], ptr, f->cur.stride[0],
imin(32, f->w4 - x * 32), starty4, endy4);
}
@@ -289,7 +289,7 @@
{
filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
lflvl[x].filter_uv[0],
- &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
+ &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
(imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
starty4 >> ss_ver, uv_endy4, ss_ver);
}
@@ -300,7 +300,7 @@
{
filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
lflvl[x].filter_uv[1],
- &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
+ &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
(imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
starty4 >> ss_ver, uv_endy4, ss_hor);
}
--- a/src/lf_mask.h
+++ b/src/lf_mask.h
@@ -47,7 +47,7 @@
int16_t sgr_weights[2];
} Av1RestorationUnit;
-// each struct describes one 128x128 area (1 or 4 SBs)
+// each struct describes one 128x128 area (1 or 4 SBs), pre-superres-scaling
typedef struct Av1Filter {
// each bit is 1 col
uint16_t filter_y[2 /* 0=col, 1=row */][32][3][2];
@@ -54,8 +54,12 @@
uint16_t filter_uv[2 /* 0=col, 1=row */][32][2][2];
int8_t cdef_idx[4]; // -1 means "unset"
uint16_t noskip_mask[32][2];
- Av1RestorationUnit lr[3][4];
} Av1Filter;
+
+// each struct describes one 128x128 area (1 or 4 SBs), post-superres-scaling
+typedef struct Av1Restoration {
+ Av1RestorationUnit lr[3][4];
+} Av1Restoration;
void dav1d_create_lf_mask_intra(Av1Filter *lflvl, uint8_t (*level_cache)[4],
const ptrdiff_t b4_stride,
--- a/src/lib.c
+++ b/src/lib.c
@@ -266,7 +266,8 @@
dav1d_thread_picture_unref(&f->refp[i]);
dav1d_ref_dec(&f->ref_mvs_ref[i]);
}
- dav1d_thread_picture_unref(&f->cur);
+ dav1d_picture_unref(&f->cur);
+ dav1d_thread_picture_unref(&f->sr_cur);
dav1d_cdf_thread_unref(&f->in_cdf);
if (f->frame_hdr.refresh_context)
dav1d_cdf_thread_unref(&f->out_cdf);
@@ -324,6 +325,7 @@
dav1d_free_aligned(f->ipred_edge[0]);
free(f->a);
free(f->lf.mask);
+ free(f->lf.lr_mask);
free(f->lf.level);
free(f->lf.tx_lpf_right_edge[0]);
av1_free_ref_mv_common(f->libaom_cm);
--- a/src/lr_apply_tmpl.c
+++ b/src/lr_apply_tmpl.c
@@ -33,7 +33,6 @@
#include "src/lr_apply.h"
-
enum LrRestorePlanes {
LR_RESTORE_Y = 1 << 0,
LR_RESTORE_U = 1 << 1,
@@ -44,13 +43,14 @@
// contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
// and 2 below) the final 4 rows are used to swap the bottom of the last
// stripe with the top of the next super block row.
-static void backup_lpf(pixel *dst, ptrdiff_t dst_stride,
- const pixel *src, ptrdiff_t src_stride,
+static void backup_lpf(const Dav1dFrameContext *const f,
+ pixel *dst, const ptrdiff_t dst_stride,
+ const pixel *src, const ptrdiff_t src_stride,
const int ss_ver, const int sb128,
- int row, const int row_h, const int w)
+ int row, const int row_h, const int src_w, const int ss_hor)
{
- src_stride = PXSTRIDE(src_stride);
- dst_stride = PXSTRIDE(dst_stride);
+ const int dst_w = f->frame_hdr.super_res.enabled ?
+ (f->frame_hdr.width[1] + ss_hor) >> ss_hor : src_w;
// The first stripe of the frame is shorter by 8 luma pixel rows.
int stripe_h = (64 - 8 * !row) >> ss_ver;
@@ -59,23 +59,38 @@
const int top = 4 << sb128;
// Copy the top part of the stored loop filtered pixels from the
// previous sb row needed above the first stripe of this sb row.
- pixel_copy(&dst[dst_stride * 0], &dst[dst_stride * top], w);
- pixel_copy(&dst[dst_stride * 1], &dst[dst_stride * (top + 1)], w);
- pixel_copy(&dst[dst_stride * 2], &dst[dst_stride * (top + 2)], w);
- pixel_copy(&dst[dst_stride * 3], &dst[dst_stride * (top + 3)], w);
+ pixel_copy(&dst[PXSTRIDE(dst_stride) * 0],
+ &dst[PXSTRIDE(dst_stride) * top], dst_w);
+ pixel_copy(&dst[PXSTRIDE(dst_stride) * 1],
+ &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
+ pixel_copy(&dst[PXSTRIDE(dst_stride) * 2],
+ &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
+ pixel_copy(&dst[PXSTRIDE(dst_stride) * 3],
+ &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
}
- dst += 4 * dst_stride;
- src += (stripe_h - 2) * src_stride;
+ dst += 4 * PXSTRIDE(dst_stride);
+ src += (stripe_h - 2) * PXSTRIDE(src_stride);
- for (; row + stripe_h <= row_h; row += stripe_h) {
- for (int i = 0; i < 4; i++) {
- pixel_copy(dst, src, w);
- dst += dst_stride;
- src += src_stride;
+ if (f->frame_hdr.super_res.enabled) {
+ for (; row + stripe_h <= row_h; row += stripe_h) {
+ f->dsp->mc.resize(dst, dst_stride, src, src_stride,
+ dst_w, src_w, 4, f->resize_step[ss_hor],
+ f->resize_start[ss_hor]);
+ stripe_h = 64 >> ss_ver;
+ src += stripe_h * PXSTRIDE(src_stride);
+ dst += 4 * PXSTRIDE(dst_stride);
}
- stripe_h = 64 >> ss_ver;
- src += (stripe_h - 4) * src_stride;
+ } else {
+ for (; row + stripe_h <= row_h; row += stripe_h) {
+ for (int i = 0; i < 4; i++) {
+ pixel_copy(dst, src, src_w);
+ dst += PXSTRIDE(dst_stride);
+ src += PXSTRIDE(src_stride);
+ }
+ stripe_h = 64 >> ss_ver;
+ src += (stripe_h - 4) * PXSTRIDE(src_stride);
+ }
}
}
@@ -83,7 +98,8 @@
/*const*/ pixel *const src[3], const int sby)
{
const ptrdiff_t offset = 8 * !!sby;
- const ptrdiff_t *const src_stride = f->cur.p.stride;
+ const ptrdiff_t *const src_stride = f->cur.stride;
+ const ptrdiff_t lr_stride = ((f->sr_cur.p.p.w + 31) & ~31) * sizeof(pixel);
// TODO Also check block level restore type to reduce copying.
const int restore_planes =
@@ -96,13 +112,13 @@
const int w = f->bw << 2;
const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset;
- backup_lpf(f->lf.lr_lpf_line_ptr[0], sizeof(pixel) * f->b4_stride * 4,
+ backup_lpf(f, f->lf.lr_lpf_line_ptr[0], lr_stride,
src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
- 0, f->seq_hdr.sb128, y_stripe, row_h, w);
+ 0, f->seq_hdr.sb128, y_stripe, row_h, w, 0);
}
if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h = f->bh << (2 - ss_ver);
const int w = f->bw << (2 - ss_hor);
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
@@ -111,19 +127,18 @@
(sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;
if (restore_planes & LR_RESTORE_U) {
- backup_lpf(f->lf.lr_lpf_line_ptr[1], sizeof(pixel) * f->b4_stride * 4,
+ backup_lpf(f, f->lf.lr_lpf_line_ptr[1], lr_stride,
src[1] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],
- ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);
+ ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w, ss_hor);
}
if (restore_planes & LR_RESTORE_V) {
- backup_lpf(f->lf.lr_lpf_line_ptr[2], sizeof(pixel) * f->b4_stride * 4,
+ backup_lpf(f, f->lf.lr_lpf_line_ptr[2], lr_stride,
src[2] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],
- ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);
+ ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w, ss_hor);
}
}
}
-
static void lr_stripe(const Dav1dFrameContext *const f, pixel *p,
const pixel (*left)[4], int x, int y,
const int plane, const int unit_w, const int row_h,
@@ -131,11 +146,11 @@
{
const Dav1dDSPContext *const dsp = f->dsp;
const int chroma = !!plane;
- const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
+ const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
const int sbrow_has_bottom = (edges & LR_HAVE_BOTTOM);
const pixel *lpf = f->lf.lr_lpf_line_ptr[plane] + x;
- const ptrdiff_t p_stride = f->cur.p.stride[chroma];
- const ptrdiff_t lpf_stride = sizeof(pixel) * f->b4_stride * 4;
+ const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
+ const ptrdiff_t lpf_stride = sizeof(pixel) * ((f->sr_cur.p.p.w + 31) & ~31);
// The first stripe of the frame is shorter by 8 luma pixel rows.
int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);
@@ -192,9 +207,9 @@
const int w, const int h, const int row_h, const int plane)
{
const int chroma = !!plane;
- const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
- const int ss_hor = chroma & (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
- const ptrdiff_t p_stride = f->cur.p.stride[chroma];
+ const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);
+ const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);
+ const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];
const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!plane];
const int unit_size = 1 << unit_size_log2;
@@ -238,8 +253,8 @@
// AV1Filter unit.
const int unit_idx = ((ruy & 16) >> 3) + ((rux & 16) >> 4);
const Av1RestorationUnit *const lr =
- &f->lf.mask[(((ruy << (unit_size_log2)) >> shift_ver) * f->sb128w) +
- (x >> shift_hor)].lr[plane][unit_idx];
+ &f->lf.lr_mask[(((ruy << (unit_size_log2)) >> shift_ver) * f->sr_sb128w) +
+ (x >> shift_hor)].lr[plane][unit_idx];
// FIXME Don't backup if the next restoration unit is RESTORE_NONE
// This also requires not restoring in the same conditions.
@@ -257,7 +272,7 @@
const int sby)
{
const ptrdiff_t offset_y = 8 * !!sby;
- const ptrdiff_t *const dst_stride = f->cur.p.stride;
+ const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;
const int restore_planes =
((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +
@@ -265,8 +280,8 @@
((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);
if (restore_planes & LR_RESTORE_Y) {
- const int h = f->cur.p.p.h;
- const int w = f->cur.p.p.w;
+ const int h = f->sr_cur.p.p.h;
+ const int w = f->sr_cur.p.p.w;
const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset_y;
lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
@@ -273,10 +288,10 @@
h, row_h, 0);
}
if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
- const int h = (f->cur.p.p.h + ss_ver) >> ss_ver;
- const int w = (f->cur.p.p.w + ss_hor) >> ss_hor;
+ const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;
+ const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
const ptrdiff_t offset_uv = offset_y >> ss_ver;
const int y_stripe =
--- a/src/mc.h
+++ b/src/mc.h
@@ -105,6 +105,12 @@
pixel *dst, ptrdiff_t dst_stride, const pixel *src, ptrdiff_t src_stride)
typedef decl_emu_edge_fn(*emu_edge_fn);
+#define decl_resize_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+ const pixel *src, ptrdiff_t src_stride, \
+ int dst_w, int src_w, int h, int dx, int mx)
+typedef decl_resize_fn(*resize_fn);
+
typedef struct Dav1dMCDSPContext {
mc_fn mc[N_2D_FILTERS];
mc_scaled_fn mc_scaled[N_2D_FILTERS];
@@ -120,6 +126,7 @@
warp8x8_fn warp8x8;
warp8x8t_fn warp8x8t;
emu_edge_fn emu_edge;
+ resize_fn resize;
} Dav1dMCDSPContext;
void dav1d_mc_dsp_init_8bpc(Dav1dMCDSPContext *c);
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -782,6 +782,34 @@
}
}
+static void resize_c(pixel *dst, const ptrdiff_t dst_stride,
+ const pixel *src, const ptrdiff_t src_stride,
+ const int dst_w, const int src_w, int h,
+ const int dx, const int mx0)
+{
+ do {
+ int mx = mx0, src_x = -1;
+ for (int x = 0; x < dst_w; x++) {
+ const int16_t *const F = dav1d_resize_filter[mx >> 8];
+ dst[x] = iclip_pixel((F[0] * src[iclip(src_x - 3, 0, src_w - 1)] +
+ F[1] * src[iclip(src_x - 2, 0, src_w - 1)] +
+ F[2] * src[iclip(src_x - 1, 0, src_w - 1)] +
+ F[3] * src[iclip(src_x + 0, 0, src_w - 1)] +
+ F[4] * src[iclip(src_x + 1, 0, src_w - 1)] +
+ F[5] * src[iclip(src_x + 2, 0, src_w - 1)] +
+ F[6] * src[iclip(src_x + 3, 0, src_w - 1)] +
+ F[7] * src[iclip(src_x + 4, 0, src_w - 1)] +
+ 64) >> 7);
+ mx += dx;
+ src_x += mx >> 14;
+ mx &= 0x3fff;
+ }
+
+ dst += PXSTRIDE(dst_stride);
+ src += PXSTRIDE(src_stride);
+ } while (--h);
+}
+
void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
#define init_mc_fns(type, name) do { \
c->mc [type] = put_##name##_c; \
@@ -813,6 +841,7 @@
c->warp8x8 = warp_affine_8x8_c;
c->warp8x8t = warp_affine_8x8t_c;
c->emu_edge = emu_edge_c;
+ c->resize = resize_c;
#if HAVE_ASM
#if ARCH_AARCH64 || ARCH_ARM
--- a/src/obu.c
+++ b/src/obu.c
@@ -280,9 +280,18 @@
&c->refs[c->frame_hdr.refidx[i]].p;
if (!ref->p.data[0]) return -1;
// FIXME render_* may be wrong
- hdr->render_width = hdr->width = ref->p.p.w;
+ hdr->render_width = hdr->width[1] = ref->p.p.w;
hdr->render_height = hdr->height = ref->p.p.h;
- hdr->super_res = 0; // FIXME probably wrong
+ hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);
+ if (hdr->super_res.enabled) {
+ const int d = hdr->super_res.width_scale_denominator =
+ 9 + dav1d_get_bits(gb, 3);
+ hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,
+ imin(16, hdr->width[1]));
+ } else {
+ hdr->super_res.width_scale_denominator = 8;
+ hdr->width[0] = hdr->width[1];
+ }
return 0;
}
}
@@ -289,20 +298,26 @@
}
if (hdr->frame_size_override) {
- hdr->width = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
+ hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;
hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;
} else {
- hdr->width = seqhdr->max_width;
+ hdr->width[1] = seqhdr->max_width;
hdr->height = seqhdr->max_height;
}
- hdr->super_res = seqhdr->super_res && dav1d_get_bits(gb, 1);
- if (hdr->super_res) return -1; // FIXME
+ hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);
+ if (hdr->super_res.enabled) {
+ const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);
+ hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));
+ } else {
+ hdr->super_res.width_scale_denominator = 8;
+ hdr->width[0] = hdr->width[1];
+ }
hdr->have_render_size = dav1d_get_bits(gb, 1);
if (hdr->have_render_size) {
hdr->render_width = dav1d_get_bits(gb, 16) + 1;
hdr->render_height = dav1d_get_bits(gb, 16) + 1;
} else {
- hdr->render_width = hdr->width;
+ hdr->render_width = hdr->width[1];
hdr->render_height = hdr->height;
}
return 0;
@@ -411,7 +426,7 @@
dav1d_get_bits(gb, seqhdr->order_hint_n_bits);
if ((res = read_frame_size(c, gb, 0)) < 0) goto error;
hdr->allow_intrabc = hdr->allow_screen_content_tools &&
- /* FIXME: no superres scaling && */ dav1d_get_bits(gb, 1);
+ !hdr->super_res.enabled && dav1d_get_bits(gb, 1);
hdr->use_ref_frame_mvs = 0;
} else {
hdr->allow_intrabc = 0;
@@ -455,7 +470,7 @@
hdr->tiling.uniform = dav1d_get_bits(gb, 1);
const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
int sbsz_log2 = 6 + seqhdr->sb128;
- int sbw = (hdr->width + sbsz_min1) >> sbsz_log2;
+ int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;
int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
int max_tile_width_sb = 4096 >> sbsz_log2;
int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
@@ -733,7 +748,9 @@
#endif
// restoration
- if (!hdr->all_lossless && seqhdr->restoration && !hdr->allow_intrabc) {
+ if ((!hdr->all_lossless || hdr->super_res.enabled) &&
+ seqhdr->restoration && !hdr->allow_intrabc)
+ {
hdr->restoration.type[0] = dav1d_get_bits(gb, 2);
if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {
hdr->restoration.type[1] = dav1d_get_bits(gb, 2);
--- a/src/picture.c
+++ b/src/picture.c
@@ -91,8 +91,7 @@
void *extra_ptr; /* MUST BE AT THE END */
};
-static void free_buffer(const uint8_t *data, void *user_data)
-{
+static void free_buffer(const uint8_t *const data, void *const user_data) {
struct pic_ctx_context *pic_ctx = user_data;
pic_ctx->allocator.release_picture_callback(pic_ctx->data,
@@ -148,6 +147,13 @@
*extra_ptr = &pic_ctx->extra_ptr;
return 0;
+}
+
+int dav1d_picture_alloc(Dav1dPicture *const p, const int w, const int h,
+ const enum Dav1dPixelLayout layout, const int bpc,
+ Dav1dPicAllocator *const p_allocator)
+{
+ return picture_alloc_with_edges(p, w, h, layout, bpc, p_allocator, 0, NULL);
}
int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,
--- a/src/picture.h
+++ b/src/picture.h
@@ -54,6 +54,10 @@
/*
* Allocate a picture with custom border size.
*/
+int dav1d_picture_alloc(Dav1dPicture *p, int w, int h,
+ enum Dav1dPixelLayout layout, int bpc,
+ Dav1dPicAllocator *);
+
int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h,
enum Dav1dPixelLayout layout, int bpc,
struct thread_data *t, int visible,
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -72,7 +72,7 @@
if (dbg) printf("Start: r=%d\n", ts->msac.rng);
// does this block have any non-zero coefficients
- const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.p.layout);
+ const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
const int all_skip =
msac_decode_bool_adapt(&ts->msac, ts->cdf.coef.skip[t_dim->ctx][sctx]);
if (dbg)
@@ -289,7 +289,7 @@
t->by += txsh;
if (txh >= txw && t->by < f->bh) {
if (dst)
- dst += 4 * txsh * PXSTRIDE(f->cur.p.stride[0]);
+ dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
x_off * 2 + 0, y_off * 2 + 1, dst);
t->bx += txsw;
@@ -349,9 +349,9 @@
if (eob >= 0) {
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
- dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.p.stride[0], cf, eob);
+ dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
- hex_dump(dst, f->cur.p.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
+ hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
}
}
}
@@ -361,8 +361,8 @@
const enum BlockSize bs, const Av1Block *const b)
{
const Dav1dFrameContext *const f = t->f;
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int bx4 = t->bx & 31, by4 = t->by & 31;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
@@ -501,8 +501,8 @@
{
assert((dst8 != NULL) ^ (dst16 != NULL));
const Dav1dFrameContext *const f = t->f;
- const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
const int mvx = mv.x, mvy = mv.y;
const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
@@ -509,19 +509,19 @@
ptrdiff_t ref_stride = refp->p.stride[!!pl];
const pixel *ref;
- if (refp->p.p.w == f->cur.p.p.w && refp->p.p.h == f->cur.p.p.h) {
+ if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
const int dy = by * v_mul + (mvy >> (3 + ss_ver));
int w, h;
- if (refp != &f->cur) { // i.e. not for intrabc
+ if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
PLANE_TYPE_Y + !!pl))
{
return -1;
}
- w = (f->cur.p.p.w + ss_hor) >> ss_hor;
- h = (f->cur.p.p.h + ss_ver) >> ss_ver;
+ w = (f->cur.p.w + ss_hor) >> ss_hor;
+ h = (f->cur.p.h + ss_ver) >> ss_ver;
} else {
w = f->bw * 4 >> ss_hor;
h = f->bh * 4 >> ss_ver;
@@ -548,7 +548,7 @@
bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
}
} else {
- assert(refp != &f->cur);
+ assert(refp != &f->sr_cur);
int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
@@ -569,6 +569,11 @@
if (dav1d_thread_picture_wait(refp, bottom, PLANE_TYPE_Y + !!pl))
return -1;
+ if (DEBUG_BLOCK_INFO)
+ printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
+ left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
+ right-left, bottom-top,
+ f->svc[refidx][0].step, f->svc[refidx][1].step);
const int w = (refp->p.p.w + ss_hor) >> ss_hor;
const int h = (refp->p.p.h + ss_ver) >> ss_ver;
@@ -579,6 +584,7 @@
refp->p.data[pl], ref_stride);
ref = &t->emu_edge[320 * 3 + 3];
ref_stride = 320 * sizeof(pixel);
+ if (DEBUG_BLOCK_INFO) printf("Emu\n");
} else {
ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
}
@@ -610,8 +616,8 @@
const Dav1dFrameContext *const f = t->f;
const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];
pixel *const lap = t->scratch.lap;
- const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
int res;
@@ -673,8 +679,8 @@
assert((dst8 != NULL) ^ (dst16 != NULL));
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
- const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
const int32_t *const mat = wmp->matrix;
@@ -735,8 +741,8 @@
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
const int bx4 = t->bx & 31, by4 = t->by & 31;
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
@@ -758,8 +764,8 @@
for (int init_y = 0; init_y < h4; init_y += 16) {
for (int init_x = 0; init_x < w4; init_x += 16) {
if (b->pal_sz[0]) {
- pixel *dst = ((pixel *) f->cur.p.data[0]) +
- 4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
+ pixel *dst = ((pixel *) f->cur.data[0]) +
+ 4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
const uint8_t *pal_idx;
if (f->frame_thread.pass) {
pal_idx = ts->frame_thread.pal_idx;
@@ -770,10 +776,10 @@
const uint16_t *const pal = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][0] : t->pal[0];
- f->dsp->ipred.pal_pred(dst, f->cur.p.stride[0], pal,
+ f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
pal_idx, bw4 * 4, bh4 * 4);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
- hex_dump(dst, PXSTRIDE(f->cur.p.stride[0]),
+ hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
bw4 * 4, bh4 * 4, "y-pal-pred");
}
@@ -790,8 +796,8 @@
for (y = init_y, t->by += init_y; y < sub_h4;
y += t_dim->h, t->by += t_dim->h)
{
- pixel *dst = ((pixel *) f->cur.p.data[0]) +
- 4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) +
+ pixel *dst = ((pixel *) f->cur.data[0]) +
+ 4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
t->bx + init_x);
for (x = init_x, t->bx += init_x; x < sub_w4;
x += t_dim->w, t->bx += t_dim->w)
@@ -818,10 +824,10 @@
ts->tiling.col_end,
ts->tiling.row_end,
edge_flags, dst,
- f->cur.p.stride[0], top_sb_edge,
+ f->cur.stride[0], top_sb_edge,
b->y_mode, &angle,
t_dim->w, t_dim->h, edge);
- dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,
+ dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
t_dim->w * 4, t_dim->h * 4,
angle | intra_flags,
4 * f->bw - 4 * t->bx,
@@ -833,7 +839,7 @@
hex_dump(edge, 0, 1, 1, "tl");
hex_dump(edge + 1, t_dim->w * 4,
t_dim->w * 4, 2, "t");
- hex_dump(dst, f->cur.p.stride[0],
+ hex_dump(dst, f->cur.stride[0],
t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
}
@@ -875,10 +881,10 @@
imin(t_dim->w, 8) * 4, 3, "dq");
dsp->itx.itxfm_add[b->tx]
[txtp](dst,
- f->cur.p.stride[0],
+ f->cur.stride[0],
cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
- hex_dump(dst, f->cur.p.stride[0],
+ hex_dump(dst, f->cur.stride[0],
t_dim->w * 4, t_dim->h * 4, "recon");
}
} else if (!f->frame_thread.pass) {
@@ -896,24 +902,24 @@
if (!has_chroma) continue;
- const ptrdiff_t stride = f->cur.p.stride[1];
+ const ptrdiff_t stride = f->cur.stride[1];
if (b->uv_mode == CFL_PRED) {
assert(!init_x && !init_y);
int16_t *const ac = t->scratch.ac;
- pixel *y_src = ((pixel *) f->cur.p.data[0]) + 4 * (t->bx & ~ss_hor) +
- 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.p.stride[0]);
+ pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
+ 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
(t->by >> ss_ver) * PXSTRIDE(stride));
- pixel *const uv_dst[2] = { ((pixel *) f->cur.p.data[1]) + uv_off,
- ((pixel *) f->cur.p.data[2]) + uv_off };
+ pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
+ ((pixel *) f->cur.data[2]) + uv_off };
const int furthest_r =
((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
const int furthest_b =
((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
- dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],
+ dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
cbw4 - (furthest_r >> ss_hor),
cbh4 - (furthest_b >> ss_ver),
cbw4 * 4, cbh4 * 4);
@@ -950,7 +956,7 @@
}
} else if (b->pal_sz[1]) {
ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
- (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
+ (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
const uint8_t *pal_idx;
if (f->frame_thread.pass) {
pal_idx = ts->frame_thread.pal_idx;
@@ -961,21 +967,21 @@
const uint16_t *const pal_u = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][1] : t->pal[1];
- f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[1]) + uv_dstoff,
- f->cur.p.stride[1], pal_u,
+ f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
+ f->cur.stride[1], pal_u,
pal_idx, cbw4 * 4, cbh4 * 4);
const uint16_t *const pal_v = f->frame_thread.pass ?
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
- f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[2]) + uv_dstoff,
- f->cur.p.stride[1], pal_v,
+ f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
+ f->cur.stride[1], pal_v,
pal_idx, cbw4 * 4, cbh4 * 4);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
- hex_dump(((pixel *) f->cur.p.data[1]) + uv_dstoff,
- PXSTRIDE(f->cur.p.stride[1]),
+ hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
+ PXSTRIDE(f->cur.stride[1]),
cbw4 * 4, cbh4 * 4, "u-pal-pred");
- hex_dump(((pixel *) f->cur.p.data[2]) + uv_dstoff,
- PXSTRIDE(f->cur.p.stride[1]),
+ hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
+ PXSTRIDE(f->cur.stride[1]),
cbw4 * 4, cbh4 * 4, "v-pal-pred");
}
}
@@ -984,10 +990,10 @@
sm_uv_flag(&t->l, cby4);
const int uv_sb_has_tr =
((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
- intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.p.layout - 1));
+ intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
const int uv_sb_has_bl =
init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
- intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.p.layout - 1));
+ intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
for (int pl = 0; pl < 2; pl++) {
@@ -994,7 +1000,7 @@
for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
{
- pixel *dst = ((pixel *) f->cur.p.data[1 + pl]) +
+ pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
((t->bx + init_x) >> ss_hor));
for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
@@ -1127,8 +1133,8 @@
const Dav1dFrameContext *const f = t->f;
const Dav1dDSPContext *const dsp = f->dsp;
const int bx4 = t->bx & 31, by4 = t->by & 31;
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bw4 = b_dim[0], bh4 = b_dim[1];
@@ -1136,26 +1142,27 @@
const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
(bw4 > ss_hor || t->bx & 1) &&
(bh4 > ss_ver || t->by & 1);
- const int chr_layout_idx = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
- DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.p.layout;
+ const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
+ DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
int res;
// prediction
const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
- pixel *dst = ((pixel *) f->cur.p.data[0]) +
- 4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
+ pixel *dst = ((pixel *) f->cur.data[0]) +
+ 4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
const ptrdiff_t uvdstoff =
- 4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
+ 4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
if (!(f->frame_hdr.frame_type & 1)) {
// intrabc
- res = mc(t, dst, NULL, f->cur.p.stride[0],
- bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
+ assert(!f->frame_hdr.super_res.enabled);
+ res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
+ b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
if (res) return res;
if (has_chroma) for (int pl = 1; pl < 3; pl++) {
- res = mc(t, ((pixel *)f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],
+ res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
- t->bx & ~ss_hor, t->by & ~ss_ver,
- pl, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
+ t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
+ &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
if (res) return res;
}
} else if (b->comp_type == COMP_INTER_NONE) {
@@ -1168,16 +1175,16 @@
(b->motion_mode == MM_WARP &&
t->warpmv.type > WM_TYPE_TRANSLATION)))
{
- res = warp_affine(t, dst, NULL, f->cur.p.stride[0], b_dim, 0, refp,
+ res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
b->motion_mode == MM_WARP ? &t->warpmv :
&f->frame_hdr.gmv[b->ref[0]]);
if (res) return res;
} else {
- res = mc(t, dst, NULL, f->cur.p.stride[0],
+ res = mc(t, dst, NULL, f->cur.stride[0],
bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
- res = obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);
+ res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
if (res) return res;
}
}
@@ -1197,7 +1204,7 @@
m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
t->by, t->by > ts->tiling.row_start,
ts->tiling.col_end, ts->tiling.row_end,
- 0, dst, f->cur.p.stride[0], top_sb_edge,
+ 0, dst, f->cur.stride[0], top_sb_edge,
m, &angle, bw4, bh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
@@ -1205,7 +1212,7 @@
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
dav1d_wedge_masks[bs][0][0][b->wedge_idx];
- dsp->mc.blend(dst, f->cur.p.stride[0], tmp,
+ dsp->mc.blend(dst, f->cur.stride[0], tmp,
bw4 * 4, bh4 * 4, ii_mask);
}
@@ -1229,8 +1236,8 @@
int h_off = 0, v_off = 0;
if (bw4 == 1 && bh4 == ss_ver) {
for (int pl = 0; pl < 2; pl++) {
- res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
- NULL, f->cur.p.stride[1],
+ res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
+ NULL, f->cur.stride[1],
bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
r[-(f->b4_stride + 1)].mv[0],
&f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],
@@ -1239,7 +1246,7 @@
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
if (res) return res;
}
- v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
+ v_off = 2 * PXSTRIDE(f->cur.stride[1]);
h_off = 2;
}
if (bw4 == 1) {
@@ -1246,8 +1253,8 @@
const enum Filter2d left_filter_2d =
dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
for (int pl = 0; pl < 2; pl++) {
- res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,
- f->cur.p.stride[1], bw4, bh4, t->bx - 1,
+ res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
+ f->cur.stride[1], bw4, bh4, t->bx - 1,
t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],
r[-1].ref[0] - 1,
f->frame_thread.pass != 2 ? left_filter_2d :
@@ -1260,8 +1267,8 @@
const enum Filter2d top_filter_2d =
dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
for (int pl = 0; pl < 2; pl++) {
- res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off, NULL,
- f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,
+ res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
+ f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
1 + pl, r[-f->b4_stride].mv[0],
&f->refp[r[-f->b4_stride].ref[0] - 1],
r[-f->b4_stride].ref[0] - 1,
@@ -1269,10 +1276,10 @@
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
if (res) return res;
}
- v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
+ v_off = 2 * PXSTRIDE(f->cur.stride[1]);
}
for (int pl = 0; pl < 2; pl++) {
- res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],
+ res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
refp, b->ref[0], filter_2d);
if (res) return res;
@@ -1285,8 +1292,8 @@
t->warpmv.type > WM_TYPE_TRANSLATION)))
{
for (int pl = 0; pl < 2; pl++) {
- res = warp_affine(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff, NULL,
- f->cur.p.stride[1], b_dim, 1 + pl, refp,
+ res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
+ f->cur.stride[1], b_dim, 1 + pl, refp,
b->motion_mode == MM_WARP ? &t->warpmv :
&f->frame_hdr.gmv[b->ref[0]]);
if (res) return res;
@@ -1293,15 +1300,15 @@
}
} else {
for (int pl = 0; pl < 2; pl++) {
- res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
- NULL, f->cur.p.stride[1],
+ res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
+ NULL, f->cur.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
t->bx & ~ss_hor, t->by & ~ss_ver,
1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
- res = obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
- f->cur.p.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
+ res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
+ f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
if (res) return res;
}
}
@@ -1322,7 +1329,7 @@
b->interintra_mode == II_SMOOTH_PRED ?
SMOOTH_PRED : b->interintra_mode;
int angle = 0;
- pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
+ pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
const pixel *top_sb_edge = NULL;
if (!(t->by & (f->sb_step - 1))) {
top_sb_edge = f->ipred_edge[pl + 1];
@@ -1337,12 +1344,12 @@
(ts->tiling.row_start >> ss_ver),
ts->tiling.col_end >> ss_hor,
ts->tiling.row_end >> ss_ver,
- 0, uvdst, f->cur.p.stride[1],
+ 0, uvdst, f->cur.stride[1],
top_sb_edge, m,
&angle, cbw4, cbh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
- dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
+ dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask);
}
}
@@ -1370,21 +1377,23 @@
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
b->mv[i], refp, b->ref[i], filter_2d);
+ if (DEBUG_BLOCK_INFO)
+ coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");
if (res) return res;
}
}
switch (b->comp_type) {
case COMP_INTER_AVG:
- dsp->mc.avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
+ dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
bw4 * 4, bh4 * 4);
break;
case COMP_INTER_WEIGHTED_AVG:
jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
- dsp->mc.w_avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
+ dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
bw4 * 4, bh4 * 4, jnt_weight);
break;
case COMP_INTER_SEG:
- dsp->mc.w_mask[chr_layout_idx](dst, f->cur.p.stride[0],
+ dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4, bh4 * 4, seg_mask, b->mask_sign);
mask = seg_mask;
@@ -1391,7 +1400,7 @@
break;
case COMP_INTER_WEDGE:
mask = dav1d_wedge_masks[bs][0][0][b->wedge_idx];
- dsp->mc.mask(dst, f->cur.p.stride[0],
+ dsp->mc.mask(dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4, bh4 * 4, mask);
if (has_chroma)
@@ -1416,19 +1425,19 @@
if (res) return res;
}
}
- pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
+ pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
switch (b->comp_type) {
case COMP_INTER_AVG:
- dsp->mc.avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
+ dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver);
break;
case COMP_INTER_WEIGHTED_AVG:
- dsp->mc.w_avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
+ dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight);
break;
case COMP_INTER_WEDGE:
case COMP_INTER_SEG:
- dsp->mc.mask(uvdst, f->cur.p.stride[1],
+ dsp->mc.mask(uvdst, f->cur.stride[1],
tmp[b->mask_sign], tmp[!b->mask_sign],
bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask);
break;
@@ -1437,11 +1446,11 @@
}
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
- hex_dump(dst, f->cur.p.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
+ hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
if (has_chroma) {
- hex_dump(&((pixel *) f->cur.p.data[1])[uvdstoff], f->cur.p.stride[1],
+ hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
cbw4 * 4, cbh4 * 4, "u-pred");
- hex_dump(&((pixel *) f->cur.p.data[2])[uvdstoff], f->cur.p.stride[1],
+ hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
cbw4 * 4, cbh4 * 4, "v-pred");
}
}
@@ -1473,7 +1482,7 @@
for (int init_x = 0; init_x < bw4; init_x += 16) {
// coefficient coding & inverse transforms
int y_off = !!init_y, y;
- dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * init_y;
+ dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
y += ytx->h, y_off++)
{
@@ -1485,17 +1494,17 @@
x_off, y_off, &dst[x * 4]);
t->bx += ytx->w;
}
- dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * ytx->h;
+ dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
t->bx -= x;
t->by += ytx->h;
}
- dst -= PXSTRIDE(f->cur.p.stride[0]) * 4 * y;
+ dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
t->by -= y;
// chroma coefs and inverse transform
if (has_chroma) for (int pl = 0; pl < 2; pl++) {
- pixel *uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff +
- (PXSTRIDE(f->cur.p.stride[1]) * init_y * 4 >> ss_ver);
+ pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
+ (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
for (y = init_y >> ss_ver, t->by += init_y;
y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
{
@@ -1544,15 +1553,15 @@
coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
dsp->itx.itxfm_add[b->uvtx]
[txtp](&uvdst[4 * x],
- f->cur.p.stride[1],
+ f->cur.stride[1],
cf, eob);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
- hex_dump(&uvdst[4 * x], f->cur.p.stride[1],
+ hex_dump(&uvdst[4 * x], f->cur.stride[1],
uvtx->w * 4, uvtx->h * 4, "recon");
}
t->bx += uvtx->w << ss_hor;
}
- uvdst += PXSTRIDE(f->cur.p.stride[1]) * 4 * uvtx->h;
+ uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
t->bx -= x << ss_hor;
t->by += uvtx->h << ss_ver;
}
@@ -1564,7 +1573,7 @@
}
void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int sbsz = f->sb_step, sbh = f->sbh;
if (f->frame_hdr.loopfilter.level_y[0] ||
@@ -1584,9 +1593,9 @@
if (f->seq_hdr.cdef) {
if (sby) {
pixel *p_up[3] = {
- f->lf.p[0] - 8 * PXSTRIDE(f->cur.p.stride[0]),
- f->lf.p[1] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
- f->lf.p[2] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
+ f->lf.p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
+ f->lf.p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
+ f->lf.p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
};
bytefn(dav1d_cdef_brow)(f, p_up, f->lf.prev_mask_ptr,
sby * sbsz - 2, sby * sbsz);
@@ -1595,13 +1604,34 @@
bytefn(dav1d_cdef_brow)(f, f->lf.p, f->lf.mask_ptr, sby * sbsz,
imin(sby * sbsz + n_blks, f->bh));
}
+ if (f->frame_hdr.super_res.enabled) {
+ const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
+ for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
+ const int h_start = 8 * !!sby >> (ss_ver & !!pl);
+ const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
+ pixel *dst = f->lf.sr_p[pl] - h_start * PXSTRIDE(dst_stride);
+ const ptrdiff_t src_stride = f->cur.stride[!!pl];
+ const pixel *src = f->lf.p[pl] - h_start * PXSTRIDE(src_stride);
+ const int h_end = 4 * (sbsz - 2 * (sby + 1 < sbh)) >> (ss_ver & !!pl);
+ const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
+ const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
+
+ f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w, src_w,
+ h_end + h_start, f->resize_step[!!pl],
+ f->resize_start[!!pl]);
+ }
+ }
if (f->seq_hdr.restoration) {
- bytefn(dav1d_lr_sbrow)(f, f->lf.p, sby);
+ bytefn(dav1d_lr_sbrow)(f, f->lf.sr_p, sby);
}
- f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[0]);
- f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
- f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+ f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.stride[0]);
+ f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
+ f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
+ f->lf.sr_p[0] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[0]);
+ f->lf.sr_p[1] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;
+ f->lf.sr_p[2] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;
f->lf.prev_mask_ptr = f->lf.mask_ptr;
if ((sby & 1) || f->seq_hdr.sb128) {
f->lf.mask_ptr += f->sb128w;
@@ -1616,20 +1646,20 @@
const int x_off = ts->tiling.col_start;
const pixel *const y =
- ((const pixel *) f->cur.p.data[0]) + x_off * 4 +
- ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.p.stride[0]);
+ ((const pixel *) f->cur.data[0]) + x_off * 4 +
+ ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
4 * (ts->tiling.col_end - x_off));
- if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
- const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
- const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+ if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+ const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
- (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.p.stride[1]);
+ (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
for (int pl = 1; pl <= 2; pl++)
pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
- &((const pixel *) f->cur.p.data[pl])[uv_off],
+ &((const pixel *) f->cur.data[pl])[uv_off],
4 * (ts->tiling.col_end - x_off) >> ss_hor);
}
}
--- a/src/tables.c
+++ b/src/tables.c
@@ -712,6 +712,41 @@
{ 0, 0, 2, -1, 0, 0, 127, 0 }
};
+const int16_t dav1d_resize_filter[64][8] = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
+ { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
+ { 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
+ { -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 },
+ { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 },
+ { -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 },
+ { -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 },
+ { -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 },
+ { -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 },
+ { -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 },
+ { -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 },
+ { -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 },
+ { -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 },
+ { -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 },
+ { -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 },
+ { -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 },
+ { -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 },
+ { -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 },
+ { -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 },
+ { -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 },
+ { -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 },
+ { -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 },
+ { -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 },
+ { -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 },
+ { -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 },
+ { -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 },
+ { -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 },
+ { -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 },
+ { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 },
+ { 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 },
+ { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 },
+ { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
+};
+
const uint8_t dav1d_sm_weights[128] = {
// Unused, because we always offset by bs, which is at least 2.
0, 0,
--- a/src/tables.h
+++ b/src/tables.h
@@ -111,6 +111,7 @@
extern const int8_t dav1d_mc_subpel_filters[5][15][8];
extern const int8_t dav1d_mc_warp_filter[193][8];
+extern const int16_t dav1d_resize_filter[64][8];
extern const uint8_t dav1d_sm_weights[128];
extern const int16_t dav1d_dr_intra_derivative[90];