ref: 7aea6858ecd2e8e596dedc0be5396d8edb54eefc
parent: cdf4a3bc94be2425a7d0b140091bbfaf08c48eac
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Sat Dec 28 12:00:25 EST 2019
av1: do C inverse transforms in int32_t precision Fixes C part of #321.
--- a/src/itx_1d.c
+++ b/src/itx_1d.c
@@ -1,6 +1,6 @@
/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
+ * Copyright © 2018-2019, VideoLAN and dav1d authors
+ * Copyright © 2018-2019, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,10 @@
#include <stddef.h>
#include <stdint.h>
-#include "common/attributes.h"
+#include "common/intops.h"
+#include "src/itx_1d.h"
+
#define CLIP(a) iclip(a, min, max)
/*
@@ -60,9 +62,9 @@
* wrap around.
*/
-static void NOINLINE
-inv_dct4_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_dct4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
@@ -79,14 +81,14 @@
out[3 * out_s] = CLIP(t0 - t3);
}
-static void NOINLINE
-inv_dct8_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_dct8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
- coef tmp[4];
+ int32_t tmp[4];
- inv_dct4_1d(in, in_s * 2, tmp, 1, max);
+ dav1d_inv_dct4_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[1 * in_s], in3 = in[3 * in_s];
const int in5 = in[5 * in_s], in7 = in[7 * in_s];
@@ -114,14 +116,14 @@
out[7 * out_s] = CLIP(tmp[0] - t7);
}
-static void NOINLINE
-inv_dct16_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_dct16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
- coef tmp[8];
+ int32_t tmp[8];
- inv_dct8_1d(in, in_s * 2, tmp, 1, max);
+ dav1d_inv_dct8_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
@@ -183,14 +185,14 @@
out[15 * out_s] = CLIP(tmp[0] - t15a);
}
-static void NOINLINE
-inv_dct32_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_dct32_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
- coef tmp[16];
+ int32_t tmp[16];
- inv_dct16_1d(in, in_s * 2, tmp, 1, max);
+ dav1d_inv_dct16_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
@@ -330,14 +332,14 @@
out[31 * out_s] = CLIP(tmp[ 0] - t31);
}
-static void NOINLINE
-inv_dct64_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_dct64_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
- coef tmp[32];
+ int32_t tmp[32];
- inv_dct32_1d(in, in_s * 2, tmp, 1, max);
+ dav1d_inv_dct32_1d_c(in, in_s * 2, tmp, 1, max);
const int in1 = in[ 1 * in_s], in3 = in[ 3 * in_s];
const int in5 = in[ 5 * in_s], in7 = in[ 7 * in_s];
@@ -655,9 +657,9 @@
out[63 * out_s] = CLIP(tmp[ 0] - t63a);
}
-static void NOINLINE
-inv_adst4_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int range)
+void dav1d_inv_adst4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int range)
{
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
const int in2 = in[2 * in_s], in3 = in[3 * in_s];
@@ -674,9 +676,9 @@
in0 + in2 - in1;
}
-static void NOINLINE
-inv_adst8_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_adst8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
const int in0 = in[0 * in_s], in1 = in[1 * in_s];
@@ -723,9 +725,9 @@
out[5 * out_s] = -(((t6 - t7) * 181 + 128) >> 8);
}
-static void NOINLINE
-inv_adst16_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int max)
+void dav1d_inv_adst16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int max)
{
const int min = -max - 1;
const int in0 = in[ 0 * in_s], in1 = in[ 1 * in_s];
@@ -834,10 +836,11 @@
}
#define flip_inv_adst(sz) \
-static void inv_flipadst##sz##_1d(const coef *const in, const ptrdiff_t in_s, \
- coef *const out, const ptrdiff_t out_s, const int range) \
+void dav1d_inv_flipadst##sz##_1d_c(const int32_t *const in, const ptrdiff_t in_s, \
+ int32_t *const out, const ptrdiff_t out_s, \
+ const int range) \
{ \
- inv_adst##sz##_1d(in, in_s, &out[(sz - 1) * out_s], -out_s, range); \
+ dav1d_inv_adst##sz##_1d_c(in, in_s, &out[(sz - 1) * out_s], -out_s, range); \
}
flip_inv_adst(4)
@@ -846,42 +849,41 @@
#undef flip_inv_adst
-static void NOINLINE
-inv_identity4_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int range)
+void dav1d_inv_identity4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int range)
{
for (int i = 0; i < 4; i++)
out[out_s * i] = in[in_s * i] + ((in[in_s * i] * 1697 + 2048) >> 12);
}
-static void NOINLINE
-inv_identity8_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int range)
+void dav1d_inv_identity8_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int range)
{
for (int i = 0; i < 8; i++)
out[out_s * i] = in[in_s * i] * 2;
}
-static void NOINLINE
-inv_identity16_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int range)
+void dav1d_inv_identity16_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int range)
{
for (int i = 0; i < 16; i++)
out[out_s * i] = 2 * in[in_s * i] + ((in[in_s * i] * 1697 + 1024) >> 11);
}
-static void NOINLINE
-inv_identity32_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s, const int range)
+void dav1d_inv_identity32_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int range)
{
for (int i = 0; i < 32; i++)
out[out_s * i] = in[in_s * i] * 4;
}
-static void NOINLINE
-inv_wht4_1d(const coef *const in, const ptrdiff_t in_s,
- coef *const out, const ptrdiff_t out_s,
- const int pass)
+void dav1d_inv_wht4_1d_c(const int32_t *const in, const ptrdiff_t in_s,
+ int32_t *const out, const ptrdiff_t out_s,
+ const int pass)
{
const int sh = 2 * !pass;
const int in0 = in[0 * in_s] >> sh, in1 = in[1 * in_s] >> sh;
--- /dev/null
+++ b/src/itx_1d.h
@@ -1,0 +1,60 @@
+/*
+ * Copyright © 2018-2019, VideoLAN and dav1d authors
+ * Copyright © 2018-2019, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifndef DAV1D_SRC_ITX_1D_H
+#define DAV1D_SRC_ITX_1D_H
+
+#define decl_itx_1d_fn(name) \
+void (name)(const int32_t *in, ptrdiff_t in_s, \
+ int32_t *out, ptrdiff_t out_s, const int range)
+typedef decl_itx_1d_fn(*itx_1d_fn);
+
+decl_itx_1d_fn(dav1d_inv_dct4_1d_c);
+decl_itx_1d_fn(dav1d_inv_dct8_1d_c);
+decl_itx_1d_fn(dav1d_inv_dct16_1d_c);
+decl_itx_1d_fn(dav1d_inv_dct32_1d_c);
+decl_itx_1d_fn(dav1d_inv_dct64_1d_c);
+
+decl_itx_1d_fn(dav1d_inv_adst4_1d_c);
+decl_itx_1d_fn(dav1d_inv_adst8_1d_c);
+decl_itx_1d_fn(dav1d_inv_adst16_1d_c);
+
+decl_itx_1d_fn(dav1d_inv_flipadst4_1d_c);
+decl_itx_1d_fn(dav1d_inv_flipadst8_1d_c);
+decl_itx_1d_fn(dav1d_inv_flipadst16_1d_c);
+
+decl_itx_1d_fn(dav1d_inv_identity4_1d_c);
+decl_itx_1d_fn(dav1d_inv_identity8_1d_c);
+decl_itx_1d_fn(dav1d_inv_identity16_1d_c);
+decl_itx_1d_fn(dav1d_inv_identity32_1d_c);
+
+decl_itx_1d_fn(dav1d_inv_wht4_1d_c);
+
+#endif /* DAV1D_SRC_ITX_1D_H */
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -1,6 +1,6 @@
/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
+ * Copyright © 2018-2019, VideoLAN and dav1d authors
+ * Copyright © 2018-2019, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,12 +35,8 @@
#include "common/intops.h"
#include "src/itx.h"
+#include "src/itx_1d.h"
-#include "src/itx_1d.c"
-
-typedef void (*itx_1d_fn)(const coef *in, ptrdiff_t in_s,
- coef *out, ptrdiff_t out_s, const int range);
-
static void NOINLINE
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob,
@@ -73,29 +69,21 @@
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
// Maximum value for h and w is 64
- coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
+ int32_t tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int row_clip_max = (1 << (bitdepth + 8 - 1)) - 1;
- const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
+ const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
for (i = 0; i < sh; i++) {
- if (w != sw || is_rect2) {
- for (j = 0; j < sw; j++) {
- in_mem[j] = coeff[i + j * sh];
- if (is_rect2)
- in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
- }
- first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
- } else {
- first_1d_fn(&coeff[i], sh, &tmp[i * w], 1, row_clip_max);
+ for (j = 0; j < sw; j++) {
+ in_mem[j] = coeff[i + j * sh];
+ if (is_rect2)
+ in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
}
+ first_1d_fn(in_mem, 1, &tmp[i * w], 1, row_clip_max);
for (j = 0; j < w; j++)
-#if BITDEPTH == 8
- tmp[i * w + j] = (tmp[i * w + j] + rnd) >> shift;
-#else
tmp[i * w + j] = iclip((tmp[i * w + j] + rnd) >> shift,
-col_clip_max - 1, col_clip_max);
-#endif
}
if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp));
@@ -118,8 +106,8 @@
HIGHBD_DECL_SUFFIX) \
{ \
inv_txfm_add_c(dst, stride, coeff, eob, w, h, shift, \
- inv_##type1##w##_1d, inv_##type2##h##_1d, has_dconly \
- HIGHBD_TAIL_SUFFIX); \
+ dav1d_inv_##type1##w##_1d_c, dav1d_inv_##type2##h##_1d_c, \
+ has_dconly HIGHBD_TAIL_SUFFIX); \
}
#define inv_txfm_fn64(w, h, shift) \
@@ -176,15 +164,18 @@
const int bitdepth = bitdepth_from_max(bitdepth_max);
const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
- coef tmp[4 * 4], out[4];
+ int32_t tmp[4 * 4], out[4], in_mem[4];
- for (int i = 0; i < 4; i++)
- inv_wht4_1d(&coeff[i], 4, &tmp[i * 4], 1, 0);
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++)
+ in_mem[j] = coeff[i + j * 4];
+ dav1d_inv_wht4_1d_c(in_mem, 1, &tmp[i * 4], 1, 0);
+ }
for (int k = 0; k < 4 * 4; k++)
tmp[k] = iclip(tmp[k], col_clip_min, col_clip_max);
for (int i = 0; i < 4; i++) {
- inv_wht4_1d(&tmp[i], 4, out, 1, 1);
+ dav1d_inv_wht4_1d_c(&tmp[i], 4, out, 1, 1);
for (int j = 0; j < 4; j++)
dst[i + j * PXSTRIDE(stride)] =
iclip_pixel(dst[i + j * PXSTRIDE(stride)] + out[j]);
--- a/src/meson.build
+++ b/src/meson.build
@@ -35,6 +35,7 @@
'dequant_tables.c',
'getbits.c',
'intra_edge.c',
+ 'itx_1d.c',
'lf_mask.c',
'log.c',
'msac.c',