ref: c3980e394d32ed832dfd65decde5f210c03b2f27
parent: 2e6c8a92d25234cb27651a76760fd2b50591bc51
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Wed Dec 5 13:21:05 EST 2018
12 bits/component support
--- a/include/common/bitdepth.h
+++ b/include/common/bitdepth.h
@@ -34,6 +34,9 @@
#if !defined(BITDEPTH)
typedef void pixel;
typedef void coef;
+#define HIGHBD_DECL_SUFFIX /* nothing */
+#define HIGHBD_CALL_SUFFIX /* nothing */
+#define HIGHBD_TAIL_SUFFIX /* nothing */
#elif BITDEPTH == 8
typedef uint8_t pixel;
typedef int16_t coef;
@@ -41,28 +44,37 @@
#define pixel_set memset
#define iclip_pixel iclip_u8
#define PIX_HEX_FMT "%02x"
-#define bytefn(x) x##_8bpc
#define bitfn(x) x##_8bpc
#define PXSTRIDE(x) x
-#elif BITDEPTH == 10 || BITDEPTH == 12
+#define highbd_only(x)
+#define HIGHBD_DECL_SUFFIX /* nothing */
+#define HIGHBD_CALL_SUFFIX /* nothing */
+#define HIGHBD_TAIL_SUFFIX /* nothing */
+#define bitdepth_from_max(x) 8
+#elif BITDEPTH == 16
typedef uint16_t pixel;
typedef int32_t coef;
#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
-#define iclip_pixel(x) iclip(x, 0, ((1 << BITDEPTH) - 1))
static inline void pixel_set(pixel *const dst, const int val, const int num) {
for (int n = 0; n < num; n++)
dst[n] = val;
}
#define PIX_HEX_FMT "%03x"
-#define bytefn(x) x##_16bpc
-#if BITDEPTH == 10
-#define bitfn(x) x##_10bpc
-#else
-#define bitfn(x) x##_12bpc
-#endif
+#define iclip_pixel(x) iclip(x, 0, bitdepth_max)
+#define HIGHBD_DECL_SUFFIX , const int bitdepth_max
+#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
+#define HIGHBD_TAIL_SUFFIX , bitdepth_max
+#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
+#define bitfn(x) x##_16bpc
#define PXSTRIDE(x) (x >> 1)
+#define highbd_only(x) x
#else
#error invalid value for bitdepth
#endif
+#define bytefn(x) bitfn(x)
+
+#define bitfn_decls(name, ...) \
+name##_8bpc(__VA_ARGS__); \
+name##_16bpc(__VA_ARGS__)
#endif /* __DAV1D_COMMON_BITDEPTH_H__ */
--- a/meson.build
+++ b/meson.build
@@ -55,7 +55,7 @@
# Bitdepth option
dav1d_bitdepths = get_option('bitdepths')
-foreach bitdepth : ['8', '10']
+foreach bitdepth : ['8', '16']
cdata.set10('CONFIG_@0@BPC'.format(bitdepth), dav1d_bitdepths.contains(bitdepth))
endforeach
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -2,7 +2,7 @@
option('bitdepths',
type: 'array',
- choices: ['8', '10'],
+ choices: ['8', '16'],
description: 'Enable only specified bitdepths')
option('build_asm',
--- a/src/cdef.h
+++ b/src/cdef.h
@@ -53,11 +53,11 @@
#define decl_cdef_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \
/*const*/ pixel *const top[2], int pri_strength, int sec_strength, \
- int dir, int damping, enum CdefEdgeFlags edges)
+ int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX)
typedef decl_cdef_fn(*cdef_fn);
#define decl_cdef_dir_fn(name) \
-int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var)
+int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX)
typedef decl_cdef_dir_fn(*cdef_dir_fn);
typedef struct Dav1dCdefDSPContext {
@@ -65,10 +65,7 @@
cdef_fn fb[3 /* 444/luma, 422, 420 */];
} Dav1dCdefDSPContext;
-void dav1d_cdef_dsp_init_8bpc(Dav1dCdefDSPContext *c);
-void dav1d_cdef_dsp_init_10bpc(Dav1dCdefDSPContext *c);
-
-void dav1d_cdef_dsp_init_x86_8bpc(Dav1dCdefDSPContext *c);
-void dav1d_cdef_dsp_init_x86_10bpc(Dav1dCdefDSPContext *c);
+bitfn_decls(void dav1d_cdef_dsp_init, Dav1dCdefDSPContext *c);
+bitfn_decls(void dav1d_cdef_dsp_init_x86, Dav1dCdefDSPContext *c);
#endif /* __DAV1D_SRC_CDEF_H__ */
--- a/src/cdef_apply_tmpl.c
+++ b/src/cdef_apply_tmpl.c
@@ -83,12 +83,13 @@
const Av1Filter *const lflvl,
const int by_start, const int by_end)
{
+ const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
const Dav1dDSPContext *const dsp = f->dsp;
enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
pixel *ptrs[3] = { p[0], p[1], p[2] };
const int sbsz = 16;
const int sb64w = f->sb128w << 1;
- const int damping = f->frame_hdr->cdef.damping + BITDEPTH - 8;
+ const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
@@ -156,17 +157,17 @@
}
// the actual filter
- const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8);
+ const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
int y_sec_lvl = y_lvl & 3;
y_sec_lvl += y_sec_lvl == 3;
- y_sec_lvl <<= BITDEPTH - 8;
- const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8);
+ y_sec_lvl <<= bitdepth_min_8;
+ const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
int uv_sec_lvl = uv_lvl & 3;
uv_sec_lvl += uv_sec_lvl == 3;
- uv_sec_lvl <<= BITDEPTH - 8;
+ uv_sec_lvl <<= bitdepth_min_8;
unsigned variance;
const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
- &variance);
+ &variance HIGHBD_CALL_SUFFIX);
if (y_lvl) {
dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) {
@@ -175,7 +176,7 @@
},
adjust_strength(y_pri_lvl, variance),
y_sec_lvl, y_pri_lvl ? dir : 0,
- damping, edges);
+ damping, edges HIGHBD_CALL_SUFFIX);
}
if (uv_lvl && has_chroma) {
const int uvdir =
@@ -190,7 +191,7 @@
},
uv_pri_lvl, uv_sec_lvl,
uv_pri_lvl ? uvdir : 0,
- damping - 1, edges);
+ damping - 1, edges HIGHBD_CALL_SUFFIX);
}
}
--- a/src/cdef_tmpl.c
+++ b/src/cdef_tmpl.c
@@ -97,7 +97,8 @@
const pixel (*left)[2], /*const*/ pixel *const top[2],
const int w, const int h, const int pri_strength,
const int sec_strength, const int dir,
- const int damping, const enum CdefEdgeFlags edges)
+ const int damping, const enum CdefEdgeFlags edges
+ HIGHBD_DECL_SUFFIX)
{
static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
{ -1 * 12 + 1, -2 * 12 + 2 },
@@ -115,7 +116,8 @@
assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
- const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> bitdepth_min_8) & 1];
padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
@@ -170,10 +172,11 @@
const int sec_strength, \
const int dir, \
const int damping, \
- const enum CdefEdgeFlags edges) \
+ const enum CdefEdgeFlags edges \
+ HIGHBD_DECL_SUFFIX) \
{ \
cdef_filter_block_c(dst, stride, left, top, w, h, pri_strength, sec_strength, \
- dir, damping, edges); \
+ dir, damping, edges HIGHBD_TAIL_SUFFIX); \
}
cdef_fn(4, 4);
@@ -181,8 +184,9 @@
cdef_fn(8, 8);
static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
- unsigned *const var)
+ unsigned *const var HIGHBD_DECL_SUFFIX)
{
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
int partial_sum_hv[2][8] = { { 0 } };
int partial_sum_diag[2][15] = { { 0 } };
int partial_sum_alt[4][11] = { { 0 } };
@@ -189,7 +193,7 @@
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
- const int px = (img[x] >> (BITDEPTH - 8)) - 128;
+ const int px = (img[x] >> bitdepth_min_8) - 128;
partial_sum_diag[0][ y + x ] += px;
partial_sum_alt [0][ y + (x >> 1)] += px;
--- a/src/decode.c
+++ b/src/decode.c
@@ -3013,7 +3013,6 @@
switch (bpc) {
#define assign_bitdepth_case(bd) \
- case bd: \
dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \
@@ -3022,10 +3021,13 @@
dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
break
#if CONFIG_8BPC
- assign_bitdepth_case(8);
+ case 8:
+ assign_bitdepth_case(8);
#endif
-#if CONFIG_10BPC
- assign_bitdepth_case(10);
+#if CONFIG_16BPC
+ case 10:
+ case 12:
+ assign_bitdepth_case(16);
#endif
#undef assign_bitdepth_case
default:
@@ -3047,7 +3049,7 @@
assign_bitdepth_case(8);
#endif
} else {
-#if CONFIG_10BPC
+#if CONFIG_16BPC
assign_bitdepth_case(16);
#endif
}
@@ -3168,6 +3170,7 @@
f->sb_step = 16 << f->seq_hdr->sb128;
f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
f->b4_stride = (f->bw + 31) & ~31;
+ f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
// ref_mvs
if ((f->frame_hdr->frame_type & 1) || f->frame_hdr->allow_intrabc) {
--- a/src/dequant_tables.c
+++ b/src/dequant_tables.c
@@ -160,5 +160,70 @@
{ 3586, 5916, }, { 3702, 6032, }, { 3823, 6148, }, { 3953, 6268, },
{ 4089, 6388, }, { 4236, 6512, }, { 4394, 6640, }, { 4559, 6768, },
{ 4737, 6900, }, { 4929, 7036, }, { 5130, 7172, }, { 5347, 7312, },
+ }, {
+ { 4, 4 }, { 12, 13 }, { 18, 19 }, { 25, 27 },
+ { 33, 35 }, { 41, 44 }, { 50, 54 }, { 60, 64 },
+ { 70, 75 }, { 80, 87 }, { 91, 99 }, { 103, 112 },
+ { 115, 126 }, { 127, 139 }, { 140, 154 }, { 153, 168 },
+ { 166, 183 }, { 180, 199 }, { 194, 214 }, { 208, 230 },
+ { 222, 247 }, { 237, 263 }, { 251, 280 }, { 266, 297 },
+ { 281, 314 }, { 296, 331 }, { 312, 349 }, { 327, 366 },
+ { 343, 384 }, { 358, 402 }, { 374, 420 }, { 390, 438 },
+ { 405, 456 }, { 421, 475 }, { 437, 493 }, { 453, 511 },
+ { 469, 530 }, { 484, 548 }, { 500, 567 }, { 516, 586 },
+ { 532, 604 }, { 548, 623 }, { 564, 642 }, { 580, 660 },
+ { 596, 679 }, { 611, 698 }, { 627, 716 }, { 643, 735 },
+ { 659, 753 }, { 674, 772 }, { 690, 791 }, { 706, 809 },
+ { 721, 828 }, { 737, 846 }, { 752, 865 }, { 768, 884 },
+ { 783, 902 }, { 798, 920 }, { 814, 939 }, { 829, 957 },
+ { 844, 976 }, { 859, 994 }, { 874, 1012 }, { 889, 1030 },
+ { 904, 1049 }, { 919, 1067 }, { 934, 1085 }, { 949, 1103 },
+ { 964, 1121 }, { 978, 1139 }, { 993, 1157 }, { 1008, 1175 },
+ { 1022, 1193 }, { 1037, 1211 }, { 1051, 1229 }, { 1065, 1246 },
+ { 1080, 1264 }, { 1094, 1282 }, { 1108, 1299 }, { 1122, 1317 },
+ { 1136, 1335 }, { 1151, 1352 }, { 1165, 1370 }, { 1179, 1387 },
+ { 1192, 1405 }, { 1206, 1422 }, { 1220, 1440 }, { 1234, 1457 },
+ { 1248, 1474 }, { 1261, 1491 }, { 1275, 1509 }, { 1288, 1526 },
+ { 1302, 1543 }, { 1315, 1560 }, { 1329, 1577 }, { 1342, 1595 },
+ { 1368, 1627 }, { 1393, 1660 }, { 1419, 1693 }, { 1444, 1725 },
+ { 1469, 1758 }, { 1494, 1791 }, { 1519, 1824 }, { 1544, 1856 },
+ { 1569, 1889 }, { 1594, 1922 }, { 1618, 1954 }, { 1643, 1987 },
+ { 1668, 2020 }, { 1692, 2052 }, { 1717, 2085 }, { 1741, 2118 },
+ { 1765, 2150 }, { 1789, 2183 }, { 1814, 2216 }, { 1838, 2248 },
+ { 1862, 2281 }, { 1885, 2313 }, { 1909, 2346 }, { 1933, 2378 },
+ { 1957, 2411 }, { 1992, 2459 }, { 2027, 2508 }, { 2061, 2556 },
+ { 2096, 2605 }, { 2130, 2653 }, { 2165, 2701 }, { 2199, 2750 },
+ { 2233, 2798 }, { 2267, 2847 }, { 2300, 2895 }, { 2334, 2943 },
+ { 2367, 2992 }, { 2400, 3040 }, { 2434, 3088 }, { 2467, 3137 },
+ { 2499, 3185 }, { 2532, 3234 }, { 2575, 3298 }, { 2618, 3362 },
+ { 2661, 3426 }, { 2704, 3491 }, { 2746, 3555 }, { 2788, 3619 },
+ { 2830, 3684 }, { 2872, 3748 }, { 2913, 3812 }, { 2954, 3876 },
+ { 2995, 3941 }, { 3036, 4005 }, { 3076, 4069 }, { 3127, 4149 },
+ { 3177, 4230 }, { 3226, 4310 }, { 3275, 4390 }, { 3324, 4470 },
+ { 3373, 4550 }, { 3421, 4631 }, { 3469, 4711 }, { 3517, 4791 },
+ { 3565, 4871 }, { 3621, 4967 }, { 3677, 5064 }, { 3733, 5160 },
+ { 3788, 5256 }, { 3843, 5352 }, { 3897, 5448 }, { 3951, 5544 },
+ { 4005, 5641 }, { 4058, 5737 }, { 4119, 5849 }, { 4181, 5961 },
+ { 4241, 6073 }, { 4301, 6185 }, { 4361, 6297 }, { 4420, 6410 },
+ { 4479, 6522 }, { 4546, 6650 }, { 4612, 6778 }, { 4677, 6906 },
+ { 4742, 7034 }, { 4807, 7162 }, { 4871, 7290 }, { 4942, 7435 },
+ { 5013, 7579 }, { 5083, 7723 }, { 5153, 7867 }, { 5222, 8011 },
+ { 5291, 8155 }, { 5367, 8315 }, { 5442, 8475 }, { 5517, 8635 },
+ { 5591, 8795 }, { 5665, 8956 }, { 5745, 9132 }, { 5825, 9308 },
+ { 5905, 9484 }, { 5984, 9660 }, { 6063, 9836 }, { 6149, 10028 },
+ { 6234, 10220 }, { 6319, 10412 }, { 6404, 10604 }, { 6495, 10812 },
+ { 6587, 11020 }, { 6678, 11228 }, { 6769, 11437 }, { 6867, 11661 },
+ { 6966, 11885 }, { 7064, 12109 }, { 7163, 12333 }, { 7269, 12573 },
+ { 7376, 12813 }, { 7483, 13053 }, { 7599, 13309 }, { 7715, 13565 },
+ { 7832, 13821 }, { 7958, 14093 }, { 8085, 14365 }, { 8214, 14637 },
+ { 8352, 14925 }, { 8492, 15213 }, { 8635, 15502 }, { 8788, 15806 },
+ { 8945, 16110 }, { 9104, 16414 }, { 9275, 16734 }, { 9450, 17054 },
+ { 9639, 17390 }, { 9832, 17726 }, { 10031, 18062 }, { 10245, 18414 },
+ { 10465, 18766 }, { 10702, 19134 }, { 10946, 19502 }, { 11210, 19886 },
+ { 11482, 20270 }, { 11776, 20670 }, { 12081, 21070 }, { 12409, 21486 },
+ { 12750, 21902 }, { 13118, 22334 }, { 13501, 22766 }, { 13913, 23214 },
+ { 14343, 23662 }, { 14807, 24126 }, { 15290, 24590 }, { 15812, 25070 },
+ { 16356, 25551 }, { 16943, 26047 }, { 17575, 26559 }, { 18237, 27071 },
+ { 18949, 27599 }, { 19718, 28143 }, { 20521, 28687 }, { 21387, 29247 },
}
};
--- a/src/film_grain.h
+++ b/src/film_grain.h
@@ -30,10 +30,7 @@
#include "dav1d/dav1d.h"
-void dav1d_apply_grain_8bpc(Dav1dPicture *const out,
- const Dav1dPicture *const in);
-
-void dav1d_apply_grain_10bpc(Dav1dPicture *const out,
- const Dav1dPicture *const in);
+bitfn_decls(void dav1d_apply_grain, Dav1dPicture *const out,
+ const Dav1dPicture *const in);
#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */
--- a/src/film_grain_tmpl.c
+++ b/src/film_grain_tmpl.c
@@ -51,7 +51,11 @@
SUB_GRAIN_HEIGHT = 38,
SUB_GRAIN_OFFSET = 6,
BLOCK_SIZE = 32,
- SCALING_SIZE = 1 << BITDEPTH,
+#if BITDEPTH == 8
+ SCALING_SIZE = 256
+#else
+ SCALING_SIZE = 4096
+#endif
};
static inline int get_random_number(const int bits, unsigned *state) {
@@ -66,18 +70,14 @@
return (x + ((1 << shift) >> 1)) >> shift;
}
-enum {
- GRAIN_CENTER = 128 << (BITDEPTH - 8),
- GRAIN_MIN = -GRAIN_CENTER,
- GRAIN_MAX = (256 << (BITDEPTH - 8)) - 1 - GRAIN_CENTER,
-};
-
static void generate_grain_y(const Dav1dPicture *const in,
entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])
{
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed;
- const int shift = 12 - BITDEPTH + data->grain_scale_shift;
+ const int shift = 12 - in->p.bpc + data->grain_scale_shift;
+ const int grain_ctr = 128 << (in->p.bpc - 8);
+ const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
for (int y = 0; y < GRAIN_HEIGHT; y++) {
for (int x = 0; x < GRAIN_WIDTH; x++) {
@@ -102,7 +102,7 @@
}
int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
- buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ buf[y][x] = iclip(grain, grain_min, grain_max);
}
}
}
@@ -113,7 +113,9 @@
{
const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
- const int shift = 12 - BITDEPTH + data->grain_scale_shift;
+ const int shift = 12 - in->p.bpc + data->grain_scale_shift;
+ const int grain_ctr = 128 << (in->p.bpc - 8);
+ const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
@@ -160,15 +162,17 @@
}
const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
- buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ buf[y][x] = iclip(grain, grain_min, grain_max);
}
}
}
-static void generate_scaling(const uint8_t points[][2], int num,
+static void generate_scaling(const int bitdepth,
+ const uint8_t points[][2], int num,
uint8_t scaling[SCALING_SIZE])
{
- const int shift_x = BITDEPTH - 8;
+ const int shift_x = bitdepth - 8;
+ const int scaling_size = 1 << bitdepth;
// Fill up the preceding entries with the initial value
for (int i = 0; i < points[0][0] << shift_x; i++)
@@ -190,7 +194,7 @@
}
// Fill up the remaining entries with the final value
- for (int i = points[num - 1][0] << shift_x; i < SCALING_SIZE; i++)
+ for (int i = points[num - 1][0] << shift_x; i < scaling_size; i++)
scaling[i] = points[num - 1][1];
}
@@ -213,14 +217,17 @@
{
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
+ const int bitdepth_min_8 = in->p.bpc - 8;
+ const int grain_ctr = 128 << bitdepth_min_8;
+ const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
int min_value, max_value;
if (data->clip_to_restricted_range) {
- min_value = 16 << (BITDEPTH - 8);
- max_value = 235 << (BITDEPTH - 8);
+ min_value = 16 << bitdepth_min_8;
+ max_value = 235 << bitdepth_min_8;
} else {
min_value = 0;
- max_value = (1 << BITDEPTH) - 1;
+ max_value = (1U << in->p.bpc) - 1;
}
// seed[0] contains the current row, seed[1] contains the previous
@@ -278,7 +285,7 @@
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain);
}
}
@@ -289,7 +296,7 @@
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
grain = round2(old * w[y][0] + grain * w[y][1], 5);
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain);
}
@@ -299,17 +306,17 @@
int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
top = round2(old * w[x][0] + top * w[x][1], 5);
- top = iclip(top, GRAIN_MIN, GRAIN_MAX);
+ top = iclip(top, grain_min, grain_max);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
grain = round2(old * w[x][0] + grain * w[x][1], 5);
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
// Mix the row rows together and apply grain
grain = round2(top * w[y][0] + grain * w[y][1], 5);
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_y(x, y, grain);
}
}
@@ -322,18 +329,22 @@
{
const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
+ const int bitdepth_max = (1 << in->p.bpc) - 1;
+ const int bitdepth_min_8 = in->p.bpc - 8;
+ const int grain_ctr = 128 << bitdepth_min_8;
+ const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
int min_value, max_value;
if (data->clip_to_restricted_range) {
- min_value = 16 << (BITDEPTH - 8);
+ min_value = 16 << bitdepth_min_8;
if (out->seq_hdr->mtrx == DAV1D_MC_IDENTITY) {
- max_value = 235 << (BITDEPTH - 8);
+ max_value = 235 << bitdepth_min_8;
} else {
- max_value = 240 << (BITDEPTH - 8);
+ max_value = 240 << bitdepth_min_8;
}
} else {
min_value = 0;
- max_value = (1 << BITDEPTH) - 1;
+ max_value = bitdepth_max;
}
const int sx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
@@ -396,7 +407,7 @@
int combined = avg * data->uv_luma_mult[uv] + \
*src * data->uv_mult[uv]; \
val = iclip_pixel( (combined >> 6) + \
- (data->uv_offset[uv] * (1 << (BITDEPTH - 8))) ); \
+ (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \
} \
\
int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
@@ -414,7 +425,7 @@
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_uv(x, y, grain);
}
}
@@ -425,7 +436,7 @@
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
grain = (old * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_uv(x, y, grain);
}
@@ -435,17 +446,17 @@
int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
top = (old * w[sx][x][0] + top * w[sx][x][1] + 16) >> 5;
- top = iclip(top, GRAIN_MIN, GRAIN_MAX);
+ top = iclip(top, grain_min, grain_max);
// Blend the current pixel with the left block
int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
// Mix the row rows together and apply to image
grain = (top * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;
- grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);
+ grain = iclip(grain, grain_min, grain_max);
add_noise_uv(x, y, grain);
}
}
@@ -469,11 +480,11 @@
// Generate scaling LUTs as needed
if (data->num_y_points)
- generate_scaling(data->y_points, data->num_y_points, scaling[0]);
+ generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
if (data->num_uv_points[0])
- generate_scaling(data->uv_points[0], data->num_uv_points[0], scaling[1]);
+ generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
if (data->num_uv_points[1])
- generate_scaling(data->uv_points[1], data->num_uv_points[1], scaling[2]);
+ generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
// Copy over the non-modified planes
// TODO: eliminate in favor of per-plane refs
--- a/src/internal.h
+++ b/src/internal.h
@@ -176,6 +176,7 @@
int a_sz /* w*tile_rows */;
AV1_COMMON *libaom_cm; // FIXME
uint8_t jnt_weights[7][7];
+ int bitdepth_max;
struct {
struct thread_data td;
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -43,7 +43,8 @@
*/
#define decl_angular_ipred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
- int width, int height, int angle, int max_width, int max_height)
+ int width, int height, int angle, int max_width, int max_height \
+ HIGHBD_DECL_SUFFIX)
typedef decl_angular_ipred_fn(*angular_ipred_fn);
/*
@@ -63,7 +64,8 @@
*/
#define decl_cfl_pred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
- int width, int height, const int16_t *ac, int alpha)
+ int width, int height, const int16_t *ac, int alpha \
+ HIGHBD_DECL_SUFFIX)
typedef decl_cfl_pred_fn(*cfl_pred_fn);
/*
@@ -86,10 +88,7 @@
pal_pred_fn pal_pred;
} Dav1dIntraPredDSPContext;
-void dav1d_intra_pred_dsp_init_8bpc(Dav1dIntraPredDSPContext *c);
-void dav1d_intra_pred_dsp_init_10bpc(Dav1dIntraPredDSPContext *c);
-
-void dav1d_intra_pred_dsp_init_x86_8bpc(Dav1dIntraPredDSPContext *c);
-void dav1d_intra_pred_dsp_init_x86_10bpc(Dav1dIntraPredDSPContext *c);
+bitfn_decls(void dav1d_intra_pred_dsp_init, Dav1dIntraPredDSPContext *c);
+bitfn_decls(void dav1d_intra_pred_dsp_init_x86, Dav1dIntraPredDSPContext *c);
#endif /* __DAV1D_SRC_IPRED_H__ */
--- a/src/ipred_prepare.h
+++ b/src/ipred_prepare.h
@@ -81,7 +81,8 @@
const pixel *dst, ptrdiff_t stride,
const pixel *prefilter_toplevel_sb_edge,
enum IntraPredMode mode, int *angle,
- int tw, int th, pixel *topleft_out);
+ int tw, int th, pixel *topleft_out
+ HIGHBD_DECL_SUFFIX);
// These flags are OR'd with the angle argument into intra predictors.
// ANGLE_USE_EDGE_FILTER_FLAG signals that edges should be convolved
--- a/src/ipred_prepare_tmpl.c
+++ b/src/ipred_prepare_tmpl.c
@@ -83,8 +83,9 @@
const pixel *prefilter_toplevel_sb_edge,
enum IntraPredMode mode, int *const angle,
const int tw, const int th,
- pixel *const topleft_out)
+ pixel *const topleft_out HIGHBD_DECL_SUFFIX)
{
+ const int bitdepth = bitdepth_from_max(bitdepth_max);
assert(y < h && x < w);
switch (mode) {
@@ -144,7 +145,7 @@
if (px_have < sz)
pixel_set(left, left[sz - px_have], sz - px_have);
} else {
- pixel_set(left, have_top ? *dst_top : ((1 << BITDEPTH) >> 1) + 1, sz);
+ pixel_set(left, have_top ? *dst_top : ((1 << bitdepth) >> 1) + 1, sz);
}
if (av1_intra_prediction_edges[mode].needs_bottomleft) {
@@ -174,7 +175,7 @@
if (px_have < sz)
pixel_set(top + px_have, top[px_have - 1], sz - px_have);
} else {
- pixel_set(top, have_left ? dst[-1] : ((1 << BITDEPTH) >> 1) - 1, sz);
+ pixel_set(top, have_left ? dst[-1] : ((1 << bitdepth) >> 1) - 1, sz);
}
if (av1_intra_prediction_edges[mode].needs_topright) {
@@ -198,7 +199,7 @@
if (have_left) {
*topleft_out = have_top ? dst_top[-1] : dst[-1];
} else {
- *topleft_out = have_top ? *dst_top : (1 << BITDEPTH) >> 1;
+ *topleft_out = have_top ? *dst_top : (1 << bitdepth) >> 1;
}
if (mode == Z2_PRED && tw + th >= 6)
*topleft_out = (topleft_out[-1] * 5 + topleft_out[0] * 6 +
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -39,10 +39,10 @@
static NOINLINE void
splat_dc(pixel *dst, const ptrdiff_t stride,
- const int width, const int height, const unsigned dc)
+ const int width, const int height, const int dc HIGHBD_DECL_SUFFIX)
{
- assert(dc <= (1 << BITDEPTH) - 1);
#if BITDEPTH == 8
+ assert(dc <= 0xff);
if (width > 4) {
const uint64_t dcN = dc * 0x0101010101010101ULL;
for (int y = 0; y < height; y++) {
@@ -59,6 +59,7 @@
}
}
#else
+ assert(dc <= bitdepth_max);
const uint64_t dcN = dc * 0x0001000100010001ULL;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x += sizeof(dcN) >> 1)
@@ -70,8 +71,8 @@
static NOINLINE void
cfl_pred(pixel *dst, const ptrdiff_t stride,
- const int width, const int height, const unsigned dc,
- const int16_t *ac, const int alpha)
+ const int width, const int height, const int dc,
+ const int16_t *ac, const int alpha HIGHBD_DECL_SUFFIX)
{
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
@@ -93,17 +94,21 @@
static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
- splat_dc(dst, stride, width, height, dc_gen_top(topleft, width));
+ splat_dc(dst, stride, width, height, dc_gen_top(topleft, width)
+ HIGHBD_TAIL_SUFFIX);
}
static void ipred_cfl_top_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height,
- const int16_t *ac, const int alpha)
+ const int16_t *ac, const int alpha
+ HIGHBD_DECL_SUFFIX)
{
- cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha);
+ cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha
+ HIGHBD_TAIL_SUFFIX);
}
static unsigned dc_gen_left(const pixel *const topleft, const int height) {
@@ -116,18 +121,21 @@
static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
- splat_dc(dst, stride, width, height, dc_gen_left(topleft, height));
+ splat_dc(dst, stride, width, height, dc_gen_left(topleft, height)
+ HIGHBD_TAIL_SUFFIX);
}
static void ipred_cfl_left_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height,
- const int16_t *ac, const int alpha)
+ const int16_t *ac, const int alpha
+ HIGHBD_DECL_SUFFIX)
{
unsigned dc = dc_gen_left(topleft, height);
- cfl_pred(dst, stride, width, height, dc, ac, alpha);
+ cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX);
}
#if BITDEPTH == 8
@@ -161,18 +169,21 @@
static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
- splat_dc(dst, stride, width, height, dc_gen(topleft, width, height));
+ splat_dc(dst, stride, width, height, dc_gen(topleft, width, height)
+ HIGHBD_TAIL_SUFFIX);
}
static void ipred_cfl_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height,
- const int16_t *ac, const int alpha)
+ const int16_t *ac, const int alpha
+ HIGHBD_DECL_SUFFIX)
{
unsigned dc = dc_gen(topleft, width, height);
- cfl_pred(dst, stride, width, height, dc, ac, alpha);
+ cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX);
}
#undef MULTIPLIER_1x2
@@ -182,23 +193,36 @@
static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
- splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1));
+#if BITDEPTH == 16
+ const int dc = (bitdepth_max + 1) >> 1;
+#else
+ const int dc = 128;
+#endif
+ splat_dc(dst, stride, width, height, dc HIGHBD_TAIL_SUFFIX);
}
static void ipred_cfl_128_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height,
- const int16_t *ac, const int alpha)
+ const int16_t *ac, const int alpha
+ HIGHBD_DECL_SUFFIX)
{
- cfl_pred(dst, stride, width, height, 1 << (BITDEPTH - 1), ac, alpha);
+#if BITDEPTH == 16
+ const int dc = (bitdepth_max + 1) >> 1;
+#else
+ const int dc = 128;
+#endif
+ cfl_pred(dst, stride, width, height, dc, ac, alpha HIGHBD_TAIL_SUFFIX);
}
static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
for (int y = 0; y < height; y++) {
pixel_copy(dst, topleft + 1, width);
@@ -209,7 +233,8 @@
static void ipred_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
for (int y = 0; y < height; y++) {
pixel_set(dst, topleft[-(1 + y)], width);
@@ -220,7 +245,8 @@
static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const tl_ptr,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const int topleft = tl_ptr[0];
for (int y = 0; y < height; y++) {
@@ -242,7 +268,8 @@
static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
@@ -263,7 +290,8 @@
static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
const int bottom = topleft[-height];
@@ -281,7 +309,8 @@
static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
const int width, const int height, const int a,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const int right = topleft[width];
@@ -367,7 +396,8 @@
}
static void upsample_edge(pixel *const out, const int hsz,
- const pixel *const in, const int from, const int to)
+ const pixel *const in, const int from, const int to
+ HIGHBD_DECL_SUFFIX)
{
static const int8_t kernel[4] = { -1, 9, 9, -1 };
int i;
@@ -385,7 +415,8 @@
static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const int is_sm = (angle >> 9) & 0x1;
const int enable_intra_edge_filter = angle >> 10;
@@ -398,8 +429,8 @@
const int upsample_above = enable_intra_edge_filter ?
get_upsample(width + height, 90 - angle, is_sm) : 0;
if (upsample_above) {
- upsample_edge(top_out, width + height,
- &topleft_in[1], -1, width + imin(width, height));
+ upsample_edge(top_out, width + height, &topleft_in[1], -1,
+ width + imin(width, height) HIGHBD_TAIL_SUFFIX);
top = top_out;
max_base_x = 2 * (width + height) - 2;
dx <<= 1;
@@ -438,7 +469,8 @@
static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const int is_sm = (angle >> 9) & 0x1;
const int enable_intra_edge_filter = angle >> 10;
@@ -454,7 +486,8 @@
pixel *const topleft = &edge[height * 2];
if (upsample_above) {
- upsample_edge(topleft, width + 1, topleft_in, 0, width + 1);
+ upsample_edge(topleft, width + 1, topleft_in, 0, width + 1
+ HIGHBD_TAIL_SUFFIX);
dx <<= 1;
} else {
const int filter_strength = enable_intra_edge_filter ?
@@ -469,7 +502,8 @@
}
}
if (upsample_left) {
- upsample_edge(edge, height + 1, &topleft_in[-height], 0, height + 1);
+ upsample_edge(edge, height + 1, &topleft_in[-height], 0, height + 1
+ HIGHBD_TAIL_SUFFIX);
dy <<= 1;
} else {
const int filter_strength = enable_intra_edge_filter ?
@@ -516,7 +550,8 @@
static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int angle,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
const int is_sm = (angle >> 9) & 0x1;
const int enable_intra_edge_filter = angle >> 10;
@@ -531,7 +566,8 @@
if (upsample_left) {
upsample_edge(left_out, width + height,
&topleft_in[-(width + height)],
- imax(width - height, 0), width + height + 1);
+ imax(width - height, 0), width + height + 1
+ HIGHBD_TAIL_SUFFIX);
left = &left_out[2 * (width + height) - 2];
max_base_y = 2 * (width + height) - 2;
dy <<= 1;
@@ -574,7 +610,8 @@
static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
const int width, const int height, int filt_idx,
- const int max_width, const int max_height)
+ const int max_width, const int max_height
+ HIGHBD_DECL_SUFFIX)
{
filt_idx &= 511;
assert(filt_idx < 5);
--- a/src/itx.h
+++ b/src/itx.h
@@ -35,7 +35,8 @@
#include "src/levels.h"
#define decl_itx_fn(name) \
-void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob)
+void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob \
+ HIGHBD_DECL_SUFFIX)
typedef decl_itx_fn(*itxfm_fn);
typedef struct Dav1dInvTxfmDSPContext {
@@ -42,10 +43,7 @@
itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
} Dav1dInvTxfmDSPContext;
-void dav1d_itx_dsp_init_8bpc(Dav1dInvTxfmDSPContext *c);
-void dav1d_itx_dsp_init_10bpc(Dav1dInvTxfmDSPContext *c);
-
-void dav1d_itx_dsp_init_x86_8bpc(Dav1dInvTxfmDSPContext *c);
-void dav1d_itx_dsp_init_x86_10bpc(Dav1dInvTxfmDSPContext *c);
+bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c);
+bitfn_decls(void dav1d_itx_dsp_init_x86, Dav1dInvTxfmDSPContext *c);
#endif /* __DAV1D_SRC_ITX_H__ */
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -46,7 +46,8 @@
inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
coef *const coeff, const int eob,
const int w, const int h, const int shift1, const int shift2,
- const itx_1d_fn first_1d_fn, const itx_1d_fn second_1d_fn)
+ const itx_1d_fn first_1d_fn, const itx_1d_fn second_1d_fn
+ HIGHBD_DECL_SUFFIX)
{
int i, j;
const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
@@ -54,8 +55,9 @@
// Maximum value for h and w is 64
coef tmp[4096 /* w * h */], out[64 /* h */], in_mem[64 /* w */];
const int is_rect2 = w * 2 == h || h * 2 == w;
- const int row_clip_max = (1 << (BITDEPTH + 8 - 1)) - 1;
- const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
+ const int bitdepth = bitdepth_from_max(bitdepth_max);
+ const int row_clip_max = (1 << (bitdepth + 8 - 1)) - 1;
+ const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
@@ -93,10 +95,12 @@
inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
const ptrdiff_t stride, \
coef *const coeff, \
- const int eob) \
+ const int eob \
+ HIGHBD_DECL_SUFFIX) \
{ \
inv_txfm_add_c(dst, stride, coeff, eob, w, h, shift1, shift2, \
- inv_##type1##w##_1d, inv_##type2##h##_1d); \
+ inv_##type1##w##_1d, inv_##type2##h##_1d \
+ HIGHBD_TAIL_SUFFIX); \
}
#define inv_txfm_fn64(w, h, shift1, shift2) \
@@ -147,9 +151,11 @@
inv_txfm_fn64(64, 64, 2, 4)
static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
- coef *const coeff, const int eob)
+ coef *const coeff, const int eob
+ HIGHBD_DECL_SUFFIX)
{
- const int col_clip_max = (1 << (imax(BITDEPTH + 6, 16) - 1)) -1;
+ const int bitdepth = bitdepth_from_max(bitdepth_max);
+ const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1;
const int col_clip_min = -col_clip_max - 1;
coef tmp[4 * 4], out[4];
--- a/src/lf_apply_tmpl.c
+++ b/src/lf_apply_tmpl.c
@@ -66,7 +66,7 @@
hmask[3] = 0;
dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][0], b4_stride,
- &f->lf.lim_lut, endy4 - starty4);
+ &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
}
}
@@ -96,7 +96,7 @@
};
dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
(const uint8_t(*)[4]) &lvl[0][1], b4_stride,
- &f->lf.lim_lut, w);
+ &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
}
}
@@ -130,10 +130,10 @@
hmask[2] = 0;
dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][2], b4_stride,
- &f->lf.lim_lut, endy4 - starty4);
+ &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
(const uint8_t(*)[4]) &lvl[x][3], b4_stride,
- &f->lf.lim_lut, endy4 - starty4);
+ &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
}
}
@@ -164,10 +164,10 @@
};
dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
(const uint8_t(*)[4]) &lvl[0][2], b4_stride,
- &f->lf.lim_lut, w);
+ &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
(const uint8_t(*)[4]) &lvl[0][3], b4_stride,
- &f->lf.lim_lut, w);
+ &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
}
}
--- a/src/lib.c
+++ b/src/lib.c
@@ -264,9 +264,10 @@
dav1d_apply_grain_8bpc(out, in);
break;
#endif
-#if CONFIG_10BPC
+#if CONFIG_16BPC
case 10:
- dav1d_apply_grain_10bpc(out, in);
+ case 12:
+ dav1d_apply_grain_16bpc(out, in);
break;
#endif
default:
--- a/src/loopfilter.h
+++ b/src/loopfilter.h
@@ -39,7 +39,7 @@
#define decl_loopfilter_sb_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const uint32_t *mask, \
const uint8_t (*lvl)[4], ptrdiff_t lvl_stride, \
- const Av1FilterLUT *lut, int w)
+ const Av1FilterLUT *lut, int w HIGHBD_DECL_SUFFIX)
typedef decl_loopfilter_sb_fn(*loopfilter_sb_fn);
typedef struct Dav1dLoopFilterDSPContext {
@@ -52,10 +52,7 @@
loopfilter_sb_fn loop_filter_sb[2][2];
} Dav1dLoopFilterDSPContext;
-void dav1d_loop_filter_dsp_init_8bpc(Dav1dLoopFilterDSPContext *c);
-void dav1d_loop_filter_dsp_init_10bpc(Dav1dLoopFilterDSPContext *c);
-
-void dav1d_loop_filter_dsp_init_x86_8bpc(Dav1dLoopFilterDSPContext *c);
-void dav1d_loop_filter_dsp_init_x86_10bpc(Dav1dLoopFilterDSPContext *c);
+bitfn_decls(void dav1d_loop_filter_dsp_init, Dav1dLoopFilterDSPContext *c);
+bitfn_decls(void dav1d_loop_filter_dsp_init_x86, Dav1dLoopFilterDSPContext *c);
#endif /* __DAV1D_SRC_LOOPFILTER_H__ */
--- a/src/loopfilter_tmpl.c
+++ b/src/loopfilter_tmpl.c
@@ -36,12 +36,14 @@
static NOINLINE void
loop_filter(pixel *dst, int E, int I, int H,
- const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd)
+ const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd
+ HIGHBD_DECL_SUFFIX)
{
- const int F = 1 << (BITDEPTH - 8);
- E <<= BITDEPTH - 8;
- I <<= BITDEPTH - 8;
- H <<= BITDEPTH - 8;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ const int F = 1 << bitdepth_min_8;
+ E <<= bitdepth_min_8;
+ I <<= bitdepth_min_8;
+ H <<= bitdepth_min_8;
for (int i = 0; i < 4; i++, dst += stridea) {
int p6, p5, p4, p3, p2;
@@ -128,15 +130,15 @@
} else {
const int hev = abs(p1 - p0) > H || abs(q1 - q0) > H;
-#define iclip_diff(v) iclip(v, -128 * (1 << (BITDEPTH - 8)), \
- 128 * (1 << (BITDEPTH - 8)) - 1)
+#define iclip_diff(v) iclip(v, -128 * (1 << bitdepth_min_8), \
+ 128 * (1 << bitdepth_min_8) - 1)
if (hev) {
int f = iclip_diff(p1 - q1), f1, f2;
f = iclip_diff(3 * (q0 - p0) + f);
- f1 = imin(f + 4, (128 << (BITDEPTH - 8)) - 1) >> 3;
- f2 = imin(f + 3, (128 << (BITDEPTH - 8)) - 1) >> 3;
+ f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
+ f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
dst[strideb * -1] = iclip_pixel(p0 + f2);
dst[strideb * +0] = iclip_pixel(q0 - f1);
@@ -143,8 +145,8 @@
} else {
int f = iclip_diff(3 * (q0 - p0)), f1, f2;
- f1 = imin(f + 4, (128 << (BITDEPTH - 8)) - 1) >> 3;
- f2 = imin(f + 3, (128 << (BITDEPTH - 8)) - 1) >> 3;
+ f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
+ f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
dst[strideb * -1] = iclip_pixel(p0 + f2);
dst[strideb * +0] = iclip_pixel(q0 - f1);
@@ -161,7 +163,8 @@
static void loop_filter_h_sb128y_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
- const Av1FilterLUT *lut, const int h)
+ const Av1FilterLUT *lut, const int h
+ HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1] | vmask[2];
for (unsigned y = 1; vm & ~(y - 1);
@@ -173,7 +176,8 @@
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = (vmask[2] & y) ? 2 : !!(vmask[1] & y);
- loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 << idx);
+ loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 << idx
+ HIGHBD_TAIL_SUFFIX);
}
}
}
@@ -181,7 +185,8 @@
static void loop_filter_v_sb128y_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
- const Av1FilterLUT *lut, const int w)
+ const Av1FilterLUT *lut, const int w
+ HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1] | vmask[2];
for (unsigned x = 1; vm & ~(x - 1); x <<= 1, dst += 4, l++) {
@@ -191,7 +196,8 @@
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = (vmask[2] & x) ? 2 : !!(vmask[1] & x);
- loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 << idx);
+ loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 << idx
+ HIGHBD_TAIL_SUFFIX);
}
}
}
@@ -199,7 +205,8 @@
static void loop_filter_h_sb128uv_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
- const Av1FilterLUT *lut, const int h)
+ const Av1FilterLUT *lut, const int h
+ HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1];
for (unsigned y = 1; vm & ~(y - 1);
@@ -211,7 +218,8 @@
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = !!(vmask[1] & y);
- loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 + 2 * idx);
+ loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 + 2 * idx
+ HIGHBD_TAIL_SUFFIX);
}
}
}
@@ -219,7 +227,8 @@
static void loop_filter_v_sb128uv_c(pixel *dst, const ptrdiff_t stride,
const uint32_t *const vmask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
- const Av1FilterLUT *lut, const int w)
+ const Av1FilterLUT *lut, const int w
+ HIGHBD_DECL_SUFFIX)
{
const unsigned vm = vmask[0] | vmask[1];
for (unsigned x = 1; vm & ~(x - 1); x <<= 1, dst += 4, l++) {
@@ -229,7 +238,8 @@
const int H = L >> 4;
const int E = lut->e[L], I = lut->i[L];
const int idx = !!(vmask[1] & x);
- loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 + 2 * idx);
+ loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 + 2 * idx
+ HIGHBD_TAIL_SUFFIX);
}
}
}
--- a/src/looprestoration.h
+++ b/src/looprestoration.h
@@ -55,7 +55,8 @@
const_left_pixel_row left, \
const pixel *lpf, ptrdiff_t lpf_stride, \
int w, int h, const int16_t filterh[7], \
- const int16_t filterv[7], enum LrEdgeFlags edges)
+ const int16_t filterv[7], enum LrEdgeFlags edges \
+ HIGHBD_DECL_SUFFIX)
typedef decl_wiener_filter_fn(*wienerfilter_fn);
#define decl_selfguided_filter_fn(name) \
@@ -63,7 +64,7 @@
const_left_pixel_row left, \
const pixel *lpf, ptrdiff_t lpf_stride, \
int w, int h, int sgr_idx, const int16_t sgr_w[2], \
- const enum LrEdgeFlags edges)
+ const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
typedef decl_selfguided_filter_fn(*selfguided_fn);
typedef struct Dav1dLoopRestorationDSPContext {
@@ -71,12 +72,8 @@
selfguided_fn selfguided;
} Dav1dLoopRestorationDSPContext;
-void dav1d_loop_restoration_dsp_init_8bpc(Dav1dLoopRestorationDSPContext *c);
-void dav1d_loop_restoration_dsp_init_10bpc(Dav1dLoopRestorationDSPContext *c);
-
-void dav1d_loop_restoration_dsp_init_arm_8bpc(Dav1dLoopRestorationDSPContext *c);
-void dav1d_loop_restoration_dsp_init_arm_10bpc(Dav1dLoopRestorationDSPContext *c);
-void dav1d_loop_restoration_dsp_init_x86_8bpc(Dav1dLoopRestorationDSPContext *c);
-void dav1d_loop_restoration_dsp_init_x86_10bpc(Dav1dLoopRestorationDSPContext *c);
+bitfn_decls(void dav1d_loop_restoration_dsp_init, Dav1dLoopRestorationDSPContext *c);
+bitfn_decls(void dav1d_loop_restoration_dsp_init_arm, Dav1dLoopRestorationDSPContext *c);
+bitfn_decls(void dav1d_loop_restoration_dsp_init_x86, Dav1dLoopRestorationDSPContext *c);
#endif /* __DAV1D_SRC_LOOPRESTORATION_H__ */
--- a/src/looprestoration_tmpl.c
+++ b/src/looprestoration_tmpl.c
@@ -136,7 +136,7 @@
const pixel *lpf, const ptrdiff_t lpf_stride,
const int w, const int h,
const int16_t filterh[7], const int16_t filterv[7],
- const enum LrEdgeFlags edges)
+ const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
{
// Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
// of padding above and below
@@ -150,12 +150,13 @@
uint16_t hor[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
uint16_t *hor_ptr = hor;
- const int round_bits_h = 3 + (BITDEPTH == 12) * 2;
+ const int bitdepth = bitdepth_from_max(bitdepth_max);
+ const int round_bits_h = 3 + (bitdepth == 12) * 2;
const int rounding_off_h = 1 << (round_bits_h - 1);
- const int clip_limit = 1 << ((BITDEPTH) + 1 + 7 - round_bits_h);
+ const int clip_limit = 1 << (bitdepth + 1 + 7 - round_bits_h);
for (int j = 0; j < h + 6; j++) {
for (int i = 0; i < w; i++) {
- int sum = (tmp_ptr[i + 3] << 7) + (1 << (BITDEPTH + 6));
+ int sum = (tmp_ptr[i + 3] << 7) + (1 << (bitdepth + 6));
for (int k = 0; k < 7; k++) {
sum += tmp_ptr[i + k] * filterh[k];
@@ -168,9 +169,9 @@
hor_ptr += REST_UNIT_STRIDE;
}
- const int round_bits_v = 11 - (BITDEPTH == 12) * 2;
+ const int round_bits_v = 11 - (bitdepth == 12) * 2;
const int rounding_off_v = 1 << (round_bits_v - 1);
- const int round_offset = 1 << (BITDEPTH + (round_bits_v - 1));
+ const int round_offset = 1 << (bitdepth + (round_bits_v - 1));
for (int i = 0; i < w; i++) {
for (int j = 0; j < h; j++) {
int sum = (hor[(j + 3) * REST_UNIT_STRIDE + i] << 7) - round_offset;
@@ -408,9 +409,10 @@
}
}
-static void selfguided_filter(int16_t *dst, const pixel *src,
+static void selfguided_filter(coef *dst, const pixel *src,
const ptrdiff_t src_stride, const int w,
- const int h, const int n, const int s)
+ const int h, const int n, const int s
+ HIGHBD_DECL_SUFFIX)
{
const int sgr_one_by_x = n == 25 ? 164 : 455;
@@ -431,6 +433,7 @@
boxsum3(B_, src, w + 6, h + 6);
boxsum3sqr(A_, src, w + 6, h + 6);
}
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
int32_t *AA = A - REST_UNIT_STRIDE;
coef *BB = B - REST_UNIT_STRIDE;
@@ -437,9 +440,9 @@
for (int j = -1; j < h + 1; j+= step) {
for (int i = -1; i < w + 1; i++) {
const int a =
- (AA[i] + (1 << (2 * (BITDEPTH - 8)) >> 1)) >> (2 * (BITDEPTH - 8));
+ (AA[i] + ((1 << (2 * bitdepth_min_8)) >> 1)) >> (2 * bitdepth_min_8);
const int b =
- (BB[i] + (1 << (BITDEPTH - 8) >> 1)) >> (BITDEPTH - 8);
+ (BB[i] + ((1 << bitdepth_min_8) >> 1)) >> bitdepth_min_8;
const unsigned p = imax(a * n - b * b, 0);
const unsigned z = (p * s + (1 << 19)) >> 20;
@@ -446,7 +449,7 @@
const int x = dav1d_sgr_x_by_xplus1[imin(z, 255)];
// This is where we invert A and B, so that B is of size coef.
- AA[i] = (((1 << 8) - x) * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
+ AA[i] = (((1U << 8) - x) * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
BB[i] = x;
}
AA += step * REST_UNIT_STRIDE;
@@ -512,7 +515,8 @@
const pixel (*const left)[4],
const pixel *lpf, const ptrdiff_t lpf_stride,
const int w, const int h, const int sgr_idx,
- const int16_t sgr_w[2], const enum LrEdgeFlags edges)
+ const int16_t sgr_w[2], const enum LrEdgeFlags edges
+ HIGHBD_DECL_SUFFIX)
{
// Selfguided filter is applied to a maximum stripe height of 64 + 3 pixels
// of padding above and below
@@ -522,12 +526,12 @@
// Selfguided filter outputs to a maximum stripe height of 64 and a
// maximum restoration width of 384 (256 * 1.5)
- int16_t dst[64 * 384];
+ coef dst[64 * 384];
// both r1 and r0 can't be zero
if (!dav1d_sgr_params[sgr_idx][0]) {
const int s1 = dav1d_sgr_params[sgr_idx][3];
- selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 9, s1);
+ selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 9, s1 HIGHBD_TAIL_SUFFIX);
const int w1 = (1 << 7) - sgr_w[1];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
@@ -539,7 +543,7 @@
}
} else if (!dav1d_sgr_params[sgr_idx][1]) {
const int s0 = dav1d_sgr_params[sgr_idx][2];
- selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 25, s0);
+ selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 25, s0 HIGHBD_TAIL_SUFFIX);
const int w0 = sgr_w[0];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
@@ -550,13 +554,13 @@
p += PXSTRIDE(p_stride);
}
} else {
- int16_t dst1[64 * 384];
+ coef dst1[64 * 384];
const int s0 = dav1d_sgr_params[sgr_idx][2];
const int s1 = dav1d_sgr_params[sgr_idx][3];
const int w0 = sgr_w[0];
const int w1 = (1 << 7) - w0 - sgr_w[1];
- selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 25, s0);
- selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h, 9, s1);
+ selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 25, s0 HIGHBD_TAIL_SUFFIX);
+ selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h, 9, s1 HIGHBD_TAIL_SUFFIX);
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
const int u = (p[i] << 4);
--- a/src/lr_apply_tmpl.c
+++ b/src/lr_apply_tmpl.c
@@ -76,7 +76,7 @@
while (row + stripe_h <= row_h) {
f->dsp->mc.resize(dst, dst_stride, src, src_stride,
dst_w, src_w, 4, f->resize_step[ss_hor],
- f->resize_start[ss_hor]);
+ f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
row += stripe_h; // unmodified stripe_h for the 1st stripe
stripe_h = 64 >> ss_ver;
src += stripe_h * PXSTRIDE(src_stride);
@@ -180,11 +180,11 @@
}
if (lr->type == DAV1D_RESTORATION_WIENER) {
dsp->lr.wiener(p, p_stride, left, lpf, lpf_stride, unit_w, stripe_h,
- filterh, filterv, edges);
+ filterh, filterv, edges HIGHBD_CALL_SUFFIX);
} else {
assert(lr->type == DAV1D_RESTORATION_SGRPROJ);
dsp->lr.selfguided(p, p_stride, left, lpf, lpf_stride, unit_w, stripe_h,
- lr->sgr_idx, lr->sgr_weights, edges);
+ lr->sgr_idx, lr->sgr_weights, edges HIGHBD_CALL_SUFFIX);
}
left += stripe_h;
--- a/src/mc.h
+++ b/src/mc.h
@@ -38,57 +38,59 @@
#define decl_mc_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
- int w, int h, int mx, int my)
+ int w, int h, int mx, int my HIGHBD_DECL_SUFFIX)
typedef decl_mc_fn(*mc_fn);
#define decl_mc_scaled_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
- int w, int h, int mx, int my, int dx, int dy)
+ int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX)
typedef decl_mc_scaled_fn(*mc_scaled_fn);
#define decl_warp8x8_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
- const int16_t *abcd, int mx, int my)
+ const int16_t *abcd, int mx, int my HIGHBD_DECL_SUFFIX)
typedef decl_warp8x8_fn(*warp8x8_fn);
#define decl_mct_fn(name) \
void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
- int w, int h, int mx, int my)
+ int w, int h, int mx, int my HIGHBD_DECL_SUFFIX)
typedef decl_mct_fn(*mct_fn);
#define decl_mct_scaled_fn(name) \
void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \
- int w, int h, int mx, int my, int dx, int dy)
+ int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX)
typedef decl_mct_scaled_fn(*mct_scaled_fn);
#define decl_warp8x8t_fn(name) \
void (name)(int16_t *tmp, const ptrdiff_t tmp_stride, \
const pixel *src, ptrdiff_t src_stride, \
- const int16_t *abcd, int mx, int my)
+ const int16_t *abcd, int mx, int my HIGHBD_DECL_SUFFIX)
typedef decl_warp8x8t_fn(*warp8x8t_fn);
#define decl_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
- const int16_t *tmp1, const int16_t *tmp2, int w, int h)
+ const int16_t *tmp1, const int16_t *tmp2, int w, int h \
+ HIGHBD_DECL_SUFFIX)
typedef decl_avg_fn(*avg_fn);
#define decl_w_avg_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
- const int16_t *tmp1, const int16_t *tmp2, int w, int h, int weight)
+ const int16_t *tmp1, const int16_t *tmp2, int w, int h, int weight \
+ HIGHBD_DECL_SUFFIX)
typedef decl_w_avg_fn(*w_avg_fn);
#define decl_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
- const uint8_t *mask)
+ const uint8_t *mask HIGHBD_DECL_SUFFIX)
typedef decl_mask_fn(*mask_fn);
#define decl_w_mask_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const int16_t *tmp1, const int16_t *tmp2, int w, int h, \
- uint8_t *mask, int sign)
+ uint8_t *mask, int sign HIGHBD_DECL_SUFFIX)
typedef decl_w_mask_fn(*w_mask_fn);
#define decl_blend_fn(name) \
@@ -108,7 +110,7 @@
#define decl_resize_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
- int dst_w, int src_w, int h, int dx, int mx)
+ int dst_w, int src_w, int h, int dx, int mx HIGHBD_DECL_SUFFIX)
typedef decl_resize_fn(*resize_fn);
typedef struct Dav1dMCDSPContext {
@@ -129,13 +131,8 @@
resize_fn resize;
} Dav1dMCDSPContext;
-void dav1d_mc_dsp_init_8bpc(Dav1dMCDSPContext *c);
-void dav1d_mc_dsp_init_10bpc(Dav1dMCDSPContext *c);
-
-void dav1d_mc_dsp_init_arm_8bpc(Dav1dMCDSPContext *c);
-void dav1d_mc_dsp_init_arm_10bpc(Dav1dMCDSPContext *c);
-
-void dav1d_mc_dsp_init_x86_8bpc(Dav1dMCDSPContext *c);
-void dav1d_mc_dsp_init_x86_10bpc(Dav1dMCDSPContext *c);
+bitfn_decls(void dav1d_mc_dsp_init, Dav1dMCDSPContext *c);
+bitfn_decls(void dav1d_mc_dsp_init_arm, Dav1dMCDSPContext *c);
+bitfn_decls(void dav1d_mc_dsp_init_x86, Dav1dMCDSPContext *c);
#endif /* __DAV1D_SRC_MC_H__ */
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -37,6 +37,13 @@
#include "src/mc.h"
#include "src/tables.h"
+#if BITDEPTH == 8
+#define get_intermediate_bits(bitdepth_max) 4
+#else
+// 4 for 10 bits/component, 2 for 12 bits/component
+#define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max))
+#endif
+
static NOINLINE void
put_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride, const int w, int h)
@@ -51,11 +58,12 @@
static NOINLINE void
prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,
- const int w, int h)
+ const int w, int h HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
do {
for (int x = 0; x < w; x++)
- tmp[x] = src[x] << 4;
+ tmp[x] = src[x] << intermediate_bits;
tmp += w;
src += src_stride;
@@ -73,7 +81,7 @@
F[7] * src[x + +4 * stride])
#define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \
- ((FILTER_8TAP(src, x, F, stride) + ((1 << sh) >> 1)) >> sh)
+ ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh))
#define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \
iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh))
@@ -96,8 +104,11 @@
put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my,
- const int filter_type)
+ const int filter_type HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int intermediate_rnd = (1 << intermediate_bits) >> 1;
+
GET_FILTERS();
dst_stride = PXSTRIDE(dst_stride);
src_stride = PXSTRIDE(src_stride);
@@ -110,7 +121,8 @@
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
+ mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
+ 6 - intermediate_bits);
mid_ptr += 128;
src += src_stride;
@@ -119,7 +131,8 @@
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
- dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
+ dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128,
+ 6 + intermediate_bits);
mid_ptr += 128;
dst += dst_stride;
@@ -127,8 +140,9 @@
} else {
do {
for (int x = 0; x < w; x++) {
- const int px = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
- dst[x] = iclip_pixel((px + 8) >> 4);
+ const int px = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
+ 6 - intermediate_bits);
+ dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits);
}
dst += dst_stride;
@@ -151,8 +165,11 @@
put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
- const int dx, const int dy, const int filter_type)
+ const int dx, const int dy, const int filter_type
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int intermediate_rnd = (1 << intermediate_bits) >> 1;
int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride);
@@ -164,7 +181,9 @@
for (x = 0; x < w; x++) {
GET_H_FILTER(imx >> 6);
- mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+ mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1,
+ 6 - intermediate_bits) :
+ src[ioff] << intermediate_bits;
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@@ -180,8 +199,10 @@
GET_V_FILTER(my >> 6);
for (x = 0; x < w; x++)
- dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10) :
- iclip_pixel((mid_ptr[x] + 8) >> 4);
+ dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128,
+ 6 + intermediate_bits) :
+ iclip_pixel((mid_ptr[x] + intermediate_rnd) >>
+ intermediate_bits);
my += dy;
mid_ptr += (my >> 10) * 128;
@@ -193,8 +214,9 @@
static NOINLINE void
prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my,
- const int filter_type)
+ const int filter_type HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
GET_FILTERS();
src_stride = PXSTRIDE(src_stride);
@@ -206,7 +228,8 @@
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
+ mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
+ 6 - intermediate_bits);
mid_ptr += 128;
src += src_stride;
@@ -223,7 +246,8 @@
} else {
do {
for (int x = 0; x < w; x++)
- tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1, 2);
+ tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
+ 6 - intermediate_bits);
tmp += w;
src += src_stride;
@@ -232,20 +256,23 @@
} else if (fv) {
do {
for (int x = 0; x < w; x++)
- tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride, 2);
+ tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride,
+ 6 - intermediate_bits);
tmp += w;
src += src_stride;
} while (--h);
} else
- prep_c(tmp, src, src_stride, w, h);
+ prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX);
}
static NOINLINE void
prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
- const int dx, const int dy, const int filter_type)
+ const int dx, const int dy, const int filter_type
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
src_stride = PXSTRIDE(src_stride);
@@ -257,7 +284,9 @@
for (x = 0; x < w; x++) {
GET_H_FILTER(imx >> 6);
- mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+ mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1,
+ 6 - intermediate_bits) :
+ src[ioff] << intermediate_bits;
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@@ -288,10 +317,11 @@
const pixel *const src, \
const ptrdiff_t src_stride, \
const int w, const int h, \
- const int mx, const int my) \
+ const int mx, const int my \
+ HIGHBD_DECL_SUFFIX) \
{ \
put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \
- type_h | (type_v << 2)); \
+ type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
} \
static void put_8tap_##type##_scaled_c(pixel *const dst, \
const ptrdiff_t dst_stride, \
@@ -299,19 +329,21 @@
const ptrdiff_t src_stride, \
const int w, const int h, \
const int mx, const int my, \
- const int dx, const int dy) \
+ const int dx, const int dy \
+ HIGHBD_DECL_SUFFIX) \
{ \
put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
- type_h | (type_v << 2)); \
+ type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
} \
static void prep_8tap_##type##_c(int16_t *const tmp, \
const pixel *const src, \
const ptrdiff_t src_stride, \
const int w, const int h, \
- const int mx, const int my) \
+ const int mx, const int my \
+ HIGHBD_DECL_SUFFIX) \
{ \
prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
- type_h | (type_v << 2)); \
+ type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
} \
static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \
const pixel *const src, \
@@ -318,10 +350,11 @@
const ptrdiff_t src_stride, \
const int w, const int h, \
const int mx, const int my, \
- const int dx, const int dy) \
+ const int dx, const int dy \
+ HIGHBD_DECL_SUFFIX) \
{ \
prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \
- type_h | (type_v << 2)); \
+ type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
}
filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR)
@@ -338,7 +371,7 @@
(16 * src[x] + ((mxy) * (src[x + stride] - src[x])))
#define FILTER_BILIN_RND(src, x, mxy, stride, sh) \
- ((FILTER_BILIN(src, x, mxy, stride) + ((1 << sh) >> 1)) >> sh)
+ ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh))
#define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \
iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh))
@@ -345,8 +378,11 @@
static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride,
const pixel *src, ptrdiff_t src_stride,
- const int w, int h, const int mx, const int my)
+ const int w, int h, const int mx, const int my
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int intermediate_rnd = (1 << intermediate_bits) >> 1;
dst_stride = PXSTRIDE(dst_stride);
src_stride = PXSTRIDE(src_stride);
@@ -357,7 +393,8 @@
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+ mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1,
+ 4 - intermediate_bits);
mid_ptr += 128;
src += src_stride;
@@ -366,7 +403,8 @@
mid_ptr = mid;
do {
for (int x = 0; x < w; x++)
- dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, 8);
+ dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128,
+ 4 + intermediate_bits);
mid_ptr += 128;
dst += dst_stride;
@@ -373,8 +411,11 @@
} while (--h);
} else {
do {
- for (int x = 0; x < w; x++)
- dst[x] = FILTER_BILIN_CLIP(src, x, mx, 1, 4);
+ for (int x = 0; x < w; x++) {
+ const int px = FILTER_BILIN_RND(src, x, mx, 1,
+ 4 - intermediate_bits);
+ dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits);
+ }
dst += dst_stride;
src += src_stride;
@@ -395,8 +436,10 @@
static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
- const int dx, const int dy)
+ const int dx, const int dy
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
@@ -405,7 +448,8 @@
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
- mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+ mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1,
+ 4 - intermediate_bits);
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@@ -420,7 +464,8 @@
int x;
for (x = 0; x < w; x++)
- dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my >> 6, 128, 8);
+ dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my >> 6, 128,
+ 4 + intermediate_bits);
my += dy;
mid_ptr += (my >> 10) * 128;
@@ -431,8 +476,10 @@
static void prep_bilin_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride,
- const int w, int h, const int mx, const int my)
+ const int w, int h, const int mx, const int my
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
src_stride = PXSTRIDE(src_stride);
if (mx) {
@@ -442,7 +489,8 @@
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+ mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1,
+ 4 - intermediate_bits);
mid_ptr += 128;
src += src_stride;
@@ -459,7 +507,8 @@
} else {
do {
for (int x = 0; x < w; x++)
- tmp[x] = FILTER_BILIN(src, x, mx, 1);
+ tmp[x] = FILTER_BILIN_RND(src, x, mx, 1,
+ 4 - intermediate_bits);
tmp += w;
src += src_stride;
@@ -468,20 +517,22 @@
} else if (my) {
do {
for (int x = 0; x < w; x++)
- tmp[x] = FILTER_BILIN(src, x, my, src_stride);
+ tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride,
+ 4 - intermediate_bits);
tmp += w;
src += src_stride;
} while (--h);
} else
- prep_c(tmp, src, src_stride, w, h);
+ prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX);
}
static void prep_bilin_scaled_c(int16_t *tmp,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, int my,
- const int dx, const int dy)
+ const int dx, const int dy HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
@@ -490,7 +541,8 @@
int imx = mx, ioff = 0;
for (x = 0; x < w; x++) {
- mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+ mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1,
+ 4 - intermediate_bits);
imx += dx;
ioff += imx >> 10;
imx &= 0x3ff;
@@ -515,11 +567,14 @@
}
static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
- const int16_t *tmp1, const int16_t *tmp2, const int w, int h)
+ const int16_t *tmp1, const int16_t *tmp2, const int w, int h
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int sh = intermediate_bits + 1, rnd = 1 << intermediate_bits;
do {
for (int x = 0; x < w; x++)
- dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + 16) >> 5);
+ dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh);
tmp1 += w;
tmp2 += w;
@@ -529,12 +584,14 @@
static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
- const int weight)
+ const int weight HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int sh = intermediate_bits + 4, rnd = 8 << intermediate_bits;
do {
for (int x = 0; x < w; x++)
dst[x] = iclip_pixel((tmp1[x] * weight +
- tmp2[x] * (16 - weight) + 128) >> 8);
+ tmp2[x] * (16 - weight) + rnd) >> sh);
tmp1 += w;
tmp2 += w;
@@ -544,12 +601,14 @@
static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
- const uint8_t *mask)
+ const uint8_t *mask HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int sh = intermediate_bits + 6, rnd = 32 << intermediate_bits;
do {
for (int x = 0; x < w; x++)
dst[x] = iclip_pixel((tmp1[x] * mask[x] +
- tmp2[x] * (64 - mask[x]) + 512) >> 10);
+ tmp2[x] * (64 - mask[x]) + rnd) >> sh);
tmp1 += w;
tmp2 += w;
@@ -603,23 +662,27 @@
static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
uint8_t *mask, const int sign,
- const int ss_hor, const int ss_ver)
+ const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX)
{
// store mask at 2x2 resolution, i.e. store 2x1 sum for even rows,
// and then load this intermediate to calculate final value for odd rows
- const int rnd = 8 << (BITDEPTH - 8);
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
+ const int bitdepth = bitdepth_from_max(bitdepth_max);
+ const int sh = intermediate_bits + 6, rnd = 32 << intermediate_bits;
+ const int mask_sh = bitdepth + intermediate_bits - 4;
+ const int mask_rnd = 1 << (mask_sh - 5);
do {
for (int x = 0; x < w; x++) {
- const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + rnd) >> BITDEPTH), 64);
+ const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
dst[x] = iclip_pixel((tmp1[x] * m +
- tmp2[x] * (64 - m) + 512) >> 10);
+ tmp2[x] * (64 - m) + rnd) >> sh);
if (ss_hor) {
x++;
- const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + rnd) >> BITDEPTH), 64);
+ const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
dst[x] = iclip_pixel((tmp1[x] * n +
- tmp2[x] * (64 - n) + 512) >> 10);
+ tmp2[x] * (64 - n) + rnd) >> sh);
if (h & ss_ver) {
mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2;
@@ -644,9 +707,10 @@
static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \
const int16_t *const tmp1, const int16_t *const tmp2, \
const int w, const int h, uint8_t *mask, \
- const int sign) \
+ const int sign HIGHBD_DECL_SUFFIX) \
{ \
- w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver); \
+ w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \
+ HIGHBD_TAIL_SUFFIX); \
}
w_mask_fns(444, 0, 0);
@@ -666,7 +730,7 @@
F[7] * src[x + +4 * stride])
#define FILTER_WARP_RND(src, x, F, stride, sh) \
- ((FILTER_WARP(src, x, F, stride) + ((1 << sh) >> 1)) >> sh)
+ ((FILTER_WARP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh))
#define FILTER_WARP_CLIP(src, x, F, stride, sh) \
iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh))
@@ -673,8 +737,10 @@
static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride,
- const int16_t *const abcd, int mx, int my)
+ const int16_t *const abcd, int mx, int my
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride);
@@ -683,7 +749,8 @@
const int8_t *const filter =
dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
- mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, 3);
+ mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1,
+ 7 - intermediate_bits);
}
src += PXSTRIDE(src_stride);
mid_ptr += 8;
@@ -695,7 +762,8 @@
const int8_t *const filter =
dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];
- dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8, 11);
+ dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8,
+ 7 + intermediate_bits);
}
mid_ptr += 8;
dst += PXSTRIDE(dst_stride);
@@ -704,8 +772,10 @@
static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride,
const pixel *src, const ptrdiff_t src_stride,
- const int16_t *const abcd, int mx, int my)
+ const int16_t *const abcd, int mx, int my
+ HIGHBD_DECL_SUFFIX)
{
+ const int intermediate_bits = get_intermediate_bits(bitdepth_max);
int16_t mid[15 * 8], *mid_ptr = mid;
src -= 3 * PXSTRIDE(src_stride);
@@ -714,7 +784,8 @@
const int8_t *const filter =
dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
- mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1, 3);
+ mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1,
+ 7 - intermediate_bits);
}
src += PXSTRIDE(src_stride);
mid_ptr += 8;
@@ -785,7 +856,7 @@
static void resize_c(pixel *dst, const ptrdiff_t dst_stride,
const pixel *src, const ptrdiff_t src_stride,
const int dst_w, const int src_w, int h,
- const int dx, const int mx0)
+ const int dx, const int mx0 HIGHBD_DECL_SUFFIX)
{
do {
int mx = mx0, src_x = -1;
--- a/src/meson.build
+++ b/src/meson.build
@@ -52,9 +52,9 @@
# These files are compiled for each bitdepth with
# `BITDEPTH` defined to the currently built bitdepth.
libdav1d_tmpl_sources = files(
+ 'ipred_prepare_tmpl.c',
'ipred_tmpl.c',
'itx_tmpl.c',
- 'ipred_prepare_tmpl.c',
'lf_apply_tmpl.c',
'loopfilter_tmpl.c',
'mc_tmpl.c',
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -208,6 +208,9 @@
const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
const uint8_t *const qm_tbl = f->qm[is_1d || *txtp == IDTX][tx][plane];
const int dq_shift = imax(0, t_dim->ctx - 2);
+ const int bitdepth = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
+ const int cf_min = -(1 << (7 + bitdepth));
+ const int cf_max = (1 << (7 + bitdepth)) - 1;
for (int i = 0; i <= eob; i++) {
const int rc = scan[i];
int tok = cf[rc];
@@ -247,9 +250,7 @@
// dequant, see 7.12.3
cul_level += tok;
tok = (((int64_t)dq * tok) & 0xffffff) >> dq_shift;
- cf[rc] = iclip(sign ? -tok : tok,
- -(1 << (7 + BITDEPTH)),
- (1 << (7 + BITDEPTH)) - 1);
+ cf[rc] = iclip(sign ? -tok : tok, cf_min, cf_max);
}
// context
@@ -349,7 +350,8 @@
if (eob >= 0) {
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
- dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob);
+ dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob
+ HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
}
@@ -542,10 +544,12 @@
if (dst8 != NULL) {
f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
- bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ bh4 * v_mul, mx << !ss_hor, my << !ss_ver
+ HIGHBD_CALL_SUFFIX);
} else {
f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
- bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ bh4 * v_mul, mx << !ss_hor, my << !ss_ver
+ HIGHBD_CALL_SUFFIX);
}
} else {
assert(refp != &f->sr_cur);
@@ -594,13 +598,15 @@
bw4 * h_mul, bh4 * v_mul,
pos_x & 0x3ff, pos_y & 0x3ff,
f->svc[refidx][0].step,
- f->svc[refidx][1].step);
+ f->svc[refidx][1].step
+ HIGHBD_CALL_SUFFIX);
} else {
f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
bw4 * h_mul, bh4 * v_mul,
pos_x & 0x3ff, pos_y & 0x3ff,
f->svc[refidx][0].step,
- f->svc[refidx][1].step);
+ f->svc[refidx][1].step
+ HIGHBD_CALL_SUFFIX);
}
}
@@ -722,10 +728,10 @@
}
if (dst16 != NULL)
dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
- wmp->abcd, mx, my);
+ wmp->abcd, mx, my HIGHBD_CALL_SUFFIX);
else
dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
- wmp->abcd, mx, my);
+ wmp->abcd, mx, my HIGHBD_CALL_SUFFIX);
}
if (dst8) dst8 += 8 * PXSTRIDE(dstride);
else dst16 += 8 * dstride;
@@ -826,12 +832,14 @@
edge_flags, dst,
f->cur.stride[0], top_sb_edge,
b->y_mode, &angle,
- t_dim->w, t_dim->h, edge);
+ t_dim->w, t_dim->h, edge
+ HIGHBD_CALL_SUFFIX);
dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
t_dim->w * 4, t_dim->h * 4,
angle | intra_flags,
4 * f->bw - 4 * t->bx,
- 4 * f->bh - 4 * t->by);
+ 4 * f->bh - 4 * t->by
+ HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
@@ -882,7 +890,7 @@
dsp->itx.itxfm_add[b->tx]
[txtp](dst,
f->cur.stride[0],
- cf, eob);
+ cf, eob HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, f->cur.stride[0],
t_dim->w * 4, t_dim->h * 4, "recon");
@@ -943,11 +951,13 @@
0, uv_dst[pl], stride,
top_sb_edge, DC_PRED, &angle,
uv_t_dim->w,
- uv_t_dim->h, edge);
+ uv_t_dim->h, edge
+ HIGHBD_CALL_SUFFIX);
dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
uv_t_dim->w * 4,
uv_t_dim->h * 4,
- ac, b->cfl_alpha[pl]);
+ ac, b->cfl_alpha[pl]
+ HIGHBD_CALL_SUFFIX);
}
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
@@ -1042,7 +1052,8 @@
edge_flags, dst, stride,
top_sb_edge, uv_mode,
&angle, uv_t_dim->w,
- uv_t_dim->h, edge);
+ uv_t_dim->h, edge
+ HIGHBD_CALL_SUFFIX);
angle |= intra_edge_filter_flag;
dsp->ipred.intra_pred[m](dst, stride, edge,
uv_t_dim->w * 4,
@@ -1051,7 +1062,8 @@
(4 * f->bw + ss_hor -
4 * (t->bx & ~ss_hor)) >> ss_hor,
(4 * f->bh + ss_ver -
- 4 * (t->by & ~ss_ver)) >> ss_ver);
+ 4 * (t->by & ~ss_ver)) >> ss_ver
+ HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
uv_t_dim->h * 4, 2, "l");
@@ -1104,7 +1116,7 @@
uv_t_dim->w * 4, 3, "dq");
dsp->itx.itxfm_add[b->uvtx]
[txtp](dst, stride,
- cf, eob);
+ cf, eob HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(dst, stride, uv_t_dim->w * 4,
uv_t_dim->h * 4, "recon");
@@ -1203,9 +1215,11 @@
t->by, t->by > ts->tiling.row_start,
ts->tiling.col_end, ts->tiling.row_end,
0, dst, f->cur.stride[0], top_sb_edge,
- m, &angle, bw4, bh4, tl_edge);
+ m, &angle, bw4, bh4, tl_edge
+ HIGHBD_CALL_SUFFIX);
dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
- tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
+ tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
+ HIGHBD_CALL_SUFFIX);
const uint8_t *const ii_mask =
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
@@ -1343,9 +1357,11 @@
ts->tiling.row_end >> ss_ver,
0, uvdst, f->cur.stride[1],
top_sb_edge, m,
- &angle, cbw4, cbh4, tl_edge);
+ &angle, cbw4, cbh4, tl_edge
+ HIGHBD_CALL_SUFFIX);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
- tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
+ tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
+ HIGHBD_CALL_SUFFIX);
dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask);
}
@@ -1378,17 +1394,18 @@
switch (b->comp_type) {
case COMP_INTER_AVG:
dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
- bw4 * 4, bh4 * 4);
+ bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
break;
case COMP_INTER_WEIGHTED_AVG:
jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
- bw4 * 4, bh4 * 4, jnt_weight);
+ bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
break;
case COMP_INTER_SEG:
dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
- bw4 * 4, bh4 * 4, seg_mask, b->mask_sign);
+ bw4 * 4, bh4 * 4, seg_mask,
+ b->mask_sign HIGHBD_CALL_SUFFIX);
mask = seg_mask;
break;
case COMP_INTER_WEDGE:
@@ -1395,7 +1412,7 @@
mask = dav1d_wedge_masks[bs][0][0][b->wedge_idx];
dsp->mc.mask(dst, f->cur.stride[0],
tmp[b->mask_sign], tmp[!b->mask_sign],
- bw4 * 4, bh4 * 4, mask);
+ bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
if (has_chroma)
mask = dav1d_wedge_masks[bs][chr_layout_idx][b->mask_sign][b->wedge_idx];
break;
@@ -1421,17 +1438,20 @@
switch (b->comp_type) {
case COMP_INTER_AVG:
dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
- bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver);
+ bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
+ HIGHBD_CALL_SUFFIX);
break;
case COMP_INTER_WEIGHTED_AVG:
dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
- bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight);
+ bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
+ HIGHBD_CALL_SUFFIX);
break;
case COMP_INTER_WEDGE:
case COMP_INTER_SEG:
dsp->mc.mask(uvdst, f->cur.stride[1],
tmp[b->mask_sign], tmp[!b->mask_sign],
- bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask);
+ bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
+ HIGHBD_CALL_SUFFIX);
break;
}
}
@@ -1546,7 +1566,7 @@
dsp->itx.itxfm_add[b->uvtx]
[txtp](&uvdst[4 * x],
f->cur.stride[1],
- cf, eob);
+ cf, eob HIGHBD_CALL_SUFFIX);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
hex_dump(&uvdst[4 * x], f->cur.stride[1],
uvtx->w * 4, uvtx->h * 4, "recon");
@@ -1613,7 +1633,7 @@
f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w, src_w,
imin(img_h, h_end) + h_start, f->resize_step[!!pl],
- f->resize_start[!!pl]);
+ f->resize_start[!!pl] HIGHBD_CALL_SUFFIX);
}
}
if (f->seq_hdr->restoration) {
--- a/tests/checkasm/cdef.c
+++ b/tests/checkasm/cdef.c
@@ -32,9 +32,9 @@
#include "src/levels.h"
#include "src/cdef.h"
-static void init_tmp(pixel *buf, int n) {
+static void init_tmp(pixel *buf, int n, const int bitdepth_max) {
while (n--)
- *buf++ = rand() & ((1 << BITDEPTH) - 1);
+ *buf++ = rand() & bitdepth_max;
}
static void check_cdef_filter(const cdef_fn fn, const int w, const int h,
@@ -48,12 +48,8 @@
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel (*left)[2],
pixel *const top[2], int pri_strength, int sec_strength,
- int dir, int damping, enum CdefEdgeFlags edges);
+ int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX);
- init_tmp(src, 10 * 16 + 8);
- init_tmp(top, 16 * 2 + 8);
- init_tmp((pixel *) left,8 * 2);
-
if (check_func(fn, "%s_%dbpc", name, BITDEPTH)) {
for (int dir = 0; dir < 8; dir++) {
for (enum CdefEdgeFlags edges = 0; edges <= 0xf; edges++) {
@@ -60,21 +56,35 @@
memcpy(a_src, src, (10 * 16 + 8) * sizeof(pixel));
memcpy(c_src, src, (10 * 16 + 8) * sizeof(pixel));
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ init_tmp(src, 10 * 16 + 8, bitdepth_max);
+ init_tmp(top, 16 * 2 + 8, bitdepth_max);
+ init_tmp((pixel *) left,8 * 2, bitdepth_max);
+
const int lvl = 1 + (rand() % 62);
- const int damping = 3 + (rand() & 3);
- const int pri_strength = (lvl >> 2) << (BITDEPTH - 8);
+ const int damping = 3 + (rand() & 3) + bitdepth_min_8;
+ const int pri_strength = (lvl >> 2) << bitdepth_min_8;
int sec_strength = lvl & 3;
sec_strength += sec_strength == 3;
+ sec_strength <<= bitdepth_min_8;
call_ref(c_src_ptr, 16 * sizeof(pixel), left,
(pixel *[2]) { top_ptr, top_ptr + 16 },
- pri_strength, sec_strength, dir, damping, edges);
+ pri_strength, sec_strength, dir, damping, edges
+ HIGHBD_TAIL_SUFFIX);
call_new(a_src_ptr, 16 * sizeof(pixel), left,
(pixel *[2]) { top_ptr, top_ptr + 16 },
- pri_strength, sec_strength, dir, damping, edges);
+ pri_strength, sec_strength, dir, damping, edges
+ HIGHBD_TAIL_SUFFIX);
if (memcmp(a_src, c_src, (10 * 16 + 8) * sizeof(pixel))) fail();
bench_new(a_src_ptr, 16 * sizeof(pixel), left,
(pixel *[2]) { top_ptr, top_ptr + 16 },
- pri_strength, sec_strength, dir, damping, edges);
+ pri_strength, sec_strength, dir, damping, edges
+ HIGHBD_TAIL_SUFFIX);
}
}
}
@@ -84,17 +94,22 @@
static void check_cdef_direction(const cdef_dir_fn fn) {
ALIGN_STK_32(pixel, src, 8 * 8,);
- declare_func(int, pixel *src, ptrdiff_t dst_stride, unsigned *var);
+ declare_func(int, pixel *src, ptrdiff_t dst_stride, unsigned *var
+ HIGHBD_DECL_SUFFIX);
- init_tmp(src, 64);
-
if (check_func(fn, "cdef_dir_%dbpc", BITDEPTH)) {
unsigned c_var, a_var;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(src, 64, bitdepth_max);
- const int c_dir = call_ref(src, 8 * sizeof(pixel), &c_var);
- const int a_dir = call_new(src, 8 * sizeof(pixel), &a_var);
+ const int c_dir = call_ref(src, 8 * sizeof(pixel), &c_var HIGHBD_TAIL_SUFFIX);
+ const int a_dir = call_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
if (c_var != a_var || c_dir != a_dir) fail();
- bench_new(src, 8 * sizeof(pixel), &a_var);
+ bench_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
}
report("cdef_dir");
}
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -69,13 +69,13 @@
{ "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
{ "mc_8bpc", checkasm_check_mc_8bpc },
#endif
-#if CONFIG_10BPC
- { "cdef_10bpc", checkasm_check_cdef_10bpc },
- { "ipred_10bpc", checkasm_check_ipred_10bpc },
- { "itx_10bpc", checkasm_check_itx_10bpc },
- { "loopfilter_10bpc", checkasm_check_loopfilter_10bpc },
- { "looprestoration_10bpc", checkasm_check_looprestoration_10bpc },
- { "mc_10bpc", checkasm_check_mc_10bpc },
+#if CONFIG_16BPC
+ { "cdef_16bpc", checkasm_check_cdef_16bpc },
+ { "ipred_16bpc", checkasm_check_ipred_16bpc },
+ { "itx_16bpc", checkasm_check_itx_16bpc },
+ { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
+ { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
+ { "mc_16bpc", checkasm_check_mc_16bpc },
#endif
{ 0 }
};
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -36,23 +36,16 @@
#include "include/common/attributes.h"
#include "include/common/intops.h"
-void checkasm_check_cdef_8bpc(void);
-void checkasm_check_cdef_10bpc(void);
+#define decl_check_bitfns(name) \
+name##_8bpc(void); \
+name##_16bpc(void)
-void checkasm_check_ipred_8bpc(void);
-void checkasm_check_ipred_10bpc(void);
-
-void checkasm_check_itx_8bpc(void);
-void checkasm_check_itx_10bpc(void);
-
-void checkasm_check_loopfilter_8bpc(void);
-void checkasm_check_loopfilter_10bpc(void);
-
-void checkasm_check_looprestoration_8bpc(void);
-void checkasm_check_looprestoration_10bpc(void);
-
-void checkasm_check_mc_8bpc(void);
-void checkasm_check_mc_10bpc(void);
+decl_check_bitfns(void checkasm_check_cdef);
+decl_check_bitfns(void checkasm_check_ipred);
+decl_check_bitfns(void checkasm_check_itx);
+decl_check_bitfns(void checkasm_check_loopfilter);
+decl_check_bitfns(void checkasm_check_looprestoration);
+decl_check_bitfns(void checkasm_check_mc);
void *checkasm_check_func(void *func, const char *name, ...);
int checkasm_bench_func(void);
--- a/tests/checkasm/ipred.c
+++ b/tests/checkasm/ipred.c
@@ -70,7 +70,8 @@
pixel *const topleft = topleft_buf + 128;
declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
- int width, int height, int angle, int max_width, int max_height);
+ int width, int height, int angle, int max_width, int max_height
+ HIGHBD_DECL_SUFFIX);
for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++)
for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1)
@@ -89,16 +90,25 @@
else if (mode == FILTER_PRED) /* filter_idx */
a = (rand() % 5) | (rand() & ~511);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+
for (int i = -h * 2; i <= w * 2; i++)
- topleft[i] = rand() & ((1 << BITDEPTH) - 1);
+ topleft[i] = rand() & bitdepth_max;
const int maxw = 1 + (rand() % 128), maxh = 1 + (rand() % 128);
- call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh);
- call_new(a_dst, stride, topleft, w, h, a, maxw, maxh);
+ call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh
+ HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, stride, topleft, w, h, a, maxw, maxh
+ HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, stride, topleft, w, h, a, 128, 128);
+ bench_new(a_dst, stride, topleft, w, h, a, 128, 128
+ HIGHBD_TAIL_SUFFIX);
}
}
report("intra_pred");
@@ -123,9 +133,14 @@
const ptrdiff_t stride = 32 * sizeof(pixel);
for (int w_pad = (w >> 2) - 1; w_pad >= 0; w_pad--) {
for (int h_pad = (h >> 2) - 1; h_pad >= 0; h_pad--) {
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
for (int y = 0; y < (h << ss_ver); y++)
for (int x = 0; x < (w << ss_hor); x++)
- luma[y * 32 + x] = rand() & ((1 << BITDEPTH) - 1);
+ luma[y * 32 + x] = rand() & bitdepth_max;
call_ref(c_dst, luma, stride, w_pad, h_pad, w, h);
call_new(a_dst, luma, stride, w_pad, h_pad, w, h);
@@ -149,7 +164,8 @@
pixel *const topleft = topleft_buf + 128;
declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
- int width, int height, const int16_t *ac, int alpha);
+ int width, int height, const int16_t *ac, int alpha
+ HIGHBD_DECL_SUFFIX);
for (int mode = 0; mode <= DC_128_PRED; mode += 1 + 2 * !mode)
for (int w = 4; w <= 32; w <<= 1)
@@ -158,26 +174,35 @@
{
for (int h = imax(w / 4, 4); h <= imin(w * 4, 32); h <<= 1)
{
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+
const ptrdiff_t stride = w * sizeof(pixel);
int alpha = ((rand() & 15) + 1) * (1 - (rand() & 2));
for (int i = -h * 2; i <= w * 2; i++)
- topleft[i] = rand() & ((1 << BITDEPTH) - 1);
+ topleft[i] = rand() & bitdepth_max;
int luma_avg = w * h >> 1;
for (int i = 0; i < w * h; i++)
- luma_avg += ac[i] = rand() & ((1 << BITDEPTH) - 1) << 3;
+ luma_avg += ac[i] = rand() & (bitdepth_max << 3);
luma_avg /= w * h;
for (int i = 0; i < w * h; i++)
ac[i] -= luma_avg;
- call_ref(c_dst, stride, topleft, w, h, ac, alpha);
- call_new(a_dst, stride, topleft, w, h, ac, alpha);
+ call_ref(c_dst, stride, topleft, w, h, ac, alpha
+ HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, stride, topleft, w, h, ac, alpha
+ HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, stride, topleft, w, h, ac, alpha);
+ bench_new(a_dst, stride, topleft, w, h, ac, alpha
+ HIGHBD_TAIL_SUFFIX);
}
}
report("cfl_pred");
@@ -196,10 +221,15 @@
if (check_func(c->pal_pred, "pal_pred_w%d_%dbpc", w, BITDEPTH))
for (int h = imax(w / 4, 4); h <= imin(w * 4, 64); h <<= 1)
{
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
const ptrdiff_t stride = w * sizeof(pixel);
for (int i = 0; i < 8; i++)
- pal[i] = rand() & ((1 << BITDEPTH) - 1);
+ pal[i] = rand() & bitdepth_max;
for (int i = 0; i < w * h; i++)
idx[i] = rand() & 7;
--- a/tests/checkasm/itx.c
+++ b/tests/checkasm/itx.c
@@ -163,7 +163,7 @@
static int ftx(coef *const buf, const enum RectTxfmSize tx,
const enum TxfmType txtp, const int w, const int h,
- const int subsh)
+ const int subsh, const int bitdepth_max)
{
double out[64 * 64], temp[64 * 64];
const double scale = scaling_factors[ctz(w * h) - 4];
@@ -173,7 +173,7 @@
double in[64], temp_out[64];
for (int i = 0; i < w; i++)
- in[i] = (rand() & ((2 << BITDEPTH) - 1)) - ((1 << BITDEPTH) - 1);
+ in[i] = (rand() & (2 * bitdepth_max + 1)) - bitdepth_max;
switch (itx_1d_types[txtp][0]) {
case DCT:
@@ -238,7 +238,8 @@
static const uint8_t subsh_iters[5] = { 2, 2, 3, 5, 5 };
- declare_func(void, pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob);
+ declare_func(void, pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob
+ HIGHBD_DECL_SUFFIX);
for (int i = 0; i < N_RECT_TX_SIZES; i++) {
const enum RectTxfmSize tx = txfm_size_order[i];
@@ -256,16 +257,23 @@
itx_1d_names[itx_1d_types[txtp][1]], subsh,
BITDEPTH))
{
- const int eob = ftx(coeff[0], tx, txtp, w, h, subsh);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, bitdepth_max);
for (int j = 0; j < w * h; j++)
- c_dst[j] = a_dst[j] = rand() & ((1 << BITDEPTH) - 1);
+ c_dst[j] = a_dst[j] = rand() & bitdepth_max;
memcpy(coeff[1], coeff[0], sw * sh * sizeof(**coeff));
memcpy(coeff[2], coeff[0], sw * sh * sizeof(**coeff));
- call_ref(c_dst, w * sizeof(*c_dst), coeff[0], eob);
- call_new(a_dst, w * sizeof(*c_dst), coeff[1], eob);
+ call_ref(c_dst, w * sizeof(*c_dst), coeff[0], eob
+ HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w * sizeof(*c_dst), coeff[1], eob
+ HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)) ||
memcmp(coeff[0], coeff[1], sw * sh * sizeof(**coeff)))
{
@@ -272,7 +280,8 @@
fail();
}
- bench_new(a_dst, w * sizeof(*c_dst), coeff[2], eob);
+ bench_new(a_dst, w * sizeof(*c_dst), coeff[2], eob
+ HIGHBD_TAIL_SUFFIX);
}
report("add_%dx%d", w, h);
}
--- a/tests/checkasm/loopfilter.c
+++ b/tests/checkasm/loopfilter.c
@@ -33,12 +33,13 @@
#include "src/loopfilter.h"
static void init_lpf_border(pixel *const dst, const ptrdiff_t stride,
- int E, int I, int H)
+ int E, int I, int H, const int bitdepth_max)
{
- const int F = 1 << (BITDEPTH - 8);
- E <<= BITDEPTH - 8;
- I <<= BITDEPTH - 8;
- H <<= BITDEPTH - 8;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ const int F = 1 << bitdepth_min_8;
+ E <<= bitdepth_min_8;
+ I <<= bitdepth_min_8;
+ H <<= bitdepth_min_8;
const int filter_type = rand() % 4;
const int edge_diff = rand() % ((E + 2) * 4) - 2 * (E + 2);
@@ -45,12 +46,12 @@
switch (filter_type) {
case 0: // random, unfiltered
for (int i = -8; i < 8; i++)
- dst[i * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[i * stride] = rand() & bitdepth_max;
break;
case 1: // long flat
- dst[-8 * stride] = rand() & ((1 << BITDEPTH) - 1);
- dst[+7 * stride] = rand() & ((1 << BITDEPTH) - 1);
- dst[+0 * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[-8 * stride] = rand() & bitdepth_max;
+ dst[+7 * stride] = rand() & bitdepth_max;
+ dst[+0 * stride] = rand() & bitdepth_max;
dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
for (int i = 1; i < 7; i++) {
dst[-(1 + i) * stride] = iclip_pixel(dst[-1 * stride] +
@@ -61,10 +62,10 @@
break;
case 2: // short flat
for (int i = 4; i < 8; i++) {
- dst[-(1 + i) * stride] = rand() & ((1 << BITDEPTH) - 1);
- dst[+(0 + i) * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[-(1 + i) * stride] = rand() & bitdepth_max;
+ dst[+(0 + i) * stride] = rand() & bitdepth_max;
}
- dst[+0 * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[+0 * stride] = rand() & bitdepth_max;
dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
for (int i = 1; i < 4; i++) {
dst[-(1 + i) * stride] = iclip_pixel(dst[-1 * stride] +
@@ -75,10 +76,10 @@
break;
case 3: // normal or hev
for (int i = 4; i < 8; i++) {
- dst[-(1 + i) * stride] = rand() & ((1 << BITDEPTH) - 1);
- dst[+(0 + i) * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[-(1 + i) * stride] = rand() & bitdepth_max;
+ dst[+(0 + i) * stride] = rand() & bitdepth_max;
}
- dst[+0 * stride] = rand() & ((1 << BITDEPTH) - 1);
+ dst[+0 * stride] = rand() & bitdepth_max;
dst[-1 * stride] = iclip_pixel(dst[+0 * stride] + edge_diff);
for (int i = 1; i < 4; i++) {
dst[-(1 + i) * stride] = iclip_pixel(dst[-(0 + i) * stride] +
@@ -112,7 +113,7 @@
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const uint32_t *mask,
const uint8_t (*l)[4], ptrdiff_t b4_stride,
- const Av1FilterLUT *lut, int w);
+ const Av1FilterLUT *lut, int w HIGHBD_DECL_SUFFIX);
Av1FilterLUT lut;
const int sharp = rand() & 7;
@@ -150,6 +151,11 @@
l[j * 2 + 1][lf_idx] = rand() & 63;
}
}
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
for (int i = 0; i < 4 * n_blks; i++) {
const int x = i >> 2;
@@ -160,21 +166,21 @@
L = l[2 * x + 1][lf_idx] ? l[2 * x + 1][lf_idx] : l[2 * x][lf_idx];
}
init_lpf_border(c_dst + i * (dir ? 1 : 16), dir ? 128 : 1,
- lut.e[L], lut.i[L], L >> 4);
+ lut.e[L], lut.i[L], L >> 4, bitdepth_max);
}
memcpy(a_dst_mem, c_dst_mem, 128 * sizeof(pixel) * 16);
call_ref(c_dst, stride,
vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride,
- &lut, n_blks);
+ &lut, n_blks HIGHBD_TAIL_SUFFIX);
call_new(a_dst, stride,
vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride,
- &lut, n_blks);
+ &lut, n_blks HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst_mem, a_dst_mem, 128 * 16 * sizeof(*a_dst))) fail();
bench_new(a_dst, stride,
vmask, (const uint8_t(*)[4]) &l[dir ? 32 : 1][lf_idx], b4_stride,
- &lut, n_blks);
+ &lut, n_blks HIGHBD_TAIL_SUFFIX);
}
}
report(name);
--- a/tests/checkasm/looprestoration.c
+++ b/tests/checkasm/looprestoration.c
@@ -34,11 +34,11 @@
#include "src/tables.h"
static void init_tmp(pixel *buf, const ptrdiff_t stride,
- const int w, const int h)
+ const int w, const int h, const int bitdepth_max)
{
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++)
- buf[x] = rand() & ((1 << BITDEPTH) - 1);
+ buf[x] = rand() & bitdepth_max;
buf += PXSTRIDE(stride);
}
}
@@ -65,12 +65,9 @@
const pixel (*const left)[4],
const pixel *lpf, ptrdiff_t lpf_stride,
int w, int h, const int16_t filterh[7],
- const int16_t filterv[7], enum LrEdgeFlags edges);
+ const int16_t filterv[7], enum LrEdgeFlags edges
+ HIGHBD_DECL_SUFFIX);
- init_tmp(c_dst, 448 * sizeof(pixel), 448, 64);
- init_tmp(h_edge, 448 * sizeof(pixel), 448, 8);
- init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64);
-
for (int pl = 0; pl < 2; pl++) {
if (check_func(c->wiener, "wiener_%s_%dbpc",
pl ? "chroma" : "luma", BITDEPTH))
@@ -96,6 +93,16 @@
const int base_w = 1 + (rand() % 384);
const int base_h = 1 + (rand() & 63);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+
+ init_tmp(c_dst, 448 * sizeof(pixel), 448, 64, bitdepth_max);
+ init_tmp(h_edge, 448 * sizeof(pixel), 448, 8, bitdepth_max);
+ init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64, bitdepth_max);
+
for (enum LrEdgeFlags edges = 0; edges <= 0xf; edges++) {
const int w = edges & LR_HAVE_RIGHT ? 256 : base_w;
const int h = edges & LR_HAVE_BOTTOM ? 64 : base_h;
@@ -104,16 +111,16 @@
call_ref(c_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- w, h, filter_h, filter_v, edges);
+ w, h, filter_h, filter_v, edges HIGHBD_TAIL_SUFFIX);
call_new(a_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- w, h, filter_h, filter_v, edges);
+ w, h, filter_h, filter_v, edges HIGHBD_TAIL_SUFFIX);
const int res = cmp2d(c_dst + 32, a_dst + 32, 448 * sizeof(pixel), w, h);
if (res != -1) fail();
}
bench_new(a_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- 256, 64, filter_h, filter_v, 0xf);
+ 256, 64, filter_h, filter_v, 0xf HIGHBD_TAIL_SUFFIX);
}
}
report("wiener");
@@ -129,12 +136,9 @@
const pixel (*const left)[4],
const pixel *lpf, ptrdiff_t lpf_stride,
int w, int h, int sgr_idx,
- const int16_t sgr_wt[7], enum LrEdgeFlags edges);
+ const int16_t sgr_wt[7], enum LrEdgeFlags edges
+ HIGHBD_DECL_SUFFIX);
- init_tmp(c_dst, 448 * sizeof(pixel), 448, 64);
- init_tmp(h_edge, 448 * sizeof(pixel), 448, 8);
- init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64);
-
for (int sgr_idx = 14; sgr_idx >= 6; sgr_idx -= 4) {
if (check_func(c->selfguided, "selfguided_%s_%dbpc",
sgr_idx == 6 ? "mix" : sgr_idx == 10 ? "3x3" : "5x5", BITDEPTH))
@@ -147,6 +151,16 @@
const int base_w = 1 + (rand() % 384);
const int base_h = 1 + (rand() & 63);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+
+ init_tmp(c_dst, 448 * sizeof(pixel), 448, 64, bitdepth_max);
+ init_tmp(h_edge, 448 * sizeof(pixel), 448, 8, bitdepth_max);
+ init_tmp((pixel *) left, 4 * sizeof(pixel), 4, 64, bitdepth_max);
+
for (enum LrEdgeFlags edges = 0; edges <= 0xf; edges++) {
const int w = edges & LR_HAVE_RIGHT ? 256 : base_w;
const int h = edges & LR_HAVE_BOTTOM ? 64 : base_h;
@@ -155,16 +169,16 @@
call_ref(c_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- w, h, sgr_idx, sgr_wt, edges);
+ w, h, sgr_idx, sgr_wt, edges HIGHBD_TAIL_SUFFIX);
call_new(a_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- w, h, sgr_idx, sgr_wt, edges);
+ w, h, sgr_idx, sgr_wt, edges HIGHBD_TAIL_SUFFIX);
const int res = cmp2d(c_dst + 32, a_dst + 32, 448 * sizeof(pixel), w, h);
if (res != -1) fail();
}
bench_new(a_dst + 32, 448 * sizeof(pixel), left,
h_edge + 32, 448 * sizeof(pixel),
- 256, 64, sgr_idx, sgr_wt, 0xf);
+ 256, 64, sgr_idx, sgr_wt, 0xf HIGHBD_TAIL_SUFFIX);
}
}
report("sgr");
--- a/tests/checkasm/mc.c
+++ b/tests/checkasm/mc.c
@@ -47,11 +47,9 @@
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
const pixel *src = src_buf + 135 * 3 + 3;
- for (int i = 0; i < 135 * 135; i++)
- src_buf[i] = rand();
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
- ptrdiff_t src_stride, int w, int h, int mx, int my);
+ ptrdiff_t src_stride, int w, int h, int mx, int my
+ HIGHBD_DECL_SUFFIX);
for (int filter = 0; filter < N_2D_FILTERS; filter++)
for (int w = 2; w <= 128; w <<= 1)
@@ -64,15 +62,23 @@
for (int h = min; h <= max; h <<= 1) {
const int mx = (mxy & 1) ? rand() % 15 + 1 : 0;
const int my = (mxy & 2) ? rand() % 15 + 1 : 0;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
- call_ref(c_dst, w, src, w, w, h, mx, my);
- call_new(a_dst, w, src, w, w, h, mx, my);
+ for (int i = 0; i < 135 * 135; i++)
+ src_buf[i] = rand() & bitdepth_max;
+
+ call_ref(c_dst, w, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
if (filter == FILTER_2D_8TAP_REGULAR ||
filter == FILTER_2D_BILINEAR)
- bench_new(a_dst, w, src, w, w, h, mx, my);
+ bench_new(a_dst, w, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
}
}
report("mc");
@@ -84,11 +90,8 @@
ALIGN_STK_32(int16_t, a_tmp, 128 * 128,);
const pixel *src = src_buf + 135 * 3 + 3;
- for (int i = 0; i < 135 * 135; i++)
- src_buf[i] = rand();
-
declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
- int w, int h, int mx, int my);
+ int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
for (int filter = 0; filter < N_2D_FILTERS; filter++)
for (int w = 4; w <= 128; w <<= 1)
@@ -99,28 +102,37 @@
{
const int mx = (mxy & 1) ? rand() % 15 + 1 : 0;
const int my = (mxy & 2) ? rand() % 15 + 1 : 0;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
- call_ref(c_tmp, src, w, w, h, mx, my);
- call_new(a_tmp, src, w, w, h, mx, my);
+ for (int i = 0; i < 135 * 135; i++)
+ src_buf[i] = rand() & bitdepth_max;
+
+ call_ref(c_tmp, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
+ call_new(a_tmp, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
if (memcmp(c_tmp, a_tmp, w * h * sizeof(*c_tmp)))
fail();
if (filter == FILTER_2D_8TAP_REGULAR ||
filter == FILTER_2D_BILINEAR)
- bench_new(a_tmp, src, w, w, h, mx, my);
+ bench_new(a_tmp, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);
}
report("mct");
}
static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
- int16_t (*const tmp)[128 * 128])
+ int16_t (*const tmp)[128 * 128], const int bitdepth_max)
{
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 135 * 135; j++)
- buf[j] = rand();
+ buf[j] = rand() & bitdepth_max;
c->mct[rand() % N_2D_FILTERS](tmp[i], buf + 135 * 3 + 3,
128 * sizeof(pixel), 128, 128,
- rand() & 15, rand() & 15);
+ rand() & 15, rand() & 15
+ HIGHBD_TAIL_SUFFIX);
}
}
@@ -129,21 +141,25 @@
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
- init_tmp(c, c_dst, tmp);
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
- const int16_t *tmp2, int w, int h);
+ const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH))
for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
{
- call_ref(c_dst, w, tmp[0], tmp[1], w, h);
- call_new(a_dst, w, tmp[0], tmp[1], w, h);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(c, c_dst, tmp, bitdepth_max);
+ call_ref(c_dst, w, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, w, tmp[0], tmp[1], w, h);
+ bench_new(a_dst, w, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
}
report("avg");
}
@@ -153,10 +169,8 @@
ALIGN_STK_32(pixel, c_dst, 135 * 135,);
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
- init_tmp(c, c_dst, tmp);
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
- const int16_t *tmp2, int w, int h, int weight);
+ const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH))
@@ -163,13 +177,19 @@
for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
{
int weight = rand() % 15 + 1;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(c, c_dst, tmp, bitdepth_max);
- call_ref(c_dst, w, tmp[0], tmp[1], w, h, weight);
- call_new(a_dst, w, tmp[0], tmp[1], w, h, weight);
+ call_ref(c_dst, w, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, w, tmp[0], tmp[1], w, h, weight);
+ bench_new(a_dst, w, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
}
report("w_avg");
}
@@ -180,23 +200,29 @@
ALIGN_STK_32(pixel, a_dst, 128 * 128,);
ALIGN_STK_32(uint8_t, mask, 128 * 128,);
- init_tmp(c, c_dst, tmp);
for (int i = 0; i < 128 * 128; i++)
mask[i] = rand() % 65;
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
- const int16_t *tmp2, int w, int h, const uint8_t *mask);
+ const int16_t *tmp2, int w, int h, const uint8_t *mask
+ HIGHBD_DECL_SUFFIX);
for (int w = 4; w <= 128; w <<= 1)
if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH))
for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
{
- call_ref(c_dst, w, tmp[0], tmp[1], w, h, mask);
- call_new(a_dst, w, tmp[0], tmp[1], w, h, mask);
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(c, c_dst, tmp, bitdepth_max);
+ call_ref(c_dst, w, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, w, tmp[0], tmp[1], w, h, mask);
+ bench_new(a_dst, w, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
}
report("mask");
}
@@ -208,10 +234,9 @@
ALIGN_STK_32(uint8_t, c_mask, 128 * 128,);
ALIGN_STK_32(uint8_t, a_mask, 128 * 128,);
- init_tmp(c, c_dst, tmp);
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
- const int16_t *tmp2, int w, int h, uint8_t *mask, int sign);
+ const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
+ HIGHBD_DECL_SUFFIX);
static const uint16_t ss[] = { 444, 422, 420 };
@@ -222,9 +247,17 @@
for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
{
int sign = rand() & 1;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(c, c_dst, tmp, bitdepth_max);
- call_ref(c_dst, w, tmp[0], tmp[1], w, h, c_mask, sign);
- call_new(a_dst, w, tmp[0], tmp[1], w, h, a_mask, sign);
+ call_ref(c_dst, w, tmp[0], tmp[1], w, h, c_mask, sign
+ HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w, tmp[0], tmp[1], w, h, a_mask, sign
+ HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)) ||
memcmp(c_mask, a_mask, (w * h * sizeof(*c_mask)) >> i))
{
@@ -231,7 +264,8 @@
fail();
}
- bench_new(a_dst, w, tmp[0], tmp[1], w, h, a_mask, sign);
+ bench_new(a_dst, w, tmp[0], tmp[1], w, h, a_mask, sign
+ HIGHBD_TAIL_SUFFIX);
}
report("w_mask");
}
@@ -242,11 +276,6 @@
ALIGN_STK_32(pixel, a_dst, 32 * 32,);
ALIGN_STK_32(uint8_t, mask, 32 * 32,);
- for (int i = 0; i < 32 * 32; i++) {
- tmp[i] = rand() & ((1 << BITDEPTH) - 1);
- mask[i] = rand() % 65;
- }
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
int w, int h, const uint8_t *mask);
@@ -254,8 +283,17 @@
const ptrdiff_t dst_stride = w * sizeof(pixel);
if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ for (int i = 0; i < 32 * 32; i++) {
+ tmp[i] = rand() & bitdepth_max;
+ mask[i] = rand() % 65;
+ }
for (int i = 0; i < w * h; i++)
- c_dst[i] = a_dst[i] = rand() & ((1 << BITDEPTH) - 1);
+ c_dst[i] = a_dst[i] = rand() & bitdepth_max;
call_ref(c_dst, dst_stride, tmp, w, h, mask);
call_new(a_dst, dst_stride, tmp, w, h, mask);
@@ -273,9 +311,6 @@
ALIGN_STK_32(pixel, c_dst, 32 * 128,);
ALIGN_STK_32(pixel, a_dst, 32 * 128,);
- for (int i = 0; i < 32 * 128; i++)
- tmp[i] = rand() & ((1 << BITDEPTH) - 1);
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
int w, int h);
@@ -283,8 +318,16 @@
const ptrdiff_t dst_stride = w * sizeof(pixel);
if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+
for (int i = 0; i < w * h; i++)
- c_dst[i] = a_dst[i] = rand() & ((1 << BITDEPTH) - 1);
+ c_dst[i] = a_dst[i] = rand() & bitdepth_max;
+ for (int i = 0; i < 32 * 128; i++)
+ tmp[i] = rand() & bitdepth_max;
call_ref(c_dst, dst_stride, tmp, w, h);
call_new(a_dst, dst_stride, tmp, w, h);
@@ -302,9 +345,6 @@
ALIGN_STK_32(pixel, c_dst, 128 * 32,);
ALIGN_STK_32(pixel, a_dst, 128 * 32,);
- for (int i = 0; i < 128 * 32; i++)
- tmp[i] = rand() & ((1 << BITDEPTH) - 1);
-
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
int w, int h);
@@ -312,8 +352,15 @@
const ptrdiff_t dst_stride = w * sizeof(pixel);
if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
for (int i = 0; i < w * h; i++)
- c_dst[i] = a_dst[i] = rand() & ((1 << BITDEPTH) - 1);
+ c_dst[i] = a_dst[i] = rand() & bitdepth_max;
+ for (int i = 0; i < 128 * 32; i++)
+ tmp[i] = rand() & bitdepth_max;
call_ref(c_dst, dst_stride, tmp, w, h);
call_new(a_dst, dst_stride, tmp, w, h);
@@ -336,24 +383,30 @@
const ptrdiff_t src_stride = 15 * sizeof(pixel);
declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
- ptrdiff_t src_stride, const int16_t *abcd, int mx, int my);
+ ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
+ HIGHBD_DECL_SUFFIX);
if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
const int mx = (rand() & 0x1fff) - 0x800;
const int my = (rand() & 0x1fff) - 0x800;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
for (int i = 0; i < 4; i++)
abcd[i] = (rand() & 0x1fff) - 0x800;
for (int i = 0; i < 15 * 15; i++)
- src_buf[i] = rand() & ((1 << BITDEPTH) - 1);
+ src_buf[i] = rand() & bitdepth_max;
- call_ref(c_dst, dst_stride, src, src_stride, abcd, mx, my);
- call_new(a_dst, dst_stride, src, src_stride, abcd, mx, my);
+ call_ref(c_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
if (memcmp(c_dst, a_dst, 8 * 8 * sizeof(*c_dst)))
fail();
- bench_new(a_dst, dst_stride, src, src_stride, abcd, mx, my);
+ bench_new(a_dst, dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
}
report("warp8x8");
}
@@ -367,24 +420,30 @@
const ptrdiff_t src_stride = 15 * sizeof(pixel);
declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
- ptrdiff_t src_stride, const int16_t *abcd, int mx, int my);
+ ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
+ HIGHBD_DECL_SUFFIX);
if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
const int mx = (rand() & 0x1fff) - 0x800;
const int my = (rand() & 0x1fff) - 0x800;
+#if BITDEPTH == 16
+ const int bitdepth_max = rand() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
for (int i = 0; i < 4; i++)
abcd[i] = (rand() & 0x1fff) - 0x800;
for (int i = 0; i < 15 * 15; i++)
- src_buf[i] = rand() & ((1 << BITDEPTH) - 1);
+ src_buf[i] = rand() & bitdepth_max;
- call_ref(c_tmp, 8, src, src_stride, abcd, mx, my);
- call_new(a_tmp, 8, src, src_stride, abcd, mx, my);
+ call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
+ call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
if (memcmp(c_tmp, a_tmp, 8 * 8 * sizeof(*c_tmp)))
fail();
- bench_new(a_tmp, 8, src, src_stride, abcd, mx, my);
+ bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
}
report("warp8x8t");
}