ref: a7b92c762db7dd2d7dd5b411be29b703311fdfb5
dir: /src/x86/mc_init_tmpl.c/
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include "src/cpu.h"
#include "src/mc.h"
decl_mc_fn(dav1d_put_8tap_regular_avx2);
decl_mc_fn(dav1d_put_8tap_regular_ssse3);
decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
decl_mc_fn(dav1d_put_8tap_regular_smooth_ssse3);
decl_mc_fn(dav1d_put_8tap_regular_sharp_avx2);
decl_mc_fn(dav1d_put_8tap_regular_sharp_ssse3);
decl_mc_fn(dav1d_put_8tap_smooth_avx2);
decl_mc_fn(dav1d_put_8tap_smooth_ssse3);
decl_mc_fn(dav1d_put_8tap_smooth_regular_avx2);
decl_mc_fn(dav1d_put_8tap_smooth_regular_ssse3);
decl_mc_fn(dav1d_put_8tap_smooth_sharp_avx2);
decl_mc_fn(dav1d_put_8tap_smooth_sharp_ssse3);
decl_mc_fn(dav1d_put_8tap_sharp_avx2);
decl_mc_fn(dav1d_put_8tap_sharp_ssse3);
decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2);
decl_mc_fn(dav1d_put_8tap_sharp_regular_ssse3);
decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2);
decl_mc_fn(dav1d_put_8tap_sharp_smooth_ssse3);
decl_mc_fn(dav1d_put_bilin_avx2);
decl_mc_fn(dav1d_put_bilin_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_avx512icl);
decl_mct_fn(dav1d_prep_8tap_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_sse2);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx512icl);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_smooth_sse2);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx512icl);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_regular_sharp_sse2);
decl_mct_fn(dav1d_prep_8tap_smooth_avx512icl);
decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_sse2);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx512icl);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_regular_sse2);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx512icl);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_smooth_sharp_sse2);
decl_mct_fn(dav1d_prep_8tap_sharp_avx512icl);
decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_sse2);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx512icl);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_regular_sse2);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx512icl);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_ssse3);
decl_mct_fn(dav1d_prep_8tap_sharp_smooth_sse2);
decl_mct_fn(dav1d_prep_bilin_avx512icl);
decl_mct_fn(dav1d_prep_bilin_avx2);
decl_mct_fn(dav1d_prep_bilin_ssse3);
decl_mct_fn(dav1d_prep_bilin_sse2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_smooth_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_regular_sharp_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_regular_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_smooth_sharp_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_regular_avx2);
decl_mc_scaled_fn(dav1d_put_8tap_scaled_sharp_smooth_avx2);
decl_mc_scaled_fn(dav1d_put_bilin_scaled_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_smooth_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_regular_sharp_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_regular_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_smooth_sharp_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_regular_avx2);
decl_mct_scaled_fn(dav1d_prep_8tap_scaled_sharp_smooth_avx2);
decl_mct_scaled_fn(dav1d_prep_bilin_scaled_avx2);
decl_avg_fn(dav1d_avg_avx512icl);
decl_avg_fn(dav1d_avg_avx2);
decl_avg_fn(dav1d_avg_ssse3);
decl_w_avg_fn(dav1d_w_avg_avx512icl);
decl_w_avg_fn(dav1d_w_avg_avx2);
decl_w_avg_fn(dav1d_w_avg_ssse3);
decl_mask_fn(dav1d_mask_avx512icl);
decl_mask_fn(dav1d_mask_avx2);
decl_mask_fn(dav1d_mask_ssse3);
decl_w_mask_fn(dav1d_w_mask_420_avx512icl);
decl_w_mask_fn(dav1d_w_mask_420_avx2);
decl_w_mask_fn(dav1d_w_mask_420_ssse3);
decl_w_mask_fn(dav1d_w_mask_422_avx512icl);
decl_w_mask_fn(dav1d_w_mask_422_avx2);
decl_w_mask_fn(dav1d_w_mask_444_avx512icl);
decl_w_mask_fn(dav1d_w_mask_444_avx2);
decl_blend_fn(dav1d_blend_avx2);
decl_blend_fn(dav1d_blend_ssse3);
decl_blend_dir_fn(dav1d_blend_v_avx2);
decl_blend_dir_fn(dav1d_blend_v_ssse3);
decl_blend_dir_fn(dav1d_blend_h_avx2);
decl_blend_dir_fn(dav1d_blend_h_ssse3);
decl_warp8x8_fn(dav1d_warp_affine_8x8_avx2);
decl_warp8x8_fn(dav1d_warp_affine_8x8_sse4);
decl_warp8x8_fn(dav1d_warp_affine_8x8_ssse3);
decl_warp8x8_fn(dav1d_warp_affine_8x8_sse2);
decl_warp8x8t_fn(dav1d_warp_affine_8x8t_avx2);
decl_warp8x8t_fn(dav1d_warp_affine_8x8t_sse4);
decl_warp8x8t_fn(dav1d_warp_affine_8x8t_ssse3);
decl_warp8x8t_fn(dav1d_warp_affine_8x8t_sse2);
decl_emu_edge_fn(dav1d_emu_edge_avx2);
decl_emu_edge_fn(dav1d_emu_edge_ssse3);
decl_resize_fn(dav1d_resize_avx2);
decl_resize_fn(dav1d_resize_ssse3);
COLD void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
#define init_mc_fn(type, name, suffix) \
    c->mc[type] = dav1d_put_##name##_##suffix
#define init_mct_fn(type, name, suffix) \
    c->mct[type] = dav1d_prep_##name##_##suffix
#define init_mc_scaled_fn(type, name, suffix) \
    c->mc_scaled[type] = dav1d_put_##name##_##suffix
#define init_mct_scaled_fn(type, name, suffix) \
    c->mct_scaled[type] = dav1d_prep_##name##_##suffix
    const unsigned flags = dav1d_get_cpu_flags();
    if(!(flags & DAV1D_X86_CPU_FLAG_SSE2))
        return;
#if BITDEPTH == 8
    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               sse2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        sse2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, sse2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  sse2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, sse2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         sse2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   sse2);
    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  sse2);
    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   sse2);
    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          sse2);
    c->warp8x8  = dav1d_warp_affine_8x8_sse2;
    c->warp8x8t = dav1d_warp_affine_8x8t_sse2;
#endif
    if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
        return;
#if BITDEPTH == 8
    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
    init_mc_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        ssse3);
    init_mc_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, ssse3);
    init_mc_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  ssse3);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, ssse3);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         ssse3);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   ssse3);
    init_mc_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  ssse3);
    init_mc_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   ssse3);
    init_mc_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          ssse3);
    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        ssse3);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, ssse3);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  ssse3);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, ssse3);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         ssse3);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   ssse3);
    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  ssse3);
    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   ssse3);
    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          ssse3);
    c->avg = dav1d_avg_ssse3;
    c->w_avg = dav1d_w_avg_ssse3;
    c->mask = dav1d_mask_ssse3;
    c->w_mask[2] = dav1d_w_mask_420_ssse3;
    c->blend = dav1d_blend_ssse3;
    c->blend_v = dav1d_blend_v_ssse3;
    c->blend_h = dav1d_blend_h_ssse3;
    c->warp8x8  = dav1d_warp_affine_8x8_ssse3;
    c->warp8x8t = dav1d_warp_affine_8x8t_ssse3;
    c->emu_edge = dav1d_emu_edge_ssse3;
    c->resize = dav1d_resize_ssse3;
#endif
    if(!(flags & DAV1D_X86_CPU_FLAG_SSE41))
        return;
#if BITDEPTH == 8
    c->warp8x8  = dav1d_warp_affine_8x8_sse4;
    c->warp8x8t = dav1d_warp_affine_8x8t_sse4;
#endif
#if ARCH_X86_64
    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
        return;
#if BITDEPTH == 8
    init_mc_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
    init_mc_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
    init_mc_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
    init_mc_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
    init_mc_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
    init_mc_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
    init_mc_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
    c->avg = dav1d_avg_avx2;
    c->w_avg = dav1d_w_avg_avx2;
    c->mask = dav1d_mask_avx2;
    c->w_mask[0] = dav1d_w_mask_444_avx2;
    c->w_mask[1] = dav1d_w_mask_422_avx2;
    c->w_mask[2] = dav1d_w_mask_420_avx2;
    c->blend = dav1d_blend_avx2;
    c->blend_v = dav1d_blend_v_avx2;
    c->blend_h = dav1d_blend_h_avx2;
    c->warp8x8  = dav1d_warp_affine_8x8_avx2;
    c->warp8x8t = dav1d_warp_affine_8x8t_avx2;
    c->emu_edge = dav1d_emu_edge_avx2;
    c->resize = dav1d_resize_avx2;
#endif
    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL))
        return;
#if HAVE_AVX512ICL && BITDEPTH == 8
    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx512icl);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx512icl);
    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx512icl);
    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx512icl);
    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx512icl);
    c->avg = dav1d_avg_avx512icl;
    c->w_avg = dav1d_w_avg_avx512icl;
    c->mask = dav1d_mask_avx512icl;
    c->w_mask[0] = dav1d_w_mask_444_avx512icl;
    c->w_mask[1] = dav1d_w_mask_422_avx512icl;
    c->w_mask[2] = dav1d_w_mask_420_avx512icl;
#endif
#endif
}