ref: c5fb0eb8d9a9cf0e593fbf06730f5b8501967009
dir: /vp8/encoder/x86/csystemdependent.c/
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
#include "variance.h"
#include "onyx_int.h"
SADFunction *vp8_sad16x16;
SADFunction *vp8_sad16x8;
SADFunction *vp8_sad8x16;
SADFunction *vp8_sad8x8;
SADFunction *vp8_sad4x4;
variance_function *vp8_variance4x4;
variance_function *vp8_variance8x8;
variance_function *vp8_variance8x16;
variance_function *vp8_variance16x8;
variance_function *vp8_variance16x16;
variance_function *vp8_mse16x16;
sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
int (*vp8_block_error)(short *, short *);
int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern int vp8_block_error_c(short *, short *);
extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc);
extern int vp8_block_error_mmx(short *, short *);
extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc);
extern int vp8_block_error_xmm(short *, short *);
extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc);
int (*vp8_mbuverror)(MACROBLOCK *mb);
unsigned int (*vp8_get_mb_ss)(short *);
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
// c imports
extern int vp8_mbuverror_c(MACROBLOCK *mb);
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch);
extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch);
extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
extern SADFunction vp8_sad16x16_c;
extern SADFunction vp8_sad16x8_c;
extern SADFunction vp8_sad8x16_c;
extern SADFunction vp8_sad8x8_c;
extern SADFunction vp8_sad4x4_c;
extern SADFunction vp8_sad16x16_wmt;
extern SADFunction vp8_sad16x8_wmt;
extern SADFunction vp8_sad8x16_wmt;
extern SADFunction vp8_sad8x8_wmt;
extern SADFunction vp8_sad4x4_wmt;
extern SADFunction vp8_sad16x16_mmx;
extern SADFunction vp8_sad16x8_mmx;
extern SADFunction vp8_sad8x16_mmx;
extern SADFunction vp8_sad8x8_mmx;
extern SADFunction vp8_sad4x4_mmx;
extern variance_function vp8_variance16x16_c;
extern variance_function vp8_variance8x16_c;
extern variance_function vp8_variance16x8_c;
extern variance_function vp8_variance8x8_c;
extern variance_function vp8_variance4x4_c;
extern variance_function vp8_mse16x16_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c;
extern unsigned int vp8_get_mb_ss_c(short *);
extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
// mmx imports
extern int vp8_mbuverror_mmx(MACROBLOCK *mb);
extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d);
extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch);
extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch);
extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch);
extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch);
extern variance_function vp8_variance4x4_mmx;
extern variance_function vp8_variance8x8_mmx;
extern variance_function vp8_variance8x16_mmx;
extern variance_function vp8_variance16x8_mmx;
extern variance_function vp8_variance16x16_mmx;
extern variance_function vp8_mse16x16_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx;
extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get_mb_ss_mmx(short *);
extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
// wmt imports
extern int vp8_mbuverror_xmm(MACROBLOCK *mb);
extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d);
extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch);
extern variance_function vp8_variance4x4_wmt;
extern variance_function vp8_variance8x8_wmt;
extern variance_function vp8_variance8x16_wmt;
extern variance_function vp8_variance16x8_wmt;
extern variance_function vp8_variance16x16_wmt;
extern variance_function vp8_mse16x16_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt;
extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr);
extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
void vp8_cmachine_specific_config(void)
{
    int mmx_enabled;
    int xmm_enabled;
    int wmt_enabled;
    vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
    if (wmt_enabled)         // Willamette
    {
        // Willamette instruction set available:
        vp8_mbuverror                = vp8_mbuverror_xmm;
        /* The sse quantizer has not been updated to match the new exact
         * quantizer introduced in commit e04e2935
         */
        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
#if 0 //new fdct
        vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
        vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
        vp8_fast_fdct8x4              = vp8_short_fdct8x4_wmt;
#else
        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
#endif
        vp8_subtract_b                = vp8_subtract_b_mmx;
        vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
        vp8_variance4x4              = vp8_variance4x4_mmx;
        vp8_variance8x8              = vp8_variance8x8_mmx;
        vp8_variance8x16             = vp8_variance8x16_wmt;
        vp8_variance16x8             = vp8_variance16x8_wmt;
        vp8_variance16x16            = vp8_variance16x16_wmt;
        vp8_mse16x16                 = vp8_mse16x16_wmt;
        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_wmt;
        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_wmt;
        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_wmt;
        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_wmt;
        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_wmt;
        vp8_get_mb_ss                  = vp8_get_mb_ss_sse2;
        vp8_get16x16pred_error        = vp8_get16x16pred_error_sse2;
        vp8_get8x8var                = vp8_get8x8var_sse2;
        vp8_get16x16var              = vp8_get16x16var_sse2;
        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_mmx;
        vp8_sad16x16                 = vp8_sad16x16_wmt;
        vp8_sad16x8                  = vp8_sad16x8_wmt;
        vp8_sad8x16                  = vp8_sad8x16_wmt;
        vp8_sad8x8                   = vp8_sad8x8_wmt;
        vp8_sad4x4                   = vp8_sad4x4_wmt;
        vp8_block_error               = vp8_block_error_xmm;
        vp8_mbblock_error             = vp8_mbblock_error_xmm;
        vp8_subtract_mby              = vp8_subtract_mby_mmx;
    }
    else if (mmx_enabled)
    {
        // MMX instruction set available:
        vp8_mbuverror                = vp8_mbuverror_mmx;
        /* The mmx quantizer has not been updated to match the new exact
         * quantizer introduced in commit e04e2935
         */
        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
#if 0 // new fdct
        vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
        vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
        vp8_fast_fdct8x4              = vp8_short_fdct8x4_mmx;
#else
        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
#endif
        vp8_subtract_b                = vp8_subtract_b_mmx;
        vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
        vp8_variance4x4              = vp8_variance4x4_mmx;
        vp8_variance8x8              = vp8_variance8x8_mmx;
        vp8_variance8x16             = vp8_variance8x16_mmx;
        vp8_variance16x8             = vp8_variance16x8_mmx;
        vp8_variance16x16            = vp8_variance16x16_mmx;
        vp8_mse16x16                 = vp8_mse16x16_mmx;
        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_mmx;
        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_mmx;
        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_mmx;
        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_mmx;
        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_mmx;
        vp8_get_mb_ss                  = vp8_get_mb_ss_mmx;
        vp8_get16x16pred_error        = vp8_get16x16pred_error_mmx;
        vp8_get8x8var                = vp8_get8x8var_mmx;
        vp8_get16x16var              = vp8_get16x16var_mmx;
        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_mmx;
        vp8_sad16x16                 = vp8_sad16x16_mmx;
        vp8_sad16x8                  = vp8_sad16x8_mmx;
        vp8_sad8x16                  = vp8_sad8x16_mmx;
        vp8_sad8x8                   = vp8_sad8x8_mmx;
        vp8_sad4x4                   = vp8_sad4x4_mmx;
        vp8_block_error               = vp8_block_error_mmx;
        vp8_mbblock_error             = vp8_mbblock_error_mmx;
        vp8_subtract_mby              = vp8_subtract_mby_mmx;
    }
    else
    {
        // Pure C:
        vp8_mbuverror                = vp8_mbuverror_c;
        vp8_fast_quantize_b          = vp8_fast_quantize_b_c;
        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
        vp8_subtract_b                = vp8_subtract_b_c;
        vp8_subtract_mbuv             = vp8_subtract_mbuv_c;
        vp8_variance4x4              = vp8_variance4x4_c;
        vp8_variance8x8              = vp8_variance8x8_c;
        vp8_variance8x16             = vp8_variance8x16_c;
        vp8_variance16x8             = vp8_variance16x8_c;
        vp8_variance16x16            = vp8_variance16x16_c;
        vp8_mse16x16                 = vp8_mse16x16_c;
        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_c;
        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_c;
        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_c;
        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_c;
        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_c;
        vp8_get_mb_ss                  = vp8_get_mb_ss_c;
        vp8_get16x16pred_error        = vp8_get16x16pred_error_c;
        vp8_get8x8var                = vp8_get8x8var_c;
        vp8_get16x16var              = vp8_get16x16var_c;
        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_c;
        vp8_sad16x16                 = vp8_sad16x16_c;
        vp8_sad16x8                  = vp8_sad16x8_c;
        vp8_sad8x16                  = vp8_sad8x16_c;
        vp8_sad8x8                   = vp8_sad8x8_c;
        vp8_sad4x4                   = vp8_sad4x4_c;
        vp8_block_error               = vp8_block_error_c;
        vp8_mbblock_error             = vp8_mbblock_error_c;
        vp8_subtract_mby              = vp8_subtract_mby_c;
    }
}