shithub: libvpx

--- a/vp9/common/vp9_rtcd_defs.pl

+++ b/vp9/common/vp9_rtcd_defs.pl

@@ -46,42 +46,25 @@

 # dct

-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

-  # Force C versions if CONFIG_EMULATE_HARDWARE is 1

-  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

-    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

+# Force C versions if CONFIG_EMULATE_HARDWARE is 1

+add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

+add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";

-  } else {

-    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    specialize qw/vp9_iht4x4_16_add sse2/;

+add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";

-    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    specialize qw/vp9_iht8x8_64_add sse2/;

-    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";

-    specialize qw/vp9_iht16x16_256_add sse2/;

+if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {

+  # Note that there are more specializations appended when

+  # CONFIG_VP9_HIGHBITDEPTH is off.

+  specialize qw/vp9_iht4x4_16_add sse2/;

+  specialize qw/vp9_iht8x8_64_add sse2/;

+  specialize qw/vp9_iht16x16_256_add sse2/;

+  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {

+    # Note that these specializations are appended to the above ones.

+    specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;

+    specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;

+    specialize qw/vp9_iht16x16_256_add dspr2 msa/;

-} else {

-  # Force C versions if CONFIG_EMULATE_HARDWARE is 1

-  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

-    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";

-  } else {

-    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";

-    specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/;

-    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";

-    specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;

-  }

 # High bitdepth functions

@@ -124,67 +107,57 @@

   specialize qw/vp9_denoiser_filter neon sse2/;

-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

-  add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";

-  specialize qw/vp9_block_error avx2 sse2/;

+add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";

-  add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";

-  specialize qw/vp9_highbd_block_error sse2/;

+add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";

-  add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";

-  specialize qw/vp9_block_error_fp sse2/;

+add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

+specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";

-  add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

-  specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";

+add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

+specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";

-  add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

-  specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";

+add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

-  add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

+  specialize qw/vp9_block_error avx2 sse2/;

+  specialize qw/vp9_block_error_fp sse2/;

   specialize qw/vp9_fdct8x8_quant neon ssse3/;

+  add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";

+  specialize qw/vp9_highbd_block_error sse2/;

 } else {

-  add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";

   specialize qw/vp9_block_error avx2 msa sse2/;

-  add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";

   specialize qw/vp9_block_error_fp neon sse2/;

-  add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

-  specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";

-  add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

-  specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";

-  add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

   specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;

 # fdct functions

-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {

-  add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht4x4 sse2/;

+add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht8x8 sse2/;

+add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht16x16 sse2/;

+add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";

-  specialize qw/vp9_fwht4x4 sse2/;

-} else {

-  add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht4x4 sse2 msa/;

+add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";

-  add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht8x8 sse2 msa/;

-  add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";

-  specialize qw/vp9_fht16x16 sse2 msa/;

-  add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";

-  specialize qw/vp9_fwht4x4 msa sse2/;

+# Note that there are more specializations appended when CONFIG_VP9_HIGHBITDEPTH

+# is off.

+specialize qw/vp9_fht4x4 sse2/;

+specialize qw/vp9_fht8x8 sse2/;

+specialize qw/vp9_fht16x16 sse2/;

+specialize qw/vp9_fwht4x4 sse2/;

+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {

+  # Note that these specializations are appended to the above ones.

+  specialize qw/vp9_fht4x4 msa/;

+  specialize qw/vp9_fht8x8 msa/;

+  specialize qw/vp9_fht16x16 msa/;

+  specialize qw/vp9_fwht4x4 msa/;

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -588,7 +588,8 @@

 add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";

 if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {

-  # Note that there are more specializations appended when CONFIG_VP9_HIGHBITDEPTH is off.

+  # Note that there are more specializations appended when

+  # CONFIG_VP9_HIGHBITDEPTH is off.

   specialize qw/vpx_idct4x4_16_add neon sse2 vsx/;

   specialize qw/vpx_idct4x4_1_add neon sse2/;

   specialize qw/vpx_idct8x8_64_add neon sse2 vsx/;

@@ -604,7 +605,7 @@

   specialize qw/vpx_idct32x32_1_add neon sse2/;

   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {

-    # Note that these specializations appends to the above ones.

+    # Note that these specializations are appended to the above ones.

     specialize qw/vpx_idct4x4_16_add dspr2 msa/;

     specialize qw/vpx_idct4x4_1_add dspr2 msa/;

     specialize qw/vpx_idct8x8_64_add dspr2 msa/;

--

⑨