shithub: libvpx

--- a/vp8/common/generic/systemdependent.c

+++ b/vp8/common/generic/systemdependent.c

@@ -83,8 +83,6 @@

 #endif

-void vp8_clear_system_state_c(){};

 void vp8_machine_specific_config(VP8_COMMON *ctx) {

 #if CONFIG_MULTITHREAD

   ctx->processor_core_count = get_cpu_count();

--- a/vp8/common/postproc.c

+++ b/vp8/common/postproc.c

@@ -12,6 +12,7 @@

 #include "vpx_dsp_rtcd.h"

 #include "vp8_rtcd.h"

 #include "vpx_dsp/postproc.h"

+#include "vpx_ports/system_state.h"

 #include "vpx_scale_rtcd.h"

 #include "vpx_scale/yv12config.h"

 #include "postproc.h"

@@ -321,7 +322,7 @@

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid &&

       oci->current_video_frame >= 2 &&

@@ -363,7 +364,7 @@

         oci->postproc_state.last_noise != noise_level) {

       double sigma;

       struct postproc_state *ppstate = &oci->postproc_state;

-      vp8_clear_system_state();

+      vpx_clear_system_state();

       sigma = noise_level + .5 + .6 * q / 63.0;

       ppstate->clamp =

           vpx_setup_noise(sigma, ppstate->generated_noise, oci->Width + 256);

--- a/vp8/common/rtcd_defs.pl

+++ b/vp8/common/rtcd_defs.pl

@@ -19,13 +19,6 @@

 forward_decls qw/vp8_common_forward_decls/;

-# system state

-#

-add_proto qw/void vp8_clear_system_state/, "";

-specialize qw/vp8_clear_system_state mmx/;

-$vp8_clear_system_state_mmx=vpx_reset_mmx_state;

-#

 # Dequant

 add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";

@@ -33,15 +26,12 @@

 add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";

 specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa/;

-$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;

 add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";

 specialize qw/vp8_dequant_idct_add_y_block mmx sse2 neon dspr2 msa/;

-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;

 add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";

 specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 neon dspr2 msa/;

-$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;

 # Loopfilter

@@ -48,19 +38,15 @@

 add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";

 specialize qw/vp8_loop_filter_mbv mmx sse2 neon dspr2 msa/;

-$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;

 add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";

 specialize qw/vp8_loop_filter_bv mmx sse2 neon dspr2 msa/;

-$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;

 add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";

 specialize qw/vp8_loop_filter_mbh mmx sse2 neon dspr2 msa/;

-$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;

 add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";

 specialize qw/vp8_loop_filter_bh mmx sse2 neon dspr2 msa/;

-$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;

 add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";

@@ -101,23 +87,18 @@

 #idct16

 add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";

 specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa/;

-$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;

 #iwalsh1

 add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";

 specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;

-$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;

-# no asm yet

 #iwalsh16

 add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";

 specialize qw/vp8_short_inv_walsh4x4 mmx sse2 neon dspr2 msa/;

-$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;

 #idct1_scalar_add

 add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";

 specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa/;

-$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;

 # RECON

@@ -124,15 +105,12 @@

 add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_copy_mem16x16 mmx sse2 neon dspr2 msa/;

-$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;

 add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa/;

-$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;

 add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa/;

-$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;

 # Postproc

@@ -140,13 +118,10 @@

 if (vpx_config("CONFIG_POSTPROC") eq "yes") {

     add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";

-    # no asm yet

     add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";

-    # no asm yet

     add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";

-    # no asm yet

     add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";

     specialize qw/vp8_filter_by_weight16x16 sse2 msa/;

@@ -155,7 +130,6 @@

     specialize qw/vp8_filter_by_weight8x8 sse2 msa/;

     add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";

-    # no asm yet

@@ -163,19 +137,15 @@

 add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 neon dspr2 msa/;

-$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;

 add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 neon dspr2 msa/;

-$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;

 add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 neon dspr2 msa/;

-$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;

 add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa/;

-$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;

 add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";

 specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 neon msa/;

--- a/vp8/decoder/onyxd_if.c

+++ b/vp8/decoder/onyxd_if.c

@@ -29,6 +29,7 @@

 #include "./vpx_scale_rtcd.h"

 #include "vpx_scale/vpx_scale.h"

 #include "vp8/common/systemdependent.h"

+#include "vpx_ports/system_state.h"

 #include "vpx_ports/vpx_once.h"

 #include "vpx_ports/vpx_timer.h"

 #include "detokenize.h"

@@ -352,7 +353,7 @@

     goto decode_exit;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (cm->show_frame) {

     cm->current_video_frame++;

@@ -383,7 +384,7 @@

 decode_exit:

   pbi->common.error.setjmp = 0;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   return retcode;

 int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd,

@@ -416,7 +417,7 @@

 #endif /*!CONFIG_POSTPROC*/

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   return ret;

@@ -447,7 +448,7 @@

     if (setjmp(fb->pbi[0]->common.error.jmp)) {

       vp8_remove_decoder_instances(fb);

       memset(fb->pbi, 0, sizeof(fb->pbi) / sizeof(fb->pbi[0]));

-      vp8_clear_system_state();

+      vpx_clear_system_state();

       return VPX_CODEC_ERROR;

--- a/vp8/encoder/bitstream.c

+++ b/vp8/encoder/bitstream.c

@@ -19,6 +19,7 @@

 #include <limits.h>

 #include "vpx/vpx_encoder.h"

 #include "vpx_mem/vpx_mem.h"

+#include "vpx_ports/system_state.h"

 #include "bitstream.h"

 #include "defaultcoefcounts.h"

@@ -843,7 +844,7 @@

   int new_intra, new_last, new_garf, oldtotal, newtotal;

   int ref_frame_cost[MAX_REF_FRAMES];

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (cpi->common.frame_type != KEY_FRAME) {

     if (!(new_intra = rf_intra * 255 / (rf_intra + rf_inter))) new_intra = 1;

@@ -908,7 +909,7 @@

 #endif

   int savings = 0;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   do {

     int j = 0;

@@ -1295,7 +1296,7 @@

 #endif

-  vp8_clear_system_state();

+  vpx_clear_system_state();

 #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING

   pack_coef_probs(cpi);

--- a/vp8/encoder/encodemv.c

+++ b/vp8/encoder/encodemv.c

@@ -12,6 +12,7 @@

 #include "encodemv.h"

 #include "vp8/common/entropymode.h"

 #include "vp8/common/systemdependent.h"

+#include "vpx_ports/system_state.h"

 #include <math.h>

@@ -126,7 +127,7 @@

   unsigned int cost0 = 0;

   unsigned int cost1 = 0;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   i = 1;

--- a/vp8/encoder/firstpass.c

+++ b/vp8/encoder/firstpass.c

@@ -26,6 +26,7 @@

 #include "vpx_scale/vpx_scale.h"

 #include "encodemb.h"

 #include "vp8/common/extend.h"

+#include "vpx_ports/system_state.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vp8/common/swapyv12buffer.h"

 #include "rdopt.h"

@@ -499,7 +500,7 @@

   zero_ref_mv.as_int = 0;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   x->src = *cpi->Source;

   xd->pre = *lst_yv12;

@@ -741,10 +742,10 @@

     /* extend the recon for intra prediction */

     vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8,

                       xd->dst.v_buffer + 8);

-    vp8_clear_system_state();

+    vpx_clear_system_state();

-  vp8_clear_system_state();

+  vpx_clear_system_state();

     double weight = 0.0;

@@ -1655,7 +1656,7 @@

   cpi->twopass.gf_group_bits = 0;

   cpi->twopass.gf_decay_rate = 0;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   start_pos = cpi->twopass.stats_in;

@@ -2268,7 +2269,7 @@

     return;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (EOF == input_stats(cpi, &this_frame)) return;

@@ -2543,7 +2544,7 @@

   memset(&next_frame, 0, sizeof(next_frame));

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   start_position = cpi->twopass.stats_in;

   cpi->common.frame_type = KEY_FRAME;

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -33,6 +33,7 @@

 #include "vp8/common/reconintra.h"

 #include "vp8/common/swapyv12buffer.h"

 #include "vp8/common/threading.h"

+#include "vpx_ports/system_state.h"

 #include "vpx_ports/vpx_timer.h"

 #if ARCH_ARM

 #include "vpx_ports/arm.h"

@@ -2296,7 +2297,7 @@

     recon += recon_stride;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   return total_sse;

@@ -2691,7 +2692,7 @@

   if (cpi->Speed > 11) return 0;

   /* Clear down mmx registers */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) {

     double change = 1.0 *

@@ -3129,7 +3130,7 @@

   } else {

     struct vpx_usec_timer timer;

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     vpx_usec_timer_start(&timer);

     if (cpi->sf.auto_filter == 0) {

@@ -3217,7 +3218,7 @@

   int drop_mark25 = drop_mark / 8;

   /* Clear down mmx registers to allow floating point in what follows */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (cpi->force_next_frame_intra) {

     cm->frame_type = KEY_FRAME; /* delayed intra frame */

@@ -3576,7 +3577,7 @@

    * There is some odd behavior for one pass here that needs attention.

*/

   if ((cpi->pass == 2) || (cpi->ni_frames > 150)) {

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     Q = cpi->active_worst_quality;

@@ -3802,7 +3803,7 @@

 #endif

   do {

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     vp8_set_quantizer(cpi, Q);

@@ -3935,7 +3936,7 @@

     cpi->projected_frame_size =

         (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;

 #endif

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     /* Test to see if the stats generated for this frame indicate that

      * we should have coded a key frame (assuming that we didn't)!

@@ -3979,7 +3980,7 @@

 #endif

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;

@@ -4549,7 +4550,7 @@

         FILE *f = fopen("tmp.stt", "a");

-        vp8_clear_system_state();

+        vpx_clear_system_state();

         if (cpi->twopass.total_left_stats.coded_error != 0.0)

             fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64

@@ -4779,7 +4780,7 @@

   if (setjmp(cpi->common.error.jmp)) {

     cpi->common.error.setjmp = 0;

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     return VPX_CODEC_CORRUPT_FRAME;

@@ -4986,7 +4987,7 @@

   *size = 0;

   /* Clear down mmx registers */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   cm->frame_type = INTER_FRAME;

   cm->frame_flags = *frame_flags;

@@ -5139,7 +5140,7 @@

           vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer,

                       cm->filter_level * 10 / 6, 1, 0);

-          vp8_clear_system_state();

+          vpx_clear_system_state();

           ye = calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer,

                                 pp->y_stride, y_width, y_height);

@@ -5249,7 +5250,7 @@

 #endif

-    vp8_clear_system_state();

+    vpx_clear_system_state();

     return ret;

--- a/vp8/encoder/ratectrl.c

+++ b/vp8/encoder/ratectrl.c

@@ -22,6 +22,7 @@

 #include "vp8/common/systemdependent.h"

 #include "encodemv.h"

 #include "vpx_dsp/vpx_dsp_common.h"

+#include "vpx_ports/system_state.h"

 #define MIN_BPB_FACTOR 0.01

 #define MAX_BPB_FACTOR 50

@@ -296,7 +297,7 @@

   uint64_t target;

   /* Clear down mmx registers to allow floating point in what follows */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (cpi->oxcf.fixed_q >= 0) {

     int Q = cpi->oxcf.key_q;

@@ -1019,7 +1020,7 @@

   int projected_size_based_on_q = 0;

   /* Clear down mmx registers to allow floating point in what follows */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   if (cpi->common.frame_type == KEY_FRAME) {

     rate_correction_factor = cpi->key_frame_rate_correction_factor;

@@ -1302,7 +1303,7 @@

 void vp8_adjust_key_frame_context(VP8_COMP *cpi) {

   /* Clear down mmx registers to allow floating point in what follows */

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   /* Do we have any key frame overspend to recover? */

   /* Two-pass overspend handled elsewhere. */

--- a/vp8/encoder/rdopt.c

+++ b/vp8/encoder/rdopt.c

@@ -30,6 +30,7 @@

 #include "encodemb.h"

 #include "vp8/encoder/quantize.h"

 #include "vpx_dsp/variance.h"

+#include "vpx_ports/system_state.h"

 #include "mcomp.h"

 #include "rdopt.h"

 #include "vpx_mem/vpx_mem.h"

@@ -163,7 +164,7 @@

   double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;

   double rdconst = 2.80;

-  vp8_clear_system_state();

+  vpx_clear_system_state();

   /* Further tests required to see if optimum is different

    * for key frames, golden frames and arf frames.

--- a/vp8/vp8_dx_iface.c

+++ b/vp8/vp8_dx_iface.c

@@ -24,6 +24,7 @@

 #include "decoder/onyxd_int.h"

 #include "vpx_dsp/vpx_dsp_common.h"

 #include "vpx_mem/vpx_mem.h"

+#include "vpx_ports/system_state.h"

 #if CONFIG_ERROR_CONCEALMENT

 #include "decoder/error_concealment.h"

 #endif

@@ -365,7 +366,7 @@

            * reallocation is attempted on resync. */

           ctx->si.w = 0;

           ctx->si.h = 0;

-          vp8_clear_system_state();

+          vpx_clear_system_state();

           /* same return value as used in vp8dx_receive_compressed_data */

           return -1;

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -11,12 +11,6 @@

 forward_decls qw/vpx_dsp_forward_decls/;

-# optimizations which depend on multiple features

-$avx2_ssse3 = '';

-if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {

-  $avx2_ssse3 = 'avx2';

-}

 # functions that are 64 bit only.

 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';

 if ($opts{arch} eq "x86_64") {

@@ -437,13 +431,13 @@

 specialize qw/vpx_convolve_avg neon dspr2 msa sse2/;

 add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

-specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";

+specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa/;

 add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

-specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";

+specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa/;

 add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

-specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";

+specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa/;

 add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

 specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;

--

⑨