shithub: libvpx

Download patch

ref: c00e9c4709c1ac3828ce13aae158011495e7de35
parent: 32655853266a7bc975f825a00df96d40f9e85658
parent: 29e1b1a4b082992d0e84ee721afaadcd4db5e626
author: James Zern <jzern@google.com>
date: Thu Jul 10 17:11:40 EDT 2014

Merge changes Ie241772d,I3c72e226

* changes:
  tests: add API_REGISTER_STATE_CHECK
  call vp[89]_clear_system_state after longjmp

--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -264,7 +264,7 @@
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                 Width(), Height()));
 
@@ -281,7 +281,7 @@
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                 Width(), Height()));
 
@@ -298,7 +298,7 @@
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                  Width(), Height()));
 
@@ -356,17 +356,17 @@
                            Width(), Height());
 
         if (filters == eighttap_smooth || (filter_x && filter_y))
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->hv8_(in, kInputStride, out, kOutputStride,
                          filters[filter_x], 16, filters[filter_y], 16,
                          Width(), Height()));
         else if (filter_y)
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->v8_(in, kInputStride, out, kOutputStride,
                         kInvalidFilter, 16, filters[filter_y], 16,
                         Width(), Height()));
         else
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->h8_(in, kInputStride, out, kOutputStride,
                         filters[filter_x], 16, kInvalidFilter, 16,
                         Width(), Height()));
@@ -414,17 +414,17 @@
                                    Width(), Height());
 
         if (filters == eighttap_smooth || (filter_x && filter_y))
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
                              filters[filter_x], 16, filters[filter_y], 16,
                              Width(), Height()));
         else if (filter_y)
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
         else
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
@@ -494,9 +494,10 @@
    */
 
   /* Test the horizontal filter. */
-  REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
-                                 kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->h8_(in, kInputStride, out, kOutputStride,
+                kChangeFilters[kInitialSubPelOffset],
+                kInputPixelStep, NULL, 0, Width(), Height()));
 
   for (int x = 0; x < Width(); ++x) {
     const int kFilterPeriodAdjust = (x >> 3) << 3;
@@ -508,9 +509,10 @@
   }
 
   /* Test the vertical filter. */
-  REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
-                                 NULL, 0, kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->v8_(in, kInputStride, out, kOutputStride,
+                NULL, 0, kChangeFilters[kInitialSubPelOffset],
+                kInputPixelStep, Width(), Height()));
 
   for (int y = 0; y < Height(); ++y) {
     const int kFilterPeriodAdjust = (y >> 3) << 3;
@@ -522,12 +524,11 @@
   }
 
   /* Test the horizontal and vertical filters in combination. */
-  REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
-                                  Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->hv8_(in, kInputStride, out, kOutputStride,
+                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+                 Width(), Height()));
 
   for (int y = 0; y < Height(); ++y) {
     const int kFilterPeriodAdjustY = (y >> 3) << 3;
@@ -560,10 +561,10 @@
   for (int frac = 0; frac < 16; ++frac) {
     for (int step = 1; step <= 32; ++step) {
       /* Test the horizontal and vertical filters in combination. */
-      REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                      eighttap[frac], step,
-                                      eighttap[frac], step,
-                                      Width(), Height()));
+      ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+                                          eighttap[frac], step,
+                                          eighttap[frac], step,
+                                          Width(), Height()));
 
       CheckGuardBlocks();
 
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -311,9 +311,9 @@
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                      test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
@@ -344,7 +344,7 @@
         input_block[j] = rnd.Rand8() - rnd.Rand8();
 
       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -375,8 +375,8 @@
       }
 
       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                      output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+                                          output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j) {
@@ -421,7 +421,7 @@
       for (int j = 1; j < kNumCoeffs; ++j)
         output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
       inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j)
         EXPECT_EQ(ref[j], dst[j]);
@@ -450,7 +450,7 @@
       for (int j = 0; j < kNumCoeffs; ++j)
         coeff[j] = round(out_r[j]);
 
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -112,8 +112,8 @@
       test_input_block[j] = src[j] - dst[j];
     }
 
-    REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
-    REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
+    ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 
     for (int j = 0; j < kNumCoeffs; ++j) {
       const uint32_t diff = dst[j] - src[j];
@@ -150,7 +150,7 @@
 
     const int stride = 32;
     vp9_fdct32x32_c(input_block, output_ref_block, stride);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 
     if (version_ == 0) {
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -189,7 +189,8 @@
 
     const int stride = 32;
     vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, stride));
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_extreme_block, output_block, stride));
 
     // The minimum quant value is 4.
     for (int j = 0; j < kNumCoeffs; ++j) {
@@ -230,7 +231,7 @@
     reference_32x32_dct_2d(in, out_r);
     for (int j = 0; j < kNumCoeffs; ++j)
       coeff[j] = round(out_r[j]);
-    REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
+    ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
     for (int j = 0; j < kNumCoeffs; ++j) {
       const int diff = dst[j] - src[j];
       const int error = diff * diff;
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -32,7 +32,7 @@
                                      void *user_priv) {
   vpx_codec_err_t res_dec;
   InitOnce();
-  REGISTER_STATE_CHECK(
+  API_REGISTER_STATE_CHECK(
       res_dec = vpx_codec_decode(&decoder_,
                                  cxdata, static_cast<unsigned int>(size),
                                  user_priv, 0));
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -59,7 +59,7 @@
   }
 
   // Encode the frame
-  REGISTER_STATE_CHECK(
+  API_REGISTER_STATE_CHECK(
       res = vpx_codec_encode(&encoder_,
                              video.img(), video.pts(), video.duration(),
                              frame_flags, deadline_));
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -79,9 +79,9 @@
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                      test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
@@ -114,7 +114,7 @@
         input_block[j] = rnd.Rand8() - rnd.Rand8();
 
       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -145,8 +145,8 @@
       }
 
       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                      output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+                                          output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j) {
@@ -175,7 +175,7 @@
 
       fwd_txfm_ref(in, coeff, pitch_, tx_type_);
 
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -68,7 +68,7 @@
       // Initialize a test block with input range [-255, 255].
       for (int j = 0; j < 64; ++j)
         test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -97,7 +97,7 @@
       // Initialize a test block with input range [-15, 15].
       for (int j = 0; j < 64; ++j)
         test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -139,7 +139,7 @@
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
       for (int j = 0; j < 64; ++j) {
           if (test_temp_block[j] > 0) {
@@ -152,7 +152,7 @@
             test_temp_block[j] *= 4;
           }
       }
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -202,11 +202,11 @@
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < 64; ++j) {
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -52,7 +52,7 @@
 TEST_P(IDCTTest, TestAllZeros) {
   int i;
 
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -65,7 +65,7 @@
   int i;
 
   input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -79,7 +79,7 @@
 
   for (i = 0; i < 256; i++) predict[i] = i;
   input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -93,7 +93,7 @@
 
   for (i = 0; i < 16; i++) input[i] = i;
 
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) > 3 || i > 63)
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -261,10 +261,10 @@
 
   virtual void Predict(MB_PREDICTION_MODE mode) {
     mbptr_->mode_info_context->mbmi.mode = mode;
-    REGISTER_STATE_CHECK(pred_fn_(mbptr_,
-                                  data_ptr_[0] - kStride,
-                                  data_ptr_[0] - 1, kStride,
-                                  data_ptr_[0], kStride));
+    ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
+                                      data_ptr_[0] - kStride,
+                                      data_ptr_[0] - 1, kStride,
+                                      data_ptr_[0], kStride));
   }
 
   intra_pred_y_fn_t pred_fn_;
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -118,8 +118,8 @@
                          = (output_ref_block[j] / 1828) * 1828;
     }
 
-    REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
 
     for (int j = 0; j < block_size; ++j) {
       const int diff = dst1[j] - dst2[j];
@@ -182,8 +182,8 @@
     memcpy(test_coef_block2, test_coef_block1,
            sizeof(*test_coef_block2) * block_size);
 
-    REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
 
     for (int j = 0; j < block_size; ++j) {
       const int diff = dst1[j] - dst2[j];
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -80,8 +80,9 @@
   // Initialize pixels in the output to 99.
   (void)vpx_memset(dst_image, 99, output_size);
 
-  REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride,
-                                  output_stride, block_width, flimits, 16));
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
+                 output_stride, block_width, flimits, 16));
 
   static const uint8_t expected_data[block_height] = {
     4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -13,7 +13,21 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
 
+// ASM_REGISTER_STATE_CHECK(asm_function)
+//   Minimally validates the environment pre & post function execution. This
+//   variant should be used with assembly functions which are not expected to
+//   fully restore the system state. See platform implementations of
+//   RegisterStateCheck for details.
+//
+// API_REGISTER_STATE_CHECK(api_function)
+//   Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
+//   additional checks to ensure the environment is in a consistent state pre &
+//   post function execution. This variant should be used with API functions.
+//   See platform implementations of RegisterStateCheckXXX for details.
+//
+
 #if defined(_WIN64)
 
 #define _WIN32_LEAN_AND_MEAN
@@ -35,11 +49,6 @@
 // Compares the state of xmm[6-15] at construction with their state at
 // destruction. These registers should be preserved by the callee on
 // Windows x64.
-// Usage:
-// {
-//   RegisterStateCheck reg_check;
-//   FunctionToVerify();
-// }
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
@@ -75,9 +84,9 @@
   CONTEXT pre_context_;
 };
 
-#define REGISTER_STATE_CHECK(statement) do { \
-  libvpx_test::RegisterStateCheck reg_check; \
-  statement;                               \
+#define ASM_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheck reg_check;      \
+  statement;                                      \
 } while (false)
 
 }  // namespace libvpx_test
@@ -85,8 +94,6 @@
 #elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && defined(CONFIG_VP9) \
       && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
 
-#include "vpx/vpx_integer.h"
-
 extern "C" {
 // Save the d8-d15 registers into store.
 void vp9_push_neon(int64_t *store);
@@ -97,11 +104,6 @@
 // Compares the state of d8-d15 at construction with their state at
 // destruction. These registers should be preserved by the callee on
 // arm platform.
-// Usage:
-// {
-//   RegisterStateCheck reg_check;
-//   FunctionToVerify();
-// }
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
@@ -129,9 +131,9 @@
   int64_t pre_store_[8];
 };
 
-#define REGISTER_STATE_CHECK(statement) do { \
-  libvpx_test::RegisterStateCheck reg_check; \
-  statement;                               \
+#define ASM_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheck reg_check;      \
+  statement;                                      \
 } while (false)
 
 }  // namespace libvpx_test
@@ -141,10 +143,54 @@
 namespace libvpx_test {
 
 class RegisterStateCheck {};
-#define REGISTER_STATE_CHECK(statement) statement
+#define ASM_REGISTER_STATE_CHECK(statement) statement
 
 }  // namespace libvpx_test
 
 #endif  // _WIN64
+
+#if ARCH_X86 || ARCH_X86_64
+#if defined(__GNUC__)
+
+namespace libvpx_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+  RegisterStateCheckMMX() {
+    __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+  }
+  ~RegisterStateCheckMMX() { EXPECT_TRUE(Check()); }
+
+ private:
+  // Checks the FPU tag word pre/post execution, returning false if not cleared
+  // to 0xffff.
+  bool Check() const {
+    EXPECT_EQ(0xffff, pre_fpu_env_[4])
+        << "FPU was in an inconsistent state prior to call";
+
+    uint16_t post_fpu_env[14];
+    __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+    EXPECT_EQ(0xffff, post_fpu_env[4])
+        << "FPU was left in an inconsistent state after call";
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  uint16_t pre_fpu_env_[14];
+};
+
+#define API_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheckMMX reg_check;   \
+  ASM_REGISTER_STATE_CHECK(statement);            \
+} while (false)
+
+}  // namespace libvpx_test
+
+#endif  // __GNUC__
+#endif  // ARCH_X86 || ARCH_X86_64
+
+#ifndef API_REGISTER_STATE_CHECK
+#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
+#endif
 
 #endif  // TEST_REGISTER_STATE_CHECK_H_
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -149,9 +149,9 @@
     const uint8_t* refs[] = {GetReference(0), GetReference(1),
                              GetReference(2), GetReference(3)};
 
-    REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
-                                      refs, reference_stride_,
-                                      results));
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
+                                          refs, reference_stride_,
+                                          results));
   }
 
   void CheckSADs() {
@@ -178,9 +178,9 @@
     unsigned int ret;
     const uint8_t* const reference = GetReference(block_idx);
 
-    REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
-                                            reference, reference_stride_,
-                                            max_sad));
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                max_sad));
     return ret;
   }
 
@@ -210,8 +210,8 @@
     unsigned int ret;
     const uint8_t* const reference = GetReference(block_idx);
 
-    REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
-                                            reference, reference_stride_));
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_));
     return ret;
   }
 
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -143,8 +143,9 @@
 
   uint8_t *src = const_cast<uint8_t*>(test_data);
 
-  REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
-                                       2, 2, dst_, kDstStride));
+  ASM_REGISTER_STATE_CHECK(
+      sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
+                      2, 2, dst_, kDstStride));
 
   for (int i = 0; i < height_; ++i)
     for (int j = 0; j < width_; ++j)
@@ -169,7 +170,7 @@
                                 xoffset, yoffset, dst_c_, kDstStride);
 
       // Run test.
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
                           xoffset, yoffset, dst_, kDstStride));
 
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -82,7 +82,7 @@
       predictor += kDiffPredStride;
     }
 
-    REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
+    ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
 
     base_src = *be.base_src;
     src_diff = be.src_diff;
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -124,7 +124,8 @@
       memset(ref_, j, block_size_);
       unsigned int sse;
       unsigned int var;
-      REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
+      ASM_REGISTER_STATE_CHECK(
+          var = variance_(src_, width_, ref_, width_, &sse));
       EXPECT_EQ(0u, var) << "src values: " << i << "ref values: " << j;
     }
   }
@@ -139,7 +140,8 @@
     }
     unsigned int sse1, sse2;
     unsigned int var1;
-    REGISTER_STATE_CHECK(var1 = variance_(src_, width_, ref_, width_, &sse1));
+    ASM_REGISTER_STATE_CHECK(
+        var1 = variance_(src_, width_, ref_, width_, &sse1));
     const unsigned int var2 = variance_ref(src_, ref_, log2width_,
                                            log2height_, &sse2);
     EXPECT_EQ(sse1, sse2);
@@ -155,7 +157,7 @@
   memset(ref_ + half, 0, half);
   unsigned int sse;
   unsigned int var;
-  REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
+  ASM_REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
   const unsigned int expected = block_size_ * 255 * 255 / 4;
   EXPECT_EQ(expected, var);
 }
@@ -246,8 +248,8 @@
       }
       unsigned int sse1, sse2;
       unsigned int var1;
-      REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
-                                                   src_, width_, &sse1));
+      ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                                       src_, width_, &sse1));
       const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
                                                     log2height_, x, y, &sse2);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -269,8 +271,9 @@
       }
       unsigned int sse1, sse2;
       unsigned int var1;
-      REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
-                                                   src_, width_, &sse1, sec_));
+      ASM_REGISTER_STATE_CHECK(
+          var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                  src_, width_, &sse1, sec_));
       const unsigned int var2 = subpel_avg_variance_ref(ref_, src_, sec_,
                                                         log2width_, log2height_,
                                                         x, y, &sse2);
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -386,6 +386,7 @@
 
 decode_exit:
     pbi->common.error.setjmp = 0;
+    vp8_clear_system_state();
     return retcode;
 }
 int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4886,6 +4886,7 @@
     if (setjmp(cpi->common.error.jmp))
     {
         cpi->common.error.setjmp = 0;
+        vp8_clear_system_state();
         return VPX_CODEC_CORRUPT_FRAME;
     }
 
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -409,6 +409,7 @@
                 if (setjmp(pbi->common.error.jmp))
                 {
                     pbi->common.error.setjmp = 0;
+                    vp8_clear_system_state();
                     /* same return value as used in vp8dx_receive_compressed_data */
                     return -1;
                 }
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -246,6 +246,7 @@
 
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
+    vp9_clear_system_state();
 
     // We do not know if the missing frame(s) was supposed to update
     // any of the reference buffers, but we act conservative and