shithub: libvpx

Download patch

ref: 33b1c457ed34879887beb5adec1e4c45daa4dd37
parent: 928ff03889dadc3f63883553b443c08e625b4885
author: Paul Wilkins <paulwilkins@google.com>
date: Wed May 7 08:53:27 EDT 2014

Revert "Add an MMX fwht4x4"

Includes changes that are not compatible with VS windows builds.
Amongst other things stdint.h is not supported in VS.

This reverts commit 89fbf3de501b5d7fd90047192521eae3198705cd.

Change-Id: Ifa86d7df250578d1ada9b539c9ff12ed0c523cdd

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -353,13 +353,6 @@
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
 #endif
 
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(
-    MMX, Trans4x4WHT,
-    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
-#endif
-
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4DCT,
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -693,7 +693,7 @@
 specialize qw/vp9_fht16x16 sse2 avx2/;
 
 add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fwht4x4 mmx/;
+specialize qw/vp9_fwht4x4/;
 
 add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct4x4 sse2 avx2/;
--- a/vp9/encoder/x86/vp9_dct_mmx.c
+++ /dev/null
@@ -1,67 +1,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <mmintrin.h>
-#include <stdint.h>
-
-#include "./vpx_config.h"
-
-static void INLINE transpose_4x4_mmx(__m64* a, __m64* b, __m64* c, __m64* d) {
-  __m64 w, x, y, z;
-  w = _mm_unpacklo_pi16(*a, *b);
-  x = _mm_unpackhi_pi16(*a, *b);
-  y = _mm_unpacklo_pi16(*c, *d);
-  z = _mm_unpackhi_pi16(*c, *d);
-  *a = _mm_unpacklo_pi32(w, y);
-  *b = _mm_unpackhi_pi32(w, y);
-  *c = _mm_unpacklo_pi32(x, z);
-  *d = _mm_unpackhi_pi32(x, z);
-}
-
-static void INLINE fwht_4x4_cols(__m64* out0,
-                                 __m64* out1,
-                                 __m64* out2,
-                                 __m64* out3,
-                                 __m64 a1,
-                                 __m64 b1,
-                                 __m64 c1,
-                                 __m64 d1) {
-  __m64 e1;
-
-  a1 = _mm_add_pi16(a1, b1);
-  d1 = _mm_sub_pi16(d1, c1);
-  e1 = _mm_sub_pi16(a1, d1);
-  e1 = _mm_srai_pi16(e1, 1);
-  b1 = _mm_sub_pi16(e1, b1);
-  c1 = _mm_sub_pi16(e1, c1);
-  a1 = _mm_sub_pi16(a1, c1);
-  d1 = _mm_add_pi16(d1, b1);
-  *out0 = a1;
-  *out1 = c1;
-  *out2 = d1;
-  *out3 = b1;
-}
-
-void vp9_fwht4x4_mmx(const int16_t* input, int16_t* output, int stride) {
-  __m64 a1 = *(const __m64*)input;
-  __m64 b1 = *(const __m64*)(input + stride);
-  __m64 c1 = *(const __m64*)(input + 2 * stride);
-  __m64 d1 = *(const __m64*)(input + 3 * stride);
-
-  fwht_4x4_cols(&a1, &b1, &c1, &d1, a1, b1, c1, d1);
-  transpose_4x4_mmx(&a1, &b1, &c1, &d1);
-  fwht_4x4_cols(&a1, &b1, &c1, &d1, a1, b1, c1, d1);
-  transpose_4x4_mmx(&a1, &b1, &c1, &d1);
-
-  *(__m64*)output = _mm_slli_pi16(a1, 2);
-  *(__m64*)(output + 4) = _mm_slli_pi16(b1, 2);
-  *(__m64*)(output + 8) = _mm_slli_pi16(c1, 2);
-  *(__m64*)(output + 12) = _mm_slli_pi16(d1, 2);
-}
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -92,7 +92,6 @@
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
-VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm