shithub: libvpx

Download patch

ref: 13458362af8089df06d1534de928acfc93ac7cc5
parent: c97d435243caeac0bfc21d34244bd3fe5e6f686f
parent: 7e515c46372e6dee7f47bae3733378756e40783a
author: James Zern <jzern@google.com>
date: Wed Jan 21 07:59:07 EST 2015

Merge "fix AVX & AVX2 detection"

--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -13,6 +13,7 @@
 #define VPX_PORTS_X86_H_
 #include <stdlib.h>
 #include "vpx_config.h"
+#include "vpx/vpx_integer.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -104,6 +105,37 @@
 #endif
 #endif /* end others */
 
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static INLINE uint64_t xgetbv(void) {
+  const uint32_t ecx = 0;
+  uint32_t eax, edx;
+  // Use the raw opcode for xgetbv for compatibility with older toolchains.
+  __asm__ volatile (
+    ".byte 0x0f, 0x01, 0xd0\n"
+    : "=a"(eax), "=d"(edx) : "c" (ecx));
+  return ((uint64_t)edx << 32) | eax;
+}
+#elif (defined(_M_X64) || defined(_M_IX86)) && \
+      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
+#include <immintrin.h>
+#define xgetbv() _xgetbv(0)
+#elif defined(_MSC_VER) && defined(_M_IX86)
+static INLINE uint64_t xgetbv(void) {
+  uint32_t eax_, edx_;
+  __asm {
+    xor ecx, ecx  // ecx = 0
+    // Use the raw opcode for xgetbv for compatibility with older toolchains.
+    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+    mov eax_, eax
+    mov edx_, edx
+  }
+  return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
+#endif
+
 #define HAS_MMX     0x01
 #define HAS_SSE     0x02
 #define HAS_SSE2    0x04
@@ -156,14 +188,17 @@
 
   if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
 
-  if (reg_ecx & BIT(28)) flags |= HAS_AVX;
+  // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+  if (reg_ecx & (BIT(27) | BIT(28))) {
+    if ((xgetbv() & 0x6) == 0x6) {
+      flags |= HAS_AVX;
 
-  /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
-  reg_eax = 7;
-  reg_ecx = 0;
-  cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+      /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
+      cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
 
-  if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+      if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+    }
+  }
 
   return flags & mask;
 }