shithub: libvpx

Download patch

ref: 9f268611472bbdfa751aedb74a306ee502e7faca
parent: 81b9fd4310960854a6e0a66f0376eac901324c80
author: Erik Niemeyer <erik.a.niemeyer@gmail.com>
date: Tue Nov 19 16:11:57 EST 2013

Support for extended feature flags enumeration leaf in CPUID instruction

This CL fixes an overcite with the AVX2 support CL previously
merged (Change-Id: Idc03f3fca4bf2d0afd33631ea1d3caf8fc34ec29) that
prevented runtime execution of AVX2 code in WebM. 

Background:
Starting with the Sandybridge processor, the CPUID instruction was
enhanced to add various extended feature flag enumeration leaves.
Reading these leaves requires an additional input value for the CPUID
instruction which is stored in ECX. This change adds this second input
value for all ARCH_X86 and ARCH_x86_64 targets to the CPUID macros,
allowing checks of EBX bit 5 for AVX2 support. This capability will be
required moving forward to check for future processor features.

Change-Id: Ie9d872bc9ff68dad4b6578e4544e4dfd0ae26c36

--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -35,23 +35,23 @@
 
 #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
 #if ARCH_X86_64
-#define cpuid(func,ax,bx,cx,dx)\
+#define cpuid(func, func2, ax, bx, cx, dx)\
   __asm__ __volatile__ (\
                         "cpuid           \n\t" \
                         : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
-                        : "a"  (func));
+                        : "a" (func), "c" (func2));
 #else
-#define cpuid(func,ax,bx,cx,dx)\
+#define cpuid(func, func2, ax, bx, cx, dx)\
   __asm__ __volatile__ (\
                         "mov %%ebx, %%edi   \n\t" \
                         "cpuid              \n\t" \
                         "xchg %%edi, %%ebx  \n\t" \
                         : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
-                        : "a" (func));
+                        : "a" (func), "c" (func2));
 #endif
 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
 #if ARCH_X86_64
-#define cpuid(func,ax,bx,cx,dx)\
+#define cpuid(func, func2, ax, bx, cx, dx)\
   asm volatile (\
                 "xchg %rsi, %rbx \n\t" \
                 "cpuid           \n\t" \
@@ -58,9 +58,9 @@
                 "movl %ebx, %edi \n\t" \
                 "xchg %rsi, %rbx \n\t" \
                 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
-                : "a"  (func));
+                : "a" (func), "c" (func2));
 #else
-#define cpuid(func,ax,bx,cx,dx)\
+#define cpuid(func, func2, ax, bx, cx, dx)\
   asm volatile (\
                 "pushl %ebx       \n\t" \
                 "cpuid            \n\t" \
@@ -67,19 +67,21 @@
                 "movl %ebx, %edi  \n\t" \
                 "popl %ebx        \n\t" \
                 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
-                : "a" (func));
+                : "a" (func), "c" (func2));
 #endif
 #else /* end __SUNPRO__ */
 #if ARCH_X86_64
 void __cpuid(int CPUInfo[4], int info_type);
 #pragma intrinsic(__cpuid)
-#define cpuid(func,a,b,c,d) do{\
+#define cpuid(func, func2, a, b, c, d) do {\
     int regs[4];\
-    __cpuid(regs,func); a=regs[0];  b=regs[1];  c=regs[2];  d=regs[3];\
+    __cpuid(regs, func, func2);
+    a = regs[0];  b = regs[1];  c = regs[2];  d = regs[3];\
   } while(0)
 #else
-#define cpuid(func,a,b,c,d)\
+#define cpuid(func, func2, a, b, c, d)\
   __asm mov eax, func\
+  __asm mov ecx, func2\
   __asm cpuid\
   __asm mov a, eax\
   __asm mov b, ebx\
@@ -120,13 +122,13 @@
     mask = strtol(env, NULL, 0);
 
   /* Ensure that the CPUID instruction supports extended features */
-  cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+  cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
 
   if (reg_eax < 1)
     return 0;
 
   /* Get the standard feature flags */
-  cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx);
+  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
 
   if (reg_edx & BIT(23)) flags |= HAS_MMX;
 
@@ -141,6 +143,11 @@
   if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
 
   if (reg_ecx & BIT(28)) flags |= HAS_AVX;
+
+  /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
+  reg_eax = 7;
+  reg_ecx = 0;
+  cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
 
   if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
 
--- a/vpx_ports/x86_cpuid.c
+++ b/vpx_ports/x86_cpuid.c
@@ -38,7 +38,7 @@
   int i;
 
   /* Get the Vendor String from the CPU */
-  cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
+  cpuid(0, 0, reg_eax, vs[0], vs[2], vs[1]);
 
   for (i = 0; i < VPX_CPU_LAST; i++) {
     if (strncmp((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)