shithub: dav1d

Download patch

ref: 7b208fa806378083ab76d6a632ec6f64c4e7a87e
parent: 481c05920d8acc2808f557b986e0b3490c587238
author: Henrik Gramner <gramner@twoorioles.com>
date: Sat Dec 14 10:53:08 EST 2019

x86: Remove unused cpu flags

Also make the cpuid code a bit more explicit.

--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -33,41 +33,43 @@
 
 #include "src/x86/cpu.h"
 
-void dav1d_cpu_cpuid(uint32_t *info, int leaf);
-uint64_t dav1d_cpu_xgetbv(int xcr);
+typedef struct {
+    uint32_t eax, ebx, ecx, edx;
+} CpuidRegisters;
 
+void dav1d_cpu_cpuid(CpuidRegisters *regs, unsigned leaf, unsigned subleaf);
+uint64_t dav1d_cpu_xgetbv(unsigned xcr);
+
+#define X(reg, mask) (((reg) & (mask)) == (mask))
+
 COLD unsigned dav1d_get_cpu_flags_x86(void) {
-    uint32_t info[4] = {0}, n_ids;
+    CpuidRegisters r = { 0 };
+    dav1d_cpu_cpuid(&r, 0, 0);
+    const unsigned max_leaf = r.eax;
     unsigned flags = 0;
 
-    dav1d_cpu_cpuid(info, 0);
-    n_ids = info[0];
-
-    if (n_ids >= 1) {
-        dav1d_cpu_cpuid(info, 1);
-        if (info[3] & (1 << 25)) flags |= DAV1D_X86_CPU_FLAG_SSE;
-        if (info[3] & (1 << 26)) flags |= DAV1D_X86_CPU_FLAG_SSE2;
-        if (info[2] & (1 <<  0)) flags |= DAV1D_X86_CPU_FLAG_SSE3;
-        if (info[2] & (1 <<  9)) flags |= DAV1D_X86_CPU_FLAG_SSSE3;
-        if (info[2] & (1 << 19)) flags |= DAV1D_X86_CPU_FLAG_SSE41;
-        if (info[2] & (1 << 20)) flags |= DAV1D_X86_CPU_FLAG_SSE42;
+    if (max_leaf >= 1) {
+        dav1d_cpu_cpuid(&r, 1, 0);
+        if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
+            flags |= DAV1D_X86_CPU_FLAG_SSE2;
+            if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
+                flags |= DAV1D_X86_CPU_FLAG_SSSE3;
+                if (X(r.ecx, 0x00080000)) /* SSE4.1 */
+                    flags |= DAV1D_X86_CPU_FLAG_SSE41;
+            }
+        }
 #if ARCH_X86_64
         /* We only support >128-bit SIMD on x86-64. */
-        if (info[2] & (1 << 27)) /* OSXSAVE */ {
-            uint64_t xcr = dav1d_cpu_xgetbv(0);
-            if ((xcr & 0x00000006) == 0x00000006) /* XMM/YMM */ {
-                if (info[2] & (1 << 28)) flags |= DAV1D_X86_CPU_FLAG_AVX;
-                if (n_ids >= 7) {
-                    dav1d_cpu_cpuid(info, 7);
-                    if ((info[1] & 0x00000128) == 0x00000128)
+        if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
+            const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
+            if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
+                if (max_leaf >= 7) {
+                    dav1d_cpu_cpuid(&r, 7, 0);
+                    if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
                         flags |= DAV1D_X86_CPU_FLAG_AVX2;
-                    if ((xcr & 0x000000e0) == 0x000000e0) /* ZMM/OPMASK */ {
-                        if ((info[1] & 0xd0030000) == 0xd0030000)
-                            flags |= DAV1D_X86_CPU_FLAG_AVX512;
-                        if ((info[1] & 0xd0230000) == 0xd0230000 &&
-                            (info[2] & 0x00005f42) == 0x00005f42)
-                        {
-                            flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
+                        if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
+                            if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
+                                flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
                         }
                     }
                 }
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -29,16 +29,11 @@
 #define DAV1D_SRC_X86_CPU_H
 
 enum CpuFlags {
-    DAV1D_X86_CPU_FLAG_SSE = 1 << 0,
-    DAV1D_X86_CPU_FLAG_SSE2 = 1 << 1,
-    DAV1D_X86_CPU_FLAG_SSE3 = 1 << 2,
-    DAV1D_X86_CPU_FLAG_SSSE3 = 1 << 3,
-    DAV1D_X86_CPU_FLAG_SSE41 = 1 << 4,
-    DAV1D_X86_CPU_FLAG_SSE42 = 1 << 5,
-    DAV1D_X86_CPU_FLAG_AVX = 1 << 6,
-    DAV1D_X86_CPU_FLAG_AVX2 = 1 << 7,
-    DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8,    /* F/CD/BW/DQ/VL */
-    DAV1D_X86_CPU_FLAG_AVX512ICL = 1 << 9, /* F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/
+    DAV1D_X86_CPU_FLAG_SSE2      = 1 << 0,
+    DAV1D_X86_CPU_FLAG_SSSE3     = 1 << 1,
+    DAV1D_X86_CPU_FLAG_SSE41     = 1 << 2,
+    DAV1D_X86_CPU_FLAG_AVX2      = 1 << 3,
+    DAV1D_X86_CPU_FLAG_AVX512ICL = 1 << 4, /* F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/
                                             * VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ */
 };
 
--- a/src/x86/cpuid.asm
+++ b/src/x86/cpuid.asm
@@ -27,12 +27,12 @@
 
 SECTION .text
 
-cglobal cpu_cpuid, 0, 5, 0, info, leaf
-    mov        r4, infomp
+cglobal cpu_cpuid, 0, 5, 0, regs, leaf, subleaf
+    mov        r4, regsmp
     mov       eax, leafm
-    xor       ecx, ecx
+    mov       ecx, subleafm
 %if ARCH_X86_64
-    push      rbx
+    mov        r5, rbx
 %endif
     cpuid
     mov  [r4+4*0], eax
@@ -40,7 +40,7 @@
     mov  [r4+4*2], ecx
     mov  [r4+4*3], edx
 %if ARCH_X86_64
-    pop       rbx
+    mov       rbx, r5
 %endif
     RET
 
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -98,20 +98,15 @@
     unsigned flag;
 } cpus[] = {
 #if ARCH_X86
-    { "SSE",                "sse",       DAV1D_X86_CPU_FLAG_SSE },
     { "SSE2",               "sse2",      DAV1D_X86_CPU_FLAG_SSE2 },
-    { "SSE3",               "sse3",      DAV1D_X86_CPU_FLAG_SSE3 },
     { "SSSE3",              "ssse3",     DAV1D_X86_CPU_FLAG_SSSE3 },
     { "SSE4.1",             "sse4",      DAV1D_X86_CPU_FLAG_SSE41 },
-    { "SSE4.2",             "sse42",     DAV1D_X86_CPU_FLAG_SSE42 },
-    { "AVX",                "avx",       DAV1D_X86_CPU_FLAG_AVX },
     { "AVX2",               "avx2",      DAV1D_X86_CPU_FLAG_AVX2 },
-    { "AVX-512",            "avx512",    DAV1D_X86_CPU_FLAG_AVX512 },
     { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
 #elif ARCH_AARCH64 || ARCH_ARM
-    { "NEON",    "neon",   DAV1D_ARM_CPU_FLAG_NEON },
+    { "NEON",               "neon",      DAV1D_ARM_CPU_FLAG_NEON },
 #elif ARCH_PPC64LE
-    { "VSX",     "vsx",    DAV1D_PPC_CPU_FLAG_VSX },
+    { "VSX",                "vsx",       DAV1D_PPC_CPU_FLAG_VSX },
 #endif
     { 0 }
 };
--- a/tools/dav1d_cli_parse.c
+++ b/tools/dav1d_cli_parse.c
@@ -176,16 +176,11 @@
 
 #if ARCH_X86
 enum CpuMask {
-    X86_CPU_MASK_SSE       = DAV1D_X86_CPU_FLAG_SSE,
-    X86_CPU_MASK_SSE2      = DAV1D_X86_CPU_FLAG_SSE2      | X86_CPU_MASK_SSE,
-    X86_CPU_MASK_SSE3      = DAV1D_X86_CPU_FLAG_SSE3      | X86_CPU_MASK_SSE2,
-    X86_CPU_MASK_SSSE3     = DAV1D_X86_CPU_FLAG_SSSE3     | X86_CPU_MASK_SSE3,
+    X86_CPU_MASK_SSE2      = DAV1D_X86_CPU_FLAG_SSE2,
+    X86_CPU_MASK_SSSE3     = DAV1D_X86_CPU_FLAG_SSSE3     | X86_CPU_MASK_SSE2,
     X86_CPU_MASK_SSE41     = DAV1D_X86_CPU_FLAG_SSE41     | X86_CPU_MASK_SSSE3,
-    X86_CPU_MASK_SSE42     = DAV1D_X86_CPU_FLAG_SSE42     | X86_CPU_MASK_SSE41,
-    X86_CPU_MASK_AVX       = DAV1D_X86_CPU_FLAG_AVX       | X86_CPU_MASK_SSE42,
-    X86_CPU_MASK_AVX2      = DAV1D_X86_CPU_FLAG_AVX2      | X86_CPU_MASK_AVX,
-    X86_CPU_MASK_AVX512    = DAV1D_X86_CPU_FLAG_AVX512    | X86_CPU_MASK_AVX2,
-    X86_CPU_MASK_AVX512ICL = DAV1D_X86_CPU_FLAG_AVX512ICL | X86_CPU_MASK_AVX512,
+    X86_CPU_MASK_AVX2      = DAV1D_X86_CPU_FLAG_AVX2      | X86_CPU_MASK_SSE41,
+    X86_CPU_MASK_AVX512ICL = DAV1D_X86_CPU_FLAG_AVX512ICL | X86_CPU_MASK_AVX2,
 };
 #endif
 
@@ -197,7 +192,6 @@
     { "ssse3",     X86_CPU_MASK_SSSE3 },
     { "sse41",     X86_CPU_MASK_SSE41 },
     { "avx2",      X86_CPU_MASK_AVX2 },
-    { "avx512",    X86_CPU_MASK_AVX512 },
     { "avx512icl", X86_CPU_MASK_AVX512ICL },
 #endif
     { 0 },