ref: 7b208fa806378083ab76d6a632ec6f64c4e7a87e
parent: 481c05920d8acc2808f557b986e0b3490c587238
author: Henrik Gramner <gramner@twoorioles.com>
date: Sat Dec 14 10:53:08 EST 2019
x86: Remove unused cpu flags Also make the cpuid code a bit more explicit.
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -33,41 +33,43 @@
#include "src/x86/cpu.h"
-void dav1d_cpu_cpuid(uint32_t *info, int leaf);
-uint64_t dav1d_cpu_xgetbv(int xcr);
+typedef struct {
+ uint32_t eax, ebx, ecx, edx;
+} CpuidRegisters;
+void dav1d_cpu_cpuid(CpuidRegisters *regs, unsigned leaf, unsigned subleaf);
+uint64_t dav1d_cpu_xgetbv(unsigned xcr);
+
+#define X(reg, mask) (((reg) & (mask)) == (mask))
+
COLD unsigned dav1d_get_cpu_flags_x86(void) {
- uint32_t info[4] = {0}, n_ids;
+ CpuidRegisters r = { 0 };
+ dav1d_cpu_cpuid(&r, 0, 0);
+ const unsigned max_leaf = r.eax;
unsigned flags = 0;
- dav1d_cpu_cpuid(info, 0);
- n_ids = info[0];
-
- if (n_ids >= 1) {
- dav1d_cpu_cpuid(info, 1);
- if (info[3] & (1 << 25)) flags |= DAV1D_X86_CPU_FLAG_SSE;
- if (info[3] & (1 << 26)) flags |= DAV1D_X86_CPU_FLAG_SSE2;
- if (info[2] & (1 << 0)) flags |= DAV1D_X86_CPU_FLAG_SSE3;
- if (info[2] & (1 << 9)) flags |= DAV1D_X86_CPU_FLAG_SSSE3;
- if (info[2] & (1 << 19)) flags |= DAV1D_X86_CPU_FLAG_SSE41;
- if (info[2] & (1 << 20)) flags |= DAV1D_X86_CPU_FLAG_SSE42;
+ if (max_leaf >= 1) {
+ dav1d_cpu_cpuid(&r, 1, 0);
+ if (X(r.edx, 0x06008000)) /* CMOV/SSE/SSE2 */ {
+ flags |= DAV1D_X86_CPU_FLAG_SSE2;
+ if (X(r.ecx, 0x00000201)) /* SSE3/SSSE3 */ {
+ flags |= DAV1D_X86_CPU_FLAG_SSSE3;
+ if (X(r.ecx, 0x00080000)) /* SSE4.1 */
+ flags |= DAV1D_X86_CPU_FLAG_SSE41;
+ }
+ }
#if ARCH_X86_64
/* We only support >128-bit SIMD on x86-64. */
- if (info[2] & (1 << 27)) /* OSXSAVE */ {
- uint64_t xcr = dav1d_cpu_xgetbv(0);
- if ((xcr & 0x00000006) == 0x00000006) /* XMM/YMM */ {
- if (info[2] & (1 << 28)) flags |= DAV1D_X86_CPU_FLAG_AVX;
- if (n_ids >= 7) {
- dav1d_cpu_cpuid(info, 7);
- if ((info[1] & 0x00000128) == 0x00000128)
+ if (X(r.ecx, 0x18000000)) /* OSXSAVE/AVX */ {
+ const uint64_t xcr0 = dav1d_cpu_xgetbv(0);
+ if (X(xcr0, 0x00000006)) /* XMM/YMM */ {
+ if (max_leaf >= 7) {
+ dav1d_cpu_cpuid(&r, 7, 0);
+ if (X(r.ebx, 0x00000128)) /* BMI1/BMI2/AVX2 */ {
flags |= DAV1D_X86_CPU_FLAG_AVX2;
- if ((xcr & 0x000000e0) == 0x000000e0) /* ZMM/OPMASK */ {
- if ((info[1] & 0xd0030000) == 0xd0030000)
- flags |= DAV1D_X86_CPU_FLAG_AVX512;
- if ((info[1] & 0xd0230000) == 0xd0230000 &&
- (info[2] & 0x00005f42) == 0x00005f42)
- {
- flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
+ if (X(xcr0, 0x000000e0)) /* ZMM/OPMASK */ {
+ if (X(r.ebx, 0xd0230000) && X(r.ecx, 0x00005f42))
+ flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
}
}
}
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -29,16 +29,11 @@
#define DAV1D_SRC_X86_CPU_H
enum CpuFlags {
- DAV1D_X86_CPU_FLAG_SSE = 1 << 0,
- DAV1D_X86_CPU_FLAG_SSE2 = 1 << 1,
- DAV1D_X86_CPU_FLAG_SSE3 = 1 << 2,
- DAV1D_X86_CPU_FLAG_SSSE3 = 1 << 3,
- DAV1D_X86_CPU_FLAG_SSE41 = 1 << 4,
- DAV1D_X86_CPU_FLAG_SSE42 = 1 << 5,
- DAV1D_X86_CPU_FLAG_AVX = 1 << 6,
- DAV1D_X86_CPU_FLAG_AVX2 = 1 << 7,
- DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F/CD/BW/DQ/VL */
- DAV1D_X86_CPU_FLAG_AVX512ICL = 1 << 9, /* F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/
+ DAV1D_X86_CPU_FLAG_SSE2 = 1 << 0,
+ DAV1D_X86_CPU_FLAG_SSSE3 = 1 << 1,
+ DAV1D_X86_CPU_FLAG_SSE41 = 1 << 2,
+ DAV1D_X86_CPU_FLAG_AVX2 = 1 << 3,
+ DAV1D_X86_CPU_FLAG_AVX512ICL = 1 << 4, /* F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/
* VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ */
};
--- a/src/x86/cpuid.asm
+++ b/src/x86/cpuid.asm
@@ -27,12 +27,12 @@
SECTION .text
-cglobal cpu_cpuid, 0, 5, 0, info, leaf
- mov r4, infomp
+cglobal cpu_cpuid, 0, 5, 0, regs, leaf, subleaf
+ mov r4, regsmp
mov eax, leafm
- xor ecx, ecx
+ mov ecx, subleafm
%if ARCH_X86_64
- push rbx
+ mov r5, rbx
%endif
cpuid
mov [r4+4*0], eax
@@ -40,7 +40,7 @@
mov [r4+4*2], ecx
mov [r4+4*3], edx
%if ARCH_X86_64
- pop rbx
+ mov rbx, r5
%endif
RET
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -98,20 +98,15 @@
unsigned flag;
} cpus[] = {
#if ARCH_X86
- { "SSE", "sse", DAV1D_X86_CPU_FLAG_SSE },
{ "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 },
- { "SSE3", "sse3", DAV1D_X86_CPU_FLAG_SSE3 },
{ "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 },
{ "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 },
- { "SSE4.2", "sse42", DAV1D_X86_CPU_FLAG_SSE42 },
- { "AVX", "avx", DAV1D_X86_CPU_FLAG_AVX },
{ "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
- { "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 },
{ "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
#elif ARCH_AARCH64 || ARCH_ARM
- { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
+ { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
#elif ARCH_PPC64LE
- { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX },
+ { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX },
#endif
{ 0 }
};
--- a/tools/dav1d_cli_parse.c
+++ b/tools/dav1d_cli_parse.c
@@ -176,16 +176,11 @@
#if ARCH_X86
enum CpuMask {
- X86_CPU_MASK_SSE = DAV1D_X86_CPU_FLAG_SSE,
- X86_CPU_MASK_SSE2 = DAV1D_X86_CPU_FLAG_SSE2 | X86_CPU_MASK_SSE,
- X86_CPU_MASK_SSE3 = DAV1D_X86_CPU_FLAG_SSE3 | X86_CPU_MASK_SSE2,
- X86_CPU_MASK_SSSE3 = DAV1D_X86_CPU_FLAG_SSSE3 | X86_CPU_MASK_SSE3,
+ X86_CPU_MASK_SSE2 = DAV1D_X86_CPU_FLAG_SSE2,
+ X86_CPU_MASK_SSSE3 = DAV1D_X86_CPU_FLAG_SSSE3 | X86_CPU_MASK_SSE2,
X86_CPU_MASK_SSE41 = DAV1D_X86_CPU_FLAG_SSE41 | X86_CPU_MASK_SSSE3,
- X86_CPU_MASK_SSE42 = DAV1D_X86_CPU_FLAG_SSE42 | X86_CPU_MASK_SSE41,
- X86_CPU_MASK_AVX = DAV1D_X86_CPU_FLAG_AVX | X86_CPU_MASK_SSE42,
- X86_CPU_MASK_AVX2 = DAV1D_X86_CPU_FLAG_AVX2 | X86_CPU_MASK_AVX,
- X86_CPU_MASK_AVX512 = DAV1D_X86_CPU_FLAG_AVX512 | X86_CPU_MASK_AVX2,
- X86_CPU_MASK_AVX512ICL = DAV1D_X86_CPU_FLAG_AVX512ICL | X86_CPU_MASK_AVX512,
+ X86_CPU_MASK_AVX2 = DAV1D_X86_CPU_FLAG_AVX2 | X86_CPU_MASK_SSE41,
+ X86_CPU_MASK_AVX512ICL = DAV1D_X86_CPU_FLAG_AVX512ICL | X86_CPU_MASK_AVX2,
};
#endif
@@ -197,7 +192,6 @@
{ "ssse3", X86_CPU_MASK_SSSE3 },
{ "sse41", X86_CPU_MASK_SSE41 },
{ "avx2", X86_CPU_MASK_AVX2 },
- { "avx512", X86_CPU_MASK_AVX512 },
{ "avx512icl", X86_CPU_MASK_AVX512ICL },
#endif
{ 0 },