shithub: dav1d

Download patch

ref: 481c05920d8acc2808f557b986e0b3490c587238
parent: 003f17d80561050a27619cebe23131bad63bc326
author: Ewout ter Hoeven <e.m.terhoeven@student.tudelft.nl>
date: Sat Dec 14 16:34:47 EST 2019

Add support for Ice Lake AVX-512 cpu flags

--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -64,6 +64,11 @@
                     if ((xcr & 0x000000e0) == 0x000000e0) /* ZMM/OPMASK */ {
                         if ((info[1] & 0xd0030000) == 0xd0030000)
                             flags |= DAV1D_X86_CPU_FLAG_AVX512;
+                        if ((info[1] & 0xd0230000) == 0xd0230000 &&
+                            (info[2] & 0x00005f42) == 0x00005f42)
+                        {
+                            flags |= DAV1D_X86_CPU_FLAG_AVX512ICL;
+                        }
                     }
                 }
             }
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -37,7 +37,9 @@
     DAV1D_X86_CPU_FLAG_SSE42 = 1 << 5,
     DAV1D_X86_CPU_FLAG_AVX = 1 << 6,
     DAV1D_X86_CPU_FLAG_AVX2 = 1 << 7,
-    DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F + CD + BW + DQ + VL */
+    DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8,    /* F/CD/BW/DQ/VL */
+    DAV1D_X86_CPU_FLAG_AVX512ICL = 1 << 9, /* F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/
+                                            * VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ */
 };
 
 unsigned dav1d_get_cpu_flags_x86(void);
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -98,15 +98,16 @@
     unsigned flag;
 } cpus[] = {
 #if ARCH_X86
-    { "SSE",     "sse",    DAV1D_X86_CPU_FLAG_SSE },
-    { "SSE2",    "sse2",   DAV1D_X86_CPU_FLAG_SSE2 },
-    { "SSE3",    "sse3",   DAV1D_X86_CPU_FLAG_SSE3 },
-    { "SSSE3",   "ssse3",  DAV1D_X86_CPU_FLAG_SSSE3 },
-    { "SSE4.1",  "sse4",   DAV1D_X86_CPU_FLAG_SSE41 },
-    { "SSE4.2",  "sse42",  DAV1D_X86_CPU_FLAG_SSE42 },
-    { "AVX",     "avx",    DAV1D_X86_CPU_FLAG_AVX },
-    { "AVX2",    "avx2",   DAV1D_X86_CPU_FLAG_AVX2 },
-    { "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 },
+    { "SSE",                "sse",       DAV1D_X86_CPU_FLAG_SSE },
+    { "SSE2",               "sse2",      DAV1D_X86_CPU_FLAG_SSE2 },
+    { "SSE3",               "sse3",      DAV1D_X86_CPU_FLAG_SSE3 },
+    { "SSSE3",              "ssse3",     DAV1D_X86_CPU_FLAG_SSSE3 },
+    { "SSE4.1",             "sse4",      DAV1D_X86_CPU_FLAG_SSE41 },
+    { "SSE4.2",             "sse42",     DAV1D_X86_CPU_FLAG_SSE42 },
+    { "AVX",                "avx",       DAV1D_X86_CPU_FLAG_AVX },
+    { "AVX2",               "avx2",      DAV1D_X86_CPU_FLAG_AVX2 },
+    { "AVX-512",            "avx512",    DAV1D_X86_CPU_FLAG_AVX512 },
+    { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
 #elif ARCH_AARCH64 || ARCH_ARM
     { "NEON",    "neon",   DAV1D_ARM_CPU_FLAG_NEON },
 #elif ARCH_PPC64LE
--- a/tools/dav1d_cli_parse.c
+++ b/tools/dav1d_cli_parse.c
@@ -86,7 +86,7 @@
 #define ALLOWED_CPU_MASKS " or 'neon'"
 #elif ARCH_X86
 #define ALLOWED_CPU_MASKS \
-    ", 'sse2', 'ssse3', 'sse41', 'avx2' or 'avx512'"
+    ", 'sse2', 'ssse3', 'sse41', 'avx2', 'avx512' or 'avx512icl'"
 #else
 #define ALLOWED_CPU_MASKS "not yet implemented for this architecture"
 #endif
@@ -176,15 +176,16 @@
 
 #if ARCH_X86
 enum CpuMask {
-    X86_CPU_MASK_SSE    = DAV1D_X86_CPU_FLAG_SSE,
-    X86_CPU_MASK_SSE2   = DAV1D_X86_CPU_FLAG_SSE2   | X86_CPU_MASK_SSE,
-    X86_CPU_MASK_SSE3   = DAV1D_X86_CPU_FLAG_SSE3   | X86_CPU_MASK_SSE2,
-    X86_CPU_MASK_SSSE3  = DAV1D_X86_CPU_FLAG_SSSE3  | X86_CPU_MASK_SSE3,
-    X86_CPU_MASK_SSE41  = DAV1D_X86_CPU_FLAG_SSE41  | X86_CPU_MASK_SSSE3,
-    X86_CPU_MASK_SSE42  = DAV1D_X86_CPU_FLAG_SSE42  | X86_CPU_MASK_SSE41,
-    X86_CPU_MASK_AVX    = DAV1D_X86_CPU_FLAG_AVX    | X86_CPU_MASK_SSE42,
-    X86_CPU_MASK_AVX2   = DAV1D_X86_CPU_FLAG_AVX2   | X86_CPU_MASK_AVX,
-    X86_CPU_MASK_AVX512 = DAV1D_X86_CPU_FLAG_AVX512 | X86_CPU_MASK_AVX2,
+    X86_CPU_MASK_SSE       = DAV1D_X86_CPU_FLAG_SSE,
+    X86_CPU_MASK_SSE2      = DAV1D_X86_CPU_FLAG_SSE2      | X86_CPU_MASK_SSE,
+    X86_CPU_MASK_SSE3      = DAV1D_X86_CPU_FLAG_SSE3      | X86_CPU_MASK_SSE2,
+    X86_CPU_MASK_SSSE3     = DAV1D_X86_CPU_FLAG_SSSE3     | X86_CPU_MASK_SSE3,
+    X86_CPU_MASK_SSE41     = DAV1D_X86_CPU_FLAG_SSE41     | X86_CPU_MASK_SSSE3,
+    X86_CPU_MASK_SSE42     = DAV1D_X86_CPU_FLAG_SSE42     | X86_CPU_MASK_SSE41,
+    X86_CPU_MASK_AVX       = DAV1D_X86_CPU_FLAG_AVX       | X86_CPU_MASK_SSE42,
+    X86_CPU_MASK_AVX2      = DAV1D_X86_CPU_FLAG_AVX2      | X86_CPU_MASK_AVX,
+    X86_CPU_MASK_AVX512    = DAV1D_X86_CPU_FLAG_AVX512    | X86_CPU_MASK_AVX2,
+    X86_CPU_MASK_AVX512ICL = DAV1D_X86_CPU_FLAG_AVX512ICL | X86_CPU_MASK_AVX512,
 };
 #endif
 
@@ -192,11 +193,12 @@
 #if ARCH_AARCH64 || ARCH_ARM
     { "neon", DAV1D_ARM_CPU_FLAG_NEON },
 #elif ARCH_X86
-    { "sse2",   X86_CPU_MASK_SSE2 },
-    { "ssse3",  X86_CPU_MASK_SSSE3 },
-    { "sse41",  X86_CPU_MASK_SSE41 },
-    { "avx2",   X86_CPU_MASK_AVX2 },
-    { "avx512", X86_CPU_MASK_AVX512 },
+    { "sse2",      X86_CPU_MASK_SSE2 },
+    { "ssse3",     X86_CPU_MASK_SSSE3 },
+    { "sse41",     X86_CPU_MASK_SSE41 },
+    { "avx2",      X86_CPU_MASK_AVX2 },
+    { "avx512",    X86_CPU_MASK_AVX512 },
+    { "avx512icl", X86_CPU_MASK_AVX512ICL },
 #endif
     { 0 },
 };