ref: 7077b6ddfaf7cc5a48ccfd970dd091538b5b8580
parent: 18341c3b341f7f7c151673d2f79299f4074d378b
author: S. Gilles <sgilles@math.umd.edu>
date: Tue Mar 13 01:07:24 EDT 2018
Allow CPU feature detection by mbld, and add "sse4" tag This allows writing graceful degradation of optimizations. In practice, it justifies less-than-optimal floating point implementations by allowing a portable upgrade path through AVX, SIMD, &c. Bootstrap scripts are updated by genboostrap.sh on Linux+amd64, and by hand on all others.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -2,7 +2,7 @@
fpmath.myr
# trunc, floor, ceil
- fpmath-trunc-impl+posixy-x64.s
+ fpmath-trunc-impl+posixy-x64-sse4.s
fpmath-trunc-impl.myr
lib ../std:std
--- /dev/null
+++ b/lib/math/fpmath-trunc-impl+posixy-x64-sse4.s
@@ -1,0 +1,41 @@
+.globl math$trunc32
+.globl math$_trunc32
+math$trunc32:
+math$_trunc32:
+ roundps $0x03, %xmm0, %xmm0
+ ret
+
+.globl math$floor32
+.globl math$_floor32
+math$floor32:
+math$_floor32:
+ roundps $0x01, %xmm0, %xmm0
+ ret
+
+.globl math$ceil32
+.globl math$_ceil32
+math$ceil32:
+math$_ceil32:
+ roundps $0x02, %xmm0, %xmm0
+ ret
+
+.globl math$trunc64
+.globl math$_trunc64
+math$trunc64:
+math$_trunc64:
+ roundpd $0x03, %xmm0, %xmm0
+ ret
+
+.globl math$floor64
+.globl math$_floor64
+math$floor64:
+math$_floor64:
+ roundpd $0x01, %xmm0, %xmm0
+ ret
+
+.globl math$ceil64
+.globl math$_ceil64
+math$ceil64:
+math$_ceil64:
+ roundpd $0x02, %xmm0, %xmm0
+ ret
--- a/lib/math/fpmath-trunc-impl+posixy-x64.s
+++ /dev/null
@@ -1,41 +1,0 @@
-.globl math$trunc32
-.globl math$_trunc32
-math$trunc32:
-math$_trunc32:
- roundps $0x03, %xmm0, %xmm0
- ret
-
-.globl math$floor32
-.globl math$_floor32
-math$floor32:
-math$_floor32:
- roundps $0x01, %xmm0, %xmm0
- ret
-
-.globl math$ceil32
-.globl math$_ceil32
-math$ceil32:
-math$_ceil32:
- roundps $0x02, %xmm0, %xmm0
- ret
-
-.globl math$trunc64
-.globl math$_trunc64
-math$trunc64:
-math$_trunc64:
- roundpd $0x03, %xmm0, %xmm0
- ret
-
-.globl math$floor64
-.globl math$_floor64
-math$floor64:
-math$_floor64:
- roundpd $0x01, %xmm0, %xmm0
- ret
-
-.globl math$ceil64
-.globl math$_ceil64
-math$ceil64:
-math$_ceil64:
- roundpd $0x02, %xmm0, %xmm0
- ret
--- a/mbld/bld.sub
+++ b/mbld/bld.sub
@@ -1,5 +1,6 @@
bin mbld =
build.myr
+ cpufeatures+x64.s
deps.myr
libs.myr
install.myr
--- /dev/null
+++ b/mbld/cpufeatures+x64.s
@@ -1,0 +1,10 @@
+.globl bld$cpufeatures
+.globl bld$_cpufeatures
+bld$cpufeatures:
+bld$_cpufeatures:
+ mov $0x1, %eax
+ cpuid
+ mov %ecx, %eax
+ rol $32, %rax
+ shrd $32, %rdx, %rax
+ ret
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -7,6 +7,7 @@
var opt_arch : byte[:]
var opt_sys : byte[:]
var opt_sysvers : (int, int, int)
+ var opt_cpufeatures : uint64
var opt_runtime : byte[:]
var opt_genasm : bool
var opt_incpaths : byte[:][:]
@@ -30,11 +31,13 @@
const initopts : (-> void)
const parseversion : (v : byte[:] -> (int, int, int))
+ extern const cpufeatures : (-> uint64)
;;
var opt_arch = ""
var opt_sys = ""
var opt_binname = ""
+var opt_cpufeatures = 0ul
var opt_libname = ""
var opt_runtime = ""
var opt_incpaths /* FIXME: taking a constant slice is a nonconstant initializer */
@@ -74,6 +77,9 @@
| "amd64": opt_arch = "x64"
| unknown: std.fatal("unknown architecture \"{}\"\n", unknown)
;;
+
+ /* from cpuid with EAX=1; EDX at top, ECX at bottom */
+ opt_cpufeatures = cpufeatures()
opt_maxproc = 2*(thread.ncpu() : std.size)
opt_incpaths = [][:]
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -158,6 +158,11 @@
| "x64": tag(b, "x64")
| unknown: std.fatal("unknown architecture {}\n", unknown)
;;
+
+ if opt_cpufeatures & 0x180000 == 0x180000
+ tag(b, "sse4")
+ ;;
+
for t : tags
tag(b, t)
;;
--- a/mk/bootstrap/bootstrap+Darwin-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Darwin-x86_64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/start.o lib/thread/start+osx-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -140,5 +141,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
+ ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
true
--- a/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+freebsd-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+Linux-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Linux-x86_64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+linux-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+NetBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+NetBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
$pwd/6/6m -I . -I lib/sys lib/std/option.myr
@@ -139,5 +140,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+ as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
as -g -o lib/thread/exit.o lib/thread/exit+openbsd-x64.s
as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+ ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures+x64.s mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
true
--- a/mk/bootstrap/bootstrap+Plan9-amd64.sh
+++ b/mk/bootstrap/bootstrap+Plan9-amd64.sh
@@ -4,6 +4,7 @@
pwd=`pwd`
set -x
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config+plan9-x64.myr
+ 6a -o mbld/cpufeatures.6 mbld/cpufeatures+x64.s
6a -o lib/thread/atomic-impl.6 lib/thread/atomic-impl+plan9-x64.s
6a -o lib/std/getbp.6 lib/std/getbp+plan9-x64.s
$pwd/6/6.out -I lib/sys lib/std/errno+plan9.myr
@@ -138,5 +139,5 @@
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
- 6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
+ 6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/cpufeatures.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
true