shithub: mc

Download patch

ref: 7077b6ddfaf7cc5a48ccfd970dd091538b5b8580
parent: 18341c3b341f7f7c151673d2f79299f4074d378b
author: S. Gilles <sgilles@math.umd.edu>
date: Tue Mar 13 01:07:24 EDT 2018

Allow CPU feature detection by mbld, and add "sse4" tag

This allows writing graceful degradation of optimizations. In
practice, it justifies less-than-optimal floating point implementations
by allowing a portable upgrade path through AVX, SIMD, &c.

Bootstrap scripts are updated by genboostrap.sh on Linux+amd64, and
by hand on all others.

--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -2,7 +2,7 @@
 	fpmath.myr
 
 	# trunc, floor, ceil
-	fpmath-trunc-impl+posixy-x64.s
+	fpmath-trunc-impl+posixy-x64-sse4.s
 	fpmath-trunc-impl.myr
 
 	lib ../std:std
--- /dev/null
+++ b/lib/math/fpmath-trunc-impl+posixy-x64-sse4.s
@@ -1,0 +1,41 @@
+.globl math$trunc32
+.globl math$_trunc32
+math$trunc32:
+math$_trunc32:
+	roundps $0x03, %xmm0, %xmm0
+	ret
+
+.globl math$floor32
+.globl math$_floor32
+math$floor32:
+math$_floor32:
+	roundps $0x01, %xmm0, %xmm0
+	ret
+
+.globl math$ceil32
+.globl math$_ceil32
+math$ceil32:
+math$_ceil32:
+	roundps $0x02, %xmm0, %xmm0
+	ret
+
+.globl math$trunc64
+.globl math$_trunc64
+math$trunc64:
+math$_trunc64:
+	roundpd $0x03, %xmm0, %xmm0
+	ret
+
+.globl math$floor64
+.globl math$_floor64
+math$floor64:
+math$_floor64:
+	roundpd $0x01, %xmm0, %xmm0
+	ret
+
+.globl math$ceil64
+.globl math$_ceil64
+math$ceil64:
+math$_ceil64:
+	roundpd $0x02, %xmm0, %xmm0
+	ret
--- a/lib/math/fpmath-trunc-impl+posixy-x64.s
+++ /dev/null
@@ -1,41 +1,0 @@
-.globl math$trunc32
-.globl math$_trunc32
-math$trunc32:
-math$_trunc32:
-	roundps $0x03, %xmm0, %xmm0
-	ret
-
-.globl math$floor32
-.globl math$_floor32
-math$floor32:
-math$_floor32:
-	roundps $0x01, %xmm0, %xmm0
-	ret
-
-.globl math$ceil32
-.globl math$_ceil32
-math$ceil32:
-math$_ceil32:
-	roundps $0x02, %xmm0, %xmm0
-	ret
-
-.globl math$trunc64
-.globl math$_trunc64
-math$trunc64:
-math$_trunc64:
-	roundpd $0x03, %xmm0, %xmm0
-	ret
-
-.globl math$floor64
-.globl math$_floor64
-math$floor64:
-math$_floor64:
-	roundpd $0x01, %xmm0, %xmm0
-	ret
-
-.globl math$ceil64
-.globl math$_ceil64
-math$ceil64:
-math$_ceil64:
-	roundpd $0x02, %xmm0, %xmm0
-	ret
--- a/mbld/bld.sub
+++ b/mbld/bld.sub
@@ -1,5 +1,6 @@
 bin mbld =
 	build.myr
+	cpufeatures+x64.s
 	deps.myr
 	libs.myr
 	install.myr
--- /dev/null
+++ b/mbld/cpufeatures+x64.s
@@ -1,0 +1,10 @@
+.globl bld$cpufeatures
+.globl bld$_cpufeatures
+bld$cpufeatures:
+bld$_cpufeatures:
+	mov	$0x1, %eax
+	cpuid
+	mov	%ecx, %eax
+	rol	$32, %rax
+	shrd	$32, %rdx, %rax
+	ret
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -7,6 +7,7 @@
 	var opt_arch 		: byte[:]
 	var opt_sys		: byte[:]
 	var opt_sysvers		: (int, int, int)
+	var opt_cpufeatures	: uint64
 	var opt_runtime		: byte[:]
 	var opt_genasm		: bool
 	var opt_incpaths	: byte[:][:]
@@ -30,11 +31,13 @@
 
 	const initopts	: (-> void)
 	const parseversion	: (v : byte[:] -> (int, int, int))
+	extern const cpufeatures : (-> uint64)
 ;;
 
 var opt_arch 		= ""
 var opt_sys		= ""
 var opt_binname		= ""
+var opt_cpufeatures	= 0ul
 var opt_libname		= ""
 var opt_runtime		= ""
 var opt_incpaths	/* FIXME: taking a constant slice is a nonconstant initializer */
@@ -74,6 +77,9 @@
 	| "amd64":	opt_arch = "x64"
 	| unknown:	std.fatal("unknown architecture \"{}\"\n", unknown)
 	;;
+
+	/* from cpuid with EAX=1; EDX at top, ECX at bottom */
+	opt_cpufeatures = cpufeatures()
 
 	opt_maxproc = 2*(thread.ncpu() : std.size)
 	opt_incpaths = [][:]
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -158,6 +158,11 @@
 	| "x64":	tag(b, "x64")
 	| unknown:	std.fatal("unknown architecture {}\n", unknown)
 	;;
+
+	if opt_cpufeatures & 0x180000 == 0x180000
+		tag(b, "sse4")
+	;;
+
 	for t : tags
 		tag(b, t)
 	;;
--- a/mk/bootstrap/bootstrap+Darwin-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Darwin-x86_64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/start.o lib/thread/start+osx-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -140,5 +141,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
+	ld -pagezero_size 0x100000000 -macosx_version_min 10.6 -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys -macosx_version_min 10.6
 true
--- a/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+FreeBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+freebsd-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+Linux-x86_64.sh
+++ b/mk/bootstrap/bootstrap+Linux-x86_64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+linux-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+NetBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+NetBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
 	$pwd/6/6m -I . -I lib/sys lib/std/option.myr
@@ -139,5 +140,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
+++ b/mk/bootstrap/bootstrap+OpenBSD-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config.myr
+	as -g -o mbld/cpufeatures.o mbld/cpufeatures+x64.s
 	as -g -o lib/thread/exit.o lib/thread/exit+openbsd-x64.s
 	as -g -o lib/thread/atomic-impl.o lib/thread/atomic-impl+x64.s
 	as -g -o lib/std/getbp.o lib/std/getbp+posixy-x64.s
@@ -141,5 +142,5 @@
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6m -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
+	ld -nopie --gc-sections -o mbld/mbld $pwd/rt/_myrrt.o mbld/deps.o mbld/main.o mbld/util.o mbld/cpufeatures+x64.s mbld/libs.o mbld/syssel.o mbld/config.o mbld/opts.o mbld/subtest.o mbld/types.o mbld/test.o mbld/install.o mbld/parse.o mbld/build.o -Llib/thread -lthread -Llib/bio -lbio -Llib/regex -lregex -Llib/std -lstd -Llib/sys -lsys
 true
--- a/mk/bootstrap/bootstrap+Plan9-amd64.sh
+++ b/mk/bootstrap/bootstrap+Plan9-amd64.sh
@@ -4,6 +4,7 @@
 pwd=`pwd`
 set -x
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/config+plan9-x64.myr
+	6a -o mbld/cpufeatures.6 mbld/cpufeatures+x64.s
 	6a -o lib/thread/atomic-impl.6 lib/thread/atomic-impl+plan9-x64.s
 	6a -o lib/std/getbp.6 lib/std/getbp+plan9-x64.s
 	$pwd/6/6.out -I lib/sys lib/std/errno+plan9.myr
@@ -138,5 +139,5 @@
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/test.myr
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/deps.myr
 	$pwd/6/6.out -I lib/sys -I lib/std -I lib/bio -I lib/regex -I lib/thread mbld/main.myr
-	6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
+	6l -l -o mbld/mbld $pwd/rt/_myrrt.6 mbld/deps.6 mbld/main.6 mbld/util.6 mbld/cpufeatures.6 mbld/libs.6 mbld/syssel.6 mbld/config.6 mbld/opts.6 mbld/subtest.6 mbld/types.6 mbld/test.6 mbld/install.6 mbld/parse.6 mbld/build.6 lib/thread/libthread.a lib/bio/libbio.a lib/regex/libregex.a lib/std/libstd.a lib/sys/libsys.a
 true