shithub: mc

Download patch

ref: 55f8d064387dcf0229b6020d5a8e57e660d358c7
parent: 959b473779673322fbef51385a1cf730e73c7aac
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 28 20:37:58 EDT 2018

Implement sqrt in asm.

--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -6,6 +6,7 @@
 	fma-impl.myr
 
 	# sqrt
+	sqrt-impl+posixy-x64-sse2.s
 	sqrt-impl.myr
 
 	# trunc, floor, ceil
--- /dev/null
+++ b/lib/math/sqrt-impl+posixy-x64-sse2.s
@@ -1,0 +1,13 @@
+.globl math$sqrt32
+.globl math$_sqrt32
+math$sqrt32:
+math$_sqrt32:
+	sqrtss %xmm0, %xmm0
+	ret
+
+.globl math$sqrt64
+.globl math$_sqrt64
+math$sqrt64:
+math$_sqrt64:
+	sqrtsd %xmm0, %xmm0
+	ret
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -34,6 +34,7 @@
 	const parseversion	: (v : byte[:] -> (int, int, int))
 
 	/* not exactly portable, but good enough for now */
+	const CpuidSSE2 : uint64= 0x400000000000000
 	const CpuidSSE4 : uint64= 0x180000
 
 	/*
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -162,6 +162,9 @@
 		match opt_arch
 		| "x64":	
 			tag(b, "x64")
+			if opt_cpufeatures & CpuidSSE2 == CpuidSSE2
+				tag(b, "sse2")
+			;;
 			if opt_cpufeatures & CpuidSSE4 == CpuidSSE4
 				tag(b, "sse4")
 			;;