ref: 55f8d064387dcf0229b6020d5a8e57e660d358c7
parent: 959b473779673322fbef51385a1cf730e73c7aac
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 28 20:37:58 EDT 2018
Implement sqrt in asm.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -6,6 +6,7 @@
fma-impl.myr
# sqrt
+ sqrt-impl+posixy-x64-sse2.s
sqrt-impl.myr
# trunc, floor, ceil
--- /dev/null
+++ b/lib/math/sqrt-impl+posixy-x64-sse2.s
@@ -1,0 +1,13 @@
+.globl math$sqrt32
+.globl math$_sqrt32
+math$sqrt32:
+math$_sqrt32:
+ sqrtss %xmm0, %xmm0
+ ret
+
+.globl math$sqrt64
+.globl math$_sqrt64
+math$sqrt64:
+math$_sqrt64:
+ sqrtsd %xmm0, %xmm0
+ ret
--- a/mbld/opts.myr
+++ b/mbld/opts.myr
@@ -34,6 +34,7 @@
const parseversion : (v : byte[:] -> (int, int, int))
/* not exactly portable, but good enough for now */
+ const CpuidSSE2 : uint64= 0x400000000000000
const CpuidSSE4 : uint64= 0x180000
/*
--- a/mbld/syssel.myr
+++ b/mbld/syssel.myr
@@ -162,6 +162,9 @@
match opt_arch
| "x64":
tag(b, "x64")
+ if opt_cpufeatures & CpuidSSE2 == CpuidSSE2
+ tag(b, "sse2")
+ ;;
if opt_cpufeatures & CpuidSSE4 == CpuidSSE4
tag(b, "sse4")
;;