ref: 18341c3b341f7f7c151673d2f79299f4074d378b
parent: 5e1154d69efc86f5fe3831b047e3531d9cfd3478
author: S. Gilles <sgilles@math.umd.edu>
date: Mon Mar 12 19:36:00 EDT 2018
Add asm implementations for trunc & co. These implementations are not terribly necessary, and they are not especially portable (they require SSE 4.1). They prove it is possible, however.
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -1,7 +1,8 @@
lib math =
fpmath.myr
- # trunc
+ # trunc, floor, ceil
+ fpmath-trunc-impl+posixy-x64.s
fpmath-trunc-impl.myr
lib ../std:std
--- /dev/null
+++ b/lib/math/fpmath-trunc-impl+posixy-x64.s
@@ -1,0 +1,41 @@
+.globl math$trunc32
+.globl math$_trunc32
+math$trunc32:
+math$_trunc32:
+ roundps $0x03, %xmm0, %xmm0
+ ret
+
+.globl math$floor32
+.globl math$_floor32
+math$floor32:
+math$_floor32:
+ roundps $0x01, %xmm0, %xmm0
+ ret
+
+.globl math$ceil32
+.globl math$_ceil32
+math$ceil32:
+math$_ceil32:
+ roundps $0x02, %xmm0, %xmm0
+ ret
+
+.globl math$trunc64
+.globl math$_trunc64
+math$trunc64:
+math$_trunc64:
+ roundpd $0x03, %xmm0, %xmm0
+ ret
+
+.globl math$floor64
+.globl math$_floor64
+math$floor64:
+math$_floor64:
+ roundpd $0x01, %xmm0, %xmm0
+ ret
+
+.globl math$ceil64
+.globl math$_ceil64
+math$ceil64:
+math$_ceil64:
+ roundpd $0x02, %xmm0, %xmm0
+ ret
--- a/lib/math/test/fpmath-trunc-impl.myr
+++ b/lib/math/test/fpmath-trunc-impl.myr
@@ -15,6 +15,7 @@
const trunc01 = {c
var flt32s : (flt32, flt32)[:] = [
+ (123.4, 123.0),
(0.0, 0.0),
(-0.0, -0.0),
(1.0, 1.0),