shithub: mc

--- a/lib/math/bld.sub

+++ b/lib/math/bld.sub

@@ -1,6 +1,10 @@

 lib math =

 	fpmath.myr

+	# rounding (to actual integers)

+	round-impl+posixy-x64-sse4.s

+	round-impl.myr

 	# fused-multiply-add

 	fma-impl+posixy-x64-fma.s

 	fma-impl.myr

@@ -9,12 +13,12 @@

 	sqrt-impl+posixy-x64-sse2.s

 	sqrt-impl.myr

+	# summation

+	sum-impl.myr

 	# trunc, floor, ceil

 	trunc-impl+posixy-x64-sse4.s

 	trunc-impl.myr

-	# summation

-	sum-impl.myr

 	# util

 	util.myr

--- a/lib/math/fpmath.myr

+++ b/lib/math/fpmath.myr

@@ -9,18 +9,25 @@

 		/* sqrt-impl */

 		sqrt : (f : @f -> @f)

+		/* sum-impl */

+		kahan_sum : (a : @f[:] -> @f)

+		priest_sum : (a : @f[:] -> @f)

 		/* trunc-impl */

 		trunc : (f : @f -> @f)

 		ceil  : (f : @f -> @f)

 		floor : (f : @f -> @f)

+	;;

-		/* sum-impl */

-		kahan_sum : (a : @f[:] -> @f)

-		priest_sum : (a : @f[:] -> @f)

+	trait roundable @f -> @i =

+		/* round-impl */

+		rn : (f : @f -> @i)

;;

 	impl std.equatable flt32

 	impl std.equatable flt64

+	impl roundable flt64 -> int64

+	impl roundable flt32 -> int32

 	impl fpmath flt32

 	impl fpmath flt64

;;

@@ -41,17 +48,26 @@

 	eq = {a : flt64, b : flt64; -> std.flt64bits(a) == std.flt64bits(b)}

;;

+impl roundable flt32 -> int32 =

+	rn = {f : flt32; -> rn32(f) }

+;;

+impl roundable flt64 -> int64 =

+	rn = {f : flt64; -> rn64(f) }

+;;

 impl fpmath flt32 =

 	fma = {x, y, z; -> fma32(x, y, z)}

 	sqrt = {f; -> sqrt32(f)}

+	kahan_sum = {l; -> kahan_sum32(l) }

+	priest_sum = {l; -> priest_sum32(l) }

 	trunc = {f; -> trunc32(f)}

 	floor = {f; -> floor32(f)}

 	ceil  = {f; -> ceil32(f)}

-	kahan_sum = {l; -> kahan_sum32(l) }

-	priest_sum = {l; -> priest_sum32(l) }

;;

 impl fpmath flt64 =

@@ -59,14 +75,17 @@

 	sqrt = {f; -> sqrt64(f)}

+	kahan_sum = {l; -> kahan_sum64(l) }

+	priest_sum = {l; -> priest_sum64(l) }

 	trunc = {f; -> trunc64(f)}

 	floor = {f; -> floor64(f)}

 	ceil  = {f; -> ceil64(f)}

-	kahan_sum = {l; -> kahan_sum64(l) }

-	priest_sum = {l; -> priest_sum64(l) }

;;

+extern const rn32 : (f : flt32 -> int32)

+extern const rn64 : (f : flt64 -> int64)

 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)

 extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)

@@ -73,6 +92,12 @@

 extern const sqrt32 : (x : flt32 -> flt32)

 extern const sqrt64 : (x : flt64 -> flt64)

+extern const kahan_sum32 : (l : flt32[:] -> flt32)

+extern const kahan_sum64 : (l : flt64[:] -> flt64)

+extern const priest_sum32 : (l : flt32[:] -> flt32)

+extern const priest_sum64 : (l : flt64[:] -> flt64)

 extern const trunc32 : (f : flt32 -> flt32)

 extern const trunc64 : (f : flt64 -> flt64)

@@ -81,9 +106,3 @@

 extern const ceil32  : (f : flt32 -> flt32)

 extern const ceil64  : (f : flt64 -> flt64)

-extern const kahan_sum32 : (l : flt32[:] -> flt32)

-extern const kahan_sum64 : (l : flt64[:] -> flt64)

-extern const priest_sum32 : (l : flt32[:] -> flt32)

-extern const priest_sum64 : (l : flt64[:] -> flt64)

--- /dev/null

+++ b/lib/math/round-impl+posixy-x64-sse4.s

@@ -1,0 +1,14 @@

+.globl math$rn32

+.globl math$_rn32

+math$rn32:

+math$_rn32:

+	cvtss2si	%xmm0, %eax

+	ret

+.globl math$rn64

+.globl math$_rn64

+math$rn64:

+math$_rn64:

+	cvtsd2si	%xmm0, %rax

+	ret

--- /dev/null

+++ b/lib/math/round-impl.myr

@@ -1,0 +1,70 @@

+use std

+use "util"

+pkg math =

+	const rn64 : (f : flt64 -> int64)

+	const rn32 : (f : flt32 -> int32)

+;;

+const rn64 = {f : flt64

+	var n : bool, e : int64, s : uint64

+	(n, e, s) = std.flt64explode(f)

+	if e >= 63

+		-> -9223372036854775808

+	elif e >= 52

+		var shifted : int64 = (( s << (e - 52 : uint64)) : int64)

+		if n

+			-> -1 * shifted

+		else

+			-> shifted

+		;;

+	elif e < -1

+		-> 0

+	;;

+	var integral_s = (s >> (52 - e : uint64) : int64)

+	if need_round_away(0, s, 52 - e)

+		integral_s++

+	;;

+	if n

+		-> -integral_s

+	else

+		-> integral_s

+	;;

+}

+const rn32 = {f : flt32

+	var n : bool, e : int32, s : uint32

+	(n, e, s) = std.flt32explode(f)

+	if e >= 31

+		-> -2147483648

+	elif e >= 23

+		var shifted : int32 = (( s << (e - 23 : uint32)) : int32)

+		if n

+			->  -1 * shifted

+		else

+			->  shifted

+		;;

+	elif e < -1

+		-> 0

+	;;

+	var integral_s = (s >> (23 - e : uint32) : int32)

+	if need_round_away(0, (s : uint64), (23 - e : int64))

+		integral_s++

+	;;

+	if n

+		-> -integral_s

+	else

+		-> integral_s

+	;;

+}

--- /dev/null

+++ b/lib/math/test/round-impl.myr

@@ -1,0 +1,82 @@

+use std

+use math

+use testr

+const main = {

+	testr.run([

+		[.name = "round-01",    .fn = round01],

+		[.name = "round-02",    .fn = round02],

+	][:])

+}

+const round01 = {c

+	var inputs : (flt32, int32)[:] = [

+		(123.4, 123),

+		(0.0, 0),

+		(-0.0, 0),

+		(1.0, 1),

+		(1.1, 1),

+		(0.9, 1),

+		(15.3, 15),

+		(15.5, 16),

+		(15.7, 16),

+		(16.3, 16),

+		(16.5, 16),

+		(16.7, 17),

+		(-102.1, -102),

+		(-102.5, -102),

+		(-102.7, -103),

+		(-103.1, -103),

+		(-103.5, -104),

+		(-103.7, -104),

+		(2147483641.5, -2147483648),

+		(2147483646.5, -2147483648),

+		(2147483647.5, -2147483648),

+		(2147483649.0, -2147483648),

+		(-2147483641.5, -2147483648),

+		(-2147483646.5, -2147483648),

+		(-2147483647.5, -2147483648),

+		(-2147483649.0, -2147483648),

+	][:]

+	for (f, g) : inputs

+		testr.eq(c, math.rn(f), g)

+	;;

+}

+const round02 = {c

+	var inputs : (flt64, int64)[:] = [

+		(123.4, 123),

+		(0.0, 0),

+		(-0.0, 0),

+		(1.0, 1),

+		(1.1, 1),

+		(0.9, 1),

+		(15.3, 15),

+		(15.5, 16),

+		(15.7, 16),

+		(16.3, 16),

+		(16.5, 16),

+		(16.7, 17),

+		(-102.1, -102),

+		(-102.5, -102),

+		(-102.7, -103),

+		(-103.1, -103),

+		(-103.5, -104),

+		(-103.7, -104),

+		(2147483641.5, 2147483642),

+		(2147483646.5, 2147483646),

+		(2147483647.5, 2147483648),

+		(2147483649.0, 2147483649),

+		(-2147483641.5, -2147483642),

+		(-2147483646.5, -2147483646),

+		(-2147483647.5, -2147483648),

+		(-2147483649.0, -2147483649),

+		(9223372036854775806.1, -9223372036854775808),

+		(-9223372036854775806.1, -9223372036854775808),

+	][:]

+	for (f, g) : inputs

+		testr.eq(c, math.rn(f), g)

+	;;

+}