shithub: mc

Download patch

ref: dbe60482e84a28a55ae6d9f5a7c9728e00f690d3
parent: 77d584ee227d829e842efdc938dffa322665b22c
author: S. Gilles <sgilles@math.umd.edu>
date: Wed Mar 21 05:41:33 EDT 2018

Fix fma64 for subnormal results arising from normal inputs, start fma32

--- a/lib/math/fpmath-fma-impl.myr
+++ b/lib/math/fpmath-fma-impl.myr
@@ -9,9 +9,92 @@
 const exp_mask64 : uint64 = 0x7ff << 52
 
 pkglocal const fma32 = {x : flt32, y : flt32, z : flt32
+	var xd : flt64 = flt64fromflt32(x)
+	var yd : flt64 = flt64fromflt32(y)
+	var zd : flt64 = flt64fromflt32(z)
+	var prod : flt64 = xd * yd
+	var r : flt64 = prod + zd
+	var r32 : flt32 = flt32fromflt64(r)
+	var rn, re, rs
+	(rn, re, rs) = std.flt64explode(r)
+
+	if re == 1023 || r - prod == zd || rs & 0x1fffffff != 0x10000000
+		-> r32
+	;;
+
+	/*
+           This is a non-infinity, non-NaN, non-exact result, and
+           the bits we cut off in truncating to r32 are exactly a
+           halfway case, so rounding might be difficult.
+	 */
 	-> 0.0
 }
 
+const flt64fromflt32 = {f : flt32
+	var n, e, s
+	(n, e, s) = std.flt32explode(f)
+	var xs : uint64 = (s : uint64)
+	var xe : int64 = (e : int64)
+
+	if e == 127
+		-> std.flt64assem(n, 1023, xs)
+	elif e == -127
+		/*
+		  All subnormals in single precision (except 0.0s)
+		  can be upgraded to double precision, since the
+		  exponent range is so much wider.
+		 */
+		var first1 = find_first1_64(xs, 52)
+		if first1 < 0
+			-> std.flt64assem(n, -1023, 0)
+		;;
+		xs = xs << (52 - (first1 : uint64))
+		xe = -127 + (52 - first1)
+		-> std.flt64assem(n, xe, xs)
+	;;
+
+	-> std.flt64assem(n, xe, xs)
+}
+
+const flt32fromflt64 = {f : flt64
+	var n, e, s
+	(n, e, s) = std.flt64explode(f)
+	var ts : uint32
+	var te : int32 = (e : int32)
+
+	if e >= 127
+		if e == 1023 && s != 0
+			/* NaN */
+			-> std.flt32assem(n, 127, 1)
+		else
+			/* infinity */
+			-> std.flt32assem(n, 127, 0)
+		;;
+	;;
+
+	if e >= -126
+		/* normal */
+		ts = ((s >> (52 - 23)) : uint32)
+		if need_round_away(0, s, 52 - 23)
+			ts++
+		;;
+		-> std.flt32assem(n, te, ts)
+	;;
+
+	/* subnormal already, will have to go to 0 */
+	if e == -1023
+		-> std.flt32assem(n, -127, 0)
+	;;
+
+	/* subnormal (at least, it will be) */
+	te = -127
+	ts = (shr(s, (52 - 23) + (-1022 - e)) : uint32)
+	if need_round_away(0, s, (52 - 23) + (-1022 - e))
+		ts++
+	;;
+	-> std.flt32assem(n, te, ts)
+}
+
 pkglocal const fma64 = {x : flt64, y : flt64, z : flt64
 	var xn : bool, yn : bool, zn : bool
 	var xe : int64, ye : int64, ze : int64
@@ -155,10 +238,10 @@
 	var res_s = 0
 	if res_firstbit_e <= -1023
 		/* Subnormal case */
-		if res_lastbit_e + 128 < -1022
+		if res_lastbit_e + 128 < 12 - 1022
 			res_s = shr(res_h, 12 - 1022 - (res_lastbit_e + 128))
 			res_s |= shr(res_l, 12 - 1022 - (res_lastbit_e + 64))
-		elif res_lastbit_e + 64 < -1022
+		elif res_lastbit_e + 64 < 12 - 1022
 			res_s = shl(res_h, -12 + (res_lastbit_e + 128) - (-1022))
 			res_s |= shr(res_l, 12 - 1022 - (res_lastbit_e + 64))
 		else
@@ -337,7 +420,7 @@
 }
 
 /* Find the first 1 bit in a bitstring */
-const find_first1_64 = {b : uint64, start : int64
+const find_first1_64 : (b : uint64, start : int64 -> int64) = {b : uint64, start : int64
 	for var j = start; j >= 0; --j
 		var m = shl(1, j)
 		if b & m != 0
@@ -359,9 +442,9 @@
 
 /*
    For [ h ][ l ], where bitpos_last is the position of the last
-   bit that was included in the 53-bit wide result (l's last bit
-   has position 0), decide whether rounding up/away is needed. This
-   is true if
+   bit that was included in the truncated result (l's last bit has
+   position 0), decide whether rounding up/away is needed. This is
+   true if
 
     - following bitpos_last is a 1, then a non-zero sequence, or
 
--- a/lib/math/test/fpmath-fma-impl.myr
+++ b/lib/math/test/fpmath-fma-impl.myr
@@ -17,7 +17,8 @@
 	var inputs : (uint64, uint64, uint64, uint64)[:] = [
 		/*
 		   These are mostly obtained by running fpmath-consensus
-		   with seed 1234. Each covers a different corner case.
+		   with seed 1234. Each (mostly) covers a different
+		   corner case.
 		 */
 		(0x0000000000000000, 0x0000000000000000, 0x0100000000000000, 0x0100000000000000),
 		(0x0000000000000000, 0x0000000000000000, 0x0200000000000000, 0x0200000000000000),
@@ -33,6 +34,11 @@
 		(0xb5e8db2107f4463f, 0x614af740c0d7eb3b, 0xd7e3d25c4daa81e0, 0xd7e3d798d3ccdffb),
 		(0xae62c8be4cb45168, 0x90cc5236f3516c90, 0x0007f8b14f684558, 0x0007f9364eb1a815),
 		(0x5809f53e32a7e1ba, 0xcc647611ccaa5bf4, 0xdfbdb5c345ce7a56, 0xe480990da5526103),
+		(0xbb889d7f826438e1, 0x03bdaff82129696d, 0x000000dacab276ae, 0x8000009296c962f8),
+		(0x003d95525e2b057a, 0xbef738ea5717d89a, 0x800000089763d88c, 0x800000b456ed1a9c),
+		(0x0be868cb5a7180c8, 0x3357a30707ed947c, 0x80000050d6b86ac6, 0x000000cfa41cb229),
+		(0xbe535f4f8a7498af, 0x00d24adee12217b8, 0x0000005729e93fb0, 0x800000016d975af3),
+		(0x39d1968eb883f088, 0x856f286e3b268f0e, 0x800000d7cdd0ed70, 0x800001e9cf01a0ae),
 	][:]
 
 	for (x, y, z, r) : inputs