shithub: mc

--- a/lib/math/fpmath-fma-impl.myr

+++ b/lib/math/fpmath-fma-impl.myr

@@ -9,9 +9,92 @@

 const exp_mask64 : uint64 = 0x7ff << 52

 pkglocal const fma32 = {x : flt32, y : flt32, z : flt32

+	var xd : flt64 = flt64fromflt32(x)

+	var yd : flt64 = flt64fromflt32(y)

+	var zd : flt64 = flt64fromflt32(z)

+	var prod : flt64 = xd * yd

+	var r : flt64 = prod + zd

+	var r32 : flt32 = flt32fromflt64(r)

+	var rn, re, rs

+	(rn, re, rs) = std.flt64explode(r)

+	if re == 1023 || r - prod == zd || rs & 0x1fffffff != 0x10000000

+		-> r32

+	;;

+	/*

+           This is a non-infinity, non-NaN, non-exact result, and

+           the bits we cut off in truncating to r32 are exactly a

+           halfway case, so rounding might be difficult.

+	 */

 	-> 0.0

+const flt64fromflt32 = {f : flt32

+	var n, e, s

+	(n, e, s) = std.flt32explode(f)

+	var xs : uint64 = (s : uint64)

+	var xe : int64 = (e : int64)

+	if e == 127

+		-> std.flt64assem(n, 1023, xs)

+	elif e == -127

+		/*

+		  All subnormals in single precision (except 0.0s)

+		  can be upgraded to double precision, since the

+		  exponent range is so much wider.

+		 */

+		var first1 = find_first1_64(xs, 52)

+		if first1 < 0

+			-> std.flt64assem(n, -1023, 0)

+		;;

+		xs = xs << (52 - (first1 : uint64))

+		xe = -127 + (52 - first1)

+		-> std.flt64assem(n, xe, xs)

+	;;

+	-> std.flt64assem(n, xe, xs)

+}

+const flt32fromflt64 = {f : flt64

+	var n, e, s

+	(n, e, s) = std.flt64explode(f)

+	var ts : uint32

+	var te : int32 = (e : int32)

+	if e >= 127

+		if e == 1023 && s != 0

+			/* NaN */

+			-> std.flt32assem(n, 127, 1)

+		else

+			/* infinity */

+			-> std.flt32assem(n, 127, 0)

+		;;

+	;;

+	if e >= -126

+		/* normal */

+		ts = ((s >> (52 - 23)) : uint32)

+		if need_round_away(0, s, 52 - 23)

+			ts++

+		;;

+		-> std.flt32assem(n, te, ts)

+	;;

+	/* subnormal already, will have to go to 0 */

+	if e == -1023

+		-> std.flt32assem(n, -127, 0)

+	;;

+	/* subnormal (at least, it will be) */

+	te = -127

+	ts = (shr(s, (52 - 23) + (-1022 - e)) : uint32)

+	if need_round_away(0, s, (52 - 23) + (-1022 - e))

+		ts++

+	;;

+	-> std.flt32assem(n, te, ts)

+}

 pkglocal const fma64 = {x : flt64, y : flt64, z : flt64

 	var xn : bool, yn : bool, zn : bool

 	var xe : int64, ye : int64, ze : int64

@@ -155,10 +238,10 @@

 	var res_s = 0

 	if res_firstbit_e <= -1023

 		/* Subnormal case */

-		if res_lastbit_e + 128 < -1022

+		if res_lastbit_e + 128 < 12 - 1022

 			res_s = shr(res_h, 12 - 1022 - (res_lastbit_e + 128))

 			res_s |= shr(res_l, 12 - 1022 - (res_lastbit_e + 64))

-		elif res_lastbit_e + 64 < -1022

+		elif res_lastbit_e + 64 < 12 - 1022

 			res_s = shl(res_h, -12 + (res_lastbit_e + 128) - (-1022))

 			res_s |= shr(res_l, 12 - 1022 - (res_lastbit_e + 64))

 		else

@@ -337,7 +420,7 @@

 /* Find the first 1 bit in a bitstring */

-const find_first1_64 = {b : uint64, start : int64

+const find_first1_64 : (b : uint64, start : int64 -> int64) = {b : uint64, start : int64

 	for var j = start; j >= 0; --j

 		var m = shl(1, j)

 		if b & m != 0

@@ -359,9 +442,9 @@

/*

    For [ h ][ l ], where bitpos_last is the position of the last

-   bit that was included in the 53-bit wide result (l's last bit

-   has position 0), decide whether rounding up/away is needed. This

-   is true if

+   bit that was included in the truncated result (l's last bit has

+   position 0), decide whether rounding up/away is needed. This is

+   true if

     - following bitpos_last is a 1, then a non-zero sequence, or

--- a/lib/math/test/fpmath-fma-impl.myr

+++ b/lib/math/test/fpmath-fma-impl.myr

@@ -17,7 +17,8 @@

 	var inputs : (uint64, uint64, uint64, uint64)[:] = [

/*

 		   These are mostly obtained by running fpmath-consensus

-		   with seed 1234. Each covers a different corner case.

+		   with seed 1234. Each (mostly) covers a different

+		   corner case.

*/

 		(0x0000000000000000, 0x0000000000000000, 0x0100000000000000, 0x0100000000000000),

 		(0x0000000000000000, 0x0000000000000000, 0x0200000000000000, 0x0200000000000000),

@@ -33,6 +34,11 @@

 		(0xb5e8db2107f4463f, 0x614af740c0d7eb3b, 0xd7e3d25c4daa81e0, 0xd7e3d798d3ccdffb),

 		(0xae62c8be4cb45168, 0x90cc5236f3516c90, 0x0007f8b14f684558, 0x0007f9364eb1a815),

 		(0x5809f53e32a7e1ba, 0xcc647611ccaa5bf4, 0xdfbdb5c345ce7a56, 0xe480990da5526103),

+		(0xbb889d7f826438e1, 0x03bdaff82129696d, 0x000000dacab276ae, 0x8000009296c962f8),

+		(0x003d95525e2b057a, 0xbef738ea5717d89a, 0x800000089763d88c, 0x800000b456ed1a9c),

+		(0x0be868cb5a7180c8, 0x3357a30707ed947c, 0x80000050d6b86ac6, 0x000000cfa41cb229),

+		(0xbe535f4f8a7498af, 0x00d24adee12217b8, 0x0000005729e93fb0, 0x800000016d975af3),

+		(0x39d1968eb883f088, 0x856f286e3b268f0e, 0x800000d7cdd0ed70, 0x800001e9cf01a0ae),

 	][:]

 	for (x, y, z, r) : inputs

--

⑨