shithub: mc

Download patch

ref: c039e23ae4fd65431fb40f6d556d72a977725b0f
parent: e5833209e4cb397f5d17be79d81454b48002882a
author: S. Gilles <sgilles@math.umd.edu>
date: Fri Jun 7 20:33:17 EDT 2019

Fix some special cases in log-overkill related to subnormals.

--- a/lib/math/log-overkill.myr
+++ b/lib/math/log-overkill.myr
@@ -206,17 +206,30 @@
 	/* Special cases */
 	if std.isnan(x)
 		-> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000))
-	elif xe == -1024 && xs == 0
+	elif xe <= -1023 && xs == 0
 		/* log (+/- 0) is -infinity */
 		-> (std.flt64frombits(0xfff0000000000000), std.flt64frombits(0xfff0000000000000))
-	elif xe == 1023
-		-> (std.flt64frombits(0xfff8000000000000), std.flt64frombits(0xfff8000000000000))
 	elif xn
 		/* log(-anything) is NaN */
 		-> (std.flt64frombits(0x7ff8000000000000), std.flt64frombits(0x7ff8000000000000))
 	;;
 
+	if xe <= -1023
+		/*
+		   We depend on being able to pick bits out of xs as if it were normal, so
+		   normalize any subnormals.
+		 */
+		xe++
+		var check = 1 << 52
+		while xs & check == 0
+			xs <<= 1
+			xe--
+		;;
+		xsf = std.flt64assem(false, 0, xs)
+	;;
+
 	var shift = 0
+	var non_trivial = 0
 	var then_invert = false
 	var j1, F1, f1, logF1_hi, logF1_lo, F1_inv_hi, F1_inv_lo, fF1_hi, fF1_lo
 
@@ -237,8 +250,9 @@
 		var xinv_hi = 1.0 / x
 		var xinv_lo = fma64(-1.0 * xinv_hi, x, 1.0) / x
 		(tn, te, ts) = std.flt64explode(xinv_hi)
-		shift = ((47 >= te) : uint64) * ((47 - te) : uint64) + ((47 < te) : uint64) * 64
-		j1 = (ts >> shift) & 0x1f
+		non_trivial = ((47 >= te) : uint64) * ((47 - te < 64) : uint64)
+		shift = non_trivial * ((47 - te) : uint64)
+		j1 = non_trivial * ((ts >> shift) & 0x1f)
 		var F1m1 = scale2((j1 : flt64), -5)
 		F1 = 1.0 + F1m1
 		var f1_hi, f1_lo
@@ -266,8 +280,9 @@
 
 	/* F2 */
 	(tn, te, ts) = std.flt64explode(fF1_hi)
-	shift = ((42 >= te) : uint64) * ((42 - te) : uint64) + ((42 < te) : uint64) * 64
-	var j2 = (ts >> shift) & 0x1f
+	non_trivial = ((42 >= te) : uint64) * ((42 - te < 64) : uint64)
+	shift = non_trivial * ((42 - te) : uint64)
+	var j2 = non_trivial * ((ts >> shift) & 0x1f)
 	var F2m1 = scale2((j2 : flt64), -10)
 	var F2 = 1.0 + F2m1
 	var f2_hi, f2_lo
@@ -284,8 +299,9 @@
 
 	/* F3 (just like F2) */
 	(tn, te, ts) = std.flt64explode(fF2_hi)
-	shift = ((37 >= te) : uint64) * ((37 - te) : uint64) + ((37 < te) : uint64) * 64
-	var j3 = (ts >> shift) & 0x1f
+	non_trivial = ((37 >= te) : uint64) * ((37 - te < 64) : uint64)
+	shift = non_trivial * ((37 - te) : uint64)
+	var j3 = non_trivial * ((ts >> shift) & 0x1f)
 	var F3m1 = scale2((j3 : flt64), -15)
 	var F3 = 1.0 + F3m1
 	var f3_hi, f3_lo
@@ -302,8 +318,9 @@
 
 	/* F4 (just like F2) */
 	(tn, te, ts) = std.flt64explode(fF3_hi)
-	shift = ((32 >= te) : uint64) * ((32 - te) : uint64) + ((32 < te) : uint64) * 64
-	var j4 = (ts >> shift) & 0x1f
+	non_trivial = ((32 >= te) : uint64) * ((32 - te < 64) : uint64)
+	shift = non_trivial * ((32 - te) : uint64)
+	var j4 = non_trivial * ((ts >> shift) & 0x1f)
 	var F4m1 = scale2((j4 : flt64), -20)
 	var F4 = 1.0 + F4m1
 	var f4_hi, f4_lo
@@ -375,7 +392,6 @@
 	/* Oh yeah, and we need xe * log(2) */
 	var xel2_hi, xel2_lo, lx_hi, lx_lo
 	(xel2_hi, xel2_lo) = hl_mult((xe : flt64), 0.0, std.flt64frombits(0x3fe62e42fefa39ef), std.flt64frombits(0x3c7abc9e3b39803f))
-
 
 	(t1, t2) = slow2sum(xel2_lo, lsig_lo)
 	(lx_lo, t1) = slow2sum(lsig_hi, t1)
--- a/lib/math/test/log-overkill.myr
+++ b/lib/math/test/log-overkill.myr
@@ -227,6 +227,9 @@
 		(0x4014a5ce06d7df05, 0x3ffa42cc8df38d10, 0xbc5e54e0ca2ed44c),
 		(0x3f68d1447d5a29e8, 0xc017328d1195bac4, 0x3cb4fd5db024d1da),
 		(0x404963f9a80919eb, 0x400f6b9160b05ec4, 0x3c4526374db12c53),
+		(0x3fe78000b3cf1a39, 0xbfd3c2508d81ebf9, 0x3c7d6df43454d213),
+		(0x7fe6c53d8cef3d27, 0x40862b8a1ec909c8, 0x3cf9a8752da53a7e),
+		(0x000342cdeeb18fc9, 0xc0862fe5598ee7e6, 0xbd2bf7df7d1e9517),
 	][:]
 
 	for (x, y1, y2) : inputs