shithub: mc

Download patch

ref: bc7052e02f9bdbb93b13987d83f8572b4f657687
parent: 0f4c2a5b39ba47f84d9b3efda76920fd688defa8
author: S. Gilles <sgilles@math.umd.edu>
date: Thu Apr 19 06:51:07 EDT 2018

Test exp.

We use Tang's implementation, which occasionally is off by 1 ulp.
In the double-precision case, this is about par with current libc
implementations. Glibc computes expf() in double-precision, so is
always (at least, as observed) last-place accurate. We appear to
be about on par with musl-libc in the single-precision case.

In order to increase accuracy, we could either decrease the interval
size (and increase table size), or switch to a different algorithm.
Decreasing the interval size is non-trivial, however, as it would
require recomputing the entire table, which is non-trivial.

--- a/lib/math/exp-impl.myr
+++ b/lib/math/exp-impl.myr
@@ -230,7 +230,10 @@
 	var S_lo = d.frombits(Su_lo)
 
 	var S = S_hi + S_lo
-	var exp = d.assem(false, M, 0) * (S_hi + (S_lo + (P * S)))
+	var unscaled = S_hi + (S_lo + (P * S))
+	var nu, eu, su
+	(nu, eu, su) = d.explode(unscaled)
+	var exp = d.assem(nu, eu + M, su)
 
 	-> exp
 }
--- a/lib/math/test/exp-impl.myr
+++ b/lib/math/test/exp-impl.myr
@@ -6,6 +6,8 @@
 	testr.run([
 		[.name="exp-01", .fn = exp01],
 		[.name="exp-02", .fn = exp02],
+		[.name="exp-03", .fn = exp03],
+		[.name="exp-04", .fn = exp04],
 	][:])
 }
 
@@ -15,6 +17,8 @@
 		(0x34000000, 0x3f800001),
 		(0x3c000000, 0x3f810101),
 		(0x42000000, 0x568fa1fe),
+		(0xc2b00000, 0x0041edc4),
+		(0xc2b20000, 0x001840fc),
 	][:]
 
 	for (x, y) : inputs
@@ -30,6 +34,7 @@
 const exp02 = {c
 	var inputs : (uint64, uint64)[:] = [
 		(0x0000000000000000, 0x3ff0000000000000),
+		(0x3e50000000000000, 0x3ff0000004000000),
 	][:]
 
 	for (x, y) : inputs
@@ -39,5 +44,58 @@
 		testr.check(c, rf == yf,
 			"exp(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
 			x, y, std.flt64bits(rf))
+	;;
+}
+
+const exp03 = {c
+	/*
+	   Tang's algorithm has an error of up to 0.77 ulps. This
+	   is not terrible (musl appears to follow it, for example).
+	   Here we quarantine off some known-bad results.
+	 */
+
+	var inputs : (uint32, uint32, uint32)[:] = [
+		(0x42020000, 0x56eccf79, 0x56eccf78),
+		(0x3ec40600, 0x3fbbb54b, 0x3fbbb54c),
+	][:]
+
+	for (x, y_perfect, y_acceptable) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var ypf : flt32 = std.flt32frombits(y_perfect)
+		var yaf : flt32 = std.flt32frombits(y_acceptable)
+		var rf = math.exp(xf)
+		if rf != ypf && rf != yaf
+		testr.fail(c, "exp(0x{b=16,w=8,p=0}) was 0x{b=16,w=8,p=0}. It should have been 0x{b=16,w=8,p=0}, although we will also accept 0x{b=16,w=8,p=0}",
+			x, std.flt32bits(rf), y_perfect, y_acceptable)
+		;;
+	;;
+}
+
+const exp04 = {c
+	/*
+	   Tang's algorithm has an error of up to 0.77 ulps. This
+	   is not terrible (musl appears to follow it, for example).
+	   Here we quarantine off some known-bad results.
+	 */
+
+	var inputs : (uint64, uint64, uint64)[:] = [
+		(0x3cda000000000000, 0x3ff0000000000006, 0x3ff0000000000007),
+		(0x3d57020000000000, 0x3ff00000000005c0, 0x3ff00000000005c1),
+		(0x3d58020000000000, 0x3ff0000000000600, 0x3ff0000000000601),
+		(0xc087030000000000, 0x0000000000000c6d, 0x0000000000000c6e),
+		(0xc011070000000000, 0x3f8d039e34c59187, 0x3f8d039e34c59186),
+		(0xbd50070000000000, 0x3feffffffffff7fc, 0x3feffffffffff7fd),
+		(0xbd430e0000000000, 0x3feffffffffffb3c, 0x3feffffffffffb3d),
+	][:]
+
+	for (x, y_perfect, y_acceptable) : inputs
+		var xf : flt64 = std.flt64frombits(x)
+		var ypf : flt64 = std.flt64frombits(y_perfect)
+		var yaf : flt64 = std.flt64frombits(y_acceptable)
+		var rf = math.exp(xf)
+		if rf != ypf && rf != yaf
+		testr.fail(c, "exp(0x{b=16,w=16,p=0}) was 0x{b=16,w=16,p=0}. It should have been 0x{b=16,w=16,p=0}, although we will also accept 0x{b=16,w=16,p=0}",
+			x, std.flt64bits(rf), y_perfect, y_acceptable)
+		;;
 	;;
 }