shithub: mc

Download patch

ref: 37ba2ec87069c9ca9bfeac71ec4dba660fcadf60
parent: bf035b686f58f139ee0b04746d3215e7bb8413e9
author: S. Gilles <sgilles@math.umd.edu>
date: Thu May 3 16:14:48 EDT 2018

Finish expm1 using scale2.

--- a/lib/math/exp-impl.myr
+++ b/lib/math/exp-impl.myr
@@ -45,7 +45,7 @@
 	Log3_4 : @u
 	Log5_4 : @u
 	Bi : @u[:]
-	exactness_thresh : @u
+	precision : @u
 
 	/* For both */
 	nabs : @u
@@ -65,7 +65,7 @@
 	.tobits = std.flt32bits,
 	.frombits = std.flt32frombits,
 	.inf = 0x7f800000,
-	.nan = 0x7fc0000,
+	.nan = 0x7fc00000,
 	.thresh_1_min = 0xc2cff1b4, /* ln(2^(-127 - 23)) ~= -103.9720770839917 */
 	.thresh_1_max = 0x42b17218, /* ln([2 - 2^(-24)] * 2^127) ~= 88.72283905206 */
 	.inv_L = 0x4238aa3b, /* L = log(2) / 32, this is 1/L in working precision */
@@ -121,7 +121,7 @@
 		(0x3ff52540, 0x36f45492),
 		(0x3ffa8380, 0x36cb6dc9),
 	],
-	.exactness_thresh = 24, /* threshold to prevent underflow in a S[high] + 2^-m */
+	.precision = 24, /* threshold to prevent underflow in a S[high] + 2^-m */
 ]
 
 const desc64 : fltdesc(flt64, uint64, int64) = [
@@ -196,7 +196,7 @@
 		(0x3ffea4afa2a490c0, 0x3cf9858f73a18f5e),
 		(0x3fff50765b6e4540, 0x3c99d3e12dd8a18b),
 	],
-	.exactness_thresh = 53,
+	.precision = 53,
 ]
 
 const exp32 = {f : flt32
@@ -375,15 +375,13 @@
 	   underflow flag when not technically required. We currently
 	   do not care about such flags, so that logic is omitted.
 	 */
-	var two_m = d.assem(false, M, 0)
-	var two_neg_m = d.assem(false, -M, 0)
-	if M >= d.exactness_thresh
-		-> two_m * (S_hi  + (S * P + (S_lo - two_neg_m)))
+	if M >= d.precision
+		-> scale2(S_hi + (S * P + (S_lo - scale2(1.0, -M))), M)
 	elif M <= -8
-		-> two_m * (S_hi  + (S * P + S_lo)) - (1.0 : @f)
+		-> scale2(S_hi + (S * P + S_lo), M) - (1.0 : @f)
 	;;
 
-	-> two_m * ((S_hi - two_neg_m) + (S_hi * P + S_lo * (P + (1.0 : @f))))
+	-> scale2((S_hi - scale2(1.0, -M)) + (S_hi * P + S_lo * (P + (1.0 : @f))), M)
 }
 
 generic round = {f : @f, d : fltdesc(@f, @u, @i) :: numeric,floating,std.equatable @f, numeric,integral @u, numeric,integral @i, roundable @f -> @i
--- a/lib/math/test/exp-impl.myr
+++ b/lib/math/test/exp-impl.myr
@@ -2,6 +2,10 @@
 use math
 use testr
 
+/*
+   Note: a major part of the algorithms are the S constants. They
+   are tested extensively in expm101 and expm102.
+ */
 const main = {
 	testr.run([
 		[.name="exp-01", .fn = exp01],
@@ -9,6 +13,9 @@
 		[.name="exp-03", .fn = exp03],
 		[.name="exp-04", .fn = exp04],
 		[.name="expm1-01", .fn = expm101],
+		[.name="expm1-02", .fn = expm102],
+		[.name="expm1-03", .fn = expm103],
+		[.name="expm1-04", .fn = expm104],
 	][:])
 }
 
@@ -113,8 +120,8 @@
 const expm101 = {c
 	var inputs : (uint32, uint32)[:] = [
 		(0x00000000, 0x00000000),
+		(0x80000000, 0x80000000),
 		(0x3f000000, 0x3f261299),
-		(0x34000000, 0x34000000),
 		(0x3c000000, 0x3c008056),
 		(0x42000000, 0x568fa1fe),
 		(0xc2b00000, 0xbf800000),
@@ -121,7 +128,52 @@
 		(0xc2b20000, 0xbf800000),
 		(0x01000000, 0x01000000),
 		(0x40000000, 0x40cc7326),
-		(0x42b17200, 0x7f7d7740),
+		(0x42b17200, 0x7f7ff404),
+		(0x415a3cf2, 0x494cd0e3),
+		(0x7f800000, 0x7f800000),
+		(0xff800000, 0xbf800000),
+		(0x7a2028b1, 0x7f800000),
+		(0xa201a23a, 0xa201a23a),
+		(0xc0000000, 0xbf5d5aab),
+		(0xbe934b10, 0xbe7fffff),
+		(0xbe934b11, 0xbe800000),
+		(0xbe934b12, 0xbe800001),
+		(0x3e647fbe, 0x3e800000),
+		(0x3e647fbf, 0x3e800000),
+		(0x3e647fc0, 0x3e800001),
+		(0xc0f744f5, 0xbf7fe31e),
+		(0x4210297a, 0x597f31f5), /* J = 0 */
+		(0x3f34c3cd, 0x3f83573d), /* J = 1 */
+		(0x3f3a52b6, 0x3f89087b), /* J = 2 */
+		(0xbf20e72b, 0xbeeee940), /*  ...  */
+		(0x41f4bd2a, 0x558c999f),
+		(0xc02a0418, 0xbf6e07cd),
+		(0xc0293a2a, 0xbf6dcec1),
+		(0x40b62779, 0x4393ca4b),
+		(0x3fc680ac, 0x406dc6a4),
+		(0x3fc9d2c6, 0x4075b516),
+		(0xbfedd645, 0xbf581273),
+		(0x3e70e5d1, 0x3e87cbdb),
+		(0xbeddcacc, 0xbeb3ffd9),
+		(0x3e8beb21, 0x3ea0e776),
+		(0x3e9ded31, 0x3eb8fe1f),
+		(0x40e8503c, 0x44b19ed8),
+		(0x40d265cb, 0x4432f91f),
+		(0xbea0f9bc, 0xbe8a2036),
+		(0x3ec42672, 0x3eef04c4),
+		(0xc140e8cc, 0xbf7fff9f),
+		(0x4117320e, 0x46467e73),
+		(0x3ee8b75d, 0x3f134ef0),
+		(0xc03f51f1, 0xbf731e4e),
+		(0x42733615, 0x6b52d3d4),
+		(0x3f02f2b5, 0x3f2af617),
+		(0xbf5ac925, 0xbf131660),
+		(0x40813277, 0x425eb7ac),
+		(0x41842e94, 0x4b64b1dd),
+		(0x41b0ba81, 0x4f6a0cc1),
+		(0xc061d7c2, 0xbf787d28),
+		(0xc0611682, 0xbf786657),
+		(0x40dcd7e9, 0x447827c5), /* J = 31 */
 	][:]
 
 	for (x, y) : inputs
@@ -134,3 +186,100 @@
 	;;
 }
 
+const expm102 = {c
+	var inputs : (uint64, uint64)[:] = [
+		(0x0000000000000000, 0x0000000000000000),
+		(0x404ef04831cb65ed, 0x45834ac37c44b3d3),
+		(0x7ff0000000000000, 0x7ff0000000000000),
+		(0xfff0000000000000, 0xbff0000000000000),
+		(0x80318a89f290021a, 0x80318a89f290021a),
+		(0xc0180881a9e73af6, 0xbfefebdcaf24d5fe),
+		(0xc020cedaedb028c9, 0xbfeffe2a4ee5ba79),
+		(0xbfe62812ff80cb9e, 0xbfdff9cf6758cc6a), /* J = 0 */
+		(0xbfe526dab7e5054c, 0xbfdef44fe4876d02), /* J = 1 */
+		(0x3ff6ea5c51cbf0f2, 0x400980f836e2c055), /* J = 2 */
+		(0x403f4315360b2cdc, 0x42c12ad5f692ffff), /*  ...  */
+		(0x3fe93f778a9dc013, 0x3ff3381168aca01e),
+		(0x405023f373e01862, 0x45c1aa771de3ba9c),
+		(0xbfff56366f3b92de, 0xbfeb7c693f8bdbb8),
+		(0xc0159bcd07244a34, 0xbfefdb1461286124),
+		(0x40079bbc23a67f90, 0x40322039bbbb5e2e),
+		(0xbffe1933d2c0ed70, 0xbfeb1f6c166582bb),
+		(0xc009ea7d09d84479, 0xbfeebf01fa92741c),
+		(0x405d84f7563a2612, 0x4a946476fb27817e),
+		(0x4045f770a45f8e7f, 0x43e4da111dae0dfb),
+		(0x4070e9a83b352180, 0x585516d4e37bd9f6),
+		(0x3fefcdb2a528c6e8, 0x3ffb39ec926d8a50),
+		(0x3fd52b5e7995f00c, 0x3fd91739183464e0),
+		(0xbfd625845bbaf98f, 0xbfd2b893d222b8f2),
+		(0xbfd43c0407d114d5, 0xbfd15909a4bea38e),
+		(0x3fd93f8288681821, 0x3fdef406a9f7c4b1),
+		(0x3fdaad696fbfea76, 0x3fe08c8035acbd0b),
+		(0x405e9b6f4b4bc7f3, 0x4af8b6ad079946a7),
+		(0x3fdc85cf6b85bfaf, 0x3fe1f811193e5cf5),
+		(0xbfed55f9b317d7eb, 0xbfe334b0378703a0),
+		(0x406ee7f396a46c9b, 0x563a11333c75a10f),
+		(0x4031266dc880810e, 0x417ac458c1525e64),
+		(0xc019905c018b6d96, 0xbfeff243dffe774d),
+		(0x404dc17e83d7ee6b, 0x454cfbf6572d627f),
+		(0xc013dcd6fae06405, 0xbfefc6dfe34e5408),
+		(0x402762d99fa5cfda, 0x40fd3b9a2004f68b),
+		(0xbfe89074f132e353, 0xbfe12602fb3c9806),
+		(0x4077c7ea24627ae7, 0x623ea61f88281bd5),
+		(0xbff6a873237d8072, 0xbfe83c311800b6ee), /* J = 31 */
+	][:]
+
+	for (x, y) : inputs
+		var xf : flt64 = std.flt64frombits(x)
+		var yf : flt64 = std.flt64frombits(y)
+		var rf = math.expm1(xf)
+		testr.check(c, rf == yf,
+			"expm1(0x{b=16,w=16,p=0}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
+			x, y, std.flt64bits(rf))
+	;;
+}
+
+const expm103 = {c
+	/*
+	   As with exp, there is some accepted error in expm1.
+	 */
+
+	var inputs : (uint32, uint32, uint32)[:] = [
+		(0x34000000, 0x34000001, 0x34000000),
+		(0xbe651dea, 0xbe4d4b4d, 0xbe4d4b4c),
+	][:]
+
+	for (x, y_perfect, y_acceptable) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var ypf : flt32 = std.flt32frombits(y_perfect)
+		var yaf : flt32 = std.flt32frombits(y_acceptable)
+		var rf = math.expm1(xf)
+		if rf != ypf && rf != yaf
+		testr.fail(c, "expm1(0x{b=16,w=8,p=0}) was 0x{b=16,w=8,p=0}. It should have been 0x{b=16,w=8,p=0}, although we will also accept 0x{b=16,w=8,p=0}",
+			x, std.flt32bits(rf), y_perfect, y_acceptable)
+		;;
+	;;
+}
+
+const expm104 = {c
+	/*
+	   As with exp, there is some accepted error in expm1.
+	 */
+
+	var inputs : (uint64, uint64, uint64)[:] = [
+		(0xbf9d0b5aadc4d0ac, 0xbf9ca2e5b7bfa859, 0xbf9ca2e5b7bfa85a),
+		(0x3fc2dbb101fe0392, 0x3fc451731cc0e358, 0x3fc451731cc0e359),
+		(0x3fc8a39bc9c32fec, 0x3fcb2b988c3e0b2f, 0x3fcb2b988c3e0b30),
+	][:]
+
+	for (x, y_perfect, y_acceptable) : inputs
+		var xf : flt64 = std.flt64frombits(x)
+		var ypf : flt64 = std.flt64frombits(y_perfect)
+		var yaf : flt64 = std.flt64frombits(y_acceptable)
+		var rf = math.expm1(xf)
+		if rf != ypf && rf != yaf
+		testr.fail(c, "expm1(0x{b=16,w=16,p=0}) was 0x{b=16,w=16,p=0}. It should have been 0x{b=16,w=16,p=0}, although we will also accept 0x{b=16,w=16,p=0}",
+			x, std.flt64bits(rf), y_perfect, y_acceptable)
+		;;
+	;;
+}
--- a/lib/math/test/scale2-impl.myr
+++ b/lib/math/test/scale2-impl.myr
@@ -61,6 +61,7 @@
 		(0x0000af31,   2, 0x0002bcc4),
 		(0x0000af31, 260, 0x7eaf3100),
 		(0x0000af31, 266, 0x7f800000),
+		(0x3f7ff404, 128, 0x7f7ff404),
 	][:]
 
 	for (u, m, v) : inputsb