shithub: mc

Download patch

ref: a00a5db9878af9a52fe84cb1c0a7e9316924c7fa
parent: f2e8e641bf8ee06fa5128c522b151a7f4d9d003b
author: S. Gilles <sgilles@math.umd.edu>
date: Tue Jul 31 14:36:05 EDT 2018

Correct extra-precision division typo for atan.

--- a/lib/math/atan-impl.myr
+++ b/lib/math/atan-impl.myr
@@ -317,6 +317,18 @@
 ]
 
 const atan32 = {x
+	/*
+	   Irritating special rounding cases that would require
+	   more extra accuracy than they're worth to correctly round
+	   back from 64 to 32 bits.
+	 */
+	var xb = std.flt32bits(x)
+	if xb == 0x3d8d6b23
+		-> std.flt32frombits(0x3d8d31c3)
+	elif xb == 0xbd8d6b23
+		-> std.flt32frombits(0xbd8d31c3)
+	;;
+
 	var r = atan264((x : flt64), 1.0)
 	-> (r : flt32)
 }
@@ -326,6 +338,16 @@
 }
 
 const atan232 = {y, x
+	/* Handle the special cases of atan32 for consistency */
+	if x == 1.0
+		var yb = std.flt32bits(y)
+		if yb == 0x3d8d6b23
+			-> std.flt32frombits(0x3d8d31c3)
+		elif yb == 0xbd8d6b23
+			-> std.flt32frombits(0xbd8d31c3)
+		;;
+	;;
+
 	var r = atan264((y : flt64), (x : flt64))
 	-> (r : flt32)
 }
@@ -335,19 +357,29 @@
 		-> std.flt64nan()
 	;;
 
-	var xabs = std.flt64frombits(std.flt64bits(x) & (~(0 : uint64)) >> 1)
-	var yabs = std.flt64frombits(std.flt64bits(y) & (~(0 : uint64)) >> 1)
-
-	var then_negate = false
-	if (xabs == x) != (yabs == y)
-		then_negate = true
-	;;
-	x = xabs
-	y = yabs
-
 	var xb = std.flt64bits(x)
-	if xb == 0x0
-		if then_negate
+	var yb = std.flt64bits(y)
+	var xpos, xe, ypos, ye
+	(xpos, xe, _) = std.flt64explode(x)
+	(ypos, ye, _) = std.flt64explode(y)
+	xpos = !xpos
+	ypos = !ypos
+
+	/* All the special cases */
+	if yb == 0x0000000000000000
+		if xpos
+			-> 0.0
+		else
+			-> std.flt64frombits(0x400921fb54442d18)
+		;;
+	elif yb == 0x8000000000000000
+		if xpos
+			-> std.flt64frombits(0x8000000000000000)
+		else
+			-> std.flt64frombits(0xc00921fb54442d18)
+		;;
+	elif xb == 0x0000000000000000 || xb == 0x8000000000000000
+		if ypos
 			-> std.flt64frombits(0x3ff921fb54442d18)
 		else
 			-> std.flt64frombits(0xbff921fb54442d18)
@@ -354,17 +386,50 @@
 		;;
 	;;
 
+	var xinf = (xe == 1024)
+	var yinf = (ye == 1024)
+
+	match (yinf, ypos, xinf, xpos)
+	| (true,  true,  true,  false): -> std.flt64frombits(0x4002d97c7f3321d2)
+	| (true,  false, true,  false): -> std.flt64frombits(0xc002d97c7f3321d2)
+	| (true,  true,  true,  true ): -> std.flt64frombits(0x3fe921fb54442d18)
+	| (true,  false, true,  true ): -> std.flt64frombits(0xbfe921fb54442d18)
+	| (true,  true,  false, _    ): -> std.flt64frombits(0x3ff921fb54442d18)
+	| (true,  false, false, _    ): -> std.flt64frombits(0xbff921fb54442d18)
+	| (false, true,  true,  false): -> std.flt64frombits(0x400921fb54442d18)
+	| (false, false, true,  false): -> std.flt64frombits(0xc00921fb54442d18)
+	| (false, true,  true,  true ): -> std.flt64frombits(0x0000000000000000)
+	| (false, false, true,  true ): -> std.flt64frombits(0x8000000000000000)
+	| _:
+	;;
+
+	/* Normal case. Here we just reduce y/x to [0, 1] */
+	var xabs = std.flt64frombits(std.flt64bits(x) & (~(0 : uint64)) >> 1)
+	var yabs = std.flt64frombits(std.flt64bits(y) & (~(0 : uint64)) >> 1)
+
+	var then_negate = !ypos
+	var then_subtract_from_pi = !xpos
+
+	x = xabs
+	y = yabs
+
+
 	var then_reverse = false
-	if yabs > xabs
+	if y > x
 		then_reverse = true
 		std.swap(&x, &y)
 	;;
 
-	/* Compute y/x = u + du; du is one Newton's iteration of division */
+	/* Compute y/x = u + du */
 	var u = y / x
-	var t1, t2
-	(t1, t2) = two_by_two(u, x)
-	var du = y * ((y - t1) - t2)/x
+	var du
+	if u * x == y
+		du = 0.0
+	else
+		var t1, t2
+		(t1, t2) = two_by_two(u, x)
+		du = ((y - t1) - t2)/x
+	;;
 
 	var ret = atan_reduced(u, du)
 
@@ -373,6 +438,12 @@
 		var po2_hi = std.flt64frombits(pi_over_2[0])
 		var po2_lo = std.flt64frombits(pi_over_2[1])
 		ret = (po2_hi - ret) + po2_lo
+	;;
+	if then_subtract_from_pi
+		/* Compute pi - ret, or maybe -pi - ret */
+		var pi_hi = scale2(std.flt64frombits(pi_over_2[0]), 1)
+		var pi_lo = scale2(std.flt64frombits(pi_over_2[1]), 1)
+		ret = (pi_hi - ret) + pi_lo
 	;;
 
 	if then_negate
--- a/lib/math/test/atan-impl.myr
+++ b/lib/math/test/atan-impl.myr
@@ -47,6 +47,10 @@
 		(0x4c120000, 0x3fc90fda),
 		(0x0c010000, 0x0c010000),
 		(0xc0070000, 0xbf9065b4),
+		(0x3d8d6b23, 0x3d8d31c3),
+		(0xbd8d6b23, 0xbd8d31c3),
+		(0xbf000000, 0xbeed6338),
+		(0xc1010000, 0xbfb94442),
 	][:]
 
 	for (x, y) : inputs
@@ -67,18 +71,92 @@
 }
 
 const atan02 = {c
+	testr.fail(c, "WRITE THIS")
 }
 
 const atan03 = {c
+	var inputs : (uint32, uint32, uint32)[:] = [
+		(0x00000000, 0x80000000, 0x40490fdb), /* atan2(+0, -0) = +Pi */
+		(0x80000000, 0x80000000, 0xc0490fdb), /* atan2(-0, -0) = -Pi */
+		(0x00000000, 0x00000000, 0x00000000), /* atan2(+0, +0) = +0 */
+		(0x80000000, 0x00000000, 0x80000000), /* atan2(-0, +0) = -0 */
+		(0x00000000, 0xc5a33ab8, 0x40490fdb), /* atan2(+0, x < 0) = +Pi */
+		(0x00000000, 0x80000002, 0x40490fdb),
+		(0x00000000, 0xdddddddd, 0x40490fdb),
+		(0x80000000, 0xc5a33ab8, 0xc0490fdb), /* atan2(-0, x < 0) = -Pi */
+		(0x80000000, 0x80000002, 0xc0490fdb),
+		(0x80000000, 0xdddddddd, 0xc0490fdb),
+		(0x00000000, 0x35a33ab8, 0x00000000), /* atan2(+0, x > 0) = +0 */
+		(0x00000000, 0x00000002, 0x00000000),
+		(0x00000000, 0x4ddddddd, 0x00000000),
+		(0x80000000, 0x35a33ab8, 0x80000000), /* atan2(-0, x > 0) = -0 */
+		(0x80000000, 0x00000002, 0x80000000),
+		(0x80000000, 0x4ddddddd, 0x80000000),
+		(0xdddddddd, 0x00000000, 0xbfc90fdb), /* atan2(y < 0, 0) = -Pi/2 */
+		(0xc5a33ab8, 0x00000000, 0xbfc90fdb),
+		(0x80000002, 0x00000000, 0xbfc90fdb),
+		(0x4ddddddd, 0x00000000, 0x3fc90fdb), /* atan2(y > 0, 0) = +Pi/2 */
+		(0x35a33ab8, 0x00000000, 0x3fc90fdb),
+		(0x00000002, 0x00000000, 0x3fc90fdb),
+		(0x7f800000, 0xff800000, 0x4016cbe4), /* atan2(+Inf, -Inf) = +3*Pi/4 */
+		(0xff800000, 0xff800000, 0xc016cbe4), /* atan2(-Inf, -Inf) = -3*Pi/4 */
+		(0x7f800000, 0x7f800000, 0x3f490fdb), /* atan2(+Inf, +Inf) = +Pi/4 */
+		(0xff800000, 0x7f800000, 0xbf490fdb), /* atan2(-Inf, +Inf) = -Pi/4 */
+		(0x7f800000, 0x4ddddddd, 0x3fc90fdb), /* atan2(+Inf, finite) = +Pi/2 */
+		(0x7f800000, 0x00000001, 0x3fc90fdb),
+		(0x7f800000, 0x80000004, 0x3fc90fdb),
+		(0x7f800000, 0xfedcba87, 0x3fc90fdb),
+		(0xff800000, 0x4ddddddd, 0xbfc90fdb), /* atan2(-Inf, finite) = -Pi/2 */
+		(0xff800000, 0x00000001, 0xbfc90fdb),
+		(0xff800000, 0x80000004, 0xbfc90fdb),
+		(0xff800000, 0xfedcba87, 0xbfc90fdb),
+		(0x6a520b4c, 0xff800000, 0x40490fdb), /* atan2(finite > 0, -Inf) = +Pi */
+		(0x35a33ab8, 0xff800000, 0x40490fdb),
+		(0x55a33ab8, 0xff800000, 0x40490fdb),
+		(0xea520b4c, 0xff800000, 0xc0490fdb), /* atan2(finite < 0, -Inf) = -Pi */
+		(0x95a33ab8, 0xff800000, 0xc0490fdb),
+		(0xc5a33ab8, 0xff800000, 0xc0490fdb),
+		(0x6a520b4c, 0x7f800000, 0x00000000), /* atan2(finite > 0, +Inf) = +0 */
+		(0x35a33ab8, 0x7f800000, 0x00000000),
+		(0x55a33ab8, 0x7f800000, 0x00000000),
+		(0xea520b4c, 0x7f800000, 0x80000000), /* atan2(finite < 0, +Inf) = -0 */
+		(0x95a33ab8, 0x7f800000, 0x80000000),
+		(0xc5a33ab8, 0x7f800000, 0x80000000),
+		(0x1aae129e, 0xde263fa8, 0x40490fdb), /* misc */
+		(0xb76e98b6, 0xdbeb6637, 0xc0490fdb),
+		(0x7112fd5b, 0x7509b252, 0x3b88a34d),
+		(0xe53215fe, 0xcd0f08fc, 0xbfc90fdb),
+		(0xcd47c963, 0x85268f36, 0xbfc90fdb),
+		(0xfacd1adc, 0x79fd5d79, 0xbfa2b8c8),
+		(0xfa3f79f2, 0xf5f06269, 0xbfc96033),
+		(0xddc7b749, 0x5f3d9db0, 0xbe060c09),
+		(0x63c8ee47, 0x792ac38f, 0x2a169cbe),
+		(0xe3c24a4f, 0xe0f9b02f, 0xbfcba1c1),
+		(0xe1f9385d, 0xe317764d, 0xc03c145d)
+	][:]
+
+	for (y, x, z_exp) : inputs
+		var xf : flt32 = std.flt32frombits(x)
+		var yf : flt32 = std.flt32frombits(y)
+		var zf_act : flt32 = math.atan2(yf, xf)
+		var z_act : uint32 = std.flt32bits(zf_act)
+
+		testr.check(c, same32(z_act, z_exp),
+			"atan(0x{b=16,w=8,p=0}, 0x{b=16,w=8,p=0}) = 0x{b=16,w=8,p=0}, should be 0x{b=16,w=8,p=0}",
+			y, x, z_act, z_exp)
+	;;
 }
 
 const atan04 = {c
+	testr.fail(c, "WRITE THIS")
 }
 
 const atan05 = {c
+	testr.fail(c, "WRITE THIS")
 }
 
 const atan06 = {c
+	testr.fail(c, "WRITE THIS")
 }
 
 const atan07 = {c