shithub: mc

Download patch

ref: 895c8ae58de3ece766a1f9d70094f61adb864f71
parent: a00e33e21435b8a4961d6e0f910e09f9c17f65ba
author: S. Gilles <sgilles@math.umd.edu>
date: Mon May 20 23:51:51 EDT 2019

Correct sign-handling for pown special case

--- a/lib/math/ancillary/generate-minimax-by-Remez.gp
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -136,6 +136,10 @@
         return(1/tanoverx(x));
 }
 
+{ log2xoverx(x) =
+        return(if(x == 1,1,log(x)/(x-1))/log(2));
+}
+
 print("\n");
 print("Minimaxing sin(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
 find_minimax(sinoverx, 6, -Pi/1024, Pi/1024)
@@ -175,5 +179,11 @@
 print("\n");
 print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n");
 print("\n");
+print("---\n");
+print("Minmimaxing log_2(x) / (x - 1), degree 7, on [1, 2^(1/8)]:");
+find_minimax(log2xoverx, 7, 1, 2^(1/8))
+print("\n");
+/* print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); */
+/* print("\n"); */
 print("---\n");
 print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");
--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -23,7 +23,9 @@
 	poly-impl.myr
 
 	# x^n and x^1/q
+	log-overkill+posixy-x64-sse2.s
 	log-overkill.myr
+	pown-impl+posixy-x64-sse2.s
 	pown-impl.myr
 
 	# x^y
--- a/lib/math/pown-impl.myr
+++ b/lib/math/pown-impl.myr
@@ -109,7 +109,7 @@
 		/* Propagate NaN (why doesn't this come first? Ask IEEE.) */
 		-> d.frombits(d.nan)
 	elif (x == 0.0 || x == -0.0)
-		if n < 0 && (n % 2 == 1) && xn
+		if n < 0 && (n % 2 == -1) && xn
 			/* (+/- 0)^n = +/- oo */
 			-> d.frombits(d.neginf)
 		elif n < 0
--- a/lib/math/test/pown-impl.myr
+++ b/lib/math/test/pown-impl.myr
@@ -21,7 +21,20 @@
 const pown01 = {c
 	var inputs : (uint32, uint32, uint32)[:] = [
 		(0x000000f6, 0x00000000, 0x3f800000),
+		(0x7fc00000, 0x00000001, 0x7fc00000),
+		(0x7fc00000, 0x00000021, 0x7fc00000),
+		(0x7fc00000, 0x00030021, 0x7fc00000),
+		(0x7fc00000, 0xfacecafe, 0x7fc00000),
 		(0x00000000, 0x3d800000, 0x00000000),
+		(0x80000000, 0x00000124, 0x00000000),
+		(0x80000000, 0x00000123, 0x80000000),
+		(0x00000000, 0x00000124, 0x00000000),
+		(0x00000000, 0x00000123, 0x00000000),
+		(0x00000000, 0xad800001, 0x7f800000),
+		(0x80000000, 0x80000123, 0xff800000),
+		(0x80000000, 0x80000122, 0x7f800000),
+		(0x00000000, 0x80000127, 0x7f800000),
+		(0x00000000, 0x80000128, 0x7f800000),
 		(0x946fc13b, 0x3b21efc7, 0x80000000),
 		(0xb76e98b6, 0xdbeb6637, 0xff800000),
 		(0xc04825b7, 0x53cdd772, 0x7f800000),
@@ -52,11 +65,11 @@
 	for (x, y, z) : inputs
 		var xf : flt32 = std.flt32frombits(x)
 		var yi : int32 = int32fromuint32(y)
-		var zf : flt32 = std.flt32frombits(z)
 		var rf = math.pown(xf, yi)
-		testr.check(c, rf == zf,
+		var ru : uint32 = std.flt32bits(rf)
+		testr.check(c, ru == z,
 			"pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
-			x, yi, z, std.flt32bits(rf))
+			x, yi, z, ru)
 	;;
 }
 
@@ -92,11 +105,11 @@
 	for (x, y, z) : inputs
 		var xf : flt64 = std.flt64frombits(x)
 		var yi : int64 = int64fromuint64(y)
-		var zf : flt64 = std.flt64frombits(z)
-		var rf = math.pown(xf, yi)
-		testr.check(c, rf == zf,
+		var rf : flt64 = math.pown(xf, yi)
+		var ru : uint64 = std.flt64bits(rf)
+		testr.check(c, ru == z,
 			"pown(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
-			x, yi, z, std.flt64bits(rf))
+			x, yi, z, ru)
 	;;
 }
 
@@ -112,12 +125,11 @@
 	for (x, y, z_perfect, z_accepted) : inputs
 		var xf : flt32 = std.flt32frombits(x)
 		var yi : int32 = int32fromuint32(y)
-		var zf_perfect : flt32 = std.flt32frombits(z_perfect)
-		var zf_accepted : flt32 = std.flt32frombits(z_accepted)
-		var rf = math.pown(xf, yi)
-		testr.check(c, rf == zf_perfect || rf == zf_accepted,
+		var rf : flt32 = math.pown(xf, yi)
+		var ru : uint32 = std.flt32bits(rf)
+		testr.check(c, ru == z_perfect || ru == z_accepted,
 			"pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
-			x, yi, z_perfect, z_accepted, std.flt32bits(rf))
+			x, yi, z_perfect, z_accepted, ru)
 	;;
 }
 
@@ -279,10 +291,11 @@
 	for (x, y, z) : inputs
 		var xf : flt32 = std.flt32frombits(x)
 		var zf : flt32 = std.flt32frombits(z)
-		var rf = math.rootn(xf, y)
-		testr.check(c, rf == zf,
+		var rf : flt32 = math.rootn(xf, y)
+		var ru : uint32 = std.flt32bits(rf)
+		testr.check(c, ru == z,
 			"rootn(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",
-			x, y, z, std.flt32bits(rf))
+			x, y, z, ru)
 	;;
 }
 
@@ -441,10 +454,11 @@
 	for (x, y, z) : inputs
 		var xf : flt64 = std.flt64frombits(x)
 		var zf : flt64 = std.flt64frombits(z)
-		var rf = math.rootn(xf, y)
-		testr.check(c, rf == zf,
+		var rf : flt64 = math.rootn(xf, y)
+		var ru : uint64 = std.flt64bits(rf)
+		testr.check(c, ru == z,
 			"rootn(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",
-			x, y, z, std.flt64bits(rf))
+			x, y, z, ru)
 	;;
 }