ref: 895c8ae58de3ece766a1f9d70094f61adb864f71
parent: a00e33e21435b8a4961d6e0f910e09f9c17f65ba
	author: S. Gilles <sgilles@math.umd.edu>
	date: Mon May 20 23:51:51 EDT 2019
	
Correct sign-handling for pown special case
--- a/lib/math/ancillary/generate-minimax-by-Remez.gp
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -136,6 +136,10 @@
return(1/tanoverx(x));
}
+{ log2xoverx(x) =+ return(if(x == 1,1,log(x)/(x-1))/log(2));
+}
+
 print("\n"); print("Minimaxing sin(x) / x, degree 6, on [-Pi/(4 * 256), Pi/(4 * 256)]:");find_minimax(sinoverx, 6, -Pi/1024, Pi/1024)
@@ -175,5 +179,11 @@
 print("\n"); print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); print("\n");+print("---\n");+print("Minmimaxing log_2(x) / (x - 1), degree 7, on [1, 2^(1/8)]:");+find_minimax(log2xoverx, 7, 1, 2^(1/8))
+print("\n");+/* print("(You'll need to add a 0x0 at the beginning to make a degree 13...\n"); */+/* print("\n"); */ print("---\n"); print("Remember that there's that extra, ugly E term at the end of the vector that you want to lop off.\n");--- a/lib/math/bld.sub
+++ b/lib/math/bld.sub
@@ -23,7 +23,9 @@
poly-impl.myr
# x^n and x^1/q
+ log-overkill+posixy-x64-sse2.s
log-overkill.myr
+ pown-impl+posixy-x64-sse2.s
pown-impl.myr
# x^y
--- a/lib/math/pown-impl.myr
+++ b/lib/math/pown-impl.myr
@@ -109,7 +109,7 @@
/* Propagate NaN (why doesn't this come first? Ask IEEE.) */
-> d.frombits(d.nan)
elif (x == 0.0 || x == -0.0)
- if n < 0 && (n % 2 == 1) && xn
+ if n < 0 && (n % 2 == -1) && xn
/* (+/- 0)^n = +/- oo */
-> d.frombits(d.neginf)
elif n < 0
--- a/lib/math/test/pown-impl.myr
+++ b/lib/math/test/pown-impl.myr
@@ -21,7 +21,20 @@
 const pown01 = {cvar inputs : (uint32, uint32, uint32)[:] = [
(0x000000f6, 0x00000000, 0x3f800000),
+ (0x7fc00000, 0x00000001, 0x7fc00000),
+ (0x7fc00000, 0x00000021, 0x7fc00000),
+ (0x7fc00000, 0x00030021, 0x7fc00000),
+ (0x7fc00000, 0xfacecafe, 0x7fc00000),
(0x00000000, 0x3d800000, 0x00000000),
+ (0x80000000, 0x00000124, 0x00000000),
+ (0x80000000, 0x00000123, 0x80000000),
+ (0x00000000, 0x00000124, 0x00000000),
+ (0x00000000, 0x00000123, 0x00000000),
+ (0x00000000, 0xad800001, 0x7f800000),
+ (0x80000000, 0x80000123, 0xff800000),
+ (0x80000000, 0x80000122, 0x7f800000),
+ (0x00000000, 0x80000127, 0x7f800000),
+ (0x00000000, 0x80000128, 0x7f800000),
(0x946fc13b, 0x3b21efc7, 0x80000000),
(0xb76e98b6, 0xdbeb6637, 0xff800000),
(0xc04825b7, 0x53cdd772, 0x7f800000),
@@ -52,11 +65,11 @@
for (x, y, z) : inputs
var xf : flt32 = std.flt32frombits(x)
var yi : int32 = int32fromuint32(y)
- var zf : flt32 = std.flt32frombits(z)
var rf = math.pown(xf, yi)
- testr.check(c, rf == zf,
+ var ru : uint32 = std.flt32bits(rf)
+ testr.check(c, ru == z,
 			"pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",- x, yi, z, std.flt32bits(rf))
+ x, yi, z, ru)
;;
}
@@ -92,11 +105,11 @@
for (x, y, z) : inputs
var xf : flt64 = std.flt64frombits(x)
var yi : int64 = int64fromuint64(y)
- var zf : flt64 = std.flt64frombits(z)
- var rf = math.pown(xf, yi)
- testr.check(c, rf == zf,
+ var rf : flt64 = math.pown(xf, yi)
+ var ru : uint64 = std.flt64bits(rf)
+ testr.check(c, ru == z,
 			"pown(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",- x, yi, z, std.flt64bits(rf))
+ x, yi, z, ru)
;;
}
@@ -112,12 +125,11 @@
for (x, y, z_perfect, z_accepted) : inputs
var xf : flt32 = std.flt32frombits(x)
var yi : int32 = int32fromuint32(y)
- var zf_perfect : flt32 = std.flt32frombits(z_perfect)
- var zf_accepted : flt32 = std.flt32frombits(z_accepted)
- var rf = math.pown(xf, yi)
- testr.check(c, rf == zf_perfect || rf == zf_accepted,
+ var rf : flt32 = math.pown(xf, yi)
+ var ru : uint32 = std.flt32bits(rf)
+ testr.check(c, ru == z_perfect || ru == z_accepted,
 			"pown(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, will also accept 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",- x, yi, z_perfect, z_accepted, std.flt32bits(rf))
+ x, yi, z_perfect, z_accepted, ru)
;;
}
@@ -279,10 +291,11 @@
for (x, y, z) : inputs
var xf : flt32 = std.flt32frombits(x)
var zf : flt32 = std.flt32frombits(z)
- var rf = math.rootn(xf, y)
- testr.check(c, rf == zf,
+ var rf : flt32 = math.rootn(xf, y)
+ var ru : uint32 = std.flt32bits(rf)
+ testr.check(c, ru == z,
 			"rootn(0x{b=16,w=8,p=0}, {}) should be 0x{b=16,w=8,p=0}, was 0x{b=16,w=8,p=0}",- x, y, z, std.flt32bits(rf))
+ x, y, z, ru)
;;
}
@@ -441,10 +454,11 @@
for (x, y, z) : inputs
var xf : flt64 = std.flt64frombits(x)
var zf : flt64 = std.flt64frombits(z)
- var rf = math.rootn(xf, y)
- testr.check(c, rf == zf,
+ var rf : flt64 = math.rootn(xf, y)
+ var ru : uint64 = std.flt64bits(rf)
+ testr.check(c, ru == z,
 			"rootn(0x{b=16,w=16,p=0}, {}) should be 0x{b=16,w=16,p=0}, was 0x{b=16,w=16,p=0}",- x, y, z, std.flt64bits(rf))
+ x, y, z, ru)
;;
}
--
⑨