shithub: mc

--- a/lib/math/bld.sub

+++ b/lib/math/bld.sub

@@ -9,6 +9,9 @@

 	fma-impl+posixy-x64-fma.s

 	fma-impl.myr

+	# polynomial evaluation methods

+	poly-impl.myr

 	# sqrt

 	sqrt-impl+posixy-x64-sse2.s

 	sqrt-impl.myr

--- a/lib/math/fpmath.myr

+++ b/lib/math/fpmath.myr

@@ -6,6 +6,9 @@

 		/* fma-impl */

 		fma : (x : @f, y : @f, z : @f -> @f)

+		/* poly-impl */

+		horner_poly : (x : @f, a : @f[:] -> @f)

 		/* sqrt-impl */

 		sqrt : (f : @f -> @f)

@@ -59,6 +62,8 @@

 impl fpmath flt32 =

 	fma = {x, y, z; -> fma32(x, y, z)}

+	horner_poly = {f, a; -> horner_poly32(f, a)}

 	sqrt = {f; -> sqrt32(f)}

 	kahan_sum = {l; -> kahan_sum32(l) }

@@ -73,6 +78,8 @@

 impl fpmath flt64 =

 	fma = {x, y, z; -> fma64(x, y, z)}

+	horner_poly = {f, a; -> horner_poly64(f, a)}

 	sqrt = {f; -> sqrt64(f)}

 	kahan_sum = {l; -> kahan_sum64(l) }

@@ -88,6 +95,9 @@

 extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)

 extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)

+extern const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)

+extern const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)

 extern const sqrt32 : (x : flt32 -> flt32)

 extern const sqrt64 : (x : flt64 -> flt64)

--- /dev/null

+++ b/lib/math/poly-impl.myr

@@ -1,0 +1,28 @@

+use std

+/* See [Mul16], section 5.1 */

+pkg math =

+        pkglocal const horner_poly32 : (f : flt32, a : flt32[:] -> flt32)

+        pkglocal const horner_poly64 : (f : flt64, a : flt64[:] -> flt64)

+;;

+extern const fma32 : (x : flt32, y : flt32, z : flt32 -> flt32)

+extern const fma64 : (x : flt64, y : flt64, z : flt64 -> flt64)

+const horner_poly32 = {f : flt32, a : flt32[:]

+        var r : flt32 = 0.0

+        for var j = a.len - 1; j >= 0; j--

+                r = fma32(r, f, a[j])

+        ;;

+        -> r

+}

+const horner_poly64 = {f : flt64, a : flt64[:]

+        var r : flt64 = 0.0

+        for var j = a.len - 1; j >= 0; j--

+                r = fma64(r, f, a[j])

+        ;;

+        -> r

+}

+/* TODO: add Knuth's method */

--- a/lib/math/references

+++ b/lib/math/references

@@ -8,4 +8,8 @@

 [Mul+10]

 Jean-Michel Muller et al. Handbook of floating-point arithmetic.

-Boston: Birkhauser, 2010. isbn: 9780817647049.

+Boston: Birkhäuser, 2010. isbn: 9780817647049.

+[Mul16]

+J. M. Muller. Elementary functions : algorithms and implementation.

+Third edition. New York: Birkhäuser, 2016. isbn: 9781489979810.

--- /dev/null

+++ b/lib/math/test/poly-impl.myr

@@ -1,0 +1,66 @@

+use std

+use math

+use testr

+const main = {

+	testr.run([

+		[.name="horner-01", .fn = horner01],

+		[.name="horner-02", .fn = horner02],

+	][:])

+}

+const horner01 = {c

+	var inputs : (uint32, uint32[:], uint32)[:] = [

+		(0x00000000, [][:], 0x00000000),

+		(0xbf800000, [0x3f715545, 0x3f715546, 0x3f715544, 0x3f715545][:], 0xb4000000),

+		(0xbf800001, [0x3f715545, 0x3f715546, 0x3f715544, 0x3f715545][:], 0xb4bc5552),

+	][:]

+	for (f, a, r) : inputs

+		var f2 = std.flt32frombits(f)

+		var a2 = [][:]

+		for aa : a

+			std.slpush(&a2, std.flt32frombits(aa))

+		;;

+		var sf = math.horner_poly(f2, a2)

+		var s : uint32 = std.flt32bits(sf)

+		testr.eq(c, s, r)

+	;;

+}

+const horner02 = {c

+	var inputs : (uint64, uint64[:], uint64)[:] = [

+		(0x0000000000000000, [][:], 0x0000000000000000),

+		(0xc0003d3368ee1111, [

+			0x3ff0000000000000,

+			0x3fe0000000000000,

+			0x3fc5555555555555,

+			0x3fa5555555555555,

+			0x3f81a7b9611a7b96,

+			0x3f59c2d14ee4a102,

+			0x3f3136f054ff42a4,

+			0x3f05902ed525b6ee

+		][:], 0x3fdb64730ab8fa29),

+		(0x40003d3368ee1111, [

+			0x3ff0000000000000,

+			0x3fe0000000000000,

+			0x3fc5555555555555,

+			0x3fa5555555555555,

+			0x3f81a7b9611a7b96,

+			0x3f59c2d14ee4a102,

+			0x3f3136f054ff42a4,

+			0x3f05902ed525b6ee

+		][:], 0x400a331575ee40db),

+	][:]

+	for (f, a, r) : inputs

+		var f2 = std.flt64frombits(f)

+		var a2 = [][:]

+		for aa : a

+			std.slpush(&a2, std.flt64frombits(aa))

+		;;

+		var sf = math.horner_poly(f2, a2)

+		var s : uint64 = std.flt64bits(sf)

+		testr.eq(c, s, r)

+	;;

+}