shithub: mc

Download patch

ref: 96cb72770eb9c9bf3259ebb4647ef88092438808
parent: 755433e998180ba049bc22975b8ecffa23bf0357
author: S. Gilles <sgilles@math.umd.edu>
date: Tue Feb 27 15:19:32 EST 2018

Implement a few simple floating-point functions

Proof of concept. Trunc/floor/ceil are probably the simplest FP
functions that require bit-poking.

diff: cannot open b/lib/math/test//null: file does not exist: 'b/lib/math/test//null' diff: cannot open b/lib/math//null: file does not exist: 'b/lib/math//null'
--- a/lib/bld.sub
+++ b/lib/bld.sub
@@ -8,6 +8,7 @@
 	inifile
 	iter
 	json
+	math
 	regex
 	std
 	sys
--- /dev/null
+++ b/lib/math/bld.sub
@@ -1,0 +1,8 @@
+lib math =
+	fpmath.myr
+
+	# trunc
+	fpmath-trunc-impl.myr
+
+	lib ../std:std
+;;
--- /dev/null
+++ b/lib/math/fpmath-trunc-impl.myr
@@ -1,0 +1,160 @@
+use std
+
+pkg math =
+	pkglocal const trunc32 : (f : flt32 -> flt32)
+	pkglocal const floor32 : (f : flt32 -> flt32)
+	pkglocal const ceil32  : (f : flt32 -> flt32)
+	pkglocal const trunc64 : (f : flt64 -> flt64)
+	pkglocal const floor64 : (f : flt64 -> flt64)
+	pkglocal const ceil64  : (f : flt64 -> flt64)
+;;
+
+pkglocal const trunc32 = {f : flt32
+	var u : uint32 = std.flt32bits(f)
+	var e : uint32 = (((u >> 23) & 0xff) : uint32) - 127
+	var e_lt_zero : uint32 = ((e >> 31) : uint32) & 0x1
+	var e_ge_zero : uint32 = 1 - e_lt_zero
+	var e_ge_23 : uint32 = 1 - ((e - 23) >> 31)
+	/*
+	   The significand 1 . m1 m2 ... m23 needs to be
+	   truncated, which corresponds to zeroing all mi
+	   bits where i is beyond the exponent e (they are
+	   the actual sub-integer portion).
+	 */
+	var m : uint32 = ~(((1 << 23) - 1) >> e)
+	m |= (-1 : uint32) * e_ge_23
+	var v_ge_zero : uint32 = (u & m) * e_ge_zero
+
+	/*
+	   On the other hand, if the exponent is < 0, "23
+	   - e" is garbage, and we should just return +/-
+	   zero
+	 */
+	var v_lt_zero : uint32 = (u & (1 << 31)) * e_lt_zero
+
+	/* Try to save a branch */
+	var v : uint32 = v_ge_zero + v_lt_zero
+	-> std.flt32frombits(v)
+}
+
+pkglocal const floor32 = {f : flt32
+	var u : uint32 = std.flt32bits(f)
+	var e : int32 = (((u >> 23) & 0xff) : int32) - 127
+	var shift_e : uint32 = (e : uint32)
+
+	/* Many special cases */
+	if e >= 23 || u == 0x80000000
+		-> f
+	elif (e < 0) && (u & (1 << 31) != 0)
+		-> -1.0
+	elif e < 0
+		-> 0.0
+	;;
+
+	if u & (1 << 31) != 0
+		var fractional_mask : uint32 = (((1 << 23) - 1) >> shift_e)
+		var v : uint32 = u & ~fractional_mask
+		if (u & fractional_mask) != 0
+			v += ((1 << 23) >> shift_e)
+		;;
+		-> std.flt32frombits(v)
+	;;
+
+	var m : uint32 = ~(((1 << 23) - 1) >> shift_e)
+	var v : uint32 = u & m
+	-> std.flt32frombits(v)
+}
+
+pkglocal const ceil32 = {f;
+	var u : uint32 = std.flt32bits(f)
+	var e : int32 = (((u >> 23) & 0xff) : int32) - 127
+	var shift_e : uint32 = (e : uint32)
+	if e >= 23 || u == 0x0
+		-> f
+	elif (e < 0) && (u & (1 << 31) == 0)
+		-> 1.0
+	elif e < 0
+		-> -0.0
+	;;
+
+	if u & (1 << 31) == 0
+		var fractional_mask : uint32 = (((1 << 23) - 1) >> shift_e)
+		var v : uint32 = u & ~fractional_mask
+		if (u & fractional_mask) != 0
+			v += ((1 << 23) >> shift_e)
+		;;
+		-> std.flt32frombits(v)
+	;;
+
+	var m : uint32 = ~(((1 << 23) - 1) >> shift_e)
+	var v : uint32 = u & m
+	-> std.flt32frombits(v)
+}
+
+pkglocal const trunc64 = {f : flt64
+	var u : uint64 = std.flt64bits(f)
+	var e : uint64 = (((u >> 52) & 0x7ff) : uint64) - 1023
+	var e_lt_zero : uint64 = ((e >> 63) : uint64) & 0x1
+	var e_ge_zero : uint64 = 1 - e_lt_zero
+	var e_ge_52 : uint64 = 1 - ((e - 52) >> 63)
+	var m : uint64 = ~(((1 << 52) - 1) >> e)
+	m |= (-1 : uint64) * e_ge_52
+	var v_ge_zero : uint64 = (u & m) * e_ge_zero
+	var v_lt_zero : uint64 = (u & (1 << 63)) * e_lt_zero
+	var v : uint64 = v_ge_zero + v_lt_zero
+	-> std.flt64frombits(v)
+}
+
+pkglocal const floor64 = {f : flt64
+	var u : uint64 = std.flt64bits(f)
+	var e : int64 = (((u >> 52) & 0x7ff) : int64) - 1023
+	var shift_e : uint64 = (e : uint64)
+
+	if e >= 52 || u == 0x8000000000000000ul
+		-> f
+	elif (e < 0) && (u & (1 << 63) != 0)
+			-> -1.0
+	elif e < 0
+		-> 0.0
+	;;
+
+	if u & (1 << 63) != 0
+		var fractional_mask : uint64 = (((1 << 52) - 1) >> shift_e)
+		var v : uint64 = u & ~fractional_mask
+		if (u & fractional_mask) != 0
+			v += ((1 << 52) >> shift_e)
+		;;
+		-> std.flt64frombits(v)
+	;;
+
+	var m : uint64 = ~(((1 << 52) - 1) >> shift_e)
+	var v : uint64 = u & m
+	-> std.flt64frombits(v)
+}
+
+pkglocal const ceil64 = {f;
+	var u : uint64 = std.flt64bits(f)
+	var e : int64 = (((u >> 52) & 0x7ff) : int64) - 1023
+	var shift_e : uint64 = (e : uint64)
+
+	if e >= 52 || u == 0x0ul
+		-> f
+	elif (e < 0) && (u & (1 << 63) == 0)
+			-> 1.0
+	elif e < 0
+		-> -0.0
+	;;
+
+	if u & (1 << 63) == 0
+		var fractional_mask : uint64 = (((1 << 52) - 1) >> shift_e)
+		var v : uint64 = u & ~fractional_mask
+		if (u & fractional_mask) != 0
+			v += ((1 << 52) >> shift_e)
+		;;
+		-> std.flt64frombits(v)
+	;;
+
+	var m : uint64 = ~(((1 << 52) - 1) >> shift_e)
+	var v : uint64 = u & m
+	-> std.flt64frombits(v)
+}
--- /dev/null
+++ b/lib/math/fpmath.myr
@@ -1,0 +1,36 @@
+use std
+
+pkg math =
+	trait fpmath @f =
+
+		/* fpmath-trunc-impl */
+		trunc : (f : @f -> @f)
+		ceil  : (f : @f -> @f)
+		floor : (f : @f -> @f)
+
+		/* compute (s, t) with s = round-nearest(a+b), s + t = a + b */
+//		fast2sum : (a : @f, b : @f -> (@f, @f))
+	;;
+
+	impl fpmath flt32
+	impl fpmath flt64
+;;
+
+impl fpmath flt32 =
+	trunc = {f; -> trunc32(f)}
+	floor = {f; -> floor32(f)}
+	ceil  = {f; -> ceil32(f)}
+;;
+
+impl fpmath flt64 =
+	trunc = {f; -> trunc64(f)}
+	floor = {f; -> floor64(f)}
+	ceil  = {f; -> ceil64(f)}
+;;
+
+extern const trunc32 : (f : flt32 -> flt32)
+extern const floor32 : (f : flt32 -> flt32)
+extern const ceil32  : (f : flt32 -> flt32)
+extern const trunc64 : (f : flt64 -> flt64)
+extern const floor64 : (f : flt64 -> flt64)
+extern const ceil64  : (f : flt64 -> flt64)
--- /dev/null
+++ b/lib/math/test/fpmath-sum-impl.myr
@@ -1,0 +1,163 @@
+use std
+use math
+use testr
+
+const main = {
+	testr.run([
+		[.name = "trunc-01",    .fn = trunc01],
+		[.name = "trunc-02",    .fn = trunc02],
+		[.name = "floor-01",    .fn = floor01],
+		[.name = "floor-02",    .fn = floor02],
+		[.name = "ceil-01",     .fn = ceil01],
+		[.name = "ceil-02",     .fn = ceil02],
+		[.name = "fast2sum-01", .fn = fast2sum01],
+	][:])
+}
+
+impl std.equatable flt32 =
+        eq = {a : flt32, b : flt32; -> std.flt32bits(a) == std.flt32bits(b)}
+;;
+
+impl std.equatable flt64 =
+        eq = {a : flt64, b : flt64; -> std.flt64bits(a) == std.flt64bits(b)}
+;;
+
+const trunc01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(1.0, 1.0),
+		(1.1, 1.0),
+		(0.9, 0.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -3.0),
+		(101.999, 101.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.trunc(f), g)
+	;;
+}
+
+const trunc02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(1.0, 1.0),
+		(1.1, 1.0),
+		(0.9, 0.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -3.0),
+		(101.999, 101.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.trunc(f), g)
+	;;
+}
+
+const floor01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 0.0),
+		(1.1, 1.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -4.0),
+		(-101.999, -102.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.floor(f), g)
+	;;
+}
+
+const floor02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 0.0),
+		(1.1, 1.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -4.0),
+		(-101.999, -102.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.floor(f), g)
+	;;
+}
+
+const ceil01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 1.0),
+		(-0.1, -0.0),
+		(1.1, 2.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -3.0),
+		(-101.999, -101.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.ceil(f), g)
+	;;
+}
+
+const ceil02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 1.0),
+		(-0.1, -0.0),
+		(1.1, 2.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -3.0),
+		(-101.999, -101.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.ceil(f), g)
+	;;
+}
+
+const fast2sum01 = {c
+        var flt32s : (flt32, flt32, flt32, flt32)[:] = [
+                (1.0, 1.0, 2.0, 0.0),
+                (10664524000000000000.0, 1.11842, 10664524000000000000.0, 1.11842),
+                (1.11843, 10664524000000000000.0, 10664524000000000000.0, 1.11843),
+                (-21897.1324, 17323.22, -4573.912, 0.0),
+        ][:]
+
+        for (a, b, s1, t1) : flt32s
+                var s2, t2
+                (s2, t2) = math.fast2sum(a, b)
+                testr.eq(c, s1, s2)
+                testr.eq(c, t1, t2)
+        ;;
+
+        var flt64s : (flt64, flt64, flt64, flt64)[:] = [
+                (1.0, 1.0, 2.0, 0.0),
+                (-21897.1324, 17323.22, -4573.912399999997, 0.0),
+                (std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884),
+                 std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884)),
+                (std.flt64frombits(0x6a8c3190e27a1884), std.flt64frombits(0x78591b0672a81284),
+                 std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884)),
+                (std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x7858273672ca19a0),
+                 std.flt64frombits(0x7868a11e72b91612), 0.0),
+        ][:]
+
+        for (a, b, s1, t1) : flt64s
+                var s2, t2
+                (s2, t2) = math.fast2sum(a, b)
+                testr.eq(c, s1, s2)
+                testr.eq(c, std.flt64bits(t1), std.flt64bits(t2))
+        ;;
+}
--- /dev/null
+++ b/lib/math/test/fpmath-trunc-impl.myr
@@ -1,0 +1,128 @@
+use std
+use math
+use testr
+
+const main = {
+	testr.run([
+		[.name = "trunc-01",    .fn = trunc01],
+		[.name = "trunc-02",    .fn = trunc02],
+		[.name = "floor-01",    .fn = floor01],
+		[.name = "floor-02",    .fn = floor02],
+		[.name = "ceil-01",     .fn = ceil01],
+		[.name = "ceil-02",     .fn = ceil02],
+	][:])
+}
+
+impl std.equatable flt32 =
+        eq = {a : flt32, b : flt32; -> std.flt32bits(a) == std.flt32bits(b)}
+;;
+
+impl std.equatable flt64 =
+        eq = {a : flt64, b : flt64; -> std.flt64bits(a) == std.flt64bits(b)}
+;;
+
+const trunc01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(1.0, 1.0),
+		(1.1, 1.0),
+		(0.9, 0.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -3.0),
+		(101.999, 101.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.trunc(f), g)
+	;;
+}
+
+const trunc02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(1.0, 1.0),
+		(1.1, 1.0),
+		(0.9, 0.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -3.0),
+		(101.999, 101.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.trunc(f), g)
+	;;
+}
+
+const floor01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 0.0),
+		(1.1, 1.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -4.0),
+		(-101.999, -102.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.floor(f), g)
+	;;
+}
+
+const floor02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 0.0),
+		(1.1, 1.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -4.0),
+		(-101.999, -102.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.floor(f), g)
+	;;
+}
+
+const ceil01 = {c
+	var flt32s : (flt32, flt32)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 1.0),
+		(-0.1, -0.0),
+		(1.1, 2.0),
+		(10664524000000000000.0, 10664524000000000000.0),
+		(-3.5, -3.0),
+		(-101.999, -101.0),
+		(std.flt32nan(), std.flt32nan()),
+	][:]
+
+	for (f, g) : flt32s
+		testr.eq(c, math.ceil(f), g)
+	;;
+}
+
+const ceil02 = {c
+	var flt64s : (flt64, flt64)[:] = [
+		(0.0, 0.0),
+		(-0.0, -0.0),
+		(0.5, 1.0),
+		(-0.1, -0.0),
+		(1.1, 2.0),
+		(13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0),
+		(-3.5, -3.0),
+		(-101.999, -101.0),
+		(std.flt64nan(), std.flt64nan()),
+	][:]
+
+	for (f, g) : flt64s
+		testr.eq(c, math.ceil(f), g)
+	;;
+}