ref: f127b8250ecbd5f44622f9bf620af933f7102e78
parent: dc9fbed2061a1c5b6de5ce1627aee10c189d123f
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Mar 16 21:26:17 EDT 2018
Implement karatsuba multiplication. This is a nice optimization. In the most optimal case, it makes this stupid benchmark about 10 times faster: a = std.mkbigint(17193) b = std.mkbigint(1091327) for var i = 0; i < 12; i++ mul(a, b) mul(b, a) ;; It also makes a decent change (about 10%) on `mbld bench` for bigfactorial, in spite of us mostly not falling into the karatsuba case.
--- a/bench/bigfactorial.myr
+++ b/bench/bigfactorial.myr
@@ -7,6 +7,7 @@
[.name="bigfactorial-100", .fn={ctx; bigfact(100)}],
[.name="bigfactorial-1000", .fn={ctx; bigfact(1000)}],
[.name="bigfactorial-10000", .fn={ctx; bigfact(10000)}],
+ [.name="bigfactorial-20000", .fn={ctx; bigfact(20000)}],
][:])
}
--- a/lib/std/bigint.myr
+++ b/lib/std/bigint.myr
@@ -92,6 +92,7 @@
extern const put : (fmt : byte[:], args : ... -> size)
const Base = 0x100000000ul
+const Kmin = 64
generic mkbigint = {v : @a :: integral,numeric @a
var a
@@ -124,7 +125,7 @@
;;
for i = 0; i + 4 <= v.len; i += 4
- std.slpush(&a.dig, \
+ slpush(&a.dig, \
(v[i + 0] << 0 : uint32) | \
(v[i + 1] << 8 : uint32) | \
(v[i + 2] << 16 : uint32) | \
@@ -135,7 +136,7 @@
off = i & 0x3
last |= (v[off] : uint32) << (8 *off)
;;
- std.slpush(&a.dig, last)
+ slpush(&a.dig, last)
-> trim(a)
}
@@ -170,7 +171,7 @@
}
const bigclear = {v
- std.slfree(v.dig)
+ slfree(v.dig)
v.sign = 0
v.dig = [][:]
-> v
@@ -259,7 +260,7 @@
fit in one digit.
*/
v = mkbigint(1)
- for c : std.bychar(str)
+ for c : bychar(str)
if c == '_'
continue
;;
@@ -515,10 +516,7 @@
/* a *= b */
const bigmul = {a, b
- var i, j
- var ai, bj, wij
- var carry, t
- var w
+ var s
if a.sign == 0 || b.sign == 0
a.sign = 0
@@ -526,11 +524,84 @@
a.dig = [][:]
-> a
elif a.sign != b.sign
- a.sign = -1
+ s = -1
else
- a.sign = 1
+ s = 1
;;
+ umul(a, b)
+
+ a.sign = s
+ -> trim(a)
+}
+
+const umul = {a, b
+ var r
+
+ if a.dig.len < Kmin || b.dig.len < 2
+ smallmul(a, b)
+ else
+ r = mkbigint(0)
+ kmul(r, a, b)
+ bigmove(a, r)
+ ;;
+}
+
+const kmul = {r, a, b
+ var x0, x1, y0, y1, n
+ var z0, z1, z2, t0
+
+ if a.dig.len < b.dig.len
+ t0 = a
+ a = b
+ b = t0
+ ;;
+ n = min(a.dig.len / 2, b.dig.len - 1)
+
+ x0 = [.sign=1, .dig=a.dig[:n]]
+ x1 = [.sign=1, .dig=a.dig[n:]]
+ y0 = [.sign=1, .dig=b.dig[:n]]
+ y1 = [.sign=1, .dig=b.dig[n:]]
+
+ z0 = bigdup(&x0)
+ trim(z0)
+ umul(z0, &y0)
+
+ z2 = bigdup(&x1)
+ trim(z2)
+ umul(z2, &y1)
+
+
+ z1 = bigdup(&x0)
+ trim(z1)
+ bigsub(z1, &x1)
+ t0 = bigdup(&y1)
+ bigsub(t0, &y0)
+
+ umul(z1, t0)
+ bigadd(z1, z0)
+ bigadd(z1, z2)
+
+ bigshli(z1, 32*n)
+ bigshli(z2, 32*2*n)
+
+ bigclear(r)
+ bigadd(r, z0)
+ bigadd(r, z1)
+ bigadd(r, z2)
+
+ bigfree(z0)
+ bigfree(z1)
+ bigfree(z2)
+ bigfree(t0)
+}
+
+const smallmul = {a, b
+ var i, j
+ var ai, bj, wij
+ var carry, t
+ var w
+
w = slzalloc(a.dig.len + b.dig.len)
for j = 0; j < b.dig.len; j++
carry = 0
@@ -546,7 +617,6 @@
;;
slfree(a.dig)
a.dig = w
- -> trim(a)
}
const bigdiv = {a : bigint#, b : bigint# -> bigint#