ref: 1406e96fd9acc1075bec2dc59903e05c83f8d609
dir: /lib/crypto/aes.myr/
/*
* Translated to Myrddin by Ori Bernstein
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
use std
pkg crypto =
type aesctx = struct
nrounds : uint32
skey : uint64[30]
accum : byte[16]
;;
const aeskeysched : (x : aesctx#, k : byte[:] -> void)
const aesencrypt : (x : aesctx#, m : byte[:], c : byte[:] -> void)
const aesdecrypt : (x : aesctx#, c : byte[:], m : byte[:] -> void)
;;
const Rcon = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36]
const aeskeysched = {x, key
var skey : uint32[60]
var q : uint64[8]
var nk, nkf
var tmp, j, k
match key.len
| 16: x.nrounds = 10
| 24: x.nrounds = 12
| 32: x.nrounds = 14
| _: std.die("invalid key size in aes")
;;
nk = key.len >> 2
nkf = (x.nrounds + 1) << 2
for var i = 0; i < key.len; i += 4
skey[i>>2] = std.getle32(key[i:i+4])
;;
j = 0
k = 0
tmp = skey[(key.len >> 2) - 1];
for var i = nk; i < nkf; i++
if j == 0
tmp = (tmp << 24) | (tmp >> 8)
tmp = subword(tmp) ^ Rcon[k]
elif nk > 6 && j == 4
tmp = subword(tmp)
;;
tmp ^= skey[i-nk]
skey[i] = tmp
j++
if j == nk
j = 0
k++
;;
;;
j = 0
for var i = 0; i < nkf; i += 4
interleavein(&q[0], &q[4], skey[i:i+4])
q[1] = q[0]
q[2] = q[0]
q[3] = q[0]
q[5] = q[4]
q[6] = q[4]
q[7] = q[4]
ortho(q[:])
x.skey[j + 0] = \
(q[0] & 0x1111111111111111ul) \
| (q[1] & 0x2222222222222222ul) \
| (q[2] & 0x4444444444444444ul) \
| (q[3] & 0x8888888888888888ul)
x.skey[j + 1] = \
(q[4] & 0x1111111111111111ul) \
| (q[5] & 0x2222222222222222ul) \
| (q[6] & 0x4444444444444444ul) \
| (q[7] & 0x8888888888888888ul)
j += 2
;;
}
const aesencrypt = {x, m, c
var q : uint64[8]
var w : uint32[4]
var expkey : uint64[240]
std.assert(m.len % 16 == 0, "mismatched block size in aesencrypt")
std.assert(m.len == c.len, "ciphertext length does not match plaintext length")
expand(expkey[:], x.nrounds, x.skey[:])
while m.len != 0
w[0] = std.getle32(m[0:4])
w[1] = std.getle32(m[4:8])
w[2] = std.getle32(m[8:12])
w[3] = std.getle32(m[12:16])
interleavein(&q[0], &q[4], w[:])
ortho(q[:])
encrypt64(x.nrounds, expkey[:], q[:])
ortho(q[:])
interleaveout(w[:], q[0], q[4])
std.putle32(c[0:4], w[0])
std.putle32(c[4:8], w[1])
std.putle32(c[8:12], w[2])
std.putle32(c[12:16], w[3])
m = m[16:]
c = c[16:]
;;
}
const aesdecrypt = {x, c, m
var q : uint64[8]
var w : uint32[4]
var expkey : uint64[240]
std.assert(m.len % 16 == 0, "mismatched block size in aesencrypt")
std.assert(m.len == c.len, "ciphertext length does not match plaintext length")
expand(expkey[:], x.nrounds, x.skey[:])
while c.len != 0
w[0] = std.getle32(c[0:4])
w[1] = std.getle32(c[4:8])
w[2] = std.getle32(c[8:12])
w[3] = std.getle32(c[12:16])
interleavein(&q[0], &q[4], w[:])
ortho(q[:])
decrypt64(x.nrounds, expkey[:], q[:])
ortho(q[:])
interleaveout(w[:], q[0], q[4])
std.putle32(m[0:4], w[0])
std.putle32(m[4:8], w[1])
std.putle32(m[8:12], w[2])
std.putle32(m[12:16], w[3])
m = m[16:]
c = c[16:]
;;
}
const expand = {expkey, nrounds, key
var x0, x1, x2, x3
var u, v, n
n = (nrounds + 1) * 2
u = 0
v = 0
while u < n
x0 = x1 = x2 = x3 = key[u]
x0 &= 0x1111111111111111ul
x1 &= 0x2222222222222222ul
x2 &= 0x4444444444444444ul
x3 &= 0x8888888888888888ul
x1 >>= 1
x2 >>= 2
x3 >>= 3
expkey[v + 0] = (x0 << 4) - x0
expkey[v + 1] = (x1 << 4) - x1
expkey[v + 2] = (x2 << 4) - x2
expkey[v + 3] = (x3 << 4) - x3
u++
v += 4
;;
}
const encrypt64 = {nrounds, skey, q
var i
addroundkey(q, skey)
for i = 1; i < nrounds; i++
sbox(q)
shiftrows(q)
mixcols(q)
addroundkey(q, skey[i*8:i*8+8])
;;
sbox(q)
shiftrows(q)
addroundkey(q, skey[8*nrounds:8*nrounds + 8])
}
const decrypt64 = {nrounds, skey, q
var u
addroundkey(q, skey[8*nrounds:8*nrounds + 8])
for u = nrounds - 1; u > 0; u--
invshiftrows(q)
invsbox(q)
addroundkey(q, skey[u*8:u*8+8])
invmixcols(q)
;;
invshiftrows(q)
invsbox(q)
addroundkey(q, skey[0:8])
}
const addroundkey = {q, sk
q[0] ^= sk[0]
q[1] ^= sk[1]
q[2] ^= sk[2]
q[3] ^= sk[3]
q[4] ^= sk[4]
q[5] ^= sk[5]
q[6] ^= sk[6]
q[7] ^= sk[7]
}
const shiftrows = {q
var x
for var i = 0; i < 8; i ++
x = q[i]
q[i] = (x & 0x000000000000FFFFul) \
| ((x & 0x00000000FFF00000ul) >> 4) \
| ((x & 0x00000000000F0000ul) << 12) \
| ((x & 0x0000FF0000000000ul) >> 8) \
| ((x & 0x000000FF00000000ul) << 8) \
| ((x & 0xF000000000000000ul) >> 12) \
| ((x & 0x0FFF000000000000ul) << 4)
;;
}
const invshiftrows = {q
var x
for var i = 0; i < 8; i ++
x = q[i];
q[i] = (x & 0x000000000000FFFFul) \
| ((x & 0x000000000FFF0000ul) << 4) \
| ((x & 0x00000000F0000000ul) >> 12) \
| ((x & 0x000000FF00000000ul) << 8) \
| ((x & 0x0000FF0000000000ul) >> 8) \
| ((x & 0x000F000000000000ul) << 12) \
| ((x & 0xFFF0000000000000ul) >> 4)
;;
}
const mixcols = {q
var q0, q1, q2, q3, q4, q5, q6, q7
var r0, r1, r2, r3, r4, r5, r6, r7
q0 = q[0]
q1 = q[1]
q2 = q[2]
q3 = q[3]
q4 = q[4]
q5 = q[5]
q6 = q[6]
q7 = q[7]
r0 = (q0 >> 16) | (q0 << 48)
r1 = (q1 >> 16) | (q1 << 48)
r2 = (q2 >> 16) | (q2 << 48)
r3 = (q3 >> 16) | (q3 << 48)
r4 = (q4 >> 16) | (q4 << 48)
r5 = (q5 >> 16) | (q5 << 48)
r6 = (q6 >> 16) | (q6 << 48)
r7 = (q7 >> 16) | (q7 << 48)
q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0)
q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1)
q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2)
q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3)
q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4)
q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5)
q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6)
q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7)
}
const invmixcols = {q
var q0, q1, q2, q3, q4, q5, q6, q7
var r0, r1, r2, r3, r4, r5, r6, r7
q0 = q[0]
q1 = q[1]
q2 = q[2]
q3 = q[3]
q4 = q[4]
q5 = q[5]
q6 = q[6]
q7 = q[7]
r0 = (q0 >> 16) | (q0 << 48)
r1 = (q1 >> 16) | (q1 << 48)
r2 = (q2 >> 16) | (q2 << 48)
r3 = (q3 >> 16) | (q3 << 48)
r4 = (q4 >> 16) | (q4 << 48)
r5 = (q5 >> 16) | (q5 << 48)
r6 = (q6 >> 16) | (q6 << 48)
r7 = (q7 >> 16) | (q7 << 48)
q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5)
q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6)
q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7)
q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7)
q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6)
q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7)
q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7)
q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7)
}
/*
* Interleave bytes for an AES input block. If input bytes are
* denoted 0123456789ABCDEF, and have been decoded with little-endian
* convention (w[0] contains 0123, with '3' being most significant
* w[1] contains 4567, and so on), then output word q0 will be
* set to 08192A3B (again little-endian convention) and q1 will
* be set to 4C5D6E7F.
*/
const interleavein = {q0, q1, w
var x0, x1, x2, x3
x0 = (w[0] : uint64)
x1 = (w[1] : uint64)
x2 = (w[2] : uint64)
x3 = (w[3] : uint64)
x0 |= (x0 << 16)
x1 |= (x1 << 16)
x2 |= (x2 << 16)
x3 |= (x3 << 16)
x0 &= 0x0000FFFF0000FFFFul
x1 &= 0x0000FFFF0000FFFFul
x2 &= 0x0000FFFF0000FFFFul
x3 &= 0x0000FFFF0000FFFFul
x0 |= (x0 << 8)
x1 |= (x1 << 8)
x2 |= (x2 << 8)
x3 |= (x3 << 8)
x0 &= 0x00FF00FF00FF00FFul
x1 &= 0x00FF00FF00FF00FFul
x2 &= 0x00FF00FF00FF00FFul
x3 &= 0x00FF00FF00FF00FFul
q0# = x0 | (x2 << 8)
q1# = x1 | (x3 << 8)
}
/* invert interleavein */
const interleaveout = {w, q0, q1
var x0, x1, x2, x3
x0 = q0 & 0x00FF00FF00FF00FFul
x1 = q1 & 0x00FF00FF00FF00FFul
x2 = (q0 >> 8) & 0x00FF00FF00FF00FFul
x3 = (q1 >> 8) & 0x00FF00FF00FF00FFul
x0 |= (x0 >> 8)
x1 |= (x1 >> 8)
x2 |= (x2 >> 8)
x3 |= (x3 >> 8)
x0 &= 0x0000FFFF0000FFFFul
x1 &= 0x0000FFFF0000FFFFul
x2 &= 0x0000FFFF0000FFFFul
x3 &= 0x0000FFFF0000FFFFul
w[0] = (x0 : uint32) | (x0 >> 16 : uint32)
w[1] = (x1 : uint32) | (x1 >> 16 : uint32)
w[2] = (x2 : uint32) | (x2 >> 16 : uint32)
w[3] = (x3 : uint32) | (x3 >> 16 : uint32)
}
/*
* Perform bytewise orthogonalization of eight 64-bit words. Bytes
* of q0..q7 are spread over all words: for a byte x that occurs
* at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
* of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
*
* This operation is an involution.
*/
const ortho = {q
swap(Kl2, Kh2, 1, &q[0], &q[1])
swap(Kl2, Kh2, 1, &q[2], &q[3])
swap(Kl2, Kh2, 1, &q[4], &q[5])
swap(Kl2, Kh2, 1, &q[6], &q[7])
swap(Kl4, Kh4, 2, &q[0], &q[2])
swap(Kl4, Kh4, 2, &q[1], &q[3])
swap(Kl4, Kh4, 2, &q[4], &q[6])
swap(Kl4, Kh4, 2, &q[5], &q[7])
swap(Kl8, Kh8, 4, &q[0], &q[4])
swap(Kl8, Kh8, 4, &q[1], &q[5])
swap(Kl8, Kh8, 4, &q[2], &q[6])
swap(Kl8, Kh8, 4, &q[3], &q[7])
}
const subword = {x
var q : uint64[8]
std.slfill(q[:], 0)
q[0] = (x : uint64)
ortho(q[:])
sbox(q[:])
ortho(q[:])
-> (q[0] : uint32)
}
const Kl2 = 0x5555555555555555
const Kh2 = 0xAAAAAAAAAAAAAAAA
const Kl4 = 0x3333333333333333
const Kh4 = 0xCCCCCCCCCCCCCCCC
const Kl8 = 0x0F0F0F0F0F0F0F0F
const Kh8 = 0xF0F0F0F0F0F0F0F0
const swap = {kl, kh, s, x, y
var a, b
a = x#
b = y#
x# = (a & kl) | ((b & kl) << s); \
y# = ((a & kh) >> s) | (b & kh); \
}
const sbox = {q : uint64[:]
/*
* Translated from BearSSL
*
* This S-box implementation is a straightforward translation of
* the circuit described by Boyar and Peralta in "A new
* combinational logic minimization technique with applications
* to cryptology" (https://eprint.iacr.org/2009/191.pdf).
*
* Note that variables x* (input) and s* (output) are numbered
* in "reverse" order (x0 is the high bit, x7 is the low bit).
*/
var x0, x1, x2, x3, x4, x5, x6, x7
var y1, y2, y3, y4, y5, y6, y7, y8, y9
var y10, y11, y12, y13, y14, y15, y16, y17, y18, y19
var y20, y21
var z0, z1, z2, z3, z4, z5, z6, z7, z8, z9
var z10, z11, z12, z13, z14, z15, z16, z17
var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
var t10, t11, t12, t13, t14, t15, t16, t17, t18, t19
var t20, t21, t22, t23, t24, t25, t26, t27, t28, t29
var t30, t31, t32, t33, t34, t35, t36, t37, t38, t39
var t40, t41, t42, t43, t44, t45, t46, t47, t48, t49
var t50, t51, t52, t53, t54, t55, t56, t57, t58, t59
var t60, t61, t62, t63, t64, t65, t66, t67
var s0, s1, s2, s3, s4, s5, s6, s7
x0 = q[7]
x1 = q[6]
x2 = q[5]
x3 = q[4]
x4 = q[3]
x5 = q[2]
x6 = q[1]
x7 = q[0]
/*
* Top linear transformation.
*/
y14 = x3 ^ x5
y13 = x0 ^ x6
y9 = x0 ^ x3
y8 = x0 ^ x5
t0 = x1 ^ x2
y1 = t0 ^ x7
y4 = y1 ^ x3
y12 = y13 ^ y14
y2 = y1 ^ x0
y5 = y1 ^ x6
y3 = y5 ^ y8
t1 = x4 ^ y12
y15 = t1 ^ x5
y20 = t1 ^ x1
y6 = y15 ^ x7
y10 = y15 ^ t0
y11 = y20 ^ y9
y7 = x7 ^ y11
y17 = y10 ^ y11
y19 = y10 ^ y8
y16 = t0 ^ y11
y21 = y13 ^ y16
y18 = x0 ^ y16
/*
* Non-linear section.
*/
t2 = y12 & y15
t3 = y3 & y6
t4 = t3 ^ t2
t5 = y4 & x7
t6 = t5 ^ t2
t7 = y13 & y16
t8 = y5 & y1
t9 = t8 ^ t7
t10 = y2 & y7
t11 = t10 ^ t7
t12 = y9 & y11
t13 = y14 & y17
t14 = t13 ^ t12
t15 = y8 & y10
t16 = t15 ^ t12
t17 = t4 ^ t14
t18 = t6 ^ t16
t19 = t9 ^ t14
t20 = t11 ^ t16
t21 = t17 ^ y20
t22 = t18 ^ y19
t23 = t19 ^ y21
t24 = t20 ^ y18
t25 = t21 ^ t22
t26 = t21 & t23
t27 = t24 ^ t26
t28 = t25 & t27
t29 = t28 ^ t22
t30 = t23 ^ t24
t31 = t22 ^ t26
t32 = t31 & t30
t33 = t32 ^ t24
t34 = t23 ^ t33
t35 = t27 ^ t33
t36 = t24 & t35
t37 = t36 ^ t34
t38 = t27 ^ t36
t39 = t29 & t38
t40 = t25 ^ t39
t41 = t40 ^ t37
t42 = t29 ^ t33
t43 = t29 ^ t40
t44 = t33 ^ t37
t45 = t42 ^ t41
z0 = t44 & y15
z1 = t37 & y6
z2 = t33 & x7
z3 = t43 & y16
z4 = t40 & y1
z5 = t29 & y7
z6 = t42 & y11
z7 = t45 & y17
z8 = t41 & y10
z9 = t44 & y12
z10 = t37 & y3
z11 = t33 & y4
z12 = t43 & y13
z13 = t40 & y5
z14 = t29 & y2
z15 = t42 & y9
z16 = t45 & y14
z17 = t41 & y8
/*
* Bottom linear transformation.
*/
t46 = z15 ^ z16
t47 = z10 ^ z11
t48 = z5 ^ z13
t49 = z9 ^ z10
t50 = z2 ^ z12
t51 = z2 ^ z5
t52 = z7 ^ z8
t53 = z0 ^ z3
t54 = z6 ^ z7
t55 = z16 ^ z17
t56 = z12 ^ t48
t57 = t50 ^ t53
t58 = z4 ^ t46
t59 = z3 ^ t54
t60 = t46 ^ t57
t61 = z14 ^ t57
t62 = t52 ^ t58
t63 = t49 ^ t58
t64 = z4 ^ t59
t65 = t61 ^ t62
t66 = z1 ^ t63
s0 = t59 ^ t63
s6 = t56 ^ ~t62
s7 = t48 ^ ~t60
t67 = t64 ^ t65
s3 = t53 ^ t66
s4 = t51 ^ t66
s5 = t47 ^ t65
s1 = t64 ^ ~s3
s2 = t55 ^ ~t67
q[7] = s0
q[6] = s1
q[5] = s2
q[4] = s3
q[3] = s4
q[2] = s5
q[1] = s6
q[0] = s7
}
const invsbox = {q
var q0, q1, q2, q3, q4, q5, q6, q7
q0 = ~q[0]
q1 = ~q[1]
q2 = q[2]
q3 = q[3]
q4 = q[4]
q5 = ~q[5]
q6 = ~q[6]
q7 = q[7]
q[7] = q1 ^ q4 ^ q6
q[6] = q0 ^ q3 ^ q5
q[5] = q7 ^ q2 ^ q4
q[4] = q6 ^ q1 ^ q3
q[3] = q5 ^ q0 ^ q2
q[2] = q4 ^ q7 ^ q1
q[1] = q3 ^ q6 ^ q0
q[0] = q2 ^ q5 ^ q7
sbox(q)
q0 = ~q[0]
q1 = ~q[1]
q2 = q[2]
q3 = q[3]
q4 = q[4]
q5 = ~q[5]
q6 = ~q[6]
q7 = q[7]
q[7] = q1 ^ q4 ^ q6
q[6] = q0 ^ q3 ^ q5
q[5] = q7 ^ q2 ^ q4
q[4] = q6 ^ q1 ^ q3
q[3] = q5 ^ q0 ^ q2
q[2] = q4 ^ q7 ^ q1
q[1] = q3 ^ q6 ^ q0
q[0] = q2 ^ q5 ^ q7
}
const rotr32 = {x
-> (x << 32) | (x >> 32)
}