shithub: mc

Download patch

ref: 5362f2f03b9ca372fa7900861d07a9858b9dccb0
parent: c3d4ae24b48ae6f7ed07a34afa0eb7b709b4f103
author: Ori Bernstein <ori@markovcorp.com>
date: Fri Jul 26 07:32:15 EDT 2019

Fix wycheproof tests for curve25519 (thanks Mike)

--- a/lib/crypto/curve25519.myr
+++ b/lib/crypto/curve25519.myr
@@ -72,14 +72,14 @@
  * (note the order of the arguments!)
  */
 const fdiff = {out, in
-	for var i = 0; i < 10; i++
-		out[i] = (in[i] - out[i])
+	for var i = 0; i < 10; ++i
+		out[i] = in[i] - out[i]
 	;;
 }
 
 /* Multiply a number my a scalar: out = in * scalar */
 const fscalarproduct = {out, in, scalar
-	for var i = 0; i < 10; i++
+	for var i = 0; i < 10; ++i
 		out[i] = in[i] * scalar
 	;;
 }
@@ -194,22 +194,40 @@
 
 
 /* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
-const freducedegree= {out
-	out[8] += 19 * out[18];
-	out[7] += 19 * out[17];
-	out[6] += 19 * out[16];
-	out[5] += 19 * out[15];
-	out[4] += 19 * out[14];
-	out[3] += 19 * out[13];
-	out[2] += 19 * out[12];
-	out[1] += 19 * out[11];
-	out[0] += 19 * out[10];
+const freducedegree= {output
+	output[8] += output[18] << 4;
+	output[8] += output[18] << 1;
+	output[8] += output[18];
+	output[7] += output[17] << 4;
+	output[7] += output[17] << 1;
+	output[7] += output[17];
+	output[6] += output[16] << 4;
+	output[6] += output[16] << 1;
+	output[6] += output[16];
+	output[5] += output[15] << 4;
+	output[5] += output[15] << 1;
+	output[5] += output[15];
+	output[4] += output[14] << 4;
+	output[4] += output[14] << 1;
+	output[4] += output[14];
+	output[3] += output[13] << 4;
+	output[3] += output[13] << 1;
+	output[3] += output[13];
+	output[2] += output[12] << 4;
+	output[2] += output[12] << 1;
+	output[2] += output[12];
+	output[1] += output[11] << 4;
+	output[1] += output[11] << 1;
+	output[1] += output[11];
+	output[0] += output[10] << 4;
+	output[0] += output[10] << 1;
+	output[0] += output[10];
 }
 
 /* Reduce all coeff of the short form in to be -2**25 <= x <= 2**25
  */
 const freducecoeff = {out
-	var over, over32
+	var over
 	var hi, sgn, round
 
 	out[10] = 0;
@@ -256,10 +274,6 @@
 	 * So |over| will be no more than 1.
 	 * out[1] fits in 32 bits, so we can use div_s32_by_2_25 here.
 	 */
-	round = ((out[1] : int32) >> 31 : uint32) >> 7
-	over32 = (((out[1] : int32) + (round : int32)) >> 25 : int64)
-	out[1] -= over32 << 25
-	out[2] += over32
 
 	/*
 	 * Finally, out[0,1,3..9] are reduced, and out[2] is "nearly reduced":
@@ -384,37 +398,61 @@
 
 }
 
+/* s32_eq returns 0xffffffff iff a == b and zero otherwise. */
+const s32_eq = {a, b
+	a = ~(a ^ b)
+	a &= a << 16
+	a &= a << 8
+	a &= a << 4
+	a &= a << 2
+	a &= a << 1
+	-> a >> 31
+}
+
+/* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
+   both non-negative. */
+const s32_gte = {a, b
+	a -= b
+	/* a >= 0 iff a >= b. */
+	-> ~(a >> 31)
+}
+
 /* Take a fully reduced polynomial form number and contract it into a
  * little-endian, 32-byte array
  */
-const fcontract = {out : byte[:], in : int64[:]
+const fcontract = {out : byte[:], in_limbs : int64[:]
 	var mask, carry
-	for var j = 0; j < 2; j++
-		for var i = 0; i < 9; i++
+	var in : int32[10]
+
+	for var i = 0; i < 10; i++
+		in[i] = (in_limbs[i] : int32)
+	;;
+	for var j = 0; j < 2; ++j
+		for var i = 0; i < 9; ++i
 			if (i & 1) == 1
 			 	/* This calculation is a time-invariant way to make in[i] positive
 			 	 * by borrowing from the next-larger int64_t.
 			 	 */
-				mask = (in[i] : int32) >> 31
-				carry = -(((in[i] : int32) & mask) >> 25)
-				in[i+0] = ((in[i] : int32) + (carry << 25) : int64)
-				in[i+1] = ((in[i+1] : int32) - carry : int64)
+				mask = in[i] >> 31
+				carry = -((in[i] & mask) >> 25);
+				in[i+0] = in[i] + (carry << 25)
+				in[i+1] = in[i+1] - carry
 			else
-				mask = (in[i] : int32) >> 31
-				carry = -(((in[i] : int32) & mask) >> 26)
-				in[i+0] = ((in[i] : int32) + (carry << 26) : int64)
-				in[i+1] = ((in[i+1] : int32) - carry : int64)
+				mask = in[i] >> 31
+				carry = -((in[i] & mask) >> 26)
+				in[i+0] = in[i] + (carry << 26)
+				in[i+1] = in[i+1] - carry
 			;;
 		;;
-		mask = (in[9] : int32) >> 31
-		carry = -(((in[9] : int32) & mask) >> 25)
-		in[9] = ((in[9] : int32) + (carry << 25) : int64)
-		in[0] = ((in[0] : int32) - (carry * 19) : int64)
+		mask = in[9] >> 31
+		carry = -((in[9] & mask) >> 25)
+		in[9] = in[9] + (carry << 25)
+		in[0] = in[0] - (carry * 19)
 	;;
 
 	/* The first borrow-propagation pass above ended with every int64_t
 	   except (possibly) in[0] non-negative.
-	
+
 	   Since each in int64_t except in[0] is decreased by at most 1
 	   by a borrow-propagation pass, the second borrow-propagation pass
 	   could only have wrapped around to decrease in[0] again if the
@@ -422,11 +460,46 @@
 	   were all zero.  In that case, in[1] is now 2^25 - 1, and this
 	   last borrow-propagation step will leave in[1] non-negative.
 	*/
-	mask = (in[0] : int32) >> 31
-	carry = -(((in[0] : int32) & mask) >> 26)
-	in[0] = ((in[0] : int32) + (carry << 26) : int64)
-	in[1] = ((in[1] : int32) - carry : int64)
-	
+	mask = in[0] >> 31
+	carry = -((in[0] & mask) >> 26)
+	in[0] = in[0] + (carry << 26)
+	in[1] = in[1] - carry
+
+	for var j = 0; j < 2; j++
+		for var i = 0; i < 9; i++
+			if (i & 1) == 1
+				carry = in[1] >> 25
+				in[i+0] &= 0x1ffffff
+				in[i+1] += carry
+			else
+				carry = in[i] >> 26
+				in[i+0] &= 0x3ffffff
+				in[i+1] += carry
+			;;
+		;;
+		carry = in[9] >> 25
+		in[9] &= 0x1ffffff
+		in[0] += (19 * carry)
+	;;
+
+	mask = s32_gte(in[0], 0x3ffffed)
+	for var i = 1; i < 10; i++
+		if (i & 1) == 1
+			mask &= s32_eq(in[i], 0x1ffffff)
+		else
+			mask &= s32_eq(in[i], 0x3ffffff)
+		;;
+	;;
+	in[0] -= (mask & 0x3ffffed)
+
+	for var i = 1; i < 10; i++
+		if (i & 1) == 1
+			in[i] -= (mask & 0x1ffffff)
+		else
+			in[i] -= (mask & 0x3ffffff)
+		;;
+	;;
+
 	/* Both passes through the above loop, plus the last 0-to-1 step, are
 	   necessary: if in[9] is -1 and in[0] through in[8] are 0,
 	   negative values will remain in the array until the end.
@@ -482,7 +555,6 @@
 	std.clear(&zzprime);
 	std.clear(&zzzprime);
 	std.clear(&xxxprime);
-	std.clear(&origx);
 
 	std.slcp(origx[:10], x[:10])
 	fsum(x, z)
@@ -518,7 +590,6 @@
 	fscalarproduct(zzz[:], zz[:], 121665)
 	/* No need to call freduce_degree here:
 	   fscalar_product doesn't increase the degree of its input. */
-	freducedegree(zzz[:])
 	freducecoeff(zzz[:])
 	fsum(zzz[:], xx[:])
 	fproduct(z2, zz[:], zzz[:])
@@ -530,7 +601,7 @@
 	var s, x
 
 	s = (-swap : int32)
-	for var i = 0; i < 10; i++
+	for var i = 0; i < 10; ++i
 		x = s & ((a[i] : int32) ^ (b[i] : int32))
 		a[i] = ((a[i] : int32) ^ x : int64)
 		b[i] = ((b[i] : int32) ^ x : int64)
@@ -576,8 +647,8 @@
 			    nqpqx, nqpqz,
 			    q)
 
-		        cswap(nqx2, nqpqx2, bit);
-			cswap(nqz2, nqpqz2, bit);
+		    cswap(nqx2, nqpqx2, bit)
+			cswap(nqz2, nqpqz2, bit)
 
 			t = nqx
 			nqx = nqx2
@@ -643,7 +714,7 @@
 	/* 2^21 - 2^1 */ fsquare(t0[:], z2_20_0[:])
 	/* 2^22 - 2^2 */ fsquare(t1[:], t0[:])
 	/* 2^40 - 2^20 */
-	for var i = 2;i < 20;i += 2
+	for i = 2;i < 20;i += 2
 		fsquare(t0[:], t1[:])
 		fsquare(t1[:], t0[:])
 	;;
@@ -652,7 +723,7 @@
 	/* 2^41 - 2^1 */ fsquare(t1[:],t0[:])
 	/* 2^42 - 2^2 */ fsquare(t0[:],t1[:])
 	/* 2^50 - 2^10 */
-	for var i = 2;i < 10;i += 2
+	for i = 2;i < 10;i += 2
 		fsquare(t1[:],t0[:])
 		fsquare(t0[:],t1[:])
 	;;
@@ -711,7 +782,6 @@
 	cmult(x[:], z[:], secret[:], bp[:])
 	crecip(zmone[:], z[:])
 	fmul(z[:], x[:], zmone[:])
-	freducecoeff(z[:])
 	fcontract(pub[:], z[:])
 }