aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/crypto
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2017-06-22 22:50:50 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2017-06-25 16:30:10 +0200
commitb65de973614c0d4b529fb2461aace171c0aa5f89 (patch)
tree0f8b6af614d8ec0dd4bba254f48e04f6b9350695 /src/crypto
parentcurve25519: satisfy sparse and use short types (diff)
downloadwireguard-monolithic-historical-b65de973614c0d4b529fb2461aace171c0aa5f89.tar.xz
wireguard-monolithic-historical-b65de973614c0d4b529fb2461aace171c0aa5f89.zip
curve25519: keep certain sandy2x functions in C
We can let the compiler optimize how it sees fit.
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/curve25519-avx-x86_64.S227
-rw-r--r--src/crypto/curve25519.c89
2 files changed, 84 insertions, 232 deletions
diff --git a/src/crypto/curve25519-avx-x86_64.S b/src/crypto/curve25519-avx-x86_64.S
index 81d0c74..bd636b5 100644
--- a/src/crypto/curve25519-avx-x86_64.S
+++ b/src/crypto/curve25519-avx-x86_64.S
@@ -3257,230 +3257,3 @@ ENTRY(curve25519_sandy2x_ladder_base)
add %r11,%rsp
ret
ENDPROC(curve25519_sandy2x_ladder_base)
-
-ENTRY(curve25519_sandy2x_fe_frombytes)
- pushq %r14
- pushq %r13
- pushq %r12
- pushq %rbp
- pushq %rbx
- movzbl 5(%rsi), %r9d
- movzbl 6(%rsi), %eax
- movzbl 4(%rsi), %edx
- movzbl 11(%rsi), %r10d
- movzbl 7(%rsi), %ecx
- movzbl 20(%rsi), %r14d
- movzbl 23(%rsi), %r11d
- salq $8, %r9
- movzbl 27(%rsi), %r12d
- movl (%rsi), %r8d
- salq $16, %rax
- movzbl 29(%rsi), %r13d
- movl 16(%rsi), %ebx
- orq %rax, %r9
- movzbl 9(%rsi), %eax
- salq $8, %r10
- orq %rdx, %r9
- movzbl 8(%rsi), %edx
- salq $8, %r12
- salq $6, %r9
- salq $16, %rax
- salq $8, %rdx
- orq %rax, %rdx
- movzbl 12(%rsi), %eax
- orq %rcx, %rdx
- movzbl 10(%rsi), %ecx
- salq $5, %rdx
- salq $16, %rax
- orq %rax, %r10
- movzbl 14(%rsi), %eax
- orq %rcx, %r10
- movzbl 13(%rsi), %ecx
- salq $3, %r10
- salq $8, %rax
- movq %rax, %rbp
- movzbl 15(%rsi), %eax
- salq $16, %rax
- orq %rax, %rbp
- movzbl 22(%rsi), %eax
- orq %rcx, %rbp
- movzbl 21(%rsi), %ecx
- salq $16, %rax
- salq $8, %rcx
- orq %rax, %rcx
- movzbl 24(%rsi), %eax
- orq %r14, %rcx
- salq $7, %rcx
- movq %rcx, %r14
- movzbl 25(%rsi), %ecx
- salq $8, %rax
- salq $16, %rcx
- orq %rcx, %rax
- movzbl 28(%rsi), %ecx
- orq %r11, %rax
- movzbl 26(%rsi), %r11d
- salq $5, %rax
- salq $16, %rcx
- orq %rcx, %r12
- movzbl 30(%rsi), %ecx
- orq %r11, %r12
- movzbl 31(%rsi), %r11d
- movq %rax, %rsi
- salq $4, %r12
- shrq $25, %rsi
- andl $33554431, %eax
- addq %r12, %rsi
- movl %r8d, %r12d
- shrq $26, %r8
- salq $8, %rcx
- andl $67108863, %r12d
- salq $16, %r11
- orq %rcx, %r11
- movq %r9, %rcx
- andl $33554431, %r9d
- shrq $25, %rcx
- orq %r13, %r11
- movq %r10, %r13
- addq %rcx, %rdx
- movl %ebx, %ecx
- andl $33554431, %r13d
- addq %r8, %r9
- shrq $25, %rcx
- movq %rdx, %r8
- shrq $26, %rdx
- addq %r14, %rcx
- shrq $25, %r10
- movl %r12d, %r14d
- addq %r13, %rdx
- andl $8388607, %r11d
- movq %r14, (%rdi)
- movq %r9, 8(%rdi)
- andl $33554431, %ebx
- movq %rdx, 24(%rdi)
- leaq (%r10,%rbp,4), %rdx
- andl $67108863, %r8d
- movq %rbx, 40(%rdi)
- movq %r8, 16(%rdi)
- movq %rdx, 32(%rdi)
- movq %rcx, %rdx
- shrq $26, %rcx
- addq %rax, %rcx
- movq %rsi, %rax
- shrq $26, %rsi
- andl $67108863, %eax
- andl $67108863, %edx
- movq %rcx, 56(%rdi)
- movq %rax, 64(%rdi)
- leaq (%rsi,%r11,4), %rax
- movq %rdx, 48(%rdi)
- popq %rbx
- movq %rax, 72(%rdi)
- popq %rbp
- popq %r12
- popq %r13
- popq %r14
- ret
-ENDPROC(curve25519_sandy2x_fe_frombytes)
-
-ENTRY(curve25519_sandy2x_fe51_invert)
- pushq %rbp
- pushq %rbx
- movq %rdi, %rbp
- movl $1, %edx
- movq %rsi, %rbx
- subq $440, %rsp
- movq %rsp, %rdi
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rdi
- movq %rsp, %rsi
- movl $1, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rsi
- movl $1, %edx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rsi
- leaq 48(%rsp), %rdi
- movq %rbx, %rdx
- call curve25519_sandy2x_fe51_mul
- leaq 48(%rsp), %rsi
- leaq 96(%rsp), %rdi
- movq %rsp, %rdx
- call curve25519_sandy2x_fe51_mul
- leaq 96(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $1, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 48(%rsp), %rdx
- leaq 384(%rsp), %rsi
- leaq 144(%rsp), %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 144(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $5, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 144(%rsp), %rdx
- leaq 384(%rsp), %rsi
- leaq 192(%rsp), %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 192(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $10, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 192(%rsp), %rdx
- leaq 384(%rsp), %rsi
- leaq 240(%rsp), %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 240(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $20, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rsi
- leaq 240(%rsp), %rdx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 384(%rsp), %rsi
- movl $10, %edx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_nsquare
- leaq 192(%rsp), %rdx
- leaq 384(%rsp), %rsi
- leaq 288(%rsp), %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 288(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $50, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 288(%rsp), %rdx
- leaq 384(%rsp), %rsi
- leaq 336(%rsp), %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 336(%rsp), %rsi
- leaq 384(%rsp), %rdi
- movl $100, %edx
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rsi
- leaq 336(%rsp), %rdx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 384(%rsp), %rsi
- movl $50, %edx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_nsquare
- leaq 384(%rsp), %rsi
- leaq 288(%rsp), %rdx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 384(%rsp), %rsi
- movl $5, %edx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_nsquare
- leaq 96(%rsp), %rdx
- leaq 384(%rsp), %rsi
- movq %rbp, %rdi
- call curve25519_sandy2x_fe51_mul
- addq $440, %rsp
- popq %rbx
- popq %rbp
- ret
-ENDPROC(curve25519_sandy2x_fe51_invert)
diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c
index 780df70..1d79ab6 100644
--- a/src/crypto/curve25519.c
+++ b/src/crypto/curve25519.c
@@ -39,10 +39,89 @@ typedef u64 fe[10];
typedef u64 fe51[5];
asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *);
asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *);
-asmlinkage void curve25519_sandy2x_fe_frombytes(fe, const u8 *);
asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *);
asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *);
-asmlinkage void curve25519_sandy2x_fe51_invert(fe51 *, const fe51 *);
+asmlinkage void curve25519_sandy2x_fe51_nsquare(fe51 *, const fe51 *, int);
+
+static inline u32 le24_to_cpupv(const u8 *in)
+{
+ return le16_to_cpup((__le16 *)in) | ((u32)in[2]) << 16;
+}
+
+static inline void fe_frombytes(fe h, const u8 *s)
+{
+ u64 h0 = le32_to_cpup((__le32 *)s);
+ u64 h1 = le24_to_cpupv(s + 4) << 6;
+ u64 h2 = le24_to_cpupv(s + 7) << 5;
+ u64 h3 = le24_to_cpupv(s + 10) << 3;
+ u64 h4 = le24_to_cpupv(s + 13) << 2;
+ u64 h5 = le32_to_cpup((__le32 *)(s + 16));
+ u64 h6 = le24_to_cpupv(s + 20) << 7;
+ u64 h7 = le24_to_cpupv(s + 23) << 5;
+ u64 h8 = le24_to_cpupv(s + 26) << 4;
+ u64 h9 = (le24_to_cpupv(s + 29) & 8388607) << 2;
+ u64 carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
+
+ carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
+ carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
+ carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
+ carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
+ carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
+
+ carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
+ carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
+ carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
+ carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
+ carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+static inline void fe51_invert(fe51 *r, const fe51 *x)
+{
+ fe51 z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t;
+
+ /* 2 */ curve25519_sandy2x_fe51_nsquare(&z2, x, 1);
+ /* 4 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2, 1);
+ /* 8 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 1);
+ /* 9 */ curve25519_sandy2x_fe51_mul(&z9, (const fe51 *)&t, x);
+ /* 11 */ curve25519_sandy2x_fe51_mul(&z11, (const fe51 *)&z9, (const fe51 *)&z2);
+ /* 22 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z11, 1);
+ /* 2^5 - 2^0 = 31 */ curve25519_sandy2x_fe51_mul(&z2_5_0, (const fe51 *)&t, (const fe51 *)&z9);
+
+ /* 2^10 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_5_0, 5);
+ /* 2^10 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_10_0, (const fe51 *)&t, (const fe51 *)&z2_5_0);
+
+ /* 2^20 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_10_0, 10);
+ /* 2^20 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_20_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
+
+ /* 2^40 - 2^20 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_20_0, 20);
+ /* 2^40 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_20_0);
+
+ /* 2^50 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 10);
+ /* 2^50 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_50_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
+
+ /* 2^100 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_50_0, 50);
+ /* 2^100 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_100_0, (const fe51 *)&t, (const fe51 *)&z2_50_0);
+
+ /* 2^200 - 2^100 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_100_0, 100);
+ /* 2^200 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_100_0);
+
+ /* 2^250 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 50);
+ /* 2^250 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_50_0);
+
+ /* 2^255 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 5);
+ /* 2^255 - 21 */ curve25519_sandy2x_fe51_mul(r, (const fe51 *)t, (const fe51 *)&z11);
+}
static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
{
@@ -54,7 +133,7 @@ static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secr
#define x1 var[0]
#define x2 var[1]
#define z2 var[2]
- curve25519_sandy2x_fe_frombytes(x1, basepoint);
+ fe_frombytes(x1, basepoint);
curve25519_sandy2x_ladder(var, e);
z_51[0] = (z2[1] << 26) + z2[0];
z_51[1] = (z2[3] << 26) + z2[2];
@@ -69,7 +148,7 @@ static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secr
#undef x1
#undef x2
#undef z2
- curve25519_sandy2x_fe51_invert(&z_51, (const fe51 *)&z_51);
+ fe51_invert(&z_51, (const fe51 *)&z_51);
curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
curve25519_sandy2x_fe51_pack(mypublic, (const fe51 *)&x_51);
@@ -101,7 +180,7 @@ static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secr
x_51[4] = (x2[9] << 26) + x2[8];
#undef x2
#undef z2
- curve25519_sandy2x_fe51_invert(&z_51, (const fe51 *)&z_51);
+ fe51_invert(&z_51, (const fe51 *)&z_51);
curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
curve25519_sandy2x_fe51_pack(pub, (const fe51 *)&x_51);