aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2017-03-20 13:04:46 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2017-03-20 14:02:31 +0100
commit16703bf180c03e50ee752fe2b6bceb3f75d5de2c (patch)
tree6b369762ce21ca18dc731083841e2bf10e2a32db
parentversion: bump snapshot (diff)
downloadwireguard-monolithic-historical-16703bf180c03e50ee752fe2b6bceb3f75d5de2c.tar.xz
wireguard-monolithic-historical-16703bf180c03e50ee752fe2b6bceb3f75d5de2c.zip
curve25519: do dispatcher in C instead of asm, since shlx is haswell only
-rw-r--r--src/crypto/curve25519-avx-x86_64.S153
-rw-r--r--src/crypto/curve25519.c82
2 files changed, 73 insertions, 162 deletions
diff --git a/src/crypto/curve25519-avx-x86_64.S b/src/crypto/curve25519-avx-x86_64.S
index bc2796d..bd660ef 100644
--- a/src/crypto/curve25519-avx-x86_64.S
+++ b/src/crypto/curve25519-avx-x86_64.S
@@ -3484,156 +3484,3 @@ ENTRY(curve25519_sandy2x_fe51_invert)
popq %rbp
ret
ENDPROC(curve25519_sandy2x_fe51_invert)
-
-ENTRY(curve25519_sandy2x)
- pushq %rbx
- movq %rdi, %rbx
- subq $368, %rsp
- movq (%rsi), %rax
- leaq 128(%rsp), %rdi
- movq %rax, (%rsp)
- movq 8(%rsi), %rax
- andb $-8, (%rsp)
- movq %rax, 8(%rsp)
- movq 16(%rsi), %rax
- movq %rax, 16(%rsp)
- movq 24(%rsi), %rax
- movq %rdx, %rsi
- movq %rax, 24(%rsp)
- shrq $56, %rax
- andl $127, %eax
- orl $64, %eax
- movb %al, 31(%rsp)
- call curve25519_sandy2x_fe_frombytes
- leaq 128(%rsp), %rdi
- movq %rsp, %rsi
- call curve25519_sandy2x_ladder
- movl $26, %eax
- shlx %rax, 296(%rsp), %rax
- leaq 80(%rsp), %rsi
- addq 288(%rsp), %rax
- movq %rsi, %rdi
- movq %rax, 80(%rsp)
- movl $26, %eax
- shlx %rax, 312(%rsp), %rax
- addq 304(%rsp), %rax
- movq %rax, 88(%rsp)
- movl $26, %eax
- shlx %rax, 328(%rsp), %rax
- addq 320(%rsp), %rax
- movq %rax, 96(%rsp)
- movl $26, %eax
- shlx %rax, 344(%rsp), %rax
- addq 336(%rsp), %rax
- movq %rax, 104(%rsp)
- movl $26, %eax
- shlx %rax, 360(%rsp), %rax
- addq 352(%rsp), %rax
- movq %rax, 112(%rsp)
- movl $26, %eax
- shlx %rax, 216(%rsp), %rax
- addq 208(%rsp), %rax
- movq %rax, 32(%rsp)
- movl $26, %eax
- shlx %rax, 232(%rsp), %rax
- addq 224(%rsp), %rax
- movq %rax, 40(%rsp)
- movl $26, %eax
- shlx %rax, 248(%rsp), %rax
- addq 240(%rsp), %rax
- movq %rax, 48(%rsp)
- movl $26, %eax
- shlx %rax, 264(%rsp), %rax
- addq 256(%rsp), %rax
- movq %rax, 56(%rsp)
- movl $26, %eax
- shlx %rax, 280(%rsp), %rax
- addq 272(%rsp), %rax
- movq %rax, 64(%rsp)
- call curve25519_sandy2x_fe51_invert
- leaq 32(%rsp), %rsi
- leaq 80(%rsp), %rdx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 32(%rsp), %rsi
- movq %rbx, %rdi
- call curve25519_sandy2x_fe51_pack
- addq $368, %rsp
- popq %rbx
- ret
-ENDPROC(curve25519_sandy2x)
-
-ENTRY(curve25519_sandy2x_base)
- pushq %rbx
- movq %rdi, %rbx
- subq $368, %rsp
- movq (%rsi), %rax
- leaq 128(%rsp), %rdi
- movq %rax, (%rsp)
- movq 8(%rsi), %rax
- andb $-8, (%rsp)
- movq %rax, 8(%rsp)
- movq 16(%rsi), %rax
- movq %rax, 16(%rsp)
- movq 24(%rsi), %rax
- movq %rsp, %rsi
- movq %rax, 24(%rsp)
- shrq $56, %rax
- andl $127, %eax
- orl $64, %eax
- movb %al, 31(%rsp)
- call curve25519_sandy2x_ladder_base
- movl $26, %eax
- shlx %rax, 216(%rsp), %rax
- leaq 80(%rsp), %rsi
- addq 208(%rsp), %rax
- movq %rsi, %rdi
- movq %rax, 80(%rsp)
- movl $26, %eax
- shlx %rax, 232(%rsp), %rax
- addq 224(%rsp), %rax
- movq %rax, 88(%rsp)
- movl $26, %eax
- shlx %rax, 248(%rsp), %rax
- addq 240(%rsp), %rax
- movq %rax, 96(%rsp)
- movl $26, %eax
- shlx %rax, 264(%rsp), %rax
- addq 256(%rsp), %rax
- movq %rax, 104(%rsp)
- movl $26, %eax
- shlx %rax, 280(%rsp), %rax
- addq 272(%rsp), %rax
- movq %rax, 112(%rsp)
- movl $26, %eax
- shlx %rax, 136(%rsp), %rax
- addq 128(%rsp), %rax
- movq %rax, 32(%rsp)
- movl $26, %eax
- shlx %rax, 152(%rsp), %rax
- addq 144(%rsp), %rax
- movq %rax, 40(%rsp)
- movl $26, %eax
- shlx %rax, 168(%rsp), %rax
- addq 160(%rsp), %rax
- movq %rax, 48(%rsp)
- movl $26, %eax
- shlx %rax, 184(%rsp), %rax
- addq 176(%rsp), %rax
- movq %rax, 56(%rsp)
- movl $26, %eax
- shlx %rax, 200(%rsp), %rax
- addq 192(%rsp), %rax
- movq %rax, 64(%rsp)
- call curve25519_sandy2x_fe51_invert
- leaq 32(%rsp), %rsi
- leaq 80(%rsp), %rdx
- movq %rsi, %rdi
- call curve25519_sandy2x_fe51_mul
- leaq 32(%rsp), %rsi
- movq %rbx, %rdi
- call curve25519_sandy2x_fe51_pack
- addq $368, %rsp
- popq %rbx
- ret
-ENDPROC(curve25519_sandy2x_base)
diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c
index 539c31a..d9cdd8d 100644
--- a/src/crypto/curve25519.c
+++ b/src/crypto/curve25519.c
@@ -10,6 +10,13 @@
#include <linux/random.h>
#include <crypto/algapi.h>
+static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE])
+{
+ secret[0] &= 248;
+ secret[31] &= 127;
+ secret[31] |= 64;
+}
+
#ifdef CONFIG_X86_64
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -20,19 +27,76 @@ void curve25519_fpu_init(void)
{
curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX);
}
-asmlinkage void curve25519_sandy2x(u8 *q, const u8 *n, const u8 *p);
-asmlinkage void curve25519_sandy2x_base(u8 *q, const u8 *n);
-#else
-void curve25519_fpu_init(void) { }
-#endif
-static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE])
+typedef u64 fe[10];
+typedef u64 fe51[5];
+asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *);
+asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *);
+asmlinkage void curve25519_sandy2x_fe_frombytes(fe, const u8 *);
+asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *);
+asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *);
+asmlinkage void curve25519_sandy2x_fe51_invert(fe51 *, const fe51 *);
+
+static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
{
- secret[0] &= 248;
- secret[31] &= 127;
- secret[31] |= 64;
+ u8 e[32];
+ fe var[3];
+ fe51 x_51, z_51;
+ memcpy(e, secret, 32);
+ normalize_secret(e);
+#define x1 var[0]
+#define x2 var[1]
+#define z2 var[2]
+ curve25519_sandy2x_fe_frombytes(x1, basepoint);
+ curve25519_sandy2x_ladder(var, e);
+ z_51[0] = (z2[1] << 26) + z2[0];
+ z_51[1] = (z2[3] << 26) + z2[2];
+ z_51[2] = (z2[5] << 26) + z2[4];
+ z_51[3] = (z2[7] << 26) + z2[6];
+ z_51[4] = (z2[9] << 26) + z2[8];
+ x_51[0] = (x2[1] << 26) + x2[0];
+ x_51[1] = (x2[3] << 26) + x2[2];
+ x_51[2] = (x2[5] << 26) + x2[4];
+ x_51[3] = (x2[7] << 26) + x2[6];
+ x_51[4] = (x2[9] << 26) + x2[8];
+#undef x1
+#undef x2
+#undef z2
+ curve25519_sandy2x_fe51_invert(&z_51, &z_51);
+ curve25519_sandy2x_fe51_mul(&x_51, &x_51, &z_51);
+ curve25519_sandy2x_fe51_pack(mypublic, &x_51);
}
+static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
+{
+ u8 e[32];
+ fe var[3];
+ fe51 x_51, z_51;
+ memcpy(e, secret, 32);
+ normalize_secret(e);
+ curve25519_sandy2x_ladder_base(var, e);
+#define x2 var[0]
+#define z2 var[1]
+ z_51[0] = (z2[1] << 26) + z2[0];
+ z_51[1] = (z2[3] << 26) + z2[2];
+ z_51[2] = (z2[5] << 26) + z2[4];
+ z_51[3] = (z2[7] << 26) + z2[6];
+ z_51[4] = (z2[9] << 26) + z2[8];
+ x_51[0] = (x2[1] << 26) + x2[0];
+ x_51[1] = (x2[3] << 26) + x2[2];
+ x_51[2] = (x2[5] << 26) + x2[4];
+ x_51[3] = (x2[7] << 26) + x2[6];
+ x_51[4] = (x2[9] << 26) + x2[8];
+#undef x2
+#undef z2
+ curve25519_sandy2x_fe51_invert(&z_51, &z_51);
+ curve25519_sandy2x_fe51_mul(&x_51, &x_51, &z_51);
+ curve25519_sandy2x_fe51_pack(pub, &x_51);
+}
+#else
+void curve25519_fpu_init(void) { }
+#endif
+
#ifdef __SIZEOF_INT128__
typedef u64 limb;
typedef limb felem[5];