diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-01-18 16:46:32 +0000 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-01-18 16:46:32 +0000 |
commit | 3e9e993a2054bdf925ce6f831b75a546c6fe9f77 (patch) | |
tree | 4ab91a947f9a9e0fc4f5c237cbf0ec5e2682fd8b | |
parent | Import other curves for comparison (diff) | |
download | kbench9000-3e9e993a2054bdf925ce6f831b75a546c6fe9f77.tar.xz kbench9000-3e9e993a2054bdf925ce6f831b75a546c6fe9f77.zip |
Work on ARM
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | curve25519-amd64-asm.S | 1888 | ||||
-rw-r--r-- | curve25519-amd64.c | 234 | ||||
-rw-r--r-- | curve25519-sandy2x-asm.S | 3261 | ||||
-rw-r--r-- | curve25519-sandy2x.c | 139 | ||||
-rw-r--r-- | main.c | 18 | ||||
-rwxr-xr-x | run.sh | 1 |
7 files changed, 3 insertions, 5540 deletions
@@ -1,5 +1,5 @@ ifneq ($(KERNELRELEASE),) -kbench9000-y := main.o curve25519-donna64.o curve25519-hacl64.o curve25519-sandy2x.o curve25519-sandy2x-asm.o curve25519-amd64.o curve25519-amd64-asm.o curve25519-donna32.o curve25519-fiat32.o +kbench9000-y := main.o curve25519-donna64.o curve25519-hacl64.o curve25519-donna32.o curve25519-fiat32.o obj-m := kbench9000.o ccflags-y += -O3 ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' diff --git a/curve25519-amd64-asm.S b/curve25519-amd64-asm.S deleted file mode 100644 index 27a5b6a..0000000 --- a/curve25519-amd64-asm.S +++ /dev/null @@ -1,1888 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (C) 2015 Google Inc. All Rights Reserved. - * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Original author: Peter Schwabe <peter@cryptojedi.org> - */ - -/************************************************ - * W A R N I N G - * W A R N I N G - * W A R N I N G - * W A R N I N G - * W A R N I N G - * - * Do not import this file into the kernel as-is, - * because it makes use of the x86_64 redzone, - * which will entirely melt the kernel. We're sort - * of getting away with it here, since interrupts - * are disabled, but DANGER this will kill kittens. - * - * W A R N I N G - * W A R N I N G - * W A R N I N G - * W A R N I N G - * W A R N I N G - ************************************************/ - -.data -.p2align 4 - -x25519_x86_64_REDMASK51: .quad 0x0007FFFFFFFFFFFF -x25519_x86_64_121666_213: .quad 996687872 -x25519_x86_64_2P0: .quad 0xFFFFFFFFFFFDA -x25519_x86_64_2P1234: .quad 0xFFFFFFFFFFFFE -x25519_x86_64_4P0: .quad 0x1FFFFFFFFFFFB4 -x25519_x86_64_4P1234: .quad 0x1FFFFFFFFFFFFC -x25519_x86_64_MU0: .quad 0xED9CE5A30A2C131B -x25519_x86_64_MU1: .quad 0x2106215D086329A7 -x25519_x86_64_MU2: .quad 0xFFFFFFFFFFFFFFEB -x25519_x86_64_MU3: .quad 0xFFFFFFFFFFFFFFFF -x25519_x86_64_MU4: .quad 0x000000000000000F -x25519_x86_64_ORDER0: .quad 0x5812631A5CF5D3ED -x25519_x86_64_ORDER1: .quad 0x14DEF9DEA2F79CD6 -x25519_x86_64_ORDER2: .quad 0x0000000000000000 -x25519_x86_64_ORDER3: .quad 0x1000000000000000 -x25519_x86_64_EC2D0: .quad 1859910466990425 -x25519_x86_64_EC2D1: .quad 932731440258426 -x25519_x86_64_EC2D2: .quad 1072319116312658 -x25519_x86_64_EC2D3: .quad 1815898335770999 -x25519_x86_64_EC2D4: .quad 633789495995903 -x25519_x86_64__38: .quad 38 - -.text -.p2align 5 - -.globl x25519_x86_64_freeze -.hidden x25519_x86_64_freeze -x25519_x86_64_freeze: -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq 0(%rdi),%rsi -movq 8(%rdi),%rdx -movq 16(%rdi),%rcx -movq 24(%rdi),%r8 -movq 32(%rdi),%r9 -movq x25519_x86_64_REDMASK51(%rip),%rax -mov %rax,%r10 -sub $18,%r10 -mov $3,%r11 -._reduceloop: -mov %rsi,%r12 -shr $51,%r12 -and %rax,%rsi -add %r12,%rdx -mov %rdx,%r12 -shr $51,%r12 -and %rax,%rdx -add %r12,%rcx -mov %rcx,%r12 -shr $51,%r12 -and %rax,%rcx -add %r12,%r8 -mov %r8,%r12 -shr $51,%r12 -and %rax,%r8 -add %r12,%r9 -mov %r9,%r12 -shr $51,%r12 -and %rax,%r9 -imulq $19,%r12,%r12 -add %r12,%rsi -sub $1,%r11 -ja ._reduceloop -mov $1,%r12 -cmp %r10,%rsi -cmovl %r11,%r12 -cmp %rax,%rdx -cmovne %r11,%r12 -cmp %rax,%rcx -cmovne %r11,%r12 -cmp %rax,%r8 -cmovne %r11,%r12 -cmp %rax,%r9 -cmovne %r11,%r12 -neg %r12 -and %r12,%rax -and %r12,%r10 -sub %r10,%rsi -sub %rax,%rdx -sub %rax,%rcx -sub %rax,%r8 -sub %rax,%r9 -movq %rsi,0(%rdi) -movq %rdx,8(%rdi) -movq %rcx,16(%rdi) -movq %r8,24(%rdi) -movq %r9,32(%rdi) -movq -8(%rsp),%r12 -ret -.cfi_endproc - -.p2align 5 -.globl x25519_x86_64_mul -.hidden x25519_x86_64_mul -x25519_x86_64_mul: -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq %rbp,-48(%rsp) -.cfi_rel_offset rbp, -48 -mov %rdx,%rcx -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-64(%rsp) -mulq 16(%rcx) -mov %rax,%r8 -mov %rdx,%r9 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-72(%rsp) -mulq 8(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 0(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 8(%rcx) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsi),%rax -mulq 16(%rcx) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsi),%rax -mulq 24(%rcx) -mov %rax,%r14 -mov %rdx,%r15 -movq 0(%rsi),%rax -mulq 32(%rcx) -mov %rax,%rbx -mov %rdx,%rbp -movq 8(%rsi),%rax -mulq 0(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsi),%rax -mulq 8(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsi),%rax -mulq 16(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsi),%rax -mulq 24(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 8(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rax -mulq 0(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 16(%rsi),%rax -mulq 8(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsi),%rax -mulq 16(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 24(%rsi),%rax -mulq 0(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 24(%rsi),%rax -mulq 8(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -64(%rsp),%rax -mulq 24(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -64(%rsp),%rax -mulq 32(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 32(%rsi),%rax -mulq 0(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -72(%rsp),%rax -mulq 16(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -72(%rsp),%rax -mulq 24(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq -72(%rsp),%rax -mulq 32(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%r8,%r9 -and %rsi,%r8 -shld $13,%r10,%r11 -and %rsi,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rsi,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rsi,%r14 -add %r13,%r14 -shld $13,%rbx,%rbp -and %rsi,%rbx -add %r15,%rbx -imulq $19,%rbp,%rdx -add %rdx,%r8 -mov %r8,%rdx -shr $51,%rdx -add %r10,%rdx -mov %rdx,%rcx -shr $51,%rdx -and %rsi,%r8 -add %r12,%rdx -mov %rdx,%r9 -shr $51,%rdx -and %rsi,%rcx -add %r14,%rdx -mov %rdx,%rax -shr $51,%rdx -and %rsi,%r9 -add %rbx,%rdx -mov %rdx,%r10 -shr $51,%rdx -and %rsi,%rax -imulq $19,%rdx,%rdx -add %rdx,%r8 -and %rsi,%r10 -movq %r8,0(%rdi) -movq %rcx,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -movq -48(%rsp),%rbp -ret -.cfi_endproc - -.p2align 5 -.globl x25519_x86_64_square -.hidden x25519_x86_64_square -x25519_x86_64_square: -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq 0(%rsi),%rax -mulq 0(%rsi) -mov %rax,%rcx -mov %rdx,%r8 -movq 0(%rsi),%rax -shl $1,%rax -mulq 8(%rsi) -mov %rax,%r9 -mov %rdx,%r10 -movq 0(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -mov %rax,%r11 -mov %rdx,%r12 -movq 0(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -mov %rax,%r13 -mov %rdx,%r14 -movq 0(%rsi),%rax -shl $1,%rax -mulq 32(%rsi) -mov %rax,%r15 -mov %rdx,%rbx -movq 8(%rsi),%rax -mulq 8(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 8(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq 8(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 8(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rax -mulq 16(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%rcx,%r8 -and %rsi,%rcx -shld $13,%r9,%r10 -and %rsi,%r9 -add %r8,%r9 -shld $13,%r11,%r12 -and %rsi,%r11 -add %r10,%r11 -shld $13,%r13,%r14 -and %rsi,%r13 -add %r12,%r13 -shld $13,%r15,%rbx -and %rsi,%r15 -add %r14,%r15 -imulq $19,%rbx,%rdx -add %rdx,%rcx -mov %rcx,%rdx -shr $51,%rdx -add %r9,%rdx -and %rsi,%rcx -mov %rdx,%r8 -shr $51,%rdx -add %r11,%rdx -and %rsi,%r8 -mov %rdx,%r9 -shr $51,%rdx -add %r13,%rdx -and %rsi,%r9 -mov %rdx,%rax -shr $51,%rdx -add %r15,%rdx -and %rsi,%rax -mov %rdx,%r10 -shr $51,%rdx -imulq $19,%rdx,%rdx -add %rdx,%rcx -and %rsi,%r10 -movq %rcx,0(%rdi) -movq %r8,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -ret -.cfi_endproc - -.p2align 5 -.globl x25519_x86_64_ladderstep -.hidden x25519_x86_64_ladderstep -x25519_x86_64_ladderstep: -.cfi_startproc -sub $344,%rsp -.cfi_adjust_cfa_offset 344 -movq %r12,296(%rsp) -.cfi_rel_offset r12, 296 -movq %r13,304(%rsp) -.cfi_rel_offset r13, 304 -movq %r14,312(%rsp) -.cfi_rel_offset r14, 312 -movq %r15,320(%rsp) -.cfi_rel_offset r15, 320 -movq %rbx,328(%rsp) -.cfi_rel_offset rbx, 328 -movq %rbp,336(%rsp) -.cfi_rel_offset rbp, 336 -movq 40(%rdi),%rsi -movq 48(%rdi),%rdx -movq 56(%rdi),%rcx -movq 64(%rdi),%r8 -movq 72(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 80(%rdi),%rsi -addq 88(%rdi),%rdx -addq 96(%rdi),%rcx -addq 104(%rdi),%r8 -addq 112(%rdi),%r9 -subq 80(%rdi),%rax -subq 88(%rdi),%r10 -subq 96(%rdi),%r11 -subq 104(%rdi),%r12 -subq 112(%rdi),%r13 -movq %rsi,0(%rsp) -movq %rdx,8(%rsp) -movq %rcx,16(%rsp) -movq %r8,24(%rsp) -movq %r9,32(%rsp) -movq %rax,40(%rsp) -movq %r10,48(%rsp) -movq %r11,56(%rsp) -movq %r12,64(%rsp) -movq %r13,72(%rsp) -movq 40(%rsp),%rax -mulq 40(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 40(%rsp),%rax -shl $1,%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 40(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 40(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 40(%rsp),%rax -shl $1,%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 48(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 48(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 48(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 48(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 72(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rsp) -movq %r8,88(%rsp) -movq %r9,96(%rsp) -movq %rax,104(%rsp) -movq %r10,112(%rsp) -movq 0(%rsp),%rax -mulq 0(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 0(%rsp),%rax -shl $1,%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 0(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsp),%rax -shl $1,%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 8(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 32(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rsp) -movq %r8,128(%rsp) -movq %r9,136(%rsp) -movq %rax,144(%rsp) -movq %r10,152(%rsp) -mov %rsi,%rsi -mov %r8,%rdx -mov %r9,%rcx -mov %rax,%r8 -mov %r10,%r9 -add x25519_x86_64_2P0(%rip),%rsi -add x25519_x86_64_2P1234(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r8 -add x25519_x86_64_2P1234(%rip),%r9 -subq 80(%rsp),%rsi -subq 88(%rsp),%rdx -subq 96(%rsp),%rcx -subq 104(%rsp),%r8 -subq 112(%rsp),%r9 -movq %rsi,160(%rsp) -movq %rdx,168(%rsp) -movq %rcx,176(%rsp) -movq %r8,184(%rsp) -movq %r9,192(%rsp) -movq 120(%rdi),%rsi -movq 128(%rdi),%rdx -movq 136(%rdi),%rcx -movq 144(%rdi),%r8 -movq 152(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 160(%rdi),%rsi -addq 168(%rdi),%rdx -addq 176(%rdi),%rcx -addq 184(%rdi),%r8 -addq 192(%rdi),%r9 -subq 160(%rdi),%rax -subq 168(%rdi),%r10 -subq 176(%rdi),%r11 -subq 184(%rdi),%r12 -subq 192(%rdi),%r13 -movq %rsi,200(%rsp) -movq %rdx,208(%rsp) -movq %rcx,216(%rsp) -movq %r8,224(%rsp) -movq %r9,232(%rsp) -movq %rax,240(%rsp) -movq %r10,248(%rsp) -movq %r11,256(%rsp) -movq %r12,264(%rsp) -movq %r13,272(%rsp) -movq 224(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,280(%rsp) -mulq 56(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 232(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,288(%rsp) -mulq 48(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 40(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 200(%rsp),%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 200(%rsp),%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 200(%rsp),%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 208(%rsp),%rax -mulq 40(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 208(%rsp),%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rax -mulq 40(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 216(%rsp),%rax -mulq 48(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 216(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 224(%rsp),%rax -mulq 40(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 224(%rsp),%rax -mulq 48(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 280(%rsp),%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 280(%rsp),%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 232(%rsp),%rax -mulq 40(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 288(%rsp),%rax -mulq 56(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 288(%rsp),%rax -mulq 64(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 288(%rsp),%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rsp) -movq %r8,48(%rsp) -movq %r9,56(%rsp) -movq %rax,64(%rsp) -movq %r10,72(%rsp) -movq 264(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,200(%rsp) -mulq 16(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 272(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,208(%rsp) -mulq 8(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 0(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 240(%rsp),%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 240(%rsp),%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 240(%rsp),%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 248(%rsp),%rax -mulq 0(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 248(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 248(%rsp),%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 248(%rsp),%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 248(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rax -mulq 0(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 256(%rsp),%rax -mulq 8(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 256(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 264(%rsp),%rax -mulq 0(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 264(%rsp),%rax -mulq 8(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 200(%rsp),%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 200(%rsp),%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 272(%rsp),%rax -mulq 0(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rax -mulq 16(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 24(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -mov %rsi,%rdx -mov %r8,%rcx -mov %r9,%r11 -mov %rax,%r12 -mov %r10,%r13 -add x25519_x86_64_2P0(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 40(%rsp),%rsi -addq 48(%rsp),%r8 -addq 56(%rsp),%r9 -addq 64(%rsp),%rax -addq 72(%rsp),%r10 -subq 40(%rsp),%rdx -subq 48(%rsp),%rcx -subq 56(%rsp),%r11 -subq 64(%rsp),%r12 -subq 72(%rsp),%r13 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq %rdx,160(%rdi) -movq %rcx,168(%rdi) -movq %r11,176(%rdi) -movq %r12,184(%rdi) -movq %r13,192(%rdi) -movq 120(%rdi),%rax -mulq 120(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 120(%rdi),%rax -shl $1,%rax -mulq 128(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rdi),%rax -shl $1,%rax -mulq 152(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rdi),%rax -mulq 128(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rax -mulq 136(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 144(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 144(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 152(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq 160(%rdi),%rax -mulq 160(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 160(%rdi),%rax -shl $1,%rax -mulq 168(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -shl $1,%rax -mulq 192(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 168(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 176(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 184(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 184(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 16(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 8(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 0(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 8(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -mulq 16(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -mulq 24(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -mulq 32(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 0(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 168(%rdi),%rax -mulq 8(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -mulq 16(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -mulq 24(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 0(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 176(%rdi),%rax -mulq 8(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 176(%rdi),%rax -mulq 16(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rax -mulq 0(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 184(%rdi),%rax -mulq 8(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 24(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 32(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rax -mulq 0(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 16(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 24(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 32(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 144(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 96(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 152(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 88(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 80(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 88(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rsp),%rax -mulq 96(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rsp),%rax -mulq 104(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rsp),%rax -mulq 112(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rsp),%rax -mulq 80(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 128(%rsp),%rax -mulq 88(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rsp),%rax -mulq 96(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rsp),%rax -mulq 104(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rax -mulq 80(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 136(%rsp),%rax -mulq 88(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 136(%rsp),%rax -mulq 96(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 104(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rsp),%rax -mulq 80(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 144(%rsp),%rax -mulq 88(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 104(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 112(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rsp),%rax -mulq 80(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 96(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 104(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 112(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rdi) -movq %r8,48(%rdi) -movq %r9,56(%rdi) -movq %rax,64(%rdi) -movq %r10,72(%rdi) -movq 160(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -mov %rax,%rsi -mov %rdx,%rcx -movq 168(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%rcx -mov %rdx,%r8 -movq 176(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r8 -mov %rdx,%r9 -movq 184(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r9 -mov %rdx,%r10 -movq 192(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r10 -imulq $19,%rdx,%rdx -add %rdx,%rsi -addq 80(%rsp),%rsi -addq 88(%rsp),%rcx -addq 96(%rsp),%r8 -addq 104(%rsp),%r9 -addq 112(%rsp),%r10 -movq %rsi,80(%rdi) -movq %rcx,88(%rdi) -movq %r8,96(%rdi) -movq %r9,104(%rdi) -movq %r10,112(%rdi) -movq 104(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 176(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 112(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 168(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 160(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 168(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 80(%rdi),%rax -mulq 176(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 80(%rdi),%rax -mulq 184(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 80(%rdi),%rax -mulq 192(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 88(%rdi),%rax -mulq 160(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 88(%rdi),%rax -mulq 168(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 88(%rdi),%rax -mulq 176(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 88(%rdi),%rax -mulq 184(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 88(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rax -mulq 160(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 96(%rdi),%rax -mulq 168(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 96(%rdi),%rax -mulq 176(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 104(%rdi),%rax -mulq 160(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 104(%rdi),%rax -mulq 168(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 184(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 192(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 112(%rdi),%rax -mulq 160(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 176(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 184(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 192(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rdi) -movq %r8,88(%rdi) -movq %r9,96(%rdi) -movq %rax,104(%rdi) -movq %r10,112(%rdi) -movq 296(%rsp),%r12 -movq 304(%rsp),%r13 -movq 312(%rsp),%r14 -movq 320(%rsp),%r15 -movq 328(%rsp),%rbx -movq 336(%rsp),%rbp -add $344,%rsp -.cfi_adjust_cfa_offset -344 -ret -.cfi_endproc - -.p2align 5 -.globl x25519_x86_64_work_cswap -.hidden x25519_x86_64_work_cswap -x25519_x86_64_work_cswap: -.cfi_startproc -subq $1,%rsi -notq %rsi -movq %rsi,%xmm15 -pshufd $0x44,%xmm15,%xmm15 -movdqu 0(%rdi),%xmm0 -movdqu 16(%rdi),%xmm2 -movdqu 32(%rdi),%xmm4 -movdqu 48(%rdi),%xmm6 -movdqu 64(%rdi),%xmm8 -movdqu 80(%rdi),%xmm1 -movdqu 96(%rdi),%xmm3 -movdqu 112(%rdi),%xmm5 -movdqu 128(%rdi),%xmm7 -movdqu 144(%rdi),%xmm9 -movdqa %xmm1,%xmm10 -movdqa %xmm3,%xmm11 -movdqa %xmm5,%xmm12 -movdqa %xmm7,%xmm13 -movdqa %xmm9,%xmm14 -pxor %xmm0,%xmm10 -pxor %xmm2,%xmm11 -pxor %xmm4,%xmm12 -pxor %xmm6,%xmm13 -pxor %xmm8,%xmm14 -pand %xmm15,%xmm10 -pand %xmm15,%xmm11 -pand %xmm15,%xmm12 -pand %xmm15,%xmm13 -pand %xmm15,%xmm14 -pxor %xmm10,%xmm0 -pxor %xmm10,%xmm1 -pxor %xmm11,%xmm2 -pxor %xmm11,%xmm3 -pxor %xmm12,%xmm4 -pxor %xmm12,%xmm5 -pxor %xmm13,%xmm6 -pxor %xmm13,%xmm7 -pxor %xmm14,%xmm8 -pxor %xmm14,%xmm9 -movdqu %xmm0,0(%rdi) -movdqu %xmm2,16(%rdi) -movdqu %xmm4,32(%rdi) -movdqu %xmm6,48(%rdi) -movdqu %xmm8,64(%rdi) -movdqu %xmm1,80(%rdi) -movdqu %xmm3,96(%rdi) -movdqu %xmm5,112(%rdi) -movdqu %xmm7,128(%rdi) -movdqu %xmm9,144(%rdi) -ret -.cfi_endproc diff --git a/curve25519-amd64.c b/curve25519-amd64.c deleted file mode 100644 index 095b0d2..0000000 --- a/curve25519-amd64.c +++ /dev/null @@ -1,234 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (C) 2015 Google Inc. All Rights Reserved. - * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Original author: Peter Schwabe <peter@cryptojedi.org> - */ - -#include <linux/kernel.h> -#include <linux/string.h> - -typedef struct { uint64_t v[5]; } fe25519; - -asmlinkage void x25519_x86_64_work_cswap(fe25519 *, uint64_t); -asmlinkage void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b); -asmlinkage void x25519_x86_64_square(fe25519 *out, const fe25519 *a); -asmlinkage void x25519_x86_64_freeze(fe25519 *); -asmlinkage void x25519_x86_64_ladderstep(fe25519 *work); - -enum { CURVE25519_POINT_SIZE = 32 }; - -static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE]) -{ - secret[0] &= 248; - secret[31] &= 127; - secret[31] |= 64; -} - -static void fe25519_setint(fe25519 *r, unsigned v) -{ - r->v[0] = v; - r->v[1] = 0; - r->v[2] = 0; - r->v[3] = 0; - r->v[4] = 0; -} - -// Assumes input x being reduced below 2^255 -static void fe25519_pack(unsigned char r[32], const fe25519 *x) -{ - fe25519 t; - t = *x; - x25519_x86_64_freeze(&t); - - r[0] = (uint8_t)(t.v[0] & 0xff); - r[1] = (uint8_t)((t.v[0] >> 8) & 0xff); - r[2] = (uint8_t)((t.v[0] >> 16) & 0xff); - r[3] = (uint8_t)((t.v[0] >> 24) & 0xff); - r[4] = (uint8_t)((t.v[0] >> 32) & 0xff); - r[5] = (uint8_t)((t.v[0] >> 40) & 0xff); - r[6] = (uint8_t)((t.v[0] >> 48)); - - r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8); - r[7] = (uint8_t)((t.v[1] >> 5) & 0xff); - r[8] = (uint8_t)((t.v[1] >> 13) & 0xff); - r[9] = (uint8_t)((t.v[1] >> 21) & 0xff); - r[10] = (uint8_t)((t.v[1] >> 29) & 0xff); - r[11] = (uint8_t)((t.v[1] >> 37) & 0xff); - r[12] = (uint8_t)((t.v[1] >> 45)); - - r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0); - r[13] = (uint8_t)((t.v[2] >> 2) & 0xff); - r[14] = (uint8_t)((t.v[2] >> 10) & 0xff); - r[15] = (uint8_t)((t.v[2] >> 18) & 0xff); - r[16] = (uint8_t)((t.v[2] >> 26) & 0xff); - r[17] = (uint8_t)((t.v[2] >> 34) & 0xff); - r[18] = (uint8_t)((t.v[2] >> 42) & 0xff); - r[19] = (uint8_t)((t.v[2] >> 50)); - - r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe); - r[20] = (uint8_t)((t.v[3] >> 7) & 0xff); - r[21] = (uint8_t)((t.v[3] >> 15) & 0xff); - r[22] = (uint8_t)((t.v[3] >> 23) & 0xff); - r[23] = (uint8_t)((t.v[3] >> 31) & 0xff); - r[24] = (uint8_t)((t.v[3] >> 39) & 0xff); - r[25] = (uint8_t)((t.v[3] >> 47)); - - r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0); - r[26] = (uint8_t)((t.v[4] >> 4) & 0xff); - r[27] = (uint8_t)((t.v[4] >> 12) & 0xff); - r[28] = (uint8_t)((t.v[4] >> 20) & 0xff); - r[29] = (uint8_t)((t.v[4] >> 28) & 0xff); - r[30] = (uint8_t)((t.v[4] >> 36) & 0xff); - r[31] = (uint8_t)((t.v[4] >> 44)); -} - -static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) -{ - r->v[0] = x[0]; - r->v[0] += (uint64_t)x[1] << 8; - r->v[0] += (uint64_t)x[2] << 16; - r->v[0] += (uint64_t)x[3] << 24; - r->v[0] += (uint64_t)x[4] << 32; - r->v[0] += (uint64_t)x[5] << 40; - r->v[0] += ((uint64_t)x[6] & 7) << 48; - - r->v[1] = x[6] >> 3; - r->v[1] += (uint64_t)x[7] << 5; - r->v[1] += (uint64_t)x[8] << 13; - r->v[1] += (uint64_t)x[9] << 21; - r->v[1] += (uint64_t)x[10] << 29; - r->v[1] += (uint64_t)x[11] << 37; - r->v[1] += ((uint64_t)x[12] & 63) << 45; - - r->v[2] = x[12] >> 6; - r->v[2] += (uint64_t)x[13] << 2; - r->v[2] += (uint64_t)x[14] << 10; - r->v[2] += (uint64_t)x[15] << 18; - r->v[2] += (uint64_t)x[16] << 26; - r->v[2] += (uint64_t)x[17] << 34; - r->v[2] += (uint64_t)x[18] << 42; - r->v[2] += ((uint64_t)x[19] & 1) << 50; - - r->v[3] = x[19] >> 1; - r->v[3] += (uint64_t)x[20] << 7; - r->v[3] += (uint64_t)x[21] << 15; - r->v[3] += (uint64_t)x[22] << 23; - r->v[3] += (uint64_t)x[23] << 31; - r->v[3] += (uint64_t)x[24] << 39; - r->v[3] += ((uint64_t)x[25] & 15) << 47; - - r->v[4] = x[25] >> 4; - r->v[4] += (uint64_t)x[26] << 4; - r->v[4] += (uint64_t)x[27] << 12; - r->v[4] += (uint64_t)x[28] << 20; - r->v[4] += (uint64_t)x[29] << 28; - r->v[4] += (uint64_t)x[30] << 36; - r->v[4] += ((uint64_t)x[31] & 127) << 44; -} - -static void fe25519_invert(fe25519 *r, const fe25519 *x) -{ - fe25519 z2; - fe25519 z9; - fe25519 z11; - fe25519 z2_5_0; - fe25519 z2_10_0; - fe25519 z2_20_0; - fe25519 z2_50_0; - fe25519 z2_100_0; - fe25519 t; - int i; - - /* 2 */ x25519_x86_64_square(&z2, x); - /* 4 */ x25519_x86_64_square(&t, &z2); - /* 8 */ x25519_x86_64_square(&t, &t); - /* 9 */ x25519_x86_64_mul(&z9, &t, x); - /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2); - /* 22 */ x25519_x86_64_square(&t, &z11); - /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9); - - /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0); - /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); } - /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0); - - /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0); - /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0); - - /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0); - /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); } - /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0); - - /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0); - - /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0); - /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0); - - /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0); - /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) { - x25519_x86_64_square(&t, &t); - } - /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0); - - /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0); - - /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t); - /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t); - - /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t); - - /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t); - /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11); -} - -static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) -{ - int i, j; - uint8_t prevbit = 0; - fe25519 work[5]; - - work[0] = *xr; - fe25519_setint(work + 1, 1); - fe25519_setint(work + 2, 0); - work[3] = *xr; - fe25519_setint(work + 4, 1); - - j = 6; - for (i = 31; i >= 0; i--) { - while (j >= 0) { - const uint8_t bit = 1 & (s[i] >> j); - const uint64_t swap = bit ^ prevbit; - prevbit = bit; - x25519_x86_64_work_cswap(work + 1, swap); - x25519_x86_64_ladderstep(work); - j -= 1; - } - j = 7; - } - - *xr = work[1]; - *zr = work[2]; -} -bool curve25519_amd64(u8 out[CURVE25519_POINT_SIZE], const u8 scalar[CURVE25519_POINT_SIZE], const u8 point[CURVE25519_POINT_SIZE]) -{ - fe25519 t; - fe25519 z; - uint8_t e[32]; - memcpy(e, scalar, sizeof(e)); - normalize_secret(e); - - fe25519_unpack(&t, point); - mladder(&t, &z, e); - fe25519_invert(&z, &z); - x25519_x86_64_mul(&t, &t, &z); - fe25519_pack(out, &t); - return true; -} diff --git a/curve25519-sandy2x-asm.S b/curve25519-sandy2x-asm.S deleted file mode 100644 index f2e466b..0000000 --- a/curve25519-sandy2x-asm.S +++ /dev/null @@ -1,3261 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Original author: Tung Chou <blueprint@crypto.tw> - */ - -#include <linux/linkage.h> - -.data -.align 16 -curve25519_sandy2x_v0_0: .quad 0, 0 -curve25519_sandy2x_v1_0: .quad 1, 0 -curve25519_sandy2x_v2_1: .quad 2, 1 -curve25519_sandy2x_v9_0: .quad 9, 0 -curve25519_sandy2x_v9_9: .quad 9, 9 -curve25519_sandy2x_v19_19: .quad 19, 19 -curve25519_sandy2x_v38_1: .quad 38, 1 -curve25519_sandy2x_v38_38: .quad 38, 38 -curve25519_sandy2x_v121666_121666: .quad 121666, 121666 -curve25519_sandy2x_m25: .quad 33554431, 33554431 -curve25519_sandy2x_m26: .quad 67108863, 67108863 -curve25519_sandy2x_subc0: .quad 0x07FFFFDA, 0x03FFFFFE -curve25519_sandy2x_subc2: .quad 0x07FFFFFE, 0x03FFFFFE -curve25519_sandy2x_REDMASK51: .quad 0x0007FFFFFFFFFFFF - -.text -.align 32 -#ifdef CONFIG_AS_AVX -ENTRY(curve25519_sandy2x_fe51_mul) - push %rbp - mov %rsp,%rbp - sub $96,%rsp - and $-32,%rsp - movq %r11,0(%rsp) - movq %r12,8(%rsp) - movq %r13,16(%rsp) - movq %r14,24(%rsp) - movq %r15,32(%rsp) - movq %rbx,40(%rsp) - movq %rbp,48(%rsp) - movq %rdi,56(%rsp) - mov %rdx,%rcx - movq 24(%rsi),%rdx - imulq $19,%rdx,%rax - movq %rax,64(%rsp) - mulq 16(%rcx) - mov %rax,%r8 - mov %rdx,%r9 - movq 32(%rsi),%rdx - imulq $19,%rdx,%rax - movq %rax,72(%rsp) - mulq 8(%rcx) - add %rax,%r8 - adc %rdx,%r9 - movq 0(%rsi),%rax - mulq 0(%rcx) - add %rax,%r8 - adc %rdx,%r9 - movq 0(%rsi),%rax - mulq 8(%rcx) - mov %rax,%r10 - mov %rdx,%r11 - movq 0(%rsi),%rax - mulq 16(%rcx) - mov %rax,%r12 - mov %rdx,%r13 - movq 0(%rsi),%rax - mulq 24(%rcx) - mov %rax,%r14 - mov %rdx,%r15 - movq 0(%rsi),%rax - mulq 32(%rcx) - mov %rax,%rbx - mov %rdx,%rbp - movq 8(%rsi),%rax - mulq 0(%rcx) - add %rax,%r10 - adc %rdx,%r11 - movq 8(%rsi),%rax - mulq 8(%rcx) - add %rax,%r12 - adc %rdx,%r13 - movq 8(%rsi),%rax - mulq 16(%rcx) - add %rax,%r14 - adc %rdx,%r15 - movq 8(%rsi),%rax - mulq 24(%rcx) - add %rax,%rbx - adc %rdx,%rbp - movq 8(%rsi),%rdx - imulq $19,%rdx,%rax - mulq 32(%rcx) - add %rax,%r8 - adc %rdx,%r9 - movq 16(%rsi),%rax - mulq 0(%rcx) - add %rax,%r12 - adc %rdx,%r13 - movq 16(%rsi),%rax - mulq 8(%rcx) - add %rax,%r14 - adc %rdx,%r15 - movq 16(%rsi),%rax - mulq 16(%rcx) - add %rax,%rbx - adc %rdx,%rbp - movq 16(%rsi),%rdx - imulq $19,%rdx,%rax - mulq 24(%rcx) - add %rax,%r8 - adc %rdx,%r9 - movq 16(%rsi),%rdx - imulq $19,%rdx,%rax - mulq 32(%rcx) - add %rax,%r10 - adc %rdx,%r11 - movq 24(%rsi),%rax - mulq 0(%rcx) - add %rax,%r14 - adc %rdx,%r15 - movq 24(%rsi),%rax - mulq 8(%rcx) - add %rax,%rbx - adc %rdx,%rbp - movq 64(%rsp),%rax - mulq 24(%rcx) - add %rax,%r10 - adc %rdx,%r11 - movq 64(%rsp),%rax - mulq 32(%rcx) - add %rax,%r12 - adc %rdx,%r13 - movq 32(%rsi),%rax - mulq 0(%rcx) - add %rax,%rbx - adc %rdx,%rbp - movq 72(%rsp),%rax - mulq 16(%rcx) - add %rax,%r10 - adc %rdx,%r11 - movq 72(%rsp),%rax - mulq 24(%rcx) - add %rax,%r12 - adc %rdx,%r13 - movq 72(%rsp),%rax - mulq 32(%rcx) - add %rax,%r14 - adc %rdx,%r15 - movq curve25519_sandy2x_REDMASK51(%rip),%rsi - shld $13,%r8,%r9 - and %rsi,%r8 - shld $13,%r10,%r11 - and %rsi,%r10 - add %r9,%r10 - shld $13,%r12,%r13 - and %rsi,%r12 - add %r11,%r12 - shld $13,%r14,%r15 - and %rsi,%r14 - add %r13,%r14 - shld $13,%rbx,%rbp - and %rsi,%rbx - add %r15,%rbx - imulq $19,%rbp,%rdx - add %rdx,%r8 - mov %r8,%rdx - shr $51,%rdx - add %r10,%rdx - mov %rdx,%rcx - shr $51,%rdx - and %rsi,%r8 - add %r12,%rdx - mov %rdx,%r9 - shr $51,%rdx - and %rsi,%rcx - add %r14,%rdx - mov %rdx,%rax - shr $51,%rdx - and %rsi,%r9 - add %rbx,%rdx - mov %rdx,%r10 - shr $51,%rdx - and %rsi,%rax - imulq $19,%rdx,%rdx - add %rdx,%r8 - and %rsi,%r10 - movq %r8,0(%rdi) - movq %rcx,8(%rdi) - movq %r9,16(%rdi) - movq %rax,24(%rdi) - movq %r10,32(%rdi) - movq 0(%rsp),%r11 - movq 8(%rsp),%r12 - movq 16(%rsp),%r13 - movq 24(%rsp),%r14 - movq 32(%rsp),%r15 - movq 40(%rsp),%rbx - movq 48(%rsp),%rbp - leave - ret -ENDPROC(curve25519_sandy2x_fe51_mul) - -.align 32 -ENTRY(curve25519_sandy2x_fe51_nsquare) - push %rbp - mov %rsp,%rbp - sub $64,%rsp - and $-32,%rsp - movq %r11,0(%rsp) - movq %r12,8(%rsp) - movq %r13,16(%rsp) - movq %r14,24(%rsp) - movq %r15,32(%rsp) - movq %rbx,40(%rsp) - movq %rbp,48(%rsp) - movq 0(%rsi),%rcx - movq 8(%rsi),%r8 - movq 16(%rsi),%r9 - movq 24(%rsi),%rax - movq 32(%rsi),%rsi - movq %r9,16(%rdi) - movq %rax,24(%rdi) - movq %rsi,32(%rdi) - mov %rdx,%rsi - - .align 16 - .Lloop: - sub $1,%rsi - mov %rcx,%rax - mul %rcx - add %rcx,%rcx - mov %rax,%r9 - mov %rdx,%r10 - mov %rcx,%rax - mul %r8 - mov %rax,%r11 - mov %rdx,%r12 - mov %rcx,%rax - mulq 16(%rdi) - mov %rax,%r13 - mov %rdx,%r14 - mov %rcx,%rax - mulq 24(%rdi) - mov %rax,%r15 - mov %rdx,%rbx - mov %rcx,%rax - mulq 32(%rdi) - mov %rax,%rcx - mov %rdx,%rbp - mov %r8,%rax - mul %r8 - add %r8,%r8 - add %rax,%r13 - adc %rdx,%r14 - mov %r8,%rax - mulq 16(%rdi) - add %rax,%r15 - adc %rdx,%rbx - mov %r8,%rax - imulq $19, %r8,%r8 - mulq 24(%rdi) - add %rax,%rcx - adc %rdx,%rbp - mov %r8,%rax - mulq 32(%rdi) - add %rax,%r9 - adc %rdx,%r10 - movq 16(%rdi),%rax - mulq 16(%rdi) - add %rax,%rcx - adc %rdx,%rbp - shld $13,%rcx,%rbp - movq 16(%rdi),%rax - imulq $38, %rax,%rax - mulq 24(%rdi) - add %rax,%r9 - adc %rdx,%r10 - shld $13,%r9,%r10 - movq 16(%rdi),%rax - imulq $38, %rax,%rax - mulq 32(%rdi) - add %rax,%r11 - adc %rdx,%r12 - movq 24(%rdi),%rax - imulq $19, %rax,%rax - mulq 24(%rdi) - add %rax,%r11 - adc %rdx,%r12 - shld $13,%r11,%r12 - movq 24(%rdi),%rax - imulq $38, %rax,%rax - mulq 32(%rdi) - add %rax,%r13 - adc %rdx,%r14 - shld $13,%r13,%r14 - movq 32(%rdi),%rax - imulq $19, %rax,%rax - mulq 32(%rdi) - add %rax,%r15 - adc %rdx,%rbx - shld $13,%r15,%rbx - movq curve25519_sandy2x_REDMASK51(%rip),%rdx - and %rdx,%rcx - add %rbx,%rcx - and %rdx,%r9 - and %rdx,%r11 - add %r10,%r11 - and %rdx,%r13 - add %r12,%r13 - and %rdx,%r15 - add %r14,%r15 - imulq $19, %rbp,%rbp - lea (%r9,%rbp),%r9 - mov %r9,%rax - shr $51,%r9 - add %r11,%r9 - and %rdx,%rax - mov %r9,%r8 - shr $51,%r9 - add %r13,%r9 - and %rdx,%r8 - mov %r9,%r10 - shr $51,%r9 - add %r15,%r9 - and %rdx,%r10 - movq %r10,16(%rdi) - mov %r9,%r10 - shr $51,%r9 - add %rcx,%r9 - and %rdx,%r10 - movq %r10,24(%rdi) - mov %r9,%r10 - shr $51,%r9 - imulq $19, %r9,%r9 - lea (%rax,%r9),%rcx - and %rdx,%r10 - movq %r10,32(%rdi) - cmp $0,%rsi - jne .Lloop - - movq %rcx,0(%rdi) - movq %r8,8(%rdi) - movq 0(%rsp),%r11 - movq 8(%rsp),%r12 - movq 16(%rsp),%r13 - movq 24(%rsp),%r14 - movq 32(%rsp),%r15 - movq 40(%rsp),%rbx - movq 48(%rsp),%rbp - leave - ret -ENDPROC(curve25519_sandy2x_fe51_nsquare) - -.align 32 -ENTRY(curve25519_sandy2x_fe51_pack) - push %rbp - mov %rsp,%rbp - sub $32,%rsp - and $-32,%rsp - movq %r11,0(%rsp) - movq %r12,8(%rsp) - movq 0(%rsi),%rdx - movq 8(%rsi),%rcx - movq 16(%rsi),%r8 - movq 24(%rsi),%r9 - movq 32(%rsi),%rsi - movq curve25519_sandy2x_REDMASK51(%rip),%rax - lea -18(%rax),%r10 - mov $3,%r11 - - .align 16 - .Lreduceloop: - mov %rdx,%r12 - shr $51,%r12 - and %rax,%rdx - add %r12,%rcx - mov %rcx,%r12 - shr $51,%r12 - and %rax,%rcx - add %r12,%r8 - mov %r8,%r12 - shr $51,%r12 - and %rax,%r8 - add %r12,%r9 - mov %r9,%r12 - shr $51,%r12 - and %rax,%r9 - add %r12,%rsi - mov %rsi,%r12 - shr $51,%r12 - and %rax,%rsi - imulq $19, %r12,%r12 - add %r12,%rdx - sub $1,%r11 - ja .Lreduceloop - - mov $1,%r12 - cmp %r10,%rdx - cmovl %r11,%r12 - cmp %rax,%rcx - cmovne %r11,%r12 - cmp %rax,%r8 - cmovne %r11,%r12 - cmp %rax,%r9 - cmovne %r11,%r12 - cmp %rax,%rsi - cmovne %r11,%r12 - neg %r12 - and %r12,%rax - and %r12,%r10 - sub %r10,%rdx - sub %rax,%rcx - sub %rax,%r8 - sub %rax,%r9 - sub %rax,%rsi - mov %rdx,%rax - and $0xFF,%eax - movb %al,0(%rdi) - mov %rdx,%rax - shr $8,%rax - and $0xFF,%eax - movb %al,1(%rdi) - mov %rdx,%rax - shr $16,%rax - and $0xFF,%eax - movb %al,2(%rdi) - mov %rdx,%rax - shr $24,%rax - and $0xFF,%eax - movb %al,3(%rdi) - mov %rdx,%rax - shr $32,%rax - and $0xFF,%eax - movb %al,4(%rdi) - mov %rdx,%rax - shr $40,%rax - and $0xFF,%eax - movb %al,5(%rdi) - mov %rdx,%rdx - shr $48,%rdx - mov %rcx,%rax - shl $3,%rax - and $0xF8,%eax - xor %rdx,%rax - movb %al,6(%rdi) - mov %rcx,%rdx - shr $5,%rdx - and $0xFF,%edx - movb %dl,7(%rdi) - mov %rcx,%rdx - shr $13,%rdx - and $0xFF,%edx - movb %dl,8(%rdi) - mov %rcx,%rdx - shr $21,%rdx - and $0xFF,%edx - movb %dl,9(%rdi) - mov %rcx,%rdx - shr $29,%rdx - and $0xFF,%edx - movb %dl,10(%rdi) - mov %rcx,%rdx - shr $37,%rdx - and $0xFF,%edx - movb %dl,11(%rdi) - mov %rcx,%rdx - shr $45,%rdx - mov %r8,%rcx - shl $6,%rcx - and $0xC0,%ecx - xor %rdx,%rcx - movb %cl,12(%rdi) - mov %r8,%rdx - shr $2,%rdx - and $0xFF,%edx - movb %dl,13(%rdi) - mov %r8,%rdx - shr $10,%rdx - and $0xFF,%edx - movb %dl,14(%rdi) - mov %r8,%rdx - shr $18,%rdx - and $0xFF,%edx - movb %dl,15(%rdi) - mov %r8,%rdx - shr $26,%rdx - and $0xFF,%edx - movb %dl,16(%rdi) - mov %r8,%rdx - shr $34,%rdx - and $0xFF,%edx - movb %dl,17(%rdi) - mov %r8,%rdx - shr $42,%rdx - movb %dl,18(%rdi) - mov %r8,%rdx - shr $50,%rdx - mov %r9,%rcx - shl $1,%rcx - and $0xFE,%ecx - xor %rdx,%rcx - movb %cl,19(%rdi) - mov %r9,%rdx - shr $7,%rdx - and $0xFF,%edx - movb %dl,20(%rdi) - mov %r9,%rdx - shr $15,%rdx - and $0xFF,%edx - movb %dl,21(%rdi) - mov %r9,%rdx - shr $23,%rdx - and $0xFF,%edx - movb %dl,22(%rdi) - mov %r9,%rdx - shr $31,%rdx - and $0xFF,%edx - movb %dl,23(%rdi) - mov %r9,%rdx - shr $39,%rdx - and $0xFF,%edx - movb %dl,24(%rdi) - mov %r9,%rdx - shr $47,%rdx - mov %rsi,%rcx - shl $4,%rcx - and $0xF0,%ecx - xor %rdx,%rcx - movb %cl,25(%rdi) - mov %rsi,%rdx - shr $4,%rdx - and $0xFF,%edx - movb %dl,26(%rdi) - mov %rsi,%rdx - shr $12,%rdx - and $0xFF,%edx - movb %dl,27(%rdi) - mov %rsi,%rdx - shr $20,%rdx - and $0xFF,%edx - movb %dl,28(%rdi) - mov %rsi,%rdx - shr $28,%rdx - and $0xFF,%edx - movb %dl,29(%rdi) - mov %rsi,%rdx - shr $36,%rdx - and $0xFF,%edx - movb %dl,30(%rdi) - mov %rsi,%rsi - shr $44,%rsi - movb %sil,31(%rdi) - movq 0(%rsp),%r11 - movq 8(%rsp),%r12 - leave - ret -ENDPROC(curve25519_sandy2x_fe51_pack) - -.align 32 -ENTRY(curve25519_sandy2x_ladder) - push %rbp - mov %rsp,%rbp - sub $1856,%rsp - and $-32,%rsp - movq %r11,1824(%rsp) - movq %r12,1832(%rsp) - movq %r13,1840(%rsp) - movq %r14,1848(%rsp) - vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0 - vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1 - vmovdqu 0(%rdi),%xmm2 - vmovdqa %xmm2,0(%rsp) - vmovdqu 16(%rdi),%xmm2 - vmovdqa %xmm2,16(%rsp) - vmovdqu 32(%rdi),%xmm2 - vmovdqa %xmm2,32(%rsp) - vmovdqu 48(%rdi),%xmm2 - vmovdqa %xmm2,48(%rsp) - vmovdqu 64(%rdi),%xmm2 - vmovdqa %xmm2,64(%rsp) - vmovdqa %xmm1,80(%rsp) - vmovdqa %xmm0,96(%rsp) - vmovdqa %xmm0,112(%rsp) - vmovdqa %xmm0,128(%rsp) - vmovdqa %xmm0,144(%rsp) - vmovdqa %xmm1,%xmm0 - vpxor %xmm1,%xmm1,%xmm1 - vpxor %xmm2,%xmm2,%xmm2 - vpxor %xmm3,%xmm3,%xmm3 - vpxor %xmm4,%xmm4,%xmm4 - vpxor %xmm5,%xmm5,%xmm5 - vpxor %xmm6,%xmm6,%xmm6 - vpxor %xmm7,%xmm7,%xmm7 - vpxor %xmm8,%xmm8,%xmm8 - vpxor %xmm9,%xmm9,%xmm9 - vmovdqu 0(%rdi),%xmm10 - vmovdqa %xmm10,160(%rsp) - vmovdqu 16(%rdi),%xmm10 - vmovdqa %xmm10,176(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,192(%rsp) - vmovdqu 32(%rdi),%xmm10 - vmovdqa %xmm10,208(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,224(%rsp) - vmovdqu 48(%rdi),%xmm10 - vmovdqa %xmm10,240(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,256(%rsp) - vmovdqu 64(%rdi),%xmm10 - vmovdqa %xmm10,272(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,288(%rsp) - vmovdqu 8(%rdi),%xmm10 - vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,304(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,320(%rsp) - vmovdqu 24(%rdi),%xmm10 - vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,336(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,352(%rsp) - vmovdqu 40(%rdi),%xmm10 - vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,368(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,384(%rsp) - vmovdqu 56(%rdi),%xmm10 - vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,400(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,416(%rsp) - vmovdqu 0(%rdi),%xmm10 - vmovdqu 64(%rdi),%xmm11 - vblendps $12, %xmm11, %xmm10, %xmm10 - vpshufd $2,%xmm10,%xmm10 - vpmuludq curve25519_sandy2x_v38_1(%rip),%xmm10,%xmm10 - vmovdqa %xmm10,432(%rsp) - movq 0(%rsi),%rdx - movq 8(%rsi),%rcx - movq 16(%rsi),%r8 - movq 24(%rsi),%r9 - shrd $1,%rcx,%rdx - shrd $1,%r8,%rcx - shrd $1,%r9,%r8 - shr $1,%r9 - xorq 0(%rsi),%rdx - xorq 8(%rsi),%rcx - xorq 16(%rsi),%r8 - xorq 24(%rsi),%r9 - leaq 800(%rsp),%rsi - mov $64,%rax - - .align 16 - .Lladder_small_loop: - mov %rdx,%r10 - mov %rcx,%r11 - mov %r8,%r12 - mov %r9,%r13 - shr $1,%rdx - shr $1,%rcx - shr $1,%r8 - shr $1,%r9 - and $1,%r10d - and $1,%r11d - and $1,%r12d - and $1,%r13d - neg %r10 - neg %r11 - neg %r12 - neg %r13 - movl %r10d,0(%rsi) - movl %r11d,256(%rsi) - movl %r12d,512(%rsi) - movl %r13d,768(%rsi) - add $4,%rsi - sub $1,%rax - jne .Lladder_small_loop - mov $255,%rdx - add $760,%rsi - - .align 16 - .Lladder_loop: - sub $1,%rdx - vbroadcastss 0(%rsi),%xmm10 - sub $4,%rsi - vmovdqa 0(%rsp),%xmm11 - vmovdqa 80(%rsp),%xmm12 - vpxor %xmm11,%xmm0,%xmm13 - vpand %xmm10,%xmm13,%xmm13 - vpxor %xmm13,%xmm0,%xmm0 - vpxor %xmm13,%xmm11,%xmm11 - vpxor %xmm12,%xmm1,%xmm13 - vpand %xmm10,%xmm13,%xmm13 - vpxor %xmm13,%xmm1,%xmm1 - vpxor %xmm13,%xmm12,%xmm12 - vmovdqa 16(%rsp),%xmm13 - vmovdqa 96(%rsp),%xmm14 - vpxor %xmm13,%xmm2,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm2,%xmm2 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm3,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm3,%xmm3 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,0(%rsp) - vmovdqa %xmm14,16(%rsp) - vmovdqa 32(%rsp),%xmm13 - vmovdqa 112(%rsp),%xmm14 - vpxor %xmm13,%xmm4,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm4,%xmm4 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm5,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm5,%xmm5 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,32(%rsp) - vmovdqa %xmm14,80(%rsp) - vmovdqa 48(%rsp),%xmm13 - vmovdqa 128(%rsp),%xmm14 - vpxor %xmm13,%xmm6,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm6,%xmm6 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm7,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm7,%xmm7 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,48(%rsp) - vmovdqa %xmm14,96(%rsp) - vmovdqa 64(%rsp),%xmm13 - vmovdqa 144(%rsp),%xmm14 - vpxor %xmm13,%xmm8,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm8,%xmm8 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm9,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm9,%xmm9 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,64(%rsp) - vmovdqa %xmm14,112(%rsp) - vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10 - vpsubq %xmm12,%xmm10,%xmm10 - vpaddq %xmm12,%xmm11,%xmm11 - vpunpckhqdq %xmm10,%xmm11,%xmm12 - vpunpcklqdq %xmm10,%xmm11,%xmm10 - vpaddq %xmm1,%xmm0,%xmm11 - vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0 - vpsubq %xmm1,%xmm0,%xmm0 - vpunpckhqdq %xmm11,%xmm0,%xmm1 - vpunpcklqdq %xmm11,%xmm0,%xmm0 - vpmuludq %xmm0,%xmm10,%xmm11 - vpmuludq %xmm1,%xmm10,%xmm13 - vmovdqa %xmm1,128(%rsp) - vpaddq %xmm1,%xmm1,%xmm1 - vpmuludq %xmm0,%xmm12,%xmm14 - vmovdqa %xmm0,144(%rsp) - vpaddq %xmm14,%xmm13,%xmm13 - vpmuludq %xmm1,%xmm12,%xmm0 - vmovdqa %xmm1,448(%rsp) - vpaddq %xmm3,%xmm2,%xmm1 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2 - vpsubq %xmm3,%xmm2,%xmm2 - vpunpckhqdq %xmm1,%xmm2,%xmm3 - vpunpcklqdq %xmm1,%xmm2,%xmm1 - vpmuludq %xmm1,%xmm10,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq %xmm3,%xmm10,%xmm2 - vmovdqa %xmm3,464(%rsp) - vpaddq %xmm3,%xmm3,%xmm3 - vpmuludq %xmm1,%xmm12,%xmm14 - vmovdqa %xmm1,480(%rsp) - vpaddq %xmm14,%xmm2,%xmm2 - vpmuludq %xmm3,%xmm12,%xmm1 - vmovdqa %xmm3,496(%rsp) - vpaddq %xmm5,%xmm4,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4 - vpsubq %xmm5,%xmm4,%xmm4 - vpunpckhqdq %xmm3,%xmm4,%xmm5 - vpunpcklqdq %xmm3,%xmm4,%xmm3 - vpmuludq %xmm3,%xmm10,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq %xmm5,%xmm10,%xmm4 - vmovdqa %xmm5,512(%rsp) - vpaddq %xmm5,%xmm5,%xmm5 - vpmuludq %xmm3,%xmm12,%xmm14 - vmovdqa %xmm3,528(%rsp) - vpaddq %xmm14,%xmm4,%xmm4 - vpaddq %xmm7,%xmm6,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6 - vpsubq %xmm7,%xmm6,%xmm6 - vpunpckhqdq %xmm3,%xmm6,%xmm7 - vpunpcklqdq %xmm3,%xmm6,%xmm3 - vpmuludq %xmm3,%xmm10,%xmm6 - vpmuludq %xmm5,%xmm12,%xmm14 - vmovdqa %xmm5,544(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5 - vmovdqa %xmm5,560(%rsp) - vpaddq %xmm14,%xmm6,%xmm6 - vpmuludq %xmm7,%xmm10,%xmm5 - vmovdqa %xmm7,576(%rsp) - vpaddq %xmm7,%xmm7,%xmm7 - vpmuludq %xmm3,%xmm12,%xmm14 - vmovdqa %xmm3,592(%rsp) - vpaddq %xmm14,%xmm5,%xmm5 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vmovdqa %xmm3,608(%rsp) - vpaddq %xmm9,%xmm8,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8 - vpsubq %xmm9,%xmm8,%xmm8 - vpunpckhqdq %xmm3,%xmm8,%xmm9 - vpunpcklqdq %xmm3,%xmm8,%xmm3 - vmovdqa %xmm3,624(%rsp) - vpmuludq %xmm7,%xmm12,%xmm8 - vmovdqa %xmm7,640(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7 - vmovdqa %xmm7,656(%rsp) - vpmuludq %xmm3,%xmm10,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq %xmm9,%xmm10,%xmm7 - vmovdqa %xmm9,672(%rsp) - vpaddq %xmm9,%xmm9,%xmm9 - vpmuludq %xmm3,%xmm12,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vmovdqa %xmm3,688(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12 - vpmuludq %xmm9,%xmm12,%xmm3 - vmovdqa %xmm9,704(%rsp) - vpaddq %xmm3,%xmm11,%xmm11 - vmovdqa 0(%rsp),%xmm3 - vmovdqa 16(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 480(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 464(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 528(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 512(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 592(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 576(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 624(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 672(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 448(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 480(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 496(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 528(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 544(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 592(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 640(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 624(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 704(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm0,%xmm0 - vmovdqa 32(%rsp),%xmm3 - vmovdqa 80(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 480(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 464(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 528(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 512(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 592(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 576(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 624(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 672(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 448(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 480(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 496(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 528(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 544(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 592(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 640(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 624(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 704(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm1,%xmm1 - vmovdqa 48(%rsp),%xmm3 - vmovdqa 96(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 480(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 464(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 528(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 512(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 592(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 576(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 624(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 672(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 448(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 480(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 496(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 528(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 544(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 592(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 640(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 624(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 704(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm6,%xmm6 - vmovdqa 64(%rsp),%xmm3 - vmovdqa 112(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 480(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 464(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 528(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 512(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 592(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 576(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 624(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 672(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 448(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 480(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 496(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 528(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 544(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 592(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 640(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 624(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 704(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm8,%xmm8 - vpsrlq $25,%xmm4,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4 - vpsrlq $26,%xmm11,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm6,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm13,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13 - vpsrlq $25,%xmm5,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm0,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0 - vpsrlq $26,%xmm8,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $25,%xmm2,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2 - vpsrlq $25,%xmm7,%xmm3 - vpsllq $4,%xmm3,%xmm9 - vpaddq %xmm3,%xmm11,%xmm11 - vpsllq $1,%xmm3,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpaddq %xmm9,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $26,%xmm1,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $26,%xmm11,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $25,%xmm4,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4 - vpunpcklqdq %xmm13,%xmm11,%xmm3 - vpunpckhqdq %xmm13,%xmm11,%xmm9 - vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10 - vpsubq %xmm3,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm3,%xmm10,%xmm9 - vpunpcklqdq %xmm3,%xmm10,%xmm10 - vpmuludq %xmm10,%xmm10,%xmm3 - vpaddq %xmm10,%xmm10,%xmm10 - vpmuludq %xmm9,%xmm10,%xmm11 - vpunpcklqdq %xmm2,%xmm0,%xmm12 - vpunpckhqdq %xmm2,%xmm0,%xmm0 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2 - vpsubq %xmm12,%xmm2,%xmm2 - vpaddq %xmm0,%xmm12,%xmm12 - vpunpckhqdq %xmm12,%xmm2,%xmm0 - vpunpcklqdq %xmm12,%xmm2,%xmm2 - vpmuludq %xmm2,%xmm10,%xmm12 - vpaddq %xmm9,%xmm9,%xmm13 - vpmuludq %xmm13,%xmm9,%xmm9 - vpaddq %xmm9,%xmm12,%xmm12 - vpmuludq %xmm0,%xmm10,%xmm9 - vpmuludq %xmm2,%xmm13,%xmm14 - vpaddq %xmm14,%xmm9,%xmm9 - vpunpcklqdq %xmm4,%xmm1,%xmm14 - vpunpckhqdq %xmm4,%xmm1,%xmm1 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4 - vpsubq %xmm14,%xmm4,%xmm4 - vpaddq %xmm1,%xmm14,%xmm14 - vpunpckhqdq %xmm14,%xmm4,%xmm1 - vpunpcklqdq %xmm14,%xmm4,%xmm4 - vmovdqa %xmm1,0(%rsp) - vpaddq %xmm1,%xmm1,%xmm1 - vmovdqa %xmm1,16(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vmovdqa %xmm1,32(%rsp) - vpmuludq %xmm4,%xmm10,%xmm1 - vpmuludq %xmm2,%xmm2,%xmm14 - vpaddq %xmm14,%xmm1,%xmm1 - vpmuludq 0(%rsp),%xmm10,%xmm14 - vpmuludq %xmm4,%xmm13,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpunpcklqdq %xmm5,%xmm6,%xmm15 - vpunpckhqdq %xmm5,%xmm6,%xmm5 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6 - vpsubq %xmm15,%xmm6,%xmm6 - vpaddq %xmm5,%xmm15,%xmm15 - vpunpckhqdq %xmm15,%xmm6,%xmm5 - vpunpcklqdq %xmm15,%xmm6,%xmm6 - vmovdqa %xmm6,48(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6 - vmovdqa %xmm6,64(%rsp) - vmovdqa %xmm5,80(%rsp) - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5 - vmovdqa %xmm5,96(%rsp) - vpmuludq 48(%rsp),%xmm10,%xmm5 - vpaddq %xmm0,%xmm0,%xmm6 - vpmuludq %xmm6,%xmm0,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpmuludq 80(%rsp),%xmm10,%xmm0 - vpmuludq %xmm4,%xmm6,%xmm15 - vpaddq %xmm15,%xmm0,%xmm0 - vpmuludq %xmm6,%xmm13,%xmm15 - vpaddq %xmm15,%xmm1,%xmm1 - vpmuludq %xmm6,%xmm2,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpunpcklqdq %xmm7,%xmm8,%xmm15 - vpunpckhqdq %xmm7,%xmm8,%xmm7 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8 - vpsubq %xmm15,%xmm8,%xmm8 - vpaddq %xmm7,%xmm15,%xmm15 - vpunpckhqdq %xmm15,%xmm8,%xmm7 - vpunpcklqdq %xmm15,%xmm8,%xmm8 - vmovdqa %xmm8,112(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8 - vmovdqa %xmm8,448(%rsp) - vpmuludq 112(%rsp),%xmm10,%xmm8 - vpmuludq %xmm7,%xmm10,%xmm10 - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15 - vpmuludq %xmm15,%xmm7,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq %xmm15,%xmm13,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq %xmm15,%xmm2,%xmm7 - vpaddq %xmm7,%xmm11,%xmm11 - vpmuludq 80(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm7,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq 16(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 48(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 112(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm10,%xmm10 - vpmuludq %xmm15,%xmm6,%xmm7 - vpaddq %xmm7,%xmm12,%xmm12 - vpmuludq %xmm15,%xmm4,%xmm7 - vpaddq %xmm7,%xmm9,%xmm9 - vpaddq %xmm2,%xmm2,%xmm2 - vpmuludq %xmm4,%xmm2,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 448(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq 448(%rsp),%xmm6,%xmm7 - vpaddq %xmm7,%xmm11,%xmm11 - vpmuludq 0(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 48(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq 80(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 96(%rsp),%xmm4,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq %xmm4,%xmm4,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpaddq %xmm4,%xmm4,%xmm2 - vpmuludq 448(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vpmuludq 16(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq 48(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm14,%xmm14 - vpmuludq 96(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 448(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 16(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm8,%xmm8 - vpmuludq 48(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 80(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vpmuludq 112(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm0,%xmm0 - vmovdqa 48(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 448(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 80(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 448(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm14,%xmm14 - vpmuludq 64(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 64(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 96(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vmovdqa 48(%rsp),%xmm4 - vpmuludq 96(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 0(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vmovdqa 32(%rsp),%xmm2 - vpmuludq 0(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vmovdqa 64(%rsp),%xmm2 - vpmuludq 48(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vmovdqa 96(%rsp),%xmm2 - vpmuludq 80(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vmovdqa 448(%rsp),%xmm2 - vpmuludq 112(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpsrlq $26,%xmm3,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3 - vpsrlq $25,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $25,%xmm11,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm5,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm12,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm0,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm9,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9 - vpsrlq $26,%xmm8,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $25,%xmm10,%xmm2 - vpsllq $4,%xmm2,%xmm4 - vpaddq %xmm2,%xmm3,%xmm3 - vpsllq $1,%xmm2,%xmm2 - vpaddq %xmm2,%xmm4,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $26,%xmm3,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3 - vpunpckhqdq %xmm11,%xmm3,%xmm2 - vmovdqa %xmm2,0(%rsp) - vpshufd $0,%xmm3,%xmm2 - vpshufd $0,%xmm11,%xmm3 - vpmuludq 160(%rsp),%xmm2,%xmm4 - vpmuludq 432(%rsp),%xmm3,%xmm6 - vpaddq %xmm6,%xmm4,%xmm4 - vpmuludq 176(%rsp),%xmm2,%xmm6 - vpmuludq 304(%rsp),%xmm3,%xmm7 - vpaddq %xmm7,%xmm6,%xmm6 - vpmuludq 208(%rsp),%xmm2,%xmm7 - vpmuludq 336(%rsp),%xmm3,%xmm11 - vpaddq %xmm11,%xmm7,%xmm7 - vpmuludq 240(%rsp),%xmm2,%xmm11 - vpmuludq 368(%rsp),%xmm3,%xmm13 - vpaddq %xmm13,%xmm11,%xmm11 - vpmuludq 272(%rsp),%xmm2,%xmm2 - vpmuludq 400(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpunpckhqdq %xmm9,%xmm12,%xmm3 - vmovdqa %xmm3,16(%rsp) - vpshufd $0,%xmm12,%xmm3 - vpshufd $0,%xmm9,%xmm9 - vpmuludq 288(%rsp),%xmm3,%xmm12 - vpaddq %xmm12,%xmm4,%xmm4 - vpmuludq 416(%rsp),%xmm9,%xmm12 - vpaddq %xmm12,%xmm4,%xmm4 - vpmuludq 160(%rsp),%xmm3,%xmm12 - vpaddq %xmm12,%xmm6,%xmm6 - vpmuludq 432(%rsp),%xmm9,%xmm12 - vpaddq %xmm12,%xmm6,%xmm6 - vpmuludq 176(%rsp),%xmm3,%xmm12 - vpaddq %xmm12,%xmm7,%xmm7 - vpmuludq 304(%rsp),%xmm9,%xmm12 - vpaddq %xmm12,%xmm7,%xmm7 - vpmuludq 208(%rsp),%xmm3,%xmm12 - vpaddq %xmm12,%xmm11,%xmm11 - vpmuludq 336(%rsp),%xmm9,%xmm12 - vpaddq %xmm12,%xmm11,%xmm11 - vpmuludq 240(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 368(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpunpckhqdq %xmm14,%xmm1,%xmm3 - vmovdqa %xmm3,32(%rsp) - vpshufd $0,%xmm1,%xmm1 - vpshufd $0,%xmm14,%xmm3 - vpmuludq 256(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm4,%xmm4 - vpmuludq 384(%rsp),%xmm3,%xmm9 - vpaddq %xmm9,%xmm4,%xmm4 - vpmuludq 288(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm6,%xmm6 - vpmuludq 416(%rsp),%xmm3,%xmm9 - vpaddq %xmm9,%xmm6,%xmm6 - vpmuludq 160(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm7,%xmm7 - vpmuludq 432(%rsp),%xmm3,%xmm9 - vpaddq %xmm9,%xmm7,%xmm7 - vpmuludq 176(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm11,%xmm11 - vpmuludq 304(%rsp),%xmm3,%xmm9 - vpaddq %xmm9,%xmm11,%xmm11 - vpmuludq 208(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm2,%xmm2 - vpmuludq 336(%rsp),%xmm3,%xmm1 - vpaddq %xmm1,%xmm2,%xmm2 - vpunpckhqdq %xmm0,%xmm5,%xmm1 - vmovdqa %xmm1,48(%rsp) - vpshufd $0,%xmm5,%xmm1 - vpshufd $0,%xmm0,%xmm0 - vpmuludq 224(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 352(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 256(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 384(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 288(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 416(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 160(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 432(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 176(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm2,%xmm2 - vpmuludq 304(%rsp),%xmm0,%xmm0 - vpaddq %xmm0,%xmm2,%xmm2 - vpunpckhqdq %xmm10,%xmm8,%xmm0 - vmovdqa %xmm0,64(%rsp) - vpshufd $0,%xmm8,%xmm0 - vpshufd $0,%xmm10,%xmm1 - vpmuludq 192(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 320(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 224(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 352(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 256(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 384(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 288(%rsp),%xmm0,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 416(%rsp),%xmm1,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 160(%rsp),%xmm0,%xmm0 - vpaddq %xmm0,%xmm2,%xmm2 - vpmuludq 432(%rsp),%xmm1,%xmm0 - vpaddq %xmm0,%xmm2,%xmm2 - vmovdqa %xmm4,80(%rsp) - vmovdqa %xmm6,96(%rsp) - vmovdqa %xmm7,112(%rsp) - vmovdqa %xmm11,448(%rsp) - vmovdqa %xmm2,496(%rsp) - vmovdqa 144(%rsp),%xmm0 - vpmuludq %xmm0,%xmm0,%xmm1 - vpaddq %xmm0,%xmm0,%xmm0 - vmovdqa 128(%rsp),%xmm2 - vpmuludq %xmm2,%xmm0,%xmm3 - vmovdqa 480(%rsp),%xmm4 - vpmuludq %xmm4,%xmm0,%xmm5 - vmovdqa 464(%rsp),%xmm6 - vpmuludq %xmm6,%xmm0,%xmm7 - vmovdqa 528(%rsp),%xmm8 - vpmuludq %xmm8,%xmm0,%xmm9 - vpmuludq 512(%rsp),%xmm0,%xmm10 - vpmuludq 592(%rsp),%xmm0,%xmm11 - vpmuludq 576(%rsp),%xmm0,%xmm12 - vpmuludq 624(%rsp),%xmm0,%xmm13 - vmovdqa 672(%rsp),%xmm14 - vpmuludq %xmm14,%xmm0,%xmm0 - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15 - vpmuludq %xmm15,%xmm14,%xmm14 - vpaddq %xmm14,%xmm13,%xmm13 - vpaddq %xmm6,%xmm6,%xmm14 - vpmuludq %xmm14,%xmm6,%xmm6 - vpaddq %xmm6,%xmm11,%xmm11 - vpaddq %xmm2,%xmm2,%xmm6 - vpmuludq %xmm6,%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq %xmm15,%xmm6,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vpmuludq %xmm15,%xmm4,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpmuludq 544(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 592(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 640(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 624(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq %xmm4,%xmm6,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq %xmm14,%xmm6,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq %xmm8,%xmm6,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq %xmm15,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq %xmm15,%xmm8,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq %xmm4,%xmm4,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq %xmm14,%xmm4,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpaddq %xmm4,%xmm4,%xmm2 - vpmuludq %xmm8,%xmm2,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vpmuludq 688(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq 688(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vpmuludq 512(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vpmuludq 592(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm13,%xmm13 - vpmuludq 576(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq 656(%rsp),%xmm8,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpmuludq %xmm8,%xmm14,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq %xmm8,%xmm8,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpaddq %xmm8,%xmm8,%xmm2 - vpmuludq 688(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vpmuludq 544(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 592(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 656(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 544(%rsp),%xmm4 - vpmuludq 688(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm7,%xmm7 - vpmuludq 544(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm13,%xmm13 - vpmuludq 592(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm0,%xmm0 - vpmuludq 640(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vpmuludq 624(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vmovdqa 592(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 688(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 608(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 544(%rsp),%xmm4 - vpmuludq 608(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 544(%rsp),%xmm4 - vpmuludq 656(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vmovdqa 592(%rsp),%xmm4 - vpmuludq 656(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm7,%xmm7 - vmovdqa 640(%rsp),%xmm4 - vpmuludq 688(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 512(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vmovdqa 560(%rsp),%xmm2 - vpmuludq 512(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vmovdqa 608(%rsp),%xmm2 - vpmuludq 592(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vmovdqa 656(%rsp),%xmm2 - vpmuludq 576(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vmovdqa 688(%rsp),%xmm2 - vpmuludq 624(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $25,%xmm10,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm3,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3 - vpsrlq $26,%xmm11,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm5,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5 - vpsrlq $25,%xmm12,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm7,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $26,%xmm13,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13 - vpsrlq $26,%xmm9,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $25,%xmm0,%xmm2 - vpsllq $4,%xmm2,%xmm4 - vpaddq %xmm2,%xmm1,%xmm1 - vpsllq $1,%xmm2,%xmm2 - vpaddq %xmm2,%xmm4,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm10,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpunpckhqdq %xmm3,%xmm1,%xmm2 - vpunpcklqdq %xmm3,%xmm1,%xmm1 - vmovdqa %xmm1,464(%rsp) - vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3 - vpsubq %xmm1,%xmm3,%xmm3 - vpunpckhqdq %xmm3,%xmm2,%xmm1 - vpunpcklqdq %xmm3,%xmm2,%xmm2 - vmovdqa %xmm2,480(%rsp) - vmovdqa %xmm1,512(%rsp) - vpsllq $1,%xmm1,%xmm1 - vmovdqa %xmm1,528(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3 - vmovdqa 80(%rsp),%xmm1 - vpunpcklqdq %xmm1,%xmm3,%xmm2 - vpunpckhqdq %xmm1,%xmm3,%xmm1 - vpunpckhqdq %xmm7,%xmm5,%xmm3 - vpunpcklqdq %xmm7,%xmm5,%xmm4 - vmovdqa %xmm4,544(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5 - vpsubq %xmm4,%xmm5,%xmm5 - vpunpckhqdq %xmm5,%xmm3,%xmm4 - vpunpcklqdq %xmm5,%xmm3,%xmm3 - vmovdqa %xmm3,560(%rsp) - vmovdqa %xmm4,576(%rsp) - vpsllq $1,%xmm4,%xmm4 - vmovdqa %xmm4,592(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5 - vmovdqa 96(%rsp),%xmm3 - vpunpcklqdq %xmm3,%xmm5,%xmm4 - vpunpckhqdq %xmm3,%xmm5,%xmm3 - vpunpckhqdq %xmm10,%xmm9,%xmm5 - vpunpcklqdq %xmm10,%xmm9,%xmm6 - vmovdqa %xmm6,608(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7 - vpsubq %xmm6,%xmm7,%xmm7 - vpunpckhqdq %xmm7,%xmm5,%xmm6 - vpunpcklqdq %xmm7,%xmm5,%xmm5 - vmovdqa %xmm5,624(%rsp) - vmovdqa %xmm6,640(%rsp) - vpsllq $1,%xmm6,%xmm6 - vmovdqa %xmm6,656(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7 - vmovdqa 112(%rsp),%xmm5 - vpunpcklqdq %xmm5,%xmm7,%xmm6 - vpunpckhqdq %xmm5,%xmm7,%xmm5 - vpunpckhqdq %xmm12,%xmm11,%xmm7 - vpunpcklqdq %xmm12,%xmm11,%xmm8 - vmovdqa %xmm8,672(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9 - vpsubq %xmm8,%xmm9,%xmm9 - vpunpckhqdq %xmm9,%xmm7,%xmm8 - vpunpcklqdq %xmm9,%xmm7,%xmm7 - vmovdqa %xmm7,688(%rsp) - vmovdqa %xmm8,704(%rsp) - vpsllq $1,%xmm8,%xmm8 - vmovdqa %xmm8,720(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9 - vmovdqa 448(%rsp),%xmm7 - vpunpcklqdq %xmm7,%xmm9,%xmm8 - vpunpckhqdq %xmm7,%xmm9,%xmm7 - vpunpckhqdq %xmm0,%xmm13,%xmm9 - vpunpcklqdq %xmm0,%xmm13,%xmm0 - vmovdqa %xmm0,448(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10 - vpsubq %xmm0,%xmm10,%xmm10 - vpunpckhqdq %xmm10,%xmm9,%xmm0 - vpunpcklqdq %xmm10,%xmm9,%xmm9 - vmovdqa %xmm9,736(%rsp) - vmovdqa %xmm0,752(%rsp) - vpsllq $1,%xmm0,%xmm0 - vmovdqa %xmm0,768(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10 - vmovdqa 496(%rsp),%xmm0 - vpunpcklqdq %xmm0,%xmm10,%xmm9 - vpunpckhqdq %xmm0,%xmm10,%xmm0 - vpsrlq $26,%xmm2,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2 - vpsrlq $25,%xmm5,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $25,%xmm1,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1 - vpsrlq $26,%xmm8,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm4,%xmm10 - vpaddq %xmm10,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4 - vpsrlq $25,%xmm7,%xmm10 - vpaddq %xmm10,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $25,%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3 - vpsrlq $26,%xmm9,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $26,%xmm6,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm0,%xmm10 - vpsllq $4,%xmm10,%xmm11 - vpaddq %xmm10,%xmm2,%xmm2 - vpsllq $1,%xmm10,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpaddq %xmm11,%xmm2,%xmm2 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm5,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm2,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2 - vpunpckhqdq %xmm1,%xmm2,%xmm10 - vmovdqa %xmm10,80(%rsp) - vpunpcklqdq %xmm1,%xmm2,%xmm1 - vpunpckhqdq %xmm3,%xmm4,%xmm2 - vmovdqa %xmm2,96(%rsp) - vpunpcklqdq %xmm3,%xmm4,%xmm2 - vpunpckhqdq %xmm5,%xmm6,%xmm3 - vmovdqa %xmm3,112(%rsp) - vpunpcklqdq %xmm5,%xmm6,%xmm3 - vpunpckhqdq %xmm7,%xmm8,%xmm4 - vmovdqa %xmm4,128(%rsp) - vpunpcklqdq %xmm7,%xmm8,%xmm4 - vpunpckhqdq %xmm0,%xmm9,%xmm5 - vmovdqa %xmm5,144(%rsp) - vpunpcklqdq %xmm0,%xmm9,%xmm0 - vmovdqa 464(%rsp),%xmm5 - vpaddq %xmm5,%xmm1,%xmm1 - vpunpcklqdq %xmm1,%xmm5,%xmm6 - vpunpckhqdq %xmm1,%xmm5,%xmm1 - vpmuludq 512(%rsp),%xmm6,%xmm5 - vpmuludq 480(%rsp),%xmm1,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 560(%rsp),%xmm6,%xmm7 - vpmuludq 528(%rsp),%xmm1,%xmm8 - vpaddq %xmm8,%xmm7,%xmm7 - vpmuludq 576(%rsp),%xmm6,%xmm8 - vpmuludq 560(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm8,%xmm8 - vpmuludq 624(%rsp),%xmm6,%xmm9 - vpmuludq 592(%rsp),%xmm1,%xmm10 - vpaddq %xmm10,%xmm9,%xmm9 - vpmuludq 640(%rsp),%xmm6,%xmm10 - vpmuludq 624(%rsp),%xmm1,%xmm11 - vpaddq %xmm11,%xmm10,%xmm10 - vpmuludq 688(%rsp),%xmm6,%xmm11 - vpmuludq 656(%rsp),%xmm1,%xmm12 - vpaddq %xmm12,%xmm11,%xmm11 - vpmuludq 704(%rsp),%xmm6,%xmm12 - vpmuludq 688(%rsp),%xmm1,%xmm13 - vpaddq %xmm13,%xmm12,%xmm12 - vpmuludq 736(%rsp),%xmm6,%xmm13 - vpmuludq 720(%rsp),%xmm1,%xmm14 - vpaddq %xmm14,%xmm13,%xmm13 - vpmuludq 752(%rsp),%xmm6,%xmm14 - vpmuludq 736(%rsp),%xmm1,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpmuludq 480(%rsp),%xmm6,%xmm6 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 768(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vmovdqa 544(%rsp),%xmm1 - vpaddq %xmm1,%xmm2,%xmm2 - vpunpcklqdq %xmm2,%xmm1,%xmm15 - vpunpckhqdq %xmm2,%xmm1,%xmm1 - vpmuludq 480(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 512(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 560(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 576(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 624(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 640(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 688(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 704(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15 - vpmuludq 736(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 752(%rsp),%xmm15,%xmm15 - vpaddq %xmm15,%xmm5,%xmm5 - vpmuludq 480(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 528(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 560(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 592(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 624(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 656(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 688(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 720(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 736(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 768(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vmovdqa 608(%rsp),%xmm1 - vpaddq %xmm1,%xmm3,%xmm3 - vpunpcklqdq %xmm3,%xmm1,%xmm2 - vpunpckhqdq %xmm3,%xmm1,%xmm1 - vpmuludq 480(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpmuludq 512(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm10,%xmm10 - vpmuludq 560(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 576(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm12,%xmm12 - vpmuludq 624(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 640(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 688(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 704(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 736(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 752(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 480(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 528(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 560(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 592(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 624(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 656(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 688(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 720(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 736(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 768(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vmovdqa 672(%rsp),%xmm1 - vpaddq %xmm1,%xmm4,%xmm4 - vpunpcklqdq %xmm4,%xmm1,%xmm2 - vpunpckhqdq %xmm4,%xmm1,%xmm1 - vpmuludq 480(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 512(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm12,%xmm12 - vpmuludq 560(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 576(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 624(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 640(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 688(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 704(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 736(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpmuludq 752(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 480(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 528(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 560(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 592(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 624(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 656(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 688(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 720(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 736(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 768(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vmovdqa 448(%rsp),%xmm1 - vpaddq %xmm1,%xmm0,%xmm0 - vpunpcklqdq %xmm0,%xmm1,%xmm2 - vpunpckhqdq %xmm0,%xmm1,%xmm0 - vpmuludq 480(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm13,%xmm13 - vpmuludq 512(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 560(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpmuludq 576(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm5,%xmm5 - vpmuludq 624(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vpmuludq 640(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm8,%xmm8 - vpmuludq 688(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vpmuludq 704(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm10,%xmm10 - vpmuludq 736(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vpmuludq 752(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 480(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0 - vpmuludq 528(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpmuludq 560(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm5,%xmm5 - vpmuludq 592(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vpmuludq 624(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm8,%xmm8 - vpmuludq 656(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vpmuludq 688(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm10,%xmm10 - vpmuludq 720(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vpmuludq 736(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm12,%xmm12 - vpmuludq 768(%rsp),%xmm0,%xmm0 - vpaddq %xmm0,%xmm13,%xmm13 - vpsrlq $26,%xmm6,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm10,%xmm0 - vpaddq %xmm0,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm5,%xmm0 - vpaddq %xmm0,%xmm7,%xmm7 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm11,%xmm0 - vpaddq %xmm0,%xmm12,%xmm12 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm7,%xmm0 - vpaddq %xmm0,%xmm8,%xmm8 - vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7 - vpsrlq $25,%xmm12,%xmm0 - vpaddq %xmm0,%xmm13,%xmm13 - vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm8,%xmm0 - vpaddq %xmm0,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm13,%xmm0 - vpaddq %xmm0,%xmm14,%xmm14 - vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13 - vpsrlq $26,%xmm9,%xmm0 - vpaddq %xmm0,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $25,%xmm14,%xmm0 - vpsllq $4,%xmm0,%xmm1 - vpaddq %xmm0,%xmm6,%xmm6 - vpsllq $1,%xmm0,%xmm0 - vpaddq %xmm0,%xmm1,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $25,%xmm10,%xmm0 - vpaddq %xmm0,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $26,%xmm6,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpunpckhqdq %xmm5,%xmm6,%xmm1 - vpunpcklqdq %xmm5,%xmm6,%xmm0 - vpunpckhqdq %xmm8,%xmm7,%xmm3 - vpunpcklqdq %xmm8,%xmm7,%xmm2 - vpunpckhqdq %xmm10,%xmm9,%xmm5 - vpunpcklqdq %xmm10,%xmm9,%xmm4 - vpunpckhqdq %xmm12,%xmm11,%xmm7 - vpunpcklqdq %xmm12,%xmm11,%xmm6 - vpunpckhqdq %xmm14,%xmm13,%xmm9 - vpunpcklqdq %xmm14,%xmm13,%xmm8 - cmp $0,%rdx - jne .Lladder_loop - vmovdqu %xmm1,160(%rdi) - vmovdqu %xmm0,80(%rdi) - vmovdqu %xmm3,176(%rdi) - vmovdqu %xmm2,96(%rdi) - vmovdqu %xmm5,192(%rdi) - vmovdqu %xmm4,112(%rdi) - vmovdqu %xmm7,208(%rdi) - vmovdqu %xmm6,128(%rdi) - vmovdqu %xmm9,224(%rdi) - vmovdqu %xmm8,144(%rdi) - movq 1824(%rsp),%r11 - movq 1832(%rsp),%r12 - movq 1840(%rsp),%r13 - movq 1848(%rsp),%r14 - leave - ret -ENDPROC(curve25519_sandy2x_ladder) - -.align 32 -ENTRY(curve25519_sandy2x_ladder_base) - push %rbp - mov %rsp,%rbp - sub $1568,%rsp - and $-32,%rsp - movq %r11,1536(%rsp) - movq %r12,1544(%rsp) - movq %r13,1552(%rsp) - vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0 - vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1 - vmovdqa curve25519_sandy2x_v9_0(%rip),%xmm2 - vmovdqa %xmm2,0(%rsp) - vmovdqa %xmm0,16(%rsp) - vmovdqa %xmm0,32(%rsp) - vmovdqa %xmm0,48(%rsp) - vmovdqa %xmm0,64(%rsp) - vmovdqa %xmm1,80(%rsp) - vmovdqa %xmm0,96(%rsp) - vmovdqa %xmm0,112(%rsp) - vmovdqa %xmm0,128(%rsp) - vmovdqa %xmm0,144(%rsp) - vmovdqa %xmm1,%xmm0 - vpxor %xmm1,%xmm1,%xmm1 - vpxor %xmm2,%xmm2,%xmm2 - vpxor %xmm3,%xmm3,%xmm3 - vpxor %xmm4,%xmm4,%xmm4 - vpxor %xmm5,%xmm5,%xmm5 - vpxor %xmm6,%xmm6,%xmm6 - vpxor %xmm7,%xmm7,%xmm7 - vpxor %xmm8,%xmm8,%xmm8 - vpxor %xmm9,%xmm9,%xmm9 - movq 0(%rsi),%rdx - movq 8(%rsi),%rcx - movq 16(%rsi),%r8 - movq 24(%rsi),%r9 - shrd $1,%rcx,%rdx - shrd $1,%r8,%rcx - shrd $1,%r9,%r8 - shr $1,%r9 - xorq 0(%rsi),%rdx - xorq 8(%rsi),%rcx - xorq 16(%rsi),%r8 - xorq 24(%rsi),%r9 - leaq 512(%rsp),%rsi - mov $64,%rax - - .align 16 - .Lladder_base_small_loop: - mov %rdx,%r10 - mov %rcx,%r11 - mov %r8,%r12 - mov %r9,%r13 - shr $1,%rdx - shr $1,%rcx - shr $1,%r8 - shr $1,%r9 - and $1,%r10d - and $1,%r11d - and $1,%r12d - and $1,%r13d - neg %r10 - neg %r11 - neg %r12 - neg %r13 - movl %r10d,0(%rsi) - movl %r11d,256(%rsi) - movl %r12d,512(%rsi) - movl %r13d,768(%rsi) - add $4,%rsi - sub $1,%rax - jne .Lladder_base_small_loop - mov $255,%rdx - add $760,%rsi - - .align 16 - .Lladder_base_loop: - sub $1,%rdx - vbroadcastss 0(%rsi),%xmm10 - sub $4,%rsi - vmovdqa 0(%rsp),%xmm11 - vmovdqa 80(%rsp),%xmm12 - vpxor %xmm11,%xmm0,%xmm13 - vpand %xmm10,%xmm13,%xmm13 - vpxor %xmm13,%xmm0,%xmm0 - vpxor %xmm13,%xmm11,%xmm11 - vpxor %xmm12,%xmm1,%xmm13 - vpand %xmm10,%xmm13,%xmm13 - vpxor %xmm13,%xmm1,%xmm1 - vpxor %xmm13,%xmm12,%xmm12 - vmovdqa 16(%rsp),%xmm13 - vmovdqa 96(%rsp),%xmm14 - vpxor %xmm13,%xmm2,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm2,%xmm2 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm3,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm3,%xmm3 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,0(%rsp) - vmovdqa %xmm14,16(%rsp) - vmovdqa 32(%rsp),%xmm13 - vmovdqa 112(%rsp),%xmm14 - vpxor %xmm13,%xmm4,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm4,%xmm4 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm5,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm5,%xmm5 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,32(%rsp) - vmovdqa %xmm14,80(%rsp) - vmovdqa 48(%rsp),%xmm13 - vmovdqa 128(%rsp),%xmm14 - vpxor %xmm13,%xmm6,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm6,%xmm6 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm7,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm7,%xmm7 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,48(%rsp) - vmovdqa %xmm14,96(%rsp) - vmovdqa 64(%rsp),%xmm13 - vmovdqa 144(%rsp),%xmm14 - vpxor %xmm13,%xmm8,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm8,%xmm8 - vpxor %xmm15,%xmm13,%xmm13 - vpxor %xmm14,%xmm9,%xmm15 - vpand %xmm10,%xmm15,%xmm15 - vpxor %xmm15,%xmm9,%xmm9 - vpxor %xmm15,%xmm14,%xmm14 - vmovdqa %xmm13,64(%rsp) - vmovdqa %xmm14,112(%rsp) - vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10 - vpsubq %xmm12,%xmm10,%xmm10 - vpaddq %xmm12,%xmm11,%xmm11 - vpunpckhqdq %xmm10,%xmm11,%xmm12 - vpunpcklqdq %xmm10,%xmm11,%xmm10 - vpaddq %xmm1,%xmm0,%xmm11 - vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0 - vpsubq %xmm1,%xmm0,%xmm0 - vpunpckhqdq %xmm11,%xmm0,%xmm1 - vpunpcklqdq %xmm11,%xmm0,%xmm0 - vpmuludq %xmm0,%xmm10,%xmm11 - vpmuludq %xmm1,%xmm10,%xmm13 - vmovdqa %xmm1,128(%rsp) - vpaddq %xmm1,%xmm1,%xmm1 - vpmuludq %xmm0,%xmm12,%xmm14 - vmovdqa %xmm0,144(%rsp) - vpaddq %xmm14,%xmm13,%xmm13 - vpmuludq %xmm1,%xmm12,%xmm0 - vmovdqa %xmm1,160(%rsp) - vpaddq %xmm3,%xmm2,%xmm1 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2 - vpsubq %xmm3,%xmm2,%xmm2 - vpunpckhqdq %xmm1,%xmm2,%xmm3 - vpunpcklqdq %xmm1,%xmm2,%xmm1 - vpmuludq %xmm1,%xmm10,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq %xmm3,%xmm10,%xmm2 - vmovdqa %xmm3,176(%rsp) - vpaddq %xmm3,%xmm3,%xmm3 - vpmuludq %xmm1,%xmm12,%xmm14 - vmovdqa %xmm1,192(%rsp) - vpaddq %xmm14,%xmm2,%xmm2 - vpmuludq %xmm3,%xmm12,%xmm1 - vmovdqa %xmm3,208(%rsp) - vpaddq %xmm5,%xmm4,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4 - vpsubq %xmm5,%xmm4,%xmm4 - vpunpckhqdq %xmm3,%xmm4,%xmm5 - vpunpcklqdq %xmm3,%xmm4,%xmm3 - vpmuludq %xmm3,%xmm10,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq %xmm5,%xmm10,%xmm4 - vmovdqa %xmm5,224(%rsp) - vpaddq %xmm5,%xmm5,%xmm5 - vpmuludq %xmm3,%xmm12,%xmm14 - vmovdqa %xmm3,240(%rsp) - vpaddq %xmm14,%xmm4,%xmm4 - vpaddq %xmm7,%xmm6,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6 - vpsubq %xmm7,%xmm6,%xmm6 - vpunpckhqdq %xmm3,%xmm6,%xmm7 - vpunpcklqdq %xmm3,%xmm6,%xmm3 - vpmuludq %xmm3,%xmm10,%xmm6 - vpmuludq %xmm5,%xmm12,%xmm14 - vmovdqa %xmm5,256(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5 - vmovdqa %xmm5,272(%rsp) - vpaddq %xmm14,%xmm6,%xmm6 - vpmuludq %xmm7,%xmm10,%xmm5 - vmovdqa %xmm7,288(%rsp) - vpaddq %xmm7,%xmm7,%xmm7 - vpmuludq %xmm3,%xmm12,%xmm14 - vmovdqa %xmm3,304(%rsp) - vpaddq %xmm14,%xmm5,%xmm5 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vmovdqa %xmm3,320(%rsp) - vpaddq %xmm9,%xmm8,%xmm3 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8 - vpsubq %xmm9,%xmm8,%xmm8 - vpunpckhqdq %xmm3,%xmm8,%xmm9 - vpunpcklqdq %xmm3,%xmm8,%xmm3 - vmovdqa %xmm3,336(%rsp) - vpmuludq %xmm7,%xmm12,%xmm8 - vmovdqa %xmm7,352(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7 - vmovdqa %xmm7,368(%rsp) - vpmuludq %xmm3,%xmm10,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq %xmm9,%xmm10,%xmm7 - vmovdqa %xmm9,384(%rsp) - vpaddq %xmm9,%xmm9,%xmm9 - vpmuludq %xmm3,%xmm12,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vmovdqa %xmm3,400(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12 - vpmuludq %xmm9,%xmm12,%xmm3 - vmovdqa %xmm9,416(%rsp) - vpaddq %xmm3,%xmm11,%xmm11 - vmovdqa 0(%rsp),%xmm3 - vmovdqa 16(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 192(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 176(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 240(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 224(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 304(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 288(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 336(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 384(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 160(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 192(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 208(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 240(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 256(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 304(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 352(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 336(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 416(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm0,%xmm0 - vmovdqa 32(%rsp),%xmm3 - vmovdqa 80(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 192(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 176(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 240(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 224(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 304(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 288(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 336(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 384(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 160(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 192(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 208(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 240(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 256(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 304(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 352(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 336(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 416(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm1,%xmm1 - vmovdqa 48(%rsp),%xmm3 - vmovdqa 96(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpmuludq 192(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 176(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 240(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 224(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 304(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 288(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 336(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 384(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 160(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 192(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 208(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 240(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 256(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 304(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 352(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 336(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 416(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm6,%xmm6 - vmovdqa 64(%rsp),%xmm3 - vmovdqa 112(%rsp),%xmm9 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10 - vpsubq %xmm9,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm10,%xmm3,%xmm9 - vpunpcklqdq %xmm10,%xmm3,%xmm3 - vpmuludq 144(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpmuludq 128(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3 - vpmuludq 192(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpmuludq 176(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm13,%xmm13 - vpmuludq 240(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpmuludq 224(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm2,%xmm2 - vpmuludq 304(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpmuludq 288(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpmuludq 336(%rsp),%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpmuludq 384(%rsp),%xmm3,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 144(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9 - vpmuludq 160(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 192(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 208(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpmuludq 240(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpmuludq 256(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpmuludq 304(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpmuludq 352(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 336(%rsp),%xmm9,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 416(%rsp),%xmm9,%xmm9 - vpaddq %xmm9,%xmm8,%xmm8 - vpsrlq $25,%xmm4,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4 - vpsrlq $26,%xmm11,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm6,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm13,%xmm3 - vpaddq %xmm3,%xmm0,%xmm0 - vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13 - vpsrlq $25,%xmm5,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm0,%xmm3 - vpaddq %xmm3,%xmm2,%xmm2 - vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0 - vpsrlq $26,%xmm8,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $25,%xmm2,%xmm3 - vpaddq %xmm3,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2 - vpsrlq $25,%xmm7,%xmm3 - vpsllq $4,%xmm3,%xmm9 - vpaddq %xmm3,%xmm11,%xmm11 - vpsllq $1,%xmm3,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpaddq %xmm9,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $26,%xmm1,%xmm3 - vpaddq %xmm3,%xmm4,%xmm4 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $26,%xmm11,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $25,%xmm4,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4 - vpunpcklqdq %xmm13,%xmm11,%xmm3 - vpunpckhqdq %xmm13,%xmm11,%xmm9 - vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10 - vpsubq %xmm3,%xmm10,%xmm10 - vpaddq %xmm9,%xmm3,%xmm3 - vpunpckhqdq %xmm3,%xmm10,%xmm9 - vpunpcklqdq %xmm3,%xmm10,%xmm10 - vpmuludq %xmm10,%xmm10,%xmm3 - vpaddq %xmm10,%xmm10,%xmm10 - vpmuludq %xmm9,%xmm10,%xmm11 - vpunpcklqdq %xmm2,%xmm0,%xmm12 - vpunpckhqdq %xmm2,%xmm0,%xmm0 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2 - vpsubq %xmm12,%xmm2,%xmm2 - vpaddq %xmm0,%xmm12,%xmm12 - vpunpckhqdq %xmm12,%xmm2,%xmm0 - vpunpcklqdq %xmm12,%xmm2,%xmm2 - vpmuludq %xmm2,%xmm10,%xmm12 - vpaddq %xmm9,%xmm9,%xmm13 - vpmuludq %xmm13,%xmm9,%xmm9 - vpaddq %xmm9,%xmm12,%xmm12 - vpmuludq %xmm0,%xmm10,%xmm9 - vpmuludq %xmm2,%xmm13,%xmm14 - vpaddq %xmm14,%xmm9,%xmm9 - vpunpcklqdq %xmm4,%xmm1,%xmm14 - vpunpckhqdq %xmm4,%xmm1,%xmm1 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4 - vpsubq %xmm14,%xmm4,%xmm4 - vpaddq %xmm1,%xmm14,%xmm14 - vpunpckhqdq %xmm14,%xmm4,%xmm1 - vpunpcklqdq %xmm14,%xmm4,%xmm4 - vmovdqa %xmm1,0(%rsp) - vpaddq %xmm1,%xmm1,%xmm1 - vmovdqa %xmm1,16(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vmovdqa %xmm1,32(%rsp) - vpmuludq %xmm4,%xmm10,%xmm1 - vpmuludq %xmm2,%xmm2,%xmm14 - vpaddq %xmm14,%xmm1,%xmm1 - vpmuludq 0(%rsp),%xmm10,%xmm14 - vpmuludq %xmm4,%xmm13,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpunpcklqdq %xmm5,%xmm6,%xmm15 - vpunpckhqdq %xmm5,%xmm6,%xmm5 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6 - vpsubq %xmm15,%xmm6,%xmm6 - vpaddq %xmm5,%xmm15,%xmm15 - vpunpckhqdq %xmm15,%xmm6,%xmm5 - vpunpcklqdq %xmm15,%xmm6,%xmm6 - vmovdqa %xmm6,48(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6 - vmovdqa %xmm6,64(%rsp) - vmovdqa %xmm5,80(%rsp) - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5 - vmovdqa %xmm5,96(%rsp) - vpmuludq 48(%rsp),%xmm10,%xmm5 - vpaddq %xmm0,%xmm0,%xmm6 - vpmuludq %xmm6,%xmm0,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpmuludq 80(%rsp),%xmm10,%xmm0 - vpmuludq %xmm4,%xmm6,%xmm15 - vpaddq %xmm15,%xmm0,%xmm0 - vpmuludq %xmm6,%xmm13,%xmm15 - vpaddq %xmm15,%xmm1,%xmm1 - vpmuludq %xmm6,%xmm2,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpunpcklqdq %xmm7,%xmm8,%xmm15 - vpunpckhqdq %xmm7,%xmm8,%xmm7 - vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8 - vpsubq %xmm15,%xmm8,%xmm8 - vpaddq %xmm7,%xmm15,%xmm15 - vpunpckhqdq %xmm15,%xmm8,%xmm7 - vpunpcklqdq %xmm15,%xmm8,%xmm8 - vmovdqa %xmm8,112(%rsp) - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8 - vmovdqa %xmm8,160(%rsp) - vpmuludq 112(%rsp),%xmm10,%xmm8 - vpmuludq %xmm7,%xmm10,%xmm10 - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15 - vpmuludq %xmm15,%xmm7,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq %xmm15,%xmm13,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq %xmm15,%xmm2,%xmm7 - vpaddq %xmm7,%xmm11,%xmm11 - vpmuludq 80(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm7,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq 16(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 48(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 112(%rsp),%xmm13,%xmm7 - vpaddq %xmm7,%xmm10,%xmm10 - vpmuludq %xmm15,%xmm6,%xmm7 - vpaddq %xmm7,%xmm12,%xmm12 - vpmuludq %xmm15,%xmm4,%xmm7 - vpaddq %xmm7,%xmm9,%xmm9 - vpaddq %xmm2,%xmm2,%xmm2 - vpmuludq %xmm4,%xmm2,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 160(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm3,%xmm3 - vpmuludq 160(%rsp),%xmm6,%xmm7 - vpaddq %xmm7,%xmm11,%xmm11 - vpmuludq 0(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm0,%xmm0 - vpmuludq 48(%rsp),%xmm2,%xmm7 - vpaddq %xmm7,%xmm8,%xmm8 - vpmuludq 80(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 96(%rsp),%xmm4,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq %xmm4,%xmm4,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpaddq %xmm4,%xmm4,%xmm2 - vpmuludq 160(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vpmuludq 16(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq 48(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm14,%xmm14 - vpmuludq 96(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 160(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 16(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm8,%xmm8 - vpmuludq 48(%rsp),%xmm6,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 80(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vpmuludq 112(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm0,%xmm0 - vmovdqa 48(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 160(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 80(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 160(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm14,%xmm14 - vpmuludq 64(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 64(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vmovdqa 16(%rsp),%xmm4 - vpmuludq 96(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vmovdqa 48(%rsp),%xmm4 - vpmuludq 96(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 0(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vmovdqa 32(%rsp),%xmm2 - vpmuludq 0(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vmovdqa 64(%rsp),%xmm2 - vpmuludq 48(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vmovdqa 96(%rsp),%xmm2 - vpmuludq 80(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vmovdqa 160(%rsp),%xmm2 - vpmuludq 112(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpsrlq $26,%xmm3,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3 - vpsrlq $25,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $25,%xmm11,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm5,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm12,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm0,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm9,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9 - vpsrlq $26,%xmm8,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $25,%xmm10,%xmm2 - vpsllq $4,%xmm2,%xmm4 - vpaddq %xmm2,%xmm3,%xmm3 - vpsllq $1,%xmm2,%xmm2 - vpaddq %xmm2,%xmm4,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $26,%xmm3,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3 - vpunpckhqdq %xmm11,%xmm3,%xmm2 - vmovdqa %xmm2,0(%rsp) - vpunpcklqdq %xmm11,%xmm3,%xmm2 - vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2 - vmovdqa %xmm2,80(%rsp) - vpunpckhqdq %xmm9,%xmm12,%xmm2 - vmovdqa %xmm2,16(%rsp) - vpunpcklqdq %xmm9,%xmm12,%xmm2 - vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2 - vmovdqa %xmm2,96(%rsp) - vpunpckhqdq %xmm14,%xmm1,%xmm2 - vmovdqa %xmm2,32(%rsp) - vpunpcklqdq %xmm14,%xmm1,%xmm1 - vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm1,%xmm1 - vmovdqa %xmm1,112(%rsp) - vpunpckhqdq %xmm0,%xmm5,%xmm1 - vmovdqa %xmm1,48(%rsp) - vpunpcklqdq %xmm0,%xmm5,%xmm0 - vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0 - vmovdqa %xmm0,160(%rsp) - vpunpckhqdq %xmm10,%xmm8,%xmm0 - vmovdqa %xmm0,64(%rsp) - vpunpcklqdq %xmm10,%xmm8,%xmm0 - vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0 - vmovdqa %xmm0,208(%rsp) - vmovdqa 144(%rsp),%xmm0 - vpmuludq %xmm0,%xmm0,%xmm1 - vpaddq %xmm0,%xmm0,%xmm0 - vmovdqa 128(%rsp),%xmm2 - vpmuludq %xmm2,%xmm0,%xmm3 - vmovdqa 192(%rsp),%xmm4 - vpmuludq %xmm4,%xmm0,%xmm5 - vmovdqa 176(%rsp),%xmm6 - vpmuludq %xmm6,%xmm0,%xmm7 - vmovdqa 240(%rsp),%xmm8 - vpmuludq %xmm8,%xmm0,%xmm9 - vpmuludq 224(%rsp),%xmm0,%xmm10 - vpmuludq 304(%rsp),%xmm0,%xmm11 - vpmuludq 288(%rsp),%xmm0,%xmm12 - vpmuludq 336(%rsp),%xmm0,%xmm13 - vmovdqa 384(%rsp),%xmm14 - vpmuludq %xmm14,%xmm0,%xmm0 - vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15 - vpmuludq %xmm15,%xmm14,%xmm14 - vpaddq %xmm14,%xmm13,%xmm13 - vpaddq %xmm6,%xmm6,%xmm14 - vpmuludq %xmm14,%xmm6,%xmm6 - vpaddq %xmm6,%xmm11,%xmm11 - vpaddq %xmm2,%xmm2,%xmm6 - vpmuludq %xmm6,%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq %xmm15,%xmm6,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vpmuludq %xmm15,%xmm4,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpmuludq 256(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 304(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 352(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 336(%rsp),%xmm6,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq %xmm4,%xmm6,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq %xmm14,%xmm6,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq %xmm8,%xmm6,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq %xmm15,%xmm14,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq %xmm15,%xmm8,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq %xmm4,%xmm4,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq %xmm14,%xmm4,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpaddq %xmm4,%xmm4,%xmm2 - vpmuludq %xmm8,%xmm2,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vpmuludq 400(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpmuludq 400(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vpmuludq 224(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vpmuludq 304(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm13,%xmm13 - vpmuludq 288(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpmuludq 368(%rsp),%xmm8,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpmuludq %xmm8,%xmm14,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq %xmm8,%xmm8,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpaddq %xmm8,%xmm8,%xmm2 - vpmuludq 400(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vpmuludq 256(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 304(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 368(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 256(%rsp),%xmm4 - vpmuludq 400(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm7,%xmm7 - vpmuludq 256(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm13,%xmm13 - vpmuludq 304(%rsp),%xmm14,%xmm4 - vpaddq %xmm4,%xmm0,%xmm0 - vpmuludq 352(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm11,%xmm11 - vpmuludq 336(%rsp),%xmm15,%xmm4 - vpaddq %xmm4,%xmm12,%xmm12 - vmovdqa 304(%rsp),%xmm4 - vpaddq %xmm4,%xmm4,%xmm4 - vpmuludq 400(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm9,%xmm9 - vpmuludq 320(%rsp),%xmm2,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vmovdqa 256(%rsp),%xmm4 - vpmuludq 320(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm3,%xmm3 - vmovdqa 256(%rsp),%xmm4 - vpmuludq 368(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm5,%xmm5 - vmovdqa 304(%rsp),%xmm4 - vpmuludq 368(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm7,%xmm7 - vmovdqa 352(%rsp),%xmm4 - vpmuludq 400(%rsp),%xmm4,%xmm4 - vpaddq %xmm4,%xmm10,%xmm10 - vpmuludq 224(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vmovdqa 272(%rsp),%xmm2 - vpmuludq 224(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm1,%xmm1 - vmovdqa 320(%rsp),%xmm2 - vpmuludq 304(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vmovdqa 368(%rsp),%xmm2 - vpmuludq 288(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vmovdqa 400(%rsp),%xmm2 - vpmuludq 336(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpsrlq $25,%xmm10,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm3,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3 - vpsrlq $26,%xmm11,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm5,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5 - vpsrlq $25,%xmm12,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm7,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $26,%xmm13,%xmm2 - vpaddq %xmm2,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13 - vpsrlq $26,%xmm9,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $25,%xmm0,%xmm2 - vpsllq $4,%xmm2,%xmm4 - vpaddq %xmm2,%xmm1,%xmm1 - vpsllq $1,%xmm2,%xmm2 - vpaddq %xmm2,%xmm4,%xmm4 - vpaddq %xmm4,%xmm1,%xmm1 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm10,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $26,%xmm1,%xmm2 - vpaddq %xmm2,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1 - vpunpckhqdq %xmm3,%xmm1,%xmm2 - vpunpcklqdq %xmm3,%xmm1,%xmm1 - vmovdqa %xmm1,176(%rsp) - vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3 - vpsubq %xmm1,%xmm3,%xmm3 - vpunpckhqdq %xmm3,%xmm2,%xmm1 - vpunpcklqdq %xmm3,%xmm2,%xmm2 - vmovdqa %xmm2,192(%rsp) - vmovdqa %xmm1,224(%rsp) - vpsllq $1,%xmm1,%xmm1 - vmovdqa %xmm1,240(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3 - vmovdqa 80(%rsp),%xmm1 - vpunpcklqdq %xmm1,%xmm3,%xmm2 - vpunpckhqdq %xmm1,%xmm3,%xmm1 - vpunpckhqdq %xmm7,%xmm5,%xmm3 - vpunpcklqdq %xmm7,%xmm5,%xmm4 - vmovdqa %xmm4,256(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5 - vpsubq %xmm4,%xmm5,%xmm5 - vpunpckhqdq %xmm5,%xmm3,%xmm4 - vpunpcklqdq %xmm5,%xmm3,%xmm3 - vmovdqa %xmm3,272(%rsp) - vmovdqa %xmm4,288(%rsp) - vpsllq $1,%xmm4,%xmm4 - vmovdqa %xmm4,304(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5 - vmovdqa 96(%rsp),%xmm3 - vpunpcklqdq %xmm3,%xmm5,%xmm4 - vpunpckhqdq %xmm3,%xmm5,%xmm3 - vpunpckhqdq %xmm10,%xmm9,%xmm5 - vpunpcklqdq %xmm10,%xmm9,%xmm6 - vmovdqa %xmm6,320(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7 - vpsubq %xmm6,%xmm7,%xmm7 - vpunpckhqdq %xmm7,%xmm5,%xmm6 - vpunpcklqdq %xmm7,%xmm5,%xmm5 - vmovdqa %xmm5,336(%rsp) - vmovdqa %xmm6,352(%rsp) - vpsllq $1,%xmm6,%xmm6 - vmovdqa %xmm6,368(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7 - vmovdqa 112(%rsp),%xmm5 - vpunpcklqdq %xmm5,%xmm7,%xmm6 - vpunpckhqdq %xmm5,%xmm7,%xmm5 - vpunpckhqdq %xmm12,%xmm11,%xmm7 - vpunpcklqdq %xmm12,%xmm11,%xmm8 - vmovdqa %xmm8,384(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9 - vpsubq %xmm8,%xmm9,%xmm9 - vpunpckhqdq %xmm9,%xmm7,%xmm8 - vpunpcklqdq %xmm9,%xmm7,%xmm7 - vmovdqa %xmm7,400(%rsp) - vmovdqa %xmm8,416(%rsp) - vpsllq $1,%xmm8,%xmm8 - vmovdqa %xmm8,432(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9 - vmovdqa 160(%rsp),%xmm7 - vpunpcklqdq %xmm7,%xmm9,%xmm8 - vpunpckhqdq %xmm7,%xmm9,%xmm7 - vpunpckhqdq %xmm0,%xmm13,%xmm9 - vpunpcklqdq %xmm0,%xmm13,%xmm0 - vmovdqa %xmm0,160(%rsp) - vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10 - vpsubq %xmm0,%xmm10,%xmm10 - vpunpckhqdq %xmm10,%xmm9,%xmm0 - vpunpcklqdq %xmm10,%xmm9,%xmm9 - vmovdqa %xmm9,448(%rsp) - vmovdqa %xmm0,464(%rsp) - vpsllq $1,%xmm0,%xmm0 - vmovdqa %xmm0,480(%rsp) - vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10 - vmovdqa 208(%rsp),%xmm0 - vpunpcklqdq %xmm0,%xmm10,%xmm9 - vpunpckhqdq %xmm0,%xmm10,%xmm0 - vpsrlq $26,%xmm2,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2 - vpsrlq $25,%xmm5,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $25,%xmm1,%xmm10 - vpaddq %xmm10,%xmm4,%xmm4 - vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1 - vpsrlq $26,%xmm8,%xmm10 - vpaddq %xmm10,%xmm7,%xmm7 - vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm4,%xmm10 - vpaddq %xmm10,%xmm3,%xmm3 - vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4 - vpsrlq $25,%xmm7,%xmm10 - vpaddq %xmm10,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7 - vpsrlq $25,%xmm3,%xmm10 - vpaddq %xmm10,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3 - vpsrlq $26,%xmm9,%xmm10 - vpaddq %xmm10,%xmm0,%xmm0 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $26,%xmm6,%xmm10 - vpaddq %xmm10,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm0,%xmm10 - vpsllq $4,%xmm10,%xmm11 - vpaddq %xmm10,%xmm2,%xmm2 - vpsllq $1,%xmm10,%xmm10 - vpaddq %xmm10,%xmm11,%xmm11 - vpaddq %xmm11,%xmm2,%xmm2 - vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0 - vpsrlq $25,%xmm5,%xmm10 - vpaddq %xmm10,%xmm8,%xmm8 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm2,%xmm10 - vpaddq %xmm10,%xmm1,%xmm1 - vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2 - vpunpckhqdq %xmm1,%xmm2,%xmm10 - vmovdqa %xmm10,80(%rsp) - vpunpcklqdq %xmm1,%xmm2,%xmm1 - vpunpckhqdq %xmm3,%xmm4,%xmm2 - vmovdqa %xmm2,96(%rsp) - vpunpcklqdq %xmm3,%xmm4,%xmm2 - vpunpckhqdq %xmm5,%xmm6,%xmm3 - vmovdqa %xmm3,112(%rsp) - vpunpcklqdq %xmm5,%xmm6,%xmm3 - vpunpckhqdq %xmm7,%xmm8,%xmm4 - vmovdqa %xmm4,128(%rsp) - vpunpcklqdq %xmm7,%xmm8,%xmm4 - vpunpckhqdq %xmm0,%xmm9,%xmm5 - vmovdqa %xmm5,144(%rsp) - vpunpcklqdq %xmm0,%xmm9,%xmm0 - vmovdqa 176(%rsp),%xmm5 - vpaddq %xmm5,%xmm1,%xmm1 - vpunpcklqdq %xmm1,%xmm5,%xmm6 - vpunpckhqdq %xmm1,%xmm5,%xmm1 - vpmuludq 224(%rsp),%xmm6,%xmm5 - vpmuludq 192(%rsp),%xmm1,%xmm7 - vpaddq %xmm7,%xmm5,%xmm5 - vpmuludq 272(%rsp),%xmm6,%xmm7 - vpmuludq 240(%rsp),%xmm1,%xmm8 - vpaddq %xmm8,%xmm7,%xmm7 - vpmuludq 288(%rsp),%xmm6,%xmm8 - vpmuludq 272(%rsp),%xmm1,%xmm9 - vpaddq %xmm9,%xmm8,%xmm8 - vpmuludq 336(%rsp),%xmm6,%xmm9 - vpmuludq 304(%rsp),%xmm1,%xmm10 - vpaddq %xmm10,%xmm9,%xmm9 - vpmuludq 352(%rsp),%xmm6,%xmm10 - vpmuludq 336(%rsp),%xmm1,%xmm11 - vpaddq %xmm11,%xmm10,%xmm10 - vpmuludq 400(%rsp),%xmm6,%xmm11 - vpmuludq 368(%rsp),%xmm1,%xmm12 - vpaddq %xmm12,%xmm11,%xmm11 - vpmuludq 416(%rsp),%xmm6,%xmm12 - vpmuludq 400(%rsp),%xmm1,%xmm13 - vpaddq %xmm13,%xmm12,%xmm12 - vpmuludq 448(%rsp),%xmm6,%xmm13 - vpmuludq 432(%rsp),%xmm1,%xmm14 - vpaddq %xmm14,%xmm13,%xmm13 - vpmuludq 464(%rsp),%xmm6,%xmm14 - vpmuludq 448(%rsp),%xmm1,%xmm15 - vpaddq %xmm15,%xmm14,%xmm14 - vpmuludq 192(%rsp),%xmm6,%xmm6 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 480(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vmovdqa 256(%rsp),%xmm1 - vpaddq %xmm1,%xmm2,%xmm2 - vpunpcklqdq %xmm2,%xmm1,%xmm15 - vpunpckhqdq %xmm2,%xmm1,%xmm1 - vpmuludq 192(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 224(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 272(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 288(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 336(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 352(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 400(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 416(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15 - vpmuludq 448(%rsp),%xmm15,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 464(%rsp),%xmm15,%xmm15 - vpaddq %xmm15,%xmm5,%xmm5 - vpmuludq 192(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 240(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 272(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 304(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 336(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 368(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 400(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 432(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 448(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 480(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vmovdqa 320(%rsp),%xmm1 - vpaddq %xmm1,%xmm3,%xmm3 - vpunpcklqdq %xmm3,%xmm1,%xmm2 - vpunpckhqdq %xmm3,%xmm1,%xmm1 - vpmuludq 192(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpmuludq 224(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm10,%xmm10 - vpmuludq 272(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 288(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm12,%xmm12 - vpmuludq 336(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 352(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 400(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 416(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 448(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 464(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 192(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 240(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm11,%xmm11 - vpmuludq 272(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 304(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 336(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 368(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 400(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 432(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 448(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 480(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vmovdqa 384(%rsp),%xmm1 - vpaddq %xmm1,%xmm4,%xmm4 - vpunpcklqdq %xmm4,%xmm1,%xmm2 - vpunpckhqdq %xmm4,%xmm1,%xmm1 - vpmuludq 192(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm11,%xmm11 - vpmuludq 224(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm12,%xmm12 - vpmuludq 272(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm13,%xmm13 - vpmuludq 288(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 336(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm6,%xmm6 - vpmuludq 352(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm5,%xmm5 - vpmuludq 400(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm7,%xmm7 - vpmuludq 416(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm8,%xmm8 - vpmuludq 448(%rsp),%xmm2,%xmm3 - vpaddq %xmm3,%xmm9,%xmm9 - vpmuludq 464(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 192(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 240(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm13,%xmm13 - vpmuludq 272(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1 - vpmuludq 304(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm6,%xmm6 - vpmuludq 336(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm5,%xmm5 - vpmuludq 368(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm7,%xmm7 - vpmuludq 400(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm8,%xmm8 - vpmuludq 432(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm9,%xmm9 - vpmuludq 448(%rsp),%xmm1,%xmm2 - vpaddq %xmm2,%xmm10,%xmm10 - vpmuludq 480(%rsp),%xmm1,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vmovdqa 160(%rsp),%xmm1 - vpaddq %xmm1,%xmm0,%xmm0 - vpunpcklqdq %xmm0,%xmm1,%xmm2 - vpunpckhqdq %xmm0,%xmm1,%xmm0 - vpmuludq 192(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm13,%xmm13 - vpmuludq 224(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2 - vpmuludq 272(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpmuludq 288(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm5,%xmm5 - vpmuludq 336(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vpmuludq 352(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm8,%xmm8 - vpmuludq 400(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vpmuludq 416(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm10,%xmm10 - vpmuludq 448(%rsp),%xmm2,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vpmuludq 464(%rsp),%xmm2,%xmm2 - vpaddq %xmm2,%xmm12,%xmm12 - vpmuludq 192(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm14,%xmm14 - vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0 - vpmuludq 240(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpmuludq 272(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm5,%xmm5 - vpmuludq 304(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm7,%xmm7 - vpmuludq 336(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm8,%xmm8 - vpmuludq 368(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm9,%xmm9 - vpmuludq 400(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm10,%xmm10 - vpmuludq 432(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm11,%xmm11 - vpmuludq 448(%rsp),%xmm0,%xmm1 - vpaddq %xmm1,%xmm12,%xmm12 - vpmuludq 480(%rsp),%xmm0,%xmm0 - vpaddq %xmm0,%xmm13,%xmm13 - vpsrlq $26,%xmm6,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpsrlq $25,%xmm10,%xmm0 - vpaddq %xmm0,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $25,%xmm5,%xmm0 - vpaddq %xmm0,%xmm7,%xmm7 - vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5 - vpsrlq $26,%xmm11,%xmm0 - vpaddq %xmm0,%xmm12,%xmm12 - vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11 - vpsrlq $26,%xmm7,%xmm0 - vpaddq %xmm0,%xmm8,%xmm8 - vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7 - vpsrlq $25,%xmm12,%xmm0 - vpaddq %xmm0,%xmm13,%xmm13 - vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12 - vpsrlq $25,%xmm8,%xmm0 - vpaddq %xmm0,%xmm9,%xmm9 - vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8 - vpsrlq $26,%xmm13,%xmm0 - vpaddq %xmm0,%xmm14,%xmm14 - vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13 - vpsrlq $26,%xmm9,%xmm0 - vpaddq %xmm0,%xmm10,%xmm10 - vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9 - vpsrlq $25,%xmm14,%xmm0 - vpsllq $4,%xmm0,%xmm1 - vpaddq %xmm0,%xmm6,%xmm6 - vpsllq $1,%xmm0,%xmm0 - vpaddq %xmm0,%xmm1,%xmm1 - vpaddq %xmm1,%xmm6,%xmm6 - vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14 - vpsrlq $25,%xmm10,%xmm0 - vpaddq %xmm0,%xmm11,%xmm11 - vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10 - vpsrlq $26,%xmm6,%xmm0 - vpaddq %xmm0,%xmm5,%xmm5 - vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6 - vpunpckhqdq %xmm5,%xmm6,%xmm1 - vpunpcklqdq %xmm5,%xmm6,%xmm0 - vpunpckhqdq %xmm8,%xmm7,%xmm3 - vpunpcklqdq %xmm8,%xmm7,%xmm2 - vpunpckhqdq %xmm10,%xmm9,%xmm5 - vpunpcklqdq %xmm10,%xmm9,%xmm4 - vpunpckhqdq %xmm12,%xmm11,%xmm7 - vpunpcklqdq %xmm12,%xmm11,%xmm6 - vpunpckhqdq %xmm14,%xmm13,%xmm9 - vpunpcklqdq %xmm14,%xmm13,%xmm8 - cmp $0,%rdx - jne .Lladder_base_loop - vmovdqu %xmm1,80(%rdi) - vmovdqu %xmm0,0(%rdi) - vmovdqu %xmm3,96(%rdi) - vmovdqu %xmm2,16(%rdi) - vmovdqu %xmm5,112(%rdi) - vmovdqu %xmm4,32(%rdi) - vmovdqu %xmm7,128(%rdi) - vmovdqu %xmm6,48(%rdi) - vmovdqu %xmm9,144(%rdi) - vmovdqu %xmm8,64(%rdi) - movq 1536(%rsp),%r11 - movq 1544(%rsp),%r12 - movq 1552(%rsp),%r13 - leave - ret -ENDPROC(curve25519_sandy2x_ladder_base) -#endif /* CONFIG_AS_AVX */ diff --git a/curve25519-sandy2x.c b/curve25519-sandy2x.c deleted file mode 100644 index e8d5d2b..0000000 --- a/curve25519-sandy2x.c +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Original author: Tung Chou <blueprint@crypto.tw> - */ - -#include <linux/kernel.h> -#include <linux/string.h> - -enum { CURVE25519_POINT_SIZE = 32 }; - -static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE]) -{ - secret[0] &= 248; - secret[31] &= 127; - secret[31] |= 64; -} - -typedef u64 fe[10]; -typedef u64 fe51[5]; -asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *); -asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *); -asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *); -asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *); -asmlinkage void curve25519_sandy2x_fe51_nsquare(fe51 *, const fe51 *, int); - -static inline u32 le24_to_cpupv(const u8 *in) -{ - return le16_to_cpup((__le16 *)in) | ((u32)in[2]) << 16; -} - -static inline void fe_frombytes(fe h, const u8 *s) -{ - u64 h0 = le32_to_cpup((__le32 *)s); - u64 h1 = le24_to_cpupv(s + 4) << 6; - u64 h2 = le24_to_cpupv(s + 7) << 5; - u64 h3 = le24_to_cpupv(s + 10) << 3; - u64 h4 = le24_to_cpupv(s + 13) << 2; - u64 h5 = le32_to_cpup((__le32 *)(s + 16)); - u64 h6 = le24_to_cpupv(s + 20) << 7; - u64 h7 = le24_to_cpupv(s + 23) << 5; - u64 h8 = le24_to_cpupv(s + 26) << 4; - u64 h9 = (le24_to_cpupv(s + 29) & 8388607) << 2; - u64 carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9; - - carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF; - carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF; - carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF; - carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF; - carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF; - - carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF; - carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF; - carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF; - carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF; - carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; -} - -static inline void fe51_invert(fe51 *r, const fe51 *x) -{ - fe51 z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t; - - /* 2 */ curve25519_sandy2x_fe51_nsquare(&z2, x, 1); - /* 4 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2, 1); - /* 8 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 1); - /* 9 */ curve25519_sandy2x_fe51_mul(&z9, (const fe51 *)&t, x); - /* 11 */ curve25519_sandy2x_fe51_mul(&z11, (const fe51 *)&z9, (const fe51 *)&z2); - /* 22 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z11, 1); - /* 2^5 - 2^0 = 31 */ curve25519_sandy2x_fe51_mul(&z2_5_0, (const fe51 *)&t, (const fe51 *)&z9); - - /* 2^10 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_5_0, 5); - /* 2^10 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_10_0, (const fe51 *)&t, (const fe51 *)&z2_5_0); - - /* 2^20 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_10_0, 10); - /* 2^20 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_20_0, (const fe51 *)&t, (const fe51 *)&z2_10_0); - - /* 2^40 - 2^20 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_20_0, 20); - /* 2^40 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_20_0); - - /* 2^50 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 10); - /* 2^50 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_50_0, (const fe51 *)&t, (const fe51 *)&z2_10_0); - - /* 2^100 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_50_0, 50); - /* 2^100 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_100_0, (const fe51 *)&t, (const fe51 *)&z2_50_0); - - /* 2^200 - 2^100 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_100_0, 100); - /* 2^200 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_100_0); - - /* 2^250 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 50); - /* 2^250 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_50_0); - - /* 2^255 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 5); - /* 2^255 - 21 */ curve25519_sandy2x_fe51_mul(r, (const fe51 *)t, (const fe51 *)&z11); -} - -bool curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]) -{ - u8 e[32]; - fe var[3]; - fe51 x_51, z_51; - - memcpy(e, secret, 32); - normalize_secret(e); -#define x1 var[0] -#define x2 var[1] -#define z2 var[2] - fe_frombytes(x1, basepoint); - curve25519_sandy2x_ladder(var, e); - z_51[0] = (z2[1] << 26) + z2[0]; - z_51[1] = (z2[3] << 26) + z2[2]; - z_51[2] = (z2[5] << 26) + z2[4]; - z_51[3] = (z2[7] << 26) + z2[6]; - z_51[4] = (z2[9] << 26) + z2[8]; - x_51[0] = (x2[1] << 26) + x2[0]; - x_51[1] = (x2[3] << 26) + x2[2]; - x_51[2] = (x2[5] << 26) + x2[4]; - x_51[3] = (x2[7] << 26) + x2[6]; - x_51[4] = (x2[9] << 26) + x2[8]; -#undef x1 -#undef x2 -#undef z2 - fe51_invert(&z_51, (const fe51 *)&z_51); - curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51); - curve25519_sandy2x_fe51_pack(mypublic, (const fe51 *)&x_51); - - return true; -} @@ -6,10 +6,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/delay.h> -#include <asm/cpufeature.h> -#include <asm/processor.h> -#include <asm/fpu/api.h> -#include <asm/simd.h> static unsigned long stamp = 0; module_param(stamp, ulong, 0); @@ -48,14 +44,12 @@ static __always_inline int name(void) \ } while (0) #define report_it(name) do { \ - pr_err("%lu: %7s: %llu cycles per call\n", stamp, #name, (end_ ## name - start_ ## name) / TRIALS); \ + pr_err("%lu: %7s: %lu cycles per call\n", stamp, #name, (end_ ## name - start_ ## name) / TRIALS); \ } while (0) declare_it(donna64) declare_it(hacl64) -declare_it(sandy2x) -declare_it(amd64) declare_it(fiat32) declare_it(donna32) @@ -68,8 +62,6 @@ static bool verify(void) for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { test_it(donna64, {}, {}); test_it(hacl64, {}, {}); - test_it(sandy2x, kernel_fpu_begin(), kernel_fpu_end()); - test_it(amd64, {}, {}); test_it(fiat32, {}, {}); test_it(donna32, {}, {}); } @@ -82,8 +74,6 @@ static int __init mod_init(void) int ret = 0, i; cycles_t start_donna64, end_donna64; cycles_t start_hacl64, end_hacl64; - cycles_t start_sandy2x, end_sandy2x; - cycles_t start_amd64, end_amd64; cycles_t start_fiat32, end_fiat32; cycles_t start_donna32, end_donna32; unsigned long flags; @@ -98,10 +88,6 @@ static int __init mod_init(void) do_it(donna64); do_it(hacl64); - kernel_fpu_begin(); - do_it(sandy2x); - kernel_fpu_end(); - do_it(amd64); do_it(fiat32); do_it(donna32); @@ -109,8 +95,6 @@ static int __init mod_init(void) report_it(donna64); report_it(hacl64); - report_it(sandy2x); - report_it(amd64); report_it(fiat32); report_it(donna32); @@ -4,6 +4,7 @@ set -e nob_cpus() { echo "[+] Setting non-boot CPUs to status $1" for i in /sys/devices/system/cpu/*/online; do + [[ $i == *cpu0* ]] && continue echo "$1" > "$i" done } |