diff options
author | Samuel Neves <sneves@dei.uc.pt> | 2017-07-14 03:41:24 +0100 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2017-07-20 03:37:39 +0200 |
commit | 3f54cf501415b5206b473702005149d74de83c10 (patch) | |
tree | bb9c68e540a30e1338407f86a10e79a9514f48a0 /src/crypto/blake2s-avx-x86_64.S | |
parent | send: use skb_queue_empty where appropriate (diff) | |
download | wireguard-monolithic-historical-3f54cf501415b5206b473702005149d74de83c10.tar.xz wireguard-monolithic-historical-3f54cf501415b5206b473702005149d74de83c10.zip |
blake2s: move compression loop to assembly
Diffstat (limited to 'src/crypto/blake2s-avx-x86_64.S')
-rw-r--r-- | src/crypto/blake2s-avx-x86_64.S | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/crypto/blake2s-avx-x86_64.S b/src/crypto/blake2s-avx-x86_64.S index f7f4b3f..6b3f8a3 100644 --- a/src/crypto/blake2s-avx-x86_64.S +++ b/src/crypto/blake2s-avx-x86_64.S @@ -18,6 +18,12 @@ ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 .text ENTRY(blake2s_compress_avx) + movl %ecx, %ecx + testq %rdx, %rdx + je .Lendofloop + .align 32 +.Lbeginofloop: + addq %rcx, 32(%rdi) vmovdqu IV+16(%rip), %xmm1 vmovdqu (%rsi), %xmm4 vpxor 32(%rdi), %xmm1, %xmm1 @@ -572,5 +578,9 @@ ENTRY(blake2s_compress_avx) vpxor 16(%rdi), %xmm8, %xmm0 vpxor %xmm6, %xmm0, %xmm6 vmovups %xmm6, 16(%rdi) + addq $64, %rsi + decq %rdx + jnz .Lbeginofloop +.Lendofloop: ret ENDPROC(blake2s_compress_avx) |