/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. * Copyright (C) 2017 Samuel Neves . All Rights Reserved. */ #include .section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 .align 32 IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 .octa 0x5BE0CD191F83D9AB9B05688C510E527F .section .rodata.cst16.ROT16, "aM", @progbits, 16 .align 16 ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 .section .rodata.cst16.ROR328, "aM", @progbits, 16 .align 16 ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 #ifdef CONFIG_AS_AVX512 .section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 640 .align 64 SIGMA: .long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 .long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 .long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 .long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 .long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 .long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 .long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 .long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 .long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 .long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 #endif /* CONFIG_AS_AVX512 */ .text #ifdef CONFIG_AS_SSSE3 ENTRY(blake2s_compress_ssse3) testq %rdx, %rdx je .Lendofloop movdqu (%rdi),%xmm0 movdqu 0x10(%rdi),%xmm1 movdqa ROT16(%rip),%xmm12 movdqa ROR328(%rip),%xmm13 movdqu 0x20(%rdi),%xmm14 movq %rcx,%xmm15 jmp .Lbeginofloop .align 32 .Lbeginofloop: movdqa %xmm0,%xmm10 movdqa %xmm1,%xmm11 paddq %xmm15,%xmm14 movdqa IV(%rip),%xmm2 movdqa %xmm14,%xmm3 pxor IV+0x10(%rip),%xmm3 movl 0x8(%rsi),%r8d movl 0x18(%rsi),%r9d movl (%rsi),%r10d movl 0x10(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0xc(%rsi),%r8d movl 0x1c(%rsi),%r9d movl 0x4(%rsi),%r10d movl 0x14(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x20(%rsi),%r8d movl 0x30(%rsi),%r9d movl 0x38(%rsi),%r10d movl 0x28(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x24(%rsi),%r8d movl 0x34(%rsi),%r9d movl 0x3c(%rsi),%r10d movl 0x2c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x10(%rsi),%r8d movl 0x34(%rsi),%r9d movl 0x38(%rsi),%r10d movl 0x24(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x20(%rsi),%r8d movl 0x18(%rsi),%r9d movl 0x28(%rsi),%r10d movl 0x3c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x4(%rsi),%r8d movl 0x2c(%rsi),%r9d movl 0x14(%rsi),%r10d movl (%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x30(%rsi),%r8d movl 0x1c(%rsi),%r9d movl 0xc(%rsi),%r10d movl 0x8(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x30(%rsi),%r8d movl 0x3c(%rsi),%r9d movl 0x2c(%rsi),%r10d movl 0x14(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl (%rsi),%r8d movl 0x34(%rsi),%r9d movl 0x20(%rsi),%r10d movl 0x8(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x28(%rsi),%r8d movl 0x1c(%rsi),%r9d movl 0x24(%rsi),%r10d movl 0xc(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x38(%rsi),%r8d movl 0x4(%rsi),%r9d movl 0x10(%rsi),%r10d movl 0x18(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0xc(%rsi),%r8d movl 0x2c(%rsi),%r9d movl 0x1c(%rsi),%r10d movl 0x34(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x4(%rsi),%r8d movl 0x38(%rsi),%r9d movl 0x24(%rsi),%r10d movl 0x30(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x8(%rsi),%r8d movl 0x10(%rsi),%r9d movl 0x3c(%rsi),%r10d movl 0x14(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x18(%rsi),%r8d movl (%rsi),%r9d movl 0x20(%rsi),%r10d movl 0x28(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x14(%rsi),%r8d movl 0x28(%rsi),%r9d movl 0x24(%rsi),%r10d movl 0x8(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x1c(%rsi),%r8d movl 0x3c(%rsi),%r9d movl (%rsi),%r10d movl 0x10(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x38(%rsi),%r8d movl 0x18(%rsi),%r9d movl 0xc(%rsi),%r10d movl 0x2c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x4(%rsi),%r8d movl 0x20(%rsi),%r9d movl 0x34(%rsi),%r10d movl 0x30(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x18(%rsi),%r8d movl 0x20(%rsi),%r9d movl 0x8(%rsi),%r10d movl (%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x28(%rsi),%r8d movl 0xc(%rsi),%r9d movl 0x30(%rsi),%r10d movl 0x2c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x10(%rsi),%r8d movl 0x3c(%rsi),%r9d movl 0x4(%rsi),%r10d movl 0x1c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x34(%rsi),%r8d movl 0x38(%rsi),%r9d movl 0x24(%rsi),%r10d movl 0x14(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x4(%rsi),%r8d movl 0x10(%rsi),%r9d movl 0x30(%rsi),%r10d movl 0x38(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x3c(%rsi),%r8d movl 0x28(%rsi),%r9d movl 0x14(%rsi),%r10d movl 0x34(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl (%rsi),%r8d movl 0x24(%rsi),%r9d movl 0x20(%rsi),%r10d movl 0x18(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x1c(%rsi),%r8d movl 0x8(%rsi),%r9d movl 0x2c(%rsi),%r10d movl 0xc(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x1c(%rsi),%r8d movl 0xc(%rsi),%r9d movl 0x34(%rsi),%r10d movl 0x30(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x38(%rsi),%r8d movl 0x24(%rsi),%r9d movl 0x2c(%rsi),%r10d movl 0x4(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x14(%rsi),%r8d movl 0x20(%rsi),%r9d movl 0x8(%rsi),%r10d movl 0x3c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl (%rsi),%r8d movl 0x18(%rsi),%r9d movl 0x28(%rsi),%r10d movl 0x10(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x38(%rsi),%r8d movl (%rsi),%r9d movl 0x18(%rsi),%r10d movl 0x2c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x24(%rsi),%r8d movl 0x20(%rsi),%r9d movl 0x3c(%rsi),%r10d movl 0xc(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x30(%rsi),%r8d movl 0x4(%rsi),%r9d movl 0x28(%rsi),%r10d movl 0x34(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x8(%rsi),%r8d movl 0x10(%rsi),%r9d movl 0x14(%rsi),%r10d movl 0x1c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 movl 0x20(%rsi),%r8d movl 0x4(%rsi),%r9d movl 0x28(%rsi),%r10d movl 0x1c(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm4 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm4 paddd %xmm4,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x10(%rsi),%r8d movl 0x14(%rsi),%r9d movl 0x8(%rsi),%r10d movl 0x18(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm5 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm5 paddd %xmm5,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x93,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x39,%xmm2,%xmm2 movl 0x3c(%rsi),%r8d movl 0xc(%rsi),%r9d movl 0x34(%rsi),%r10d movl 0x24(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm6 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm6 paddd %xmm6,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm12,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0xc,%xmm1 pslld $0x14,%xmm8 por %xmm8,%xmm1 movl 0x2c(%rsi),%r8d movl 0x30(%rsi),%r9d movl (%rsi),%r10d movl 0x38(%rsi),%r11d shlq $0x20,%r8 shlq $0x20,%r9 orq %r10,%r8 orq %r11,%r9 movq %r8,%xmm7 movq %r9,%xmm8 punpcklqdq %xmm8,%xmm7 paddd %xmm7,%xmm0 paddd %xmm1,%xmm0 pxor %xmm0,%xmm3 pshufb %xmm13,%xmm3 paddd %xmm3,%xmm2 pxor %xmm2,%xmm1 movdqa %xmm1,%xmm8 psrld $0x7,%xmm1 pslld $0x19,%xmm8 por %xmm8,%xmm1 pshufd $0x39,%xmm0,%xmm0 pshufd $0x4e,%xmm3,%xmm3 pshufd $0x93,%xmm2,%xmm2 pxor %xmm2,%xmm0 pxor %xmm3,%xmm1 pxor %xmm10,%xmm0 pxor %xmm11,%xmm1 addq $0x40,%rsi decq %rdx jnz .Lbeginofloop movdqu %xmm0,(%rdi) movdqu %xmm1,0x10(%rdi) movdqu %xmm14,0x20(%rdi) .Lendofloop: ret ENDPROC(blake2s_compress_ssse3) #endif /* CONFIG_AS_SSSE3 */ #ifdef CONFIG_AS_AVX512 ENTRY(blake2s_compress_avx512) vmovdqu (%rdi),%xmm0 vmovdqu 0x10(%rdi),%xmm1 vmovdqu 0x20(%rdi),%xmm4 vmovq %rcx,%xmm5 vmovdqa IV(%rip),%xmm14 vmovdqa IV+16(%rip),%xmm15 jmp .Lblake2s_compress_avx512_mainloop .align 32 .Lblake2s_compress_avx512_mainloop: vmovdqa %xmm0,%xmm10 vmovdqa %xmm1,%xmm11 vpaddq %xmm5,%xmm4,%xmm4 vmovdqa %xmm14,%xmm2 vpxor %xmm15,%xmm4,%xmm3 vmovdqu (%rsi),%ymm6 vmovdqu 0x20(%rsi),%ymm7 addq $0x40,%rsi leaq SIGMA(%rip),%rax movb $0xa,%cl .Lblake2s_compress_avx512_roundloop: addq $0x40,%rax vmovdqa -0x40(%rax),%ymm8 vmovdqa -0x20(%rax),%ymm9 vpermi2d %ymm7,%ymm6,%ymm8 vpermi2d %ymm7,%ymm6,%ymm9 vmovdqa %ymm8,%ymm6 vmovdqa %ymm9,%ymm7 vpaddd %xmm8,%xmm0,%xmm0 vpaddd %xmm1,%xmm0,%xmm0 vpxor %xmm0,%xmm3,%xmm3 vprord $0x10,%xmm3,%xmm3 vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0xc,%xmm1,%xmm1 vextracti128 $0x1,%ymm8,%xmm8 vpaddd %xmm8,%xmm0,%xmm0 vpaddd %xmm1,%xmm0,%xmm0 vpxor %xmm0,%xmm3,%xmm3 vprord $0x8,%xmm3,%xmm3 vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0x7,%xmm1,%xmm1 vpshufd $0x93,%xmm0,%xmm0 vpshufd $0x4e,%xmm3,%xmm3 vpshufd $0x39,%xmm2,%xmm2 vpaddd %xmm9,%xmm0,%xmm0 vpaddd %xmm1,%xmm0,%xmm0 vpxor %xmm0,%xmm3,%xmm3 vprord $0x10,%xmm3,%xmm3 vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0xc,%xmm1,%xmm1 vextracti128 $0x1,%ymm9,%xmm9 vpaddd %xmm9,%xmm0,%xmm0 vpaddd %xmm1,%xmm0,%xmm0 vpxor %xmm0,%xmm3,%xmm3 vprord $0x8,%xmm3,%xmm3 vpaddd %xmm3,%xmm2,%xmm2 vpxor %xmm2,%xmm1,%xmm1 vprord $0x7,%xmm1,%xmm1 vpshufd $0x39,%xmm0,%xmm0 vpshufd $0x4e,%xmm3,%xmm3 vpshufd $0x93,%xmm2,%xmm2 decb %cl jne .Lblake2s_compress_avx512_roundloop vpxor %xmm10,%xmm0,%xmm0 vpxor %xmm11,%xmm1,%xmm1 vpxor %xmm2,%xmm0,%xmm0 vpxor %xmm3,%xmm1,%xmm1 decq %rdx jne .Lblake2s_compress_avx512_mainloop vmovdqu %xmm0,(%rdi) vmovdqu %xmm1,0x10(%rdi) vmovdqu %xmm4,0x20(%rdi) vzeroupper retq ENDPROC(blake2s_compress_avx512) #endif /* CONFIG_AS_AVX512 */