aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha20_glue.c
diff options
context:
space:
mode:
authorMartin Willi <martin@strongswan.org>2018-11-11 10:36:29 +0100
committerHerbert Xu <herbert@gondor.apana.org.au>2018-11-16 14:11:04 +0800
commita5dd97f86211e91219807db607d740f9896b8e0b (patch)
treedf6b82f9a1256896f28a0470b9a72c8056152394 /arch/x86/crypto/chacha20_glue.c
parentcrypto: x86/chacha20 - Use larger block functions more aggressively (diff)
downloadlinux-dev-a5dd97f86211e91219807db607d740f9896b8e0b.tar.xz
linux-dev-a5dd97f86211e91219807db607d740f9896b8e0b.zip
crypto: x86/chacha20 - Add a 2-block AVX2 variant
This variant uses the same principle as the single block SSSE3 variant by shuffling the state matrix after each round. With the wider AVX registers, we can do two blocks in parallel, though. This function can increase performance and efficiency significantly for lengths that would otherwise require a 4-block function. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to '')
-rw-r--r--arch/x86/crypto/chacha20_glue.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index b541da71f11e..82e46589a189 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -24,6 +24,8 @@ asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
#ifdef CONFIG_AS_AVX2
+asmlinkage void chacha20_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len);
asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len);
static bool chacha20_use_avx2;
@@ -52,6 +54,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
state[12] += chacha20_advance(bytes, 8);
return;
}
+ if (bytes > CHACHA20_BLOCK_SIZE) {
+ chacha20_2block_xor_avx2(state, dst, src, bytes);
+ state[12] += chacha20_advance(bytes, 2);
+ return;
+ }
}
#endif
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {