diff options
author | Martin Willi <martin@strongswan.org> | 2018-11-11 10:36:29 +0100 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2018-11-16 14:11:04 +0800 |
commit | a5dd97f86211e91219807db607d740f9896b8e0b (patch) | |
tree | df6b82f9a1256896f28a0470b9a72c8056152394 /arch/x86/crypto/chacha20_glue.c | |
parent | crypto: x86/chacha20 - Use larger block functions more aggressively (diff) | |
download | linux-dev-a5dd97f86211e91219807db607d740f9896b8e0b.tar.xz linux-dev-a5dd97f86211e91219807db607d740f9896b8e0b.zip |
crypto: x86/chacha20 - Add a 2-block AVX2 variant
This variant uses the same principle as the single block SSSE3 variant
by shuffling the state matrix after each round. With the wider AVX
registers, we can do two blocks in parallel, though.
This function can increase performance and efficiency significantly for
lengths that would otherwise require a 4-block function.
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to '')
-rw-r--r-- | arch/x86/crypto/chacha20_glue.c | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index b541da71f11e..82e46589a189 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -24,6 +24,8 @@ asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, unsigned int len); #ifdef CONFIG_AS_AVX2 +asmlinkage void chacha20_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len); asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len); static bool chacha20_use_avx2; @@ -52,6 +54,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, state[12] += chacha20_advance(bytes, 8); return; } + if (bytes > CHACHA20_BLOCK_SIZE) { + chacha20_2block_xor_avx2(state, dst, src, bytes); + state[12] += chacha20_advance(bytes, 2); + return; + } } #endif while (bytes >= CHACHA20_BLOCK_SIZE * 4) { |