aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha20_glue.c
diff options
context:
space:
mode:
authorMartin Willi <martin@strongswan.org>2018-11-11 10:36:25 +0100
committerHerbert Xu <herbert@gondor.apana.org.au>2018-11-16 14:11:04 +0800
commite4e72063d3c0ee9ba10faeb5645dcdaae2d733e9 (patch)
treea012d1564a5c5491b3488e699e71b6159d21bf0c /arch/x86/crypto/chacha20_glue.c
parenthwrng: bcm2835 - Switch to SPDX identifier (diff)
downloadlinux-dev-e4e72063d3c0ee9ba10faeb5645dcdaae2d733e9.tar.xz
linux-dev-e4e72063d3c0ee9ba10faeb5645dcdaae2d733e9.zip
crypto: x86/chacha20 - Support partial lengths in 1-block SSSE3 variant
Add a length argument to the single block function for SSSE3, so the block function may XOR only a partial length of the full block. Given that the setup code is rather cheap, the function does not process more than one block; this allows us to keep the block function selection in the C glue code. The required branching does not negatively affect performance for full block sizes. The partial XORing uses simple "rep movsb" to copy the data before and after doing XOR in SSE. This is rather efficient on modern processors; movsw can be slightly faster, but the additional complexity is probably not worth it. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to '')
-rw-r--r--arch/x86/crypto/chacha20_glue.c11
1 files changed, 4 insertions, 7 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index dce7c5d39c2f..cc4571736ce8 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -19,7 +19,8 @@
#define CHACHA20_STATE_ALIGN 16
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len);
asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
#ifdef CONFIG_AS_AVX2
asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
@@ -29,8 +30,6 @@ static bool chacha20_use_avx2;
static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes)
{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
#ifdef CONFIG_AS_AVX2
if (chacha20_use_avx2) {
while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
@@ -50,16 +49,14 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
state[12] += 4;
}
while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_ssse3(state, dst, src);
+ chacha20_block_xor_ssse3(state, dst, src, bytes);
bytes -= CHACHA20_BLOCK_SIZE;
src += CHACHA20_BLOCK_SIZE;
dst += CHACHA20_BLOCK_SIZE;
state[12]++;
}
if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_ssse3(state, buf, buf);
- memcpy(dst, buf, bytes);
+ chacha20_block_xor_ssse3(state, dst, src, bytes);
}
}