summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-09-27 01:49:19 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2018-10-02 03:41:49 +0200
commit0c81ba3fbc6b5da55c0a4349d603c5a533ecd63b (patch)
tree37939646450d5eaf71376391cef8bb7087a0671d
parentchacha20: test multiple page span in selftest (diff)
downloadwireguard-monolithic-historical-0c81ba3fbc6b5da55c0a4349d603c5a533ecd63b.tar.xz
wireguard-monolithic-historical-0c81ba3fbc6b5da55c0a4349d603c5a533ecd63b.zip
chacha20: feed fpu functions PAGE_SIZE at a time
Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
-rw-r--r--src/crypto/zinc/chacha20/chacha20-arm-glue.h31
-rw-r--r--src/crypto/zinc/chacha20/chacha20-mips-glue.h2
-rw-r--r--src/crypto/zinc/chacha20/chacha20-x86_64-glue.h40
-rw-r--r--src/crypto/zinc/chacha20/chacha20.c4
4 files changed, 54 insertions, 23 deletions
diff --git a/src/crypto/zinc/chacha20/chacha20-arm-glue.h b/src/crypto/zinc/chacha20/chacha20-arm-glue.h
index b8d9e12..881e16c 100644
--- a/src/crypto/zinc/chacha20/chacha20-arm-glue.h
+++ b/src/crypto/zinc/chacha20/chacha20-arm-glue.h
@@ -38,16 +38,33 @@ static void __init chacha20_fpu_init(void)
}
static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
- const u8 *src, const size_t len,
+ const u8 *src, size_t len,
simd_context_t *simd_context)
{
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
- len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context))
- chacha20_neon(dst, src, len, state->key, state->counter);
- else
- chacha20_arm(dst, src, len, state->key, state->counter);
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
+ PAGE_SIZE % CHACHA20_BLOCK_SIZE);
+
+ for (;;) {
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
+ len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
+ const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+ chacha20_neon(dst, src, bytes, state->key, state->counter);
+ state->counter[0] += (bytes + 63) / 64;
+ len -= bytes;
+ if (!len)
+ break;
+ dst += bytes;
+ src += bytes;
+ simd_relax(simd_context);
+ } else {
+ chacha20_arm(dst, src, len, state->key, state->counter);
+ state->counter[0] += (len + 63) / 64;
+ break;
+ }
+ }
- state->counter[0] += (len + 63) / 64;
return true;
}
diff --git a/src/crypto/zinc/chacha20/chacha20-mips-glue.h b/src/crypto/zinc/chacha20/chacha20-mips-glue.h
index 13e9e8d..3904b34 100644
--- a/src/crypto/zinc/chacha20/chacha20-mips-glue.h
+++ b/src/crypto/zinc/chacha20/chacha20-mips-glue.h
@@ -11,7 +11,7 @@ static void __init chacha20_fpu_init(void)
}
static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
- const u8 *src, const size_t len,
+ const u8 *src, size_t len,
simd_context_t *simd_context)
{
chacha20_mips((u32 *)state, dst, src, len);
diff --git a/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h b/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h
index 03075c9..2939f1e 100644
--- a/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h
+++ b/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h
@@ -51,26 +51,40 @@ static void __init chacha20_fpu_init(void)
}
static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
- const u8 *src, const size_t len,
+ const u8 *src, size_t len,
simd_context_t *simd_context)
{
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
+ PAGE_SIZE % CHACHA20_BLOCK_SIZE);
+
if (!IS_ENABLED(CONFIG_AS_SSSE3) || !chacha20_use_ssse3 ||
len <= CHACHA20_BLOCK_SIZE || !simd_use(simd_context))
return false;
- if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 &&
- len >= CHACHA20_BLOCK_SIZE * 8)
- chacha20_avx512(dst, src, len, state->key, state->counter);
- else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl &&
- len >= CHACHA20_BLOCK_SIZE * 4)
- chacha20_avx512vl(dst, src, len, state->key, state->counter);
- else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 &&
- len >= CHACHA20_BLOCK_SIZE * 4)
- chacha20_avx2(dst, src, len, state->key, state->counter);
- else
- chacha20_ssse3(dst, src, len, state->key, state->counter);
+ for (;;) {
+ const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 &&
+ len >= CHACHA20_BLOCK_SIZE * 8)
+ chacha20_avx512(dst, src, bytes, state->key, state->counter);
+ else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl &&
+ len >= CHACHA20_BLOCK_SIZE * 4)
+ chacha20_avx512vl(dst, src, bytes, state->key, state->counter);
+ else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 &&
+ len >= CHACHA20_BLOCK_SIZE * 4)
+ chacha20_avx2(dst, src, bytes, state->key, state->counter);
+ else
+ chacha20_ssse3(dst, src, bytes, state->key, state->counter);
+ state->counter[0] += (bytes + 63) / 64;
+ len -= bytes;
+ if (!len)
+ break;
+ dst += bytes;
+ src += bytes;
+ simd_relax(simd_context);
+ }
- state->counter[0] += (len + 63) / 64;
return true;
}
diff --git a/src/crypto/zinc/chacha20/chacha20.c b/src/crypto/zinc/chacha20/chacha20.c
index e3fcbe8..c767e02 100644
--- a/src/crypto/zinc/chacha20/chacha20.c
+++ b/src/crypto/zinc/chacha20/chacha20.c
@@ -25,8 +25,8 @@
void __init chacha20_fpu_init(void)
{
}
-static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *out,
- const u8 *in, const size_t len,
+static inline bool chacha20_arch(struct chacha20_ctx *state, u8 *dst,
+ const u8 *src, size_t len,
simd_context_t *simd_context)
{
return false;