diff options
author | Shawn Landden <shawn@git.icu> | 2019-05-11 14:19:51 -0300 |
---|---|---|
committer | Shawn Landden <shawn@git.icu> | 2019-05-13 16:25:28 -0500 |
commit | cc6513fd7d0e049c0c0dae0c6ef62eb6ad11afe1 (patch) | |
tree | 8ea0fe17d01d9287d090a05305f1210cf64a555d /src/crypto/zinc/poly1305/poly1305-ppc-glue.c | |
parent | [Zinc] Add PowerPC chacha20 implementation from openssl/cryptograms (diff) | |
download | wireguard-monolithic-historical-cc6513fd7d0e049c0c0dae0c6ef62eb6ad11afe1.tar.xz wireguard-monolithic-historical-cc6513fd7d0e049c0c0dae0c6ef62eb6ad11afe1.zip |
[zinc] Add PowerPC accelerated poly1305 from openssl/cryptogramssl/ppc
Unfortunately I am not seeing a speed up with this patch,
but it does decrease CPU usage.
Only (currently) runs on the outbound path, as the in-bound path is in
an interrupt, but that can be fixed in Linux.
v2: - Do not include the FPU version, as +10% performance on POWER8
(admittedly better on really old CPUs, like old world macs) is not
worth it, especially when there is a fast VSX version available.
- Honor CONFIG_VSX.
Signed-off-by: Shawn Landden <shawn@git.icu>
Diffstat (limited to 'src/crypto/zinc/poly1305/poly1305-ppc-glue.c')
-rw-r--r-- | src/crypto/zinc/poly1305/poly1305-ppc-glue.c | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/src/crypto/zinc/poly1305/poly1305-ppc-glue.c b/src/crypto/zinc/poly1305/poly1305-ppc-glue.c new file mode 100644 index 0000000..265467e --- /dev/null +++ b/src/crypto/zinc/poly1305/poly1305-ppc-glue.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2019 Shawn Landden <shawn@git.icu>. All Rights Reserved. + */ + +#include <asm/cpufeature.h> + +asmlinkage void poly1305_init_int(void *ctx, const u8 key[16]); +asmlinkage void poly1305_blocks_int(void *ctx, const u8 *inp, size_t len, + u32 padbit); +asmlinkage void poly1305_emit_int(void *ctx, u8 mac[16], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_vsx(void *ctx, const u8 *inp, size_t len, + u32 padbit); +static bool *const poly1305_nobs[] __initconst = {}; +static void __init poly1305_fpu_init(void) {} + +static inline bool poly1305_init_arch(void *ctx, + const u8 key[POLY1305_KEY_SIZE]) +{ + poly1305_init_int(ctx, key); + return true; +} + +static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp, + size_t len, const u32 padbit, + simd_context_t *simd_context) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || + PAGE_SIZE % POLY1305_BLOCK_SIZE); + + if (!IS_ENABLED(CONFIG_VSX) || + !cpu_have_feature(PPC_MODULE_FEATURE_VEC_CRYPTO) || + !simd_use(simd_context)) { + convert_to_base2_64(ctx); + poly1305_blocks_int(ctx, inp, len, padbit); + return true; + } + + for (;;) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + poly1305_blocks_vsx(ctx, inp, bytes, padbit); + len -= bytes; + if (!len) + break; + inp += bytes; + simd_relax(simd_context); + } + return true; +} + +static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE], + const u32 nonce[4], + simd_context_t *simd_context) +{ + poly1305_emit_int(ctx, mac, nonce); + return true; +} |