diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-08-28 23:50:35 -0600 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2018-09-03 23:52:11 -0600 |
commit | 4a0e319af86c0d38304535293f6fc32fe436ef1d (patch) | |
tree | 6fca1e89becf3ff1afdcec7b6bc725e256af5811 /src/crypto/zinc/chacha20 | |
parent | uapi: reformat (diff) | |
download | wireguard-monolithic-historical-4a0e319af86c0d38304535293f6fc32fe436ef1d.tar.xz wireguard-monolithic-historical-4a0e319af86c0d38304535293f6fc32fe436ef1d.zip |
crypto: import zinc
Diffstat (limited to '')
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-arm-glue.h | 50 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-arm.S (renamed from src/crypto/chacha20-arm.S) | 2 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-arm64.S (renamed from src/crypto/chacha20-arm64.S) | 2 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-mips-glue.h | 28 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-mips.S (renamed from src/crypto/chacha20-mips.S) | 0 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-x86_64-glue.h | 104 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20-x86_64.S (renamed from src/crypto/chacha20-x86_64.S) | 2 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20/chacha20.c | 168 | ||||
-rw-r--r-- | src/crypto/zinc/chacha20poly1305.c (renamed from src/crypto/chacha20poly1305.c) | 171 |
9 files changed, 468 insertions, 59 deletions
diff --git a/src/crypto/zinc/chacha20/chacha20-arm-glue.h b/src/crypto/zinc/chacha20/chacha20-arm-glue.h new file mode 100644 index 0000000..d323615 --- /dev/null +++ b/src/crypto/zinc/chacha20/chacha20-arm-glue.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <zinc/chacha20.h> +#include <asm/hwcap.h> +#include <asm/neon.h> + +asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && \ + (defined(CONFIG_64BIT) || __LINUX_ARM_ARCH__ >= 7) +#define ARM_USE_NEON +asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +#endif + +static bool chacha20_use_neon __ro_after_init; + +void __init chacha20_fpu_init(void) +{ +#if defined(CONFIG_ARM64) + chacha20_use_neon = elf_hwcap & HWCAP_ASIMD; +#elif defined(CONFIG_ARM) + chacha20_use_neon = elf_hwcap & HWCAP_NEON; +#endif +} + +static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len, + const u32 key[8], const u32 counter[4], + simd_context_t simd_context) +{ +#if defined(ARM_USE_NEON) + if (simd_context == HAVE_FULL_SIMD && chacha20_use_neon) { + chacha20_neon(dst, src, len, key, counter); + return true; + } +#endif + chacha20_arm(dst, src, len, key, counter); + return true; +} + +static inline bool hchacha20_arch(u8 *derived_key, const u8 *nonce, + const u8 *key, simd_context_t simd_context) +{ + return false; +} + +#define HAVE_CHACHA20_ARCH_IMPLEMENTATION diff --git a/src/crypto/chacha20-arm.S b/src/crypto/zinc/chacha20/chacha20-arm.S index 601b4e3..4b2090f 100644 --- a/src/crypto/chacha20-arm.S +++ b/src/crypto/zinc/chacha20/chacha20-arm.S @@ -2,6 +2,8 @@ * * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from OpenSSL. */ #include <linux/linkage.h> diff --git a/src/crypto/chacha20-arm64.S b/src/crypto/zinc/chacha20/chacha20-arm64.S index c3d1243..a70df6b 100644 --- a/src/crypto/chacha20-arm64.S +++ b/src/crypto/zinc/chacha20/chacha20-arm64.S @@ -2,6 +2,8 @@ * * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from OpenSSL. */ #include <linux/linkage.h> diff --git a/src/crypto/zinc/chacha20/chacha20-mips-glue.h b/src/crypto/zinc/chacha20/chacha20-mips-glue.h new file mode 100644 index 0000000..5b2c8ce --- /dev/null +++ b/src/crypto/zinc/chacha20/chacha20-mips-glue.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <zinc/chacha20.h> + +asmlinkage void chacha20_mips(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +void __init chacha20_fpu_init(void) +{ +} + +static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len, + const u32 key[8], const u32 counter[4], + simd_context_t simd_context) +{ + chacha20_mips(dst, src, len, key, counter); + return true; +} + +static inline bool hchacha20_arch(u8 *derived_key, const u8 *nonce, + const u8 *key, simd_context_t simd_context) +{ + return false; +} + +#define HAVE_CHACHA20_ARCH_IMPLEMENTATION diff --git a/src/crypto/chacha20-mips.S b/src/crypto/zinc/chacha20/chacha20-mips.S index 77da2c2..77da2c2 100644 --- a/src/crypto/chacha20-mips.S +++ b/src/crypto/zinc/chacha20/chacha20-mips.S diff --git a/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h b/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h new file mode 100644 index 0000000..616813d --- /dev/null +++ b/src/crypto/zinc/chacha20/chacha20-x86_64-glue.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <zinc/chacha20.h> +#include <asm/fpu/api.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/intel-family.h> + +#ifdef CONFIG_AS_SSSE3 +asmlinkage void hchacha20_ssse3(u8 *derived_key, const u8 *nonce, + const u8 *key); +asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +#endif +#ifdef CONFIG_AS_AVX2 +asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +#endif +#ifdef CONFIG_AS_AVX512 +asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4]); +#endif + +static bool chacha20_use_ssse3 __ro_after_init; +static bool chacha20_use_avx2 __ro_after_init; +static bool chacha20_use_avx512 __ro_after_init; +static bool chacha20_use_avx512vl __ro_after_init; + +void __init chacha20_fpu_init(void) +{ + chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); + chacha20_use_avx2 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +#ifndef COMPAT_CANNOT_USE_AVX512 + chacha20_use_avx512 = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm. */ + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X; + chacha20_use_avx512vl = + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL); +#endif +} + +static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len, + const u32 key[8], const u32 counter[4], + simd_context_t simd_context) +{ + if (simd_context != HAVE_FULL_SIMD) + return false; + +#ifdef CONFIG_AS_AVX512 + if (chacha20_use_avx512) { + chacha20_avx512(dst, src, len, key, counter); + return true; + } + if (chacha20_use_avx512vl) { + chacha20_avx512vl(dst, src, len, key, counter); + return true; + } +#endif +#ifdef CONFIG_AS_AVX2 + if (chacha20_use_avx2) { + chacha20_avx2(dst, src, len, key, counter); + return true; + } +#endif +#ifdef CONFIG_AS_SSSE3 + if (chacha20_use_ssse3) { + chacha20_ssse3(dst, src, len, key, counter); + return true; + } +#endif + return false; +} + +static inline bool hchacha20_arch(u8 *derived_key, const u8 *nonce, + const u8 *key, simd_context_t simd_context) +{ +#if defined(CONFIG_AS_SSSE3) + if (simd_context == HAVE_FULL_SIMD && chacha20_use_ssse3) { + hchacha20_ssse3(derived_key, nonce, key); + return true; + } +#endif + return false; +} + +#define HAVE_CHACHA20_ARCH_IMPLEMENTATION diff --git a/src/crypto/chacha20-x86_64.S b/src/crypto/zinc/chacha20/chacha20-x86_64.S index 39883f3..2451feb 100644 --- a/src/crypto/chacha20-x86_64.S +++ b/src/crypto/zinc/chacha20/chacha20-x86_64.S @@ -3,6 +3,8 @@ * Copyright (C) 2017 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * This is based in part on Andy Polyakov's implementation from OpenSSL. */ #include <linux/linkage.h> diff --git a/src/crypto/zinc/chacha20/chacha20.c b/src/crypto/zinc/chacha20/chacha20.c new file mode 100644 index 0000000..ab5ef07 --- /dev/null +++ b/src/crypto/zinc/chacha20/chacha20.c @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Implementation of the ChaCha20 stream cipher. + * + * Information: https://cr.yp.to/chacha.html + */ + +#include <zinc/chacha20.h> + +#include <linux/kernel.h> +#include <crypto/algapi.h> + +#ifndef HAVE_CHACHA20_ARCH_IMPLEMENTATION +void __init chacha20_fpu_init(void) +{ +} +static inline bool chacha20_arch(u8 *out, const u8 *in, const size_t len, + const u32 key[8], const u32 counter[4], + simd_context_t simd_context) +{ + return false; +} +static inline bool hchacha20_arch(u8 *derived_key, const u8 *nonce, + const u8 *key, simd_context_t simd_context) +{ + return false; +} +#endif + +#define EXPAND_32_BYTE_K 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U + +#define QUARTER_ROUND(x, a, b, c, d) ( \ + x[a] += x[b], \ + x[d] = rol32((x[d] ^ x[a]), 16), \ + x[c] += x[d], \ + x[b] = rol32((x[b] ^ x[c]), 12), \ + x[a] += x[b], \ + x[d] = rol32((x[d] ^ x[a]), 8), \ + x[c] += x[d], \ + x[b] = rol32((x[b] ^ x[c]), 7) \ +) + +#define C(i, j) (i * 4 + j) + +#define DOUBLE_ROUND(x) ( \ + /* Column Round */ \ + QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \ + QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \ + QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \ + QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \ + /* Diagonal Round */ \ + QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \ + QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \ + QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \ + QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \ +) + +#define TWENTY_ROUNDS(x) ( \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x), \ + DOUBLE_ROUND(x) \ +) + +static void chacha20_block_generic(__le32 *stream, u32 *state) +{ + u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)]; + int i; + + for (i = 0; i < ARRAY_SIZE(x); ++i) + x[i] = state[i]; + + TWENTY_ROUNDS(x); + + for (i = 0; i < ARRAY_SIZE(x); ++i) + stream[i] = cpu_to_le32(x[i] + state[i]); + + ++state[12]; +} + +static void chacha20_generic(u8 *out, const u8 *in, u32 len, const u32 key[8], + const u32 counter[4]) +{ + __le32 buf[CHACHA20_BLOCK_SIZE / sizeof(__le32)]; + u32 x[] = { + EXPAND_32_BYTE_K, + key[0], key[1], key[2], key[3], + key[4], key[5], key[6], key[7], + counter[0], counter[1], counter[2], counter[3] + }; + + if (out != in) + memmove(out, in, len); + + while (len >= CHACHA20_BLOCK_SIZE) { + chacha20_block_generic(buf, x); + crypto_xor(out, (u8 *)buf, CHACHA20_BLOCK_SIZE); + len -= CHACHA20_BLOCK_SIZE; + out += CHACHA20_BLOCK_SIZE; + } + if (len) { + chacha20_block_generic(buf, x); + crypto_xor(out, (u8 *)buf, len); + } +} + +void chacha20(struct chacha20_ctx *state, u8 *dst, const u8 *src, u32 len, + simd_context_t simd_context) +{ + if (!chacha20_arch(dst, src, len, state->key, state->counter, + simd_context)) + chacha20_generic(dst, src, len, state->key, state->counter); + state->counter[0] += (len + 63) / 64; +} +EXPORT_SYMBOL(chacha20); + +static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE]) +{ + __le32 *out = (__force __le32 *)derived_key; + u32 x[] = { EXPAND_32_BYTE_K, + get_unaligned_le32(key + 0), + get_unaligned_le32(key + 4), + get_unaligned_le32(key + 8), + get_unaligned_le32(key + 12), + get_unaligned_le32(key + 16), + get_unaligned_le32(key + 20), + get_unaligned_le32(key + 24), + get_unaligned_le32(key + 28), + get_unaligned_le32(nonce + 0), + get_unaligned_le32(nonce + 4), + get_unaligned_le32(nonce + 8), + get_unaligned_le32(nonce + 12) + }; + + TWENTY_ROUNDS(x); + + out[0] = cpu_to_le32(x[0]); + out[1] = cpu_to_le32(x[1]); + out[2] = cpu_to_le32(x[2]); + out[3] = cpu_to_le32(x[3]); + out[4] = cpu_to_le32(x[12]); + out[5] = cpu_to_le32(x[13]); + out[6] = cpu_to_le32(x[14]); + out[7] = cpu_to_le32(x[15]); +} + +/* Derived key should be 32-bit aligned */ +void hchacha20(u8 derived_key[CHACHA20_KEY_SIZE], + const u8 nonce[HCHACHA20_NONCE_SIZE], + const u8 key[HCHACHA20_KEY_SIZE], simd_context_t simd_context) +{ + if (!hchacha20_arch(derived_key, nonce, key, simd_context)) + hchacha20_generic(derived_key, nonce, key); +} +/* Deliberately not EXPORT_SYMBOL'd, since there are few reasons why somebody + * should be using this directly, rather than via xchacha20. Revisit only in + * the unlikely event that somebody has a good reason to export this. + */ diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/zinc/chacha20poly1305.c index 3e3af5b..3991482 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/zinc/chacha20poly1305.c @@ -1,12 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 * * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * This is an implementation of the ChaCha20Poly1305 AEAD construction. + * + * Information: https://tools.ietf.org/html/rfc8439 */ -#include "chacha20poly1305.h" -#include "chacha20.h" -#include "poly1305.h" - +#include <zinc/chacha20poly1305.h> +#include <zinc/chacha20.h> +#include <zinc/poly1305.h> #include <asm/unaligned.h> #include <linux/kernel.h> #include <crypto/scatterwalk.h> @@ -26,33 +29,38 @@ static struct blkcipher_desc chacha20_desc = { .tfm = &chacha20_cipher }; -static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - simd_context_t simd_context) +static inline void +__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN], + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; union { u8 block0[POLY1305_KEY_SIZE]; __le64 lens[2]; - } b = {{ 0 }}; + } b = { { 0 } }; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); poly1305_init(&poly1305_state, b.block0, simd_context); poly1305_update(&poly1305_state, ad, ad_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); chacha20(&chacha20_state, dst, src, src_len, simd_context); poly1305_update(&poly1305_state, dst, src_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); poly1305_finish(&poly1305_state, dst + src_len, simd_context); @@ -62,18 +70,23 @@ static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN]) { simd_context_t simd_context; simd_context = simd_get(); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, + simd_context); simd_put(simd_context); } +EXPORT_SYMBOL(chacha20poly1305_encrypt); -bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, +bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, + struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN], simd_context_t simd_context) { struct poly1305_ctx poly1305_state; @@ -84,39 +97,50 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr u8 block0[POLY1305_KEY_SIZE]; u8 mac[POLY1305_MAC_SIZE]; __le64 lens[2]; - } b = {{ 0 }}; + } b = { { 0 } }; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); poly1305_init(&poly1305_state, b.block0, simd_context); poly1305_update(&poly1305_state, ad, ad_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); if (likely(src_len)) { blkcipher_walk_init(&walk, dst, src, src_len); - ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE); + ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, + CHACHA20_BLOCK_SIZE); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, simd_context); - ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); + size_t chunk_len = + rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); + + chacha20(&chacha20_state, walk.dst.virt.addr, + walk.src.virt.addr, chunk_len, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, + chunk_len, simd_context); + ret = blkcipher_walk_done(&chacha20_desc, &walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, simd_context); + poly1305_update(&poly1305_state, walk.dst.virt.addr, + walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, + simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); poly1305_finish(&poly1305_state, b.mac, simd_context); scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); @@ -125,11 +149,13 @@ err: memzero_explicit(&b, sizeof(b)); return !ret; } +EXPORT_SYMBOL(chacha20poly1305_encrypt_sg); -static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], - simd_context_t simd_context) +static inline bool +__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, + const u8 *ad, const size_t ad_len, const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN], + simd_context_t simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -139,25 +165,29 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size u8 block0[POLY1305_KEY_SIZE]; u8 mac[POLY1305_MAC_SIZE]; __le64 lens[2]; - } b = {{ 0 }}; + } b = { { 0 } }; if (unlikely(src_len < POLY1305_MAC_SIZE)) return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); poly1305_init(&poly1305_state, b.block0, simd_context); poly1305_update(&poly1305_state, ad, ad_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); dst_len = src_len - POLY1305_MAC_SIZE; poly1305_update(&poly1305_state, src, dst_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, + simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); poly1305_finish(&poly1305_state, b.mac, simd_context); @@ -173,19 +203,24 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]) + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN]) { simd_context_t simd_context, ret; simd_context = simd_get(); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, simd_context); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, + key, simd_context); simd_put(simd_context); return ret; } +EXPORT_SYMBOL(chacha20poly1305_decrypt); -bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, +bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, + struct scatterlist *src, const size_t src_len, const u8 *ad, const size_t ad_len, - const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN], + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEYLEN], simd_context_t simd_context) { struct poly1305_ctx poly1305_state; @@ -200,43 +235,54 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr u8 computed_mac[POLY1305_MAC_SIZE]; }; __le64 lens[2]; - } b = {{ 0 }}; + } b = { { 0 } }; if (unlikely(src_len < POLY1305_MAC_SIZE)) return false; chacha20_init(&chacha20_state, key, nonce); - chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context); + chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), + simd_context); poly1305_init(&poly1305_state, b.block0, simd_context); poly1305_update(&poly1305_state, ad, ad_len, simd_context); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, + simd_context); dst_len = src_len - POLY1305_MAC_SIZE; if (likely(dst_len)) { blkcipher_walk_init(&walk, dst, src, dst_len); - ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE); + ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, + CHACHA20_BLOCK_SIZE); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, simd_context); - ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); + size_t chunk_len = + rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); + + poly1305_update(&poly1305_state, walk.src.virt.addr, + chunk_len, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, + walk.src.virt.addr, chunk_len, simd_context); + ret = blkcipher_walk_done(&chacha20_desc, &walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, simd_context); + poly1305_update(&poly1305_state, walk.src.virt.addr, + walk.nbytes, simd_context); + chacha20(&chacha20_state, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, simd_context); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, simd_context); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, + simd_context); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); + poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), + simd_context); poly1305_finish(&poly1305_state, b.computed_mac, simd_context); @@ -247,6 +293,7 @@ err: memzero_explicit(&b, sizeof(b)); return !ret; } +EXPORT_SYMBOL(chacha20poly1305_decrypt_sg); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, @@ -257,10 +304,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); hchacha20(derived_key, nonce, key, simd_context); - __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context); + __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, + get_unaligned_le64(nonce + 16), + derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); simd_put(simd_context); } +EXPORT_SYMBOL(xchacha20poly1305_encrypt); bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, @@ -271,10 +321,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); hchacha20(derived_key, nonce, key, simd_context); - ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context); + ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, + get_unaligned_le64(nonce + 16), + derived_key, simd_context); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); simd_put(simd_context); return ret; } +EXPORT_SYMBOL(xchacha20poly1305_decrypt); -#include "../selftest/chacha20poly1305.h" +#include "selftest/chacha20poly1305.h" |