aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/chacha_glue.c
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@google.com>2018-12-04 22:20:03 -0800
committerHerbert Xu <herbert@gondor.apana.org.au>2018-12-13 18:24:58 +0800
commit8b65f34c5821e7361488dc668d21195ea4c9f14d (patch)
treed823dbf197f5d2a4d4d9e895954d880926b97c5f /arch/x86/crypto/chacha_glue.c
parentcrypto: x86/chacha20 - add XChaCha20 support (diff)
downloadlinux-dev-8b65f34c5821e7361488dc668d21195ea4c9f14d.tar.xz
linux-dev-8b65f34c5821e7361488dc668d21195ea4c9f14d.zip
crypto: x86/chacha20 - refactor to allow varying number of rounds
In preparation for adding XChaCha12 support, rename/refactor the x86_64 SIMD implementations of ChaCha20 to support different numbers of rounds. Reviewed-by: Martin Willi <martin@strongswan.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to '')
-rw-r--r--arch/x86/crypto/chacha_glue.c (renamed from arch/x86/crypto/chacha20_glue.c)150
1 files changed, 78 insertions, 72 deletions
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha_glue.c
index 70d388e4a3a2..35fd02b50d27 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -1,5 +1,6 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
*
* Copyright (C) 2015 Martin Willi
*
@@ -17,120 +18,124 @@
#include <asm/fpu/api.h>
#include <asm/simd.h>
-#define CHACHA20_STATE_ALIGN 16
+#define CHACHA_STATE_ALIGN 16
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void hchacha20_block_ssse3(const u32 *state, u32 *out);
+asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
#ifdef CONFIG_AS_AVX2
-asmlinkage void chacha20_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-static bool chacha20_use_avx2;
+asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+static bool chacha_use_avx2;
#ifdef CONFIG_AS_AVX512
-asmlinkage void chacha20_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-static bool chacha20_use_avx512vl;
+asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+static bool chacha_use_avx512vl;
#endif
#endif
-static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
+static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
}
-static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
+static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
{
#ifdef CONFIG_AS_AVX2
#ifdef CONFIG_AS_AVX512
- if (chacha20_use_avx512vl) {
+ if (chacha_use_avx512vl) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
- chacha20_8block_xor_avx512vl(state, dst, src, bytes);
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
src += CHACHA_BLOCK_SIZE * 8;
dst += CHACHA_BLOCK_SIZE * 8;
state[12] += 8;
}
if (bytes > CHACHA_BLOCK_SIZE * 4) {
- chacha20_8block_xor_avx512vl(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 8);
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 8);
return;
}
if (bytes > CHACHA_BLOCK_SIZE * 2) {
- chacha20_4block_xor_avx512vl(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 4);
+ chacha_4block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes) {
- chacha20_2block_xor_avx512vl(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 2);
+ chacha_2block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 2);
return;
}
}
#endif
- if (chacha20_use_avx2) {
+ if (chacha_use_avx2) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
- chacha20_8block_xor_avx2(state, dst, src, bytes);
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
src += CHACHA_BLOCK_SIZE * 8;
dst += CHACHA_BLOCK_SIZE * 8;
state[12] += 8;
}
if (bytes > CHACHA_BLOCK_SIZE * 4) {
- chacha20_8block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 8);
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 8);
return;
}
if (bytes > CHACHA_BLOCK_SIZE * 2) {
- chacha20_4block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 4);
+ chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes > CHACHA_BLOCK_SIZE) {
- chacha20_2block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 2);
+ chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 2);
return;
}
}
#endif
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha20_4block_xor_ssse3(state, dst, src, bytes);
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
src += CHACHA_BLOCK_SIZE * 4;
dst += CHACHA_BLOCK_SIZE * 4;
state[12] += 4;
}
if (bytes > CHACHA_BLOCK_SIZE) {
- chacha20_4block_xor_ssse3(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 4);
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes) {
- chacha20_block_xor_ssse3(state, dst, src, bytes);
+ chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
state[12]++;
}
}
-static int chacha20_simd_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+static int chacha_simd_stream_xor(struct skcipher_request *req,
+ struct chacha_ctx *ctx, u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
struct skcipher_walk walk;
int err;
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
err = skcipher_walk_virt(&walk, req, true);
@@ -142,8 +147,8 @@ static int chacha20_simd_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = round_down(nbytes, walk.stride);
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
+ chacha_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -151,7 +156,7 @@ static int chacha20_simd_stream_xor(struct skcipher_request *req,
return err;
}
-static int chacha20_simd(struct skcipher_request *req)
+static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -161,12 +166,12 @@ static int chacha20_simd(struct skcipher_request *req)
return crypto_chacha_crypt(req);
kernel_fpu_begin();
- err = chacha20_simd_stream_xor(req, ctx, req->iv);
+ err = chacha_simd_stream_xor(req, ctx, req->iv);
kernel_fpu_end();
return err;
}
-static int xchacha20_simd(struct skcipher_request *req)
+static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -178,17 +183,18 @@ static int xchacha20_simd(struct skcipher_request *req)
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
return crypto_xchacha_crypt(req);
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
crypto_chacha_init(state, ctx, req->iv);
kernel_fpu_begin();
- hchacha20_block_ssse3(state, subctx.key);
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- err = chacha20_simd_stream_xor(req, &subctx, real_iv);
+ err = chacha_simd_stream_xor(req, &subctx, real_iv);
kernel_fpu_end();
@@ -209,8 +215,8 @@ static struct skcipher_alg algs[] = {
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_simd,
- .decrypt = chacha20_simd,
+ .encrypt = chacha_simd,
+ .decrypt = chacha_simd,
}, {
.base.cra_name = "xchacha20",
.base.cra_driver_name = "xchacha20-simd",
@@ -224,40 +230,40 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
- .encrypt = xchacha20_simd,
- .decrypt = xchacha20_simd,
+ .encrypt = xchacha_simd,
+ .decrypt = xchacha_simd,
},
};
-static int __init chacha20_simd_mod_init(void)
+static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+ chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifdef CONFIG_AS_AVX512
- chacha20_use_avx512vl = chacha20_use_avx2 &&
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
+ chacha_use_avx512vl = chacha_use_avx2 &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
#endif
#endif
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
-static void __exit chacha20_simd_mod_fini(void)
+static void __exit chacha_simd_mod_fini(void)
{
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-simd");
MODULE_ALIAS_CRYPTO("xchacha20");