From 6d7f0b0a746b06a903ec8e14fe14cd0605fb210f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 16 Jun 2018 04:52:35 +0200 Subject: simd: encapsulate fpu amortization into nice functions --- src/crypto/chacha20poly1305.c | 17 ++++++------ src/crypto/chacha20poly1305.h | 39 ---------------------------- src/crypto/simd.h | 57 +++++++++++++++++++++++++++++++++++++++++ src/receive.c | 13 +++------- src/selftest/chacha20poly1305.h | 12 ++++----- src/selftest/poly1305.h | 5 ++-- src/send.c | 11 +++----- 7 files changed, 83 insertions(+), 71 deletions(-) create mode 100644 src/crypto/simd.h diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index ccc6e1c..df1c5aa 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -6,6 +6,7 @@ #include "chacha20poly1305.h" #include "chacha20.h" #include "poly1305.h" +#include "simd.h" #include #include @@ -65,9 +66,9 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, { bool have_simd; - have_simd = chacha20poly1305_init_simd(); + have_simd = simd_get(); __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); } bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, @@ -176,9 +177,9 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, { bool have_simd, ret; - have_simd = chacha20poly1305_init_simd(); + have_simd = simd_get(); ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd); - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); return ret; } @@ -253,13 +254,13 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd = chacha20poly1305_init_simd(); + bool have_simd = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); hchacha20(derived_key, nonce, key, have_simd); __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); } bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -267,13 +268,13 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool ret, have_simd = chacha20poly1305_init_simd(); + bool ret, have_simd = simd_get(); u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16); hchacha20(derived_key, nonce, key, have_simd); ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, have_simd); memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN); - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); return ret; } diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h index 1b122ac..43b0a17 100644 --- a/src/crypto/chacha20poly1305.h +++ b/src/crypto/chacha20poly1305.h @@ -44,45 +44,6 @@ bool __must_check xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t const u8 nonce[XCHACHA20POLY1305_NONCELEN], const u8 key[CHACHA20POLY1305_KEYLEN]); -#if defined(CONFIG_X86_64) -#include -#include -#include -#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) -#include -#include -#endif - -static inline bool chacha20poly1305_init_simd(void) -{ - bool have_simd = false; -#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) - have_simd = irq_fpu_usable(); - if (have_simd) - kernel_fpu_begin(); -#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE) -#if defined(CONFIG_ARM64) - have_simd = true; /* ARM64 supports NEON in any context. */ -#elif defined(CONFIG_ARM) - have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */ -#endif - if (have_simd) - kernel_neon_begin(); -#endif - return have_simd; -} - -static inline void chacha20poly1305_deinit_simd(bool was_on) -{ -#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) - if (was_on) - kernel_fpu_end(); -#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) - if (was_on) - kernel_neon_end(); -#endif -} - #ifdef DEBUG bool chacha20poly1305_selftest(void); #endif diff --git a/src/crypto/simd.h b/src/crypto/simd.h new file mode 100644 index 0000000..21e3c55 --- /dev/null +++ b/src/crypto/simd.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2015-2018 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_SIMD_H +#define _WG_SIMD_H + +#if defined(CONFIG_X86_64) +#include +#include +#include +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) +#include +#include +#endif + +static inline bool simd_get(void) +{ + bool have_simd = false; +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE) +#if defined(CONFIG_ARM64) + have_simd = true; /* ARM64 supports NEON in any context. */ +#elif defined(CONFIG_ARM) + have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */ +#endif + if (have_simd) + kernel_neon_begin(); +#endif + return have_simd; +} + +static inline void simd_put(bool was_on) +{ +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) && !defined(CONFIG_PREEMPT_RT_BASE) + if (was_on) + kernel_fpu_end(); +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !defined(CONFIG_PREEMPT_RT_BASE) + if (was_on) + kernel_neon_end(); +#endif +} + +static inline bool simd_relax(bool was_on) +{ + if (was_on && need_resched()) { + simd_put(true); + return simd_get(); + } + return was_on; +} + +#endif /* _WG_SIMD_H */ diff --git a/src/receive.c b/src/receive.c index f33941b..c5062f8 100644 --- a/src/receive.c +++ b/src/receive.c @@ -10,6 +10,7 @@ #include "messages.h" #include "cookie.h" #include "socket.h" +#include "crypto/simd.h" #include #include @@ -418,21 +419,15 @@ void packet_decrypt_worker(struct work_struct *work) { struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; struct sk_buff *skb; - bool have_simd = chacha20poly1305_init_simd(); + bool have_simd = simd_get(); while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - queue_enqueue_per_peer(&PACKET_PEER(skb)->rx_queue, skb, state); - - /* Don't totally kill scheduling latency by keeping preemption disabled forever. */ - if (have_simd && need_resched()) { - chacha20poly1305_deinit_simd(have_simd); - have_simd = chacha20poly1305_init_simd(); - } + have_simd = simd_relax(have_simd); } - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); } static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb) diff --git a/src/selftest/chacha20poly1305.h b/src/selftest/chacha20poly1305.h index e9070ba..9baca0b 100644 --- a/src/selftest/chacha20poly1305.h +++ b/src/selftest/chacha20poly1305.h @@ -1286,7 +1286,7 @@ static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __i static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u8 nonce[12], const u8 key[CHACHA20POLY1305_KEYLEN]) { - bool have_simd = chacha20poly1305_init_simd(); + bool have_simd = simd_get(); struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; union { @@ -1309,7 +1309,7 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8 b.lens[1] = cpu_to_le64(src_len); poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); poly1305_finish(&poly1305_state, dst + src_len, have_simd); - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); } @@ -1357,7 +1357,7 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - have_simd = chacha20poly1305_init_simd(); + have_simd = simd_get(); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { if (chacha20poly1305_enc_vectors[i].nlen != 8) continue; @@ -1371,7 +1371,7 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { memset(computed_result, 0, sizeof(computed_result)); ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key); @@ -1380,7 +1380,7 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - have_simd = chacha20poly1305_init_simd(); + have_simd = simd_get(); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { memset(heap_dst, 0, MAXIMUM_TEST_BUFFER_LEN); memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen); @@ -1392,7 +1392,7 @@ bool __init chacha20poly1305_selftest(void) success = false; } } - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { memset(computed_result, 0, sizeof(computed_result)); xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key); diff --git a/src/selftest/poly1305.h b/src/selftest/poly1305.h index 41acf7c..b3b74e1 100644 --- a/src/selftest/poly1305.h +++ b/src/selftest/poly1305.h @@ -7,6 +7,7 @@ #ifdef DEBUG #include "../crypto/chacha20poly1305.h" +#include "../crypto/simd.h" struct poly1305_testdata { size_t size; @@ -1489,7 +1490,7 @@ static const struct poly1305_testvec poly1305_testvecs[] = { bool __init poly1305_selftest(void) { - bool have_simd = chacha20poly1305_init_simd(); + bool have_simd = simd_get(); bool success = true; size_t i; @@ -1558,7 +1559,7 @@ bool __init poly1305_selftest(void) } } } - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); if (success) pr_info("poly1305 self-tests: pass\n"); diff --git a/src/send.c b/src/send.c index e97e439..d3e3d75 100644 --- a/src/send.c +++ b/src/send.c @@ -10,6 +10,7 @@ #include "socket.h" #include "messages.h" #include "cookie.h" +#include "crypto/simd.h" #include #include @@ -242,7 +243,7 @@ void packet_encrypt_worker(struct work_struct *work) { struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; struct sk_buff *first, *skb, *next; - bool have_simd = chacha20poly1305_init_simd(); + bool have_simd = simd_get(); while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { enum packet_state state = PACKET_STATE_CRYPTED; @@ -257,13 +258,9 @@ void packet_encrypt_worker(struct work_struct *work) } queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state); - /* Don't totally kill scheduling latency by keeping preemption disabled forever. */ - if (have_simd && need_resched()) { - chacha20poly1305_deinit_simd(have_simd); - have_simd = chacha20poly1305_init_simd(); - } + have_simd = simd_relax(have_simd); } - chacha20poly1305_deinit_simd(have_simd); + simd_put(have_simd); } static void packet_create_data(struct sk_buff *first) -- cgit v1.2.3-59-g8ed1b