From 1be1f3d19318475865b9556e9f0b4535150e0bc6 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 4 Nov 2016 14:38:04 +0100 Subject: data: keep FPU on when possible --- src/crypto/chacha20poly1305.c | 62 ++++++++----------------------------------- src/crypto/chacha20poly1305.h | 31 +++++++++++++++++++++- src/data.c | 12 ++++----- 3 files changed, 46 insertions(+), 59 deletions(-) diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index 944710c..cc48a9e 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -14,12 +14,6 @@ #ifdef CONFIG_X86_64 #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) -#include -#include -#else -#include -#endif #ifdef CONFIG_AS_SSSE3 asmlinkage void chacha20_asm_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); asmlinkage void chacha20_asm_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); @@ -532,13 +526,7 @@ bool chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src uint8_t block0[CHACHA20_BLOCK_SIZE] = { 0 }; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd = false; - -#ifdef CONFIG_X86_64 - have_simd = irq_fpu_usable(); - if (have_simd) - kernel_fpu_begin(); -#endif + bool have_simd = chacha20poly1305_init_simd(); chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); @@ -565,17 +553,15 @@ bool chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src memzero_explicit(&poly1305_state, sizeof(poly1305_state)); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); -#ifdef CONFIG_X86_64 - if (have_simd) - kernel_fpu_end(); -#endif + chacha20poly1305_deinit_simd(have_simd); return true; } bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const uint8_t *ad, const size_t ad_len, - const uint64_t nonce, const uint8_t key[static CHACHA20POLY1305_KEYLEN]) + const uint64_t nonce, const uint8_t key[static CHACHA20POLY1305_KEYLEN], + bool have_simd) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; @@ -584,13 +570,6 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr uint8_t mac[POLY1305_MAC_SIZE]; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd = false; - -#ifdef CONFIG_X86_64 - have_simd = irq_fpu_usable(); - if (have_simd) - kernel_fpu_begin(); -#endif chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); @@ -630,12 +609,6 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr memzero_explicit(&poly1305_state, sizeof(poly1305_state)); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(mac, sizeof(mac)); - -#ifdef CONFIG_X86_64 - if (have_simd) - kernel_fpu_end(); -#endif - return true; } @@ -651,16 +624,12 @@ bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src size_t dst_len; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd = false; + bool have_simd; if (unlikely(src_len < POLY1305_MAC_SIZE)) return false; -#ifdef CONFIG_X86_64 - have_simd = irq_fpu_usable(); - if (have_simd) - kernel_fpu_begin(); -#endif + have_simd = chacha20poly1305_init_simd(); chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); @@ -690,10 +659,8 @@ bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src chacha20_crypt(&chacha20_state, dst, src, dst_len, have_simd); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); -#ifdef CONFIG_X86_64 - if (have_simd) - kernel_fpu_end(); -#endif + + chacha20poly1305_deinit_simd(have_simd); return !ret; } @@ -710,16 +677,12 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr size_t dst_len; __le64 len; __le64 le_nonce = cpu_to_le64(nonce); - bool have_simd = false; + bool have_simd; if (unlikely(src_len < POLY1305_MAC_SIZE)) return false; -#ifdef CONFIG_X86_64 - have_simd = irq_fpu_usable(); - if (have_simd) - kernel_fpu_begin(); -#endif + have_simd = chacha20poly1305_init_simd(); chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); @@ -763,10 +726,7 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr memzero_explicit(read_mac, POLY1305_MAC_SIZE); memzero_explicit(computed_mac, POLY1305_MAC_SIZE); memzero_explicit(&chacha20_state, sizeof(chacha20_state)); -#ifdef CONFIG_X86_64 - if (have_simd) - kernel_fpu_end(); -#endif + chacha20poly1305_deinit_simd(have_simd); return !ret; } diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h index e9ecaf8..57d3d23 100644 --- a/src/crypto/chacha20poly1305.h +++ b/src/crypto/chacha20poly1305.h @@ -20,7 +20,8 @@ bool chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, const uint8_t *ad, const size_t ad_len, - const uint64_t nonce, const uint8_t key[static CHACHA20POLY1305_KEYLEN]); + const uint64_t nonce, const uint8_t key[static CHACHA20POLY1305_KEYLEN], + bool have_simd); bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, const uint8_t *ad, const size_t ad_len, @@ -30,6 +31,34 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr const uint8_t *ad, const size_t ad_len, const uint64_t nonce, const uint8_t key[static CHACHA20POLY1305_KEYLEN]); +#ifdef CONFIG_X86_64 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#include +#include +#else +#include +#endif +#endif + +static inline bool chacha20poly1305_init_simd(void) +{ + bool have_simd = false; +#ifdef CONFIG_X86_64 + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#endif + return have_simd; +} + +static inline void chacha20poly1305_deinit_simd(bool was_on) +{ +#ifdef CONFIG_X86_64 + if (was_on) + kernel_fpu_end(); +#endif +} + #ifdef DEBUG bool chacha20poly1305_selftest(void); #endif diff --git a/src/data.c b/src/data.c index 55cc118..1f6c5b6 100644 --- a/src/data.c +++ b/src/data.c @@ -84,13 +84,12 @@ struct packet_data_encryption_ctx { uint64_t nonce; }; -static inline void skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair) +static inline void skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd) { struct packet_data_encryption_ctx *ctx = (struct packet_data_encryption_ctx *)skb->cb; struct scatterlist sg[ctx->num_frags]; /* This should be bound to at most 128 by the caller. */ struct message_data *header; - /* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */ if (likely(!skb_checksum_setup(skb, true))) skb_checksum_help(skb); @@ -105,7 +104,7 @@ static inline void skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypai /* Now we can encrypt the scattergather segments */ sg_init_table(sg, ctx->num_frags); skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(ctx->plaintext_len)); - chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, keypair->sending.key); + chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, keypair->sending.key, have_simd); } static inline bool skb_decrypt(struct sk_buff *skb, uint8_t num_frags, uint64_t nonce, struct noise_symmetric_key *key) @@ -159,13 +158,12 @@ struct packet_bundle_ctx { static inline void queue_encrypt_reset(struct sk_buff_head *queue, struct noise_keypair *keypair) { struct sk_buff *skb; - /* TODO: as a later optimization, we can activate the FPU just once - * for the entire loop, rather than turning it on and off for each - * packet. */ + bool have_simd = chacha20poly1305_init_simd(); skb_queue_walk(queue, skb) { - skb_encrypt(skb, keypair); + skb_encrypt(skb, keypair, have_simd); skb_reset(skb); } + chacha20poly1305_deinit_simd(have_simd); noise_keypair_put(keypair); } -- cgit v1.2.3-59-g8ed1b