From 12b4b2210c1783437b602a6ebcef242b422b30d8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 5 Dec 2019 13:18:17 +0100 Subject: chacha20poly1305: port to sgmitter for 5.5 I'm not totally comfortable with these changes yet, and it'll require some more scrutiny. But it's a start. --- src/Kconfig | 3 +- src/crypto/include/zinc/chacha20poly1305.h | 12 +- src/crypto/zinc/chacha20poly1305.c | 218 ++++++++++++++++------------ src/crypto/zinc/selftest/chacha20poly1305.c | 27 ++-- src/receive.c | 6 +- src/send.c | 7 +- 6 files changed, 151 insertions(+), 122 deletions(-) (limited to 'src') diff --git a/src/Kconfig b/src/Kconfig index 65fb31d..156e9db 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -5,8 +5,7 @@ config WIREGUARD select NET_UDP_TUNNEL select DST_CACHE select CRYPTO - select CRYPTO_BLKCIPHER - select XOR_BLOCKS + select CRYPTO_ALGAPI select VFP select VFPv3 if CPU_V7 select NEON if CPU_V7 diff --git a/src/crypto/include/zinc/chacha20poly1305.h b/src/crypto/include/zinc/chacha20poly1305.h index ce72740..e3339f0 100644 --- a/src/crypto/include/zinc/chacha20poly1305.h +++ b/src/crypto/include/zinc/chacha20poly1305.h @@ -22,9 +22,9 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE]); -bool __must_check chacha20poly1305_encrypt_sg( - struct scatterlist *dst, struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, const u64 nonce, +bool __must_check chacha20poly1305_encrypt_sg_inplace( + struct scatterlist *src, const size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); bool __must_check @@ -32,9 +32,9 @@ chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE]); -bool __must_check chacha20poly1305_decrypt_sg( - struct scatterlist *dst, struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, const u64 nonce, +bool __must_check chacha20poly1305_decrypt_sg_inplace( + struct scatterlist *src, size_t src_len, const u8 *ad, + const size_t ad_len, const u64 nonce, const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, diff --git a/src/crypto/zinc/chacha20poly1305.c b/src/crypto/zinc/chacha20poly1305.c index 0001c92..571a64e 100644 --- a/src/crypto/zinc/chacha20poly1305.c +++ b/src/crypto/zinc/chacha20poly1305.c @@ -18,16 +18,7 @@ #include #include // For blkcipher_walk. -static const u8 pad0[16] = { 0 }; - -static struct blkcipher_desc desc = { .tfm = &(struct crypto_blkcipher){ - .base = { .__crt_alg = &(struct crypto_alg){ - .cra_blocksize = 1, -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - .cra_alignmask = sizeof(u32) - 1 -#endif - } } -} }; +static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 }; static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -82,22 +73,25 @@ void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, } EXPORT_SYMBOL(chacha20poly1305_encrypt); -bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, - struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, - const u8 key[CHACHA20POLY1305_KEY_SIZE], - simd_context_t *simd_context) +bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, + const size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; - int ret = 0; - struct blkcipher_walk walk; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; u8 block0[POLY1305_KEY_SIZE]; u8 mac[POLY1305_MAC_SIZE]; __le64 lens[2]; - } b = { { 0 } }; + } b __aligned(16) = { { 0 } }; + chacha20_init(&chacha20_state, key, nonce); chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), @@ -108,32 +102,43 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - if (likely(src_len)) { - blkcipher_walk_init(&walk, dst, src, src_len); - ret = blkcipher_walk_virt_block(&desc, &walk, - CHACHA20_BLOCK_SIZE); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, chunk_len, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, - chunk_len, simd_context); - simd_relax(simd_context); - ret = blkcipher_walk_done(&desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; } - if (walk.nbytes) { - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes, simd_context); - poly1305_update(&poly1305_state, walk.dst.virt.addr, - walk.nbytes, simd_context); - ret = blkcipher_walk_done(&desc, &walk, 0); + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; } + + poly1305_update(&poly1305_state, miter.addr, + min_t(size_t, sl, miter.length), simd_context); + + simd_relax(simd_context); } - if (unlikely(ret)) - goto err; poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); @@ -143,14 +148,22 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_final(&poly1305_state, b.mac, simd_context); - scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); -err: + if (likely(sl <= -POLY1305_MAC_SIZE)) + poly1305_final(&poly1305_state, miter.addr + miter.length + sl, + simd_context); + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.mac, simd_context); + scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1); + } + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); - return !ret; + return true; } -EXPORT_SYMBOL(chacha20poly1305_encrypt_sg); +EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace); static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, @@ -217,29 +230,32 @@ bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, } EXPORT_SYMBOL(chacha20poly1305_decrypt); -bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, - struct scatterlist *src, const size_t src_len, - const u8 *ad, const size_t ad_len, - const u64 nonce, - const u8 key[CHACHA20POLY1305_KEY_SIZE], - simd_context_t *simd_context) +bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, + size_t src_len, + const u8 *ad, const size_t ad_len, + const u64 nonce, + const u8 key[CHACHA20POLY1305_KEY_SIZE], + simd_context_t *simd_context) { struct poly1305_ctx poly1305_state; struct chacha20_ctx chacha20_state; - struct blkcipher_walk walk; - int ret = 0; - size_t dst_len; + struct sg_mapping_iter miter; + size_t partial = 0; + ssize_t sl; union { + u8 chacha20_stream[CHACHA20_BLOCK_SIZE]; u8 block0[POLY1305_KEY_SIZE]; struct { u8 read_mac[POLY1305_MAC_SIZE]; u8 computed_mac[POLY1305_MAC_SIZE]; }; __le64 lens[2]; - } b = { { 0 } }; + } b __aligned(16) = { { 0 } }; + bool ret = false; if (unlikely(src_len < POLY1305_MAC_SIZE)) - return false; + return ret; + src_len -= POLY1305_MAC_SIZE; chacha20_init(&chacha20_state, key, nonce); chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), @@ -250,52 +266,74 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, simd_context); - dst_len = src_len - POLY1305_MAC_SIZE; - if (likely(dst_len)) { - blkcipher_walk_init(&walk, dst, src, dst_len); - ret = blkcipher_walk_virt_block(&desc, &walk, - CHACHA20_BLOCK_SIZE); - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { - size_t chunk_len = - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - - poly1305_update(&poly1305_state, walk.src.virt.addr, - chunk_len, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, chunk_len, simd_context); - simd_relax(simd_context); - ret = blkcipher_walk_done(&desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC); + for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { + u8 *addr = miter.addr; + size_t length = min_t(size_t, sl, miter.length); + + poly1305_update(&poly1305_state, addr, length, simd_context); + + if (unlikely(partial)) { + size_t l = min(length, CHACHA20_BLOCK_SIZE - partial); + + crypto_xor(addr, b.chacha20_stream + partial, l); + partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1); + + addr += l; + length -= l; } - if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, - walk.nbytes, simd_context); - chacha20(&chacha20_state, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes, simd_context); - ret = blkcipher_walk_done(&desc, &walk, 0); + + if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) { + size_t l = length; + + if (unlikely(length < sl)) + l &= ~(CHACHA20_BLOCK_SIZE - 1); + chacha20(&chacha20_state, addr, addr, l, simd_context); + addr += l; + length -= l; + } + + if (unlikely(length > 0)) { + chacha20(&chacha20_state, b.chacha20_stream, pad0, + CHACHA20_BLOCK_SIZE, simd_context); + crypto_xor(addr, b.chacha20_stream, length); + partial = length; } + + simd_relax(simd_context); } - if (unlikely(ret)) - goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, simd_context); b.lens[0] = cpu_to_le64(ad_len); - b.lens[1] = cpu_to_le64(dst_len); + b.lens[1] = cpu_to_le64(src_len); poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), simd_context); - poly1305_final(&poly1305_state, b.computed_mac, simd_context); + if (likely(sl <= -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + ret = !crypto_memneq(b.computed_mac, + miter.addr + miter.length + sl, + POLY1305_MAC_SIZE); + } + + sg_miter_stop(&miter); + + if (unlikely(sl > -POLY1305_MAC_SIZE)) { + poly1305_final(&poly1305_state, b.computed_mac, simd_context); + scatterwalk_map_and_copy(b.read_mac, src, src_len, + sizeof(b.read_mac), 0); + ret = !crypto_memneq(b.read_mac, b.computed_mac, + POLY1305_MAC_SIZE); + + } - scatterwalk_map_and_copy(b.read_mac, src, dst_len, POLY1305_MAC_SIZE, 0); - ret = crypto_memneq(b.read_mac, b.computed_mac, POLY1305_MAC_SIZE); -err: memzero_explicit(&chacha20_state, sizeof(chacha20_state)); memzero_explicit(&b, sizeof(b)); - return !ret; + return ret; } -EXPORT_SYMBOL(chacha20poly1305_decrypt_sg); +EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, diff --git a/src/crypto/zinc/selftest/chacha20poly1305.c b/src/crypto/zinc/selftest/chacha20poly1305.c index dba9cd7..d3b928d 100644 --- a/src/crypto/zinc/selftest/chacha20poly1305.c +++ b/src/crypto/zinc/selftest/chacha20poly1305.c @@ -8880,14 +8880,13 @@ static bool __init chacha20poly1305_selftest(void) { enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; size_t i; - u8 *computed_output = NULL, *heap_src = NULL; + u8 *computed_output = NULL; bool success = true, ret; simd_context_t simd_context; - struct scatterlist sg_src, sg_dst; + struct scatterlist sg_src; - heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); - if (!heap_src || !computed_output) { + if (!computed_output) { pr_err("chacha20poly1305 self-test malloc: FAIL\n"); success = false; goto out; @@ -8916,15 +8915,12 @@ static bool __init chacha20poly1305_selftest(void) for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { if (chacha20poly1305_enc_vectors[i].nlen != 8) continue; - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, + memcpy(computed_output, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen); - sg_init_one(&sg_src, heap_src, - chacha20poly1305_enc_vectors[i].ilen); - sg_init_one(&sg_dst, computed_output, + sg_init_one(&sg_src, computed_output, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE); - ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, + ret = chacha20poly1305_encrypt_sg_inplace(&sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, @@ -8963,15 +8959,11 @@ static bool __init chacha20poly1305_selftest(void) } simd_get(&simd_context); for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, + memcpy(computed_output, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen); - sg_init_one(&sg_src, heap_src, + sg_init_one(&sg_src, computed_output, chacha20poly1305_dec_vectors[i].ilen); - sg_init_one(&sg_dst, computed_output, - chacha20poly1305_dec_vectors[i].ilen - - POLY1305_MAC_SIZE); - ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, + ret = chacha20poly1305_decrypt_sg_inplace(&sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, @@ -9028,7 +9020,6 @@ static bool __init chacha20poly1305_selftest(void) } out: - kfree(heap_src); kfree(computed_output); return success; } diff --git a/src/receive.c b/src/receive.c index 247a56b..dfd6e40 100644 --- a/src/receive.c +++ b/src/receive.c @@ -281,9 +281,9 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key, if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) return false; - if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, - PACKET_CB(skb)->nonce, key->key, - simd_context)) + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, key->key, + simd_context)) return false; /* Another ugly situation of pushing and pulling the header so as to diff --git a/src/send.c b/src/send.c index 9b54f4a..85b83fa 100644 --- a/src/send.c +++ b/src/send.c @@ -207,9 +207,10 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair, if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0) return false; - return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, - PACKET_CB(skb)->nonce, - keypair->sending.key, simd_context); + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key, + simd_context); } void wg_packet_send_keepalive(struct wg_peer *peer) -- cgit v1.2.3-59-g8ed1b