From f3f6c5367022ff8343e243e8e3f4ebaad5729c90 Mon Sep 17 00:00:00 2001 From: René van Dorst Date: Wed, 30 May 2018 12:05:20 +0200 Subject: poly1305: Add a helper function which will padded the input to full poly1305 block when needed. In the current code, every poly1305 input is a multiple of POLY1305_BLOCK_SIZE or will padded with zero's. Put it in one function makes it more efficient. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: René van Dorst --- src/crypto/chacha20poly1305.c | 38 ++++++++++++++------------------------ src/crypto/poly1305.c | 18 ++++++++++++++++++ src/crypto/poly1305.h | 1 + 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index ccc6e1c..189d843 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -41,17 +41,15 @@ static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); poly1305_init(&poly1305_state, b.block0, have_simd); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, ad, ad_len, have_simd); chacha20(&chacha20_state, dst, src, src_len, have_simd); - poly1305_update(&poly1305_state, dst, src_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, dst, src_len, have_simd); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update_pad_fb(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); poly1305_finish(&poly1305_state, dst + src_len, have_simd); @@ -89,8 +87,7 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); poly1305_init(&poly1305_state, b.block0, have_simd); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, ad, ad_len, have_simd); if (likely(src_len)) { blkcipher_walk_init(&walk, dst, src, src_len); @@ -99,23 +96,21 @@ bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *sr size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd); + poly1305_update_pad_fb(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); - poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); + poly1305_update_pad_fb(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } } if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); - b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(src_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update_pad_fb(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); poly1305_finish(&poly1305_state, b.mac, have_simd); scatterwalk_map_and_copy(b.mac, dst, src_len, sizeof(b.mac), 1); @@ -147,16 +142,14 @@ static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); poly1305_init(&poly1305_state, b.block0, have_simd); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, ad, ad_len, have_simd); dst_len = src_len - POLY1305_MAC_SIZE; - poly1305_update(&poly1305_state, src, dst_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, src, dst_len, have_simd); b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update_pad_fb(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); poly1305_finish(&poly1305_state, b.mac, have_simd); @@ -208,8 +201,7 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), have_simd); poly1305_init(&poly1305_state, b.block0, have_simd); - poly1305_update(&poly1305_state, ad, ad_len, have_simd); - poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + poly1305_update_pad_fb(&poly1305_state, ad, ad_len, have_simd); dst_len = src_len - POLY1305_MAC_SIZE; if (likely(dst_len)) { @@ -218,12 +210,12 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); - poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd); + poly1305_update_pad_fb(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd); chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { - poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd); + poly1305_update_pad_fb(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd); chacha20(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); ret = blkcipher_walk_done(&chacha20_desc, &walk, 0); } @@ -231,11 +223,9 @@ bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *sr if (unlikely(ret)) goto err; - poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); - b.lens[0] = cpu_to_le64(ad_len); b.lens[1] = cpu_to_le64(dst_len); - poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); + poly1305_update_pad_fb(&poly1305_state, (u8 *)b.lens, sizeof(b.lens), have_simd); poly1305_finish(&poly1305_state, b.computed_mac, have_simd); diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c index a2ff970..78f4365 100644 --- a/src/crypto/poly1305.c +++ b/src/crypto/poly1305.c @@ -355,6 +355,24 @@ void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool h ctx->num = rem; } +void poly1305_update_pad_fb(struct poly1305_ctx *ctx, const u8 *inp, size_t len, bool have_simd) +{ + size_t rem; + + rem = len & -(POLY1305_BLOCK_SIZE); + if (rem) { + poly1305_blocks(ctx->opaque, inp, rem, 1, have_simd); + inp += rem; + } + + rem = len & (POLY1305_BLOCK_SIZE-1); + if (rem) { + memset(ctx->data, 0, POLY1305_BLOCK_SIZE); + memcpy(ctx->data, inp, rem); + poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1, have_simd); + } +} + void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd) { size_t num = ctx->num % POLY1305_BLOCK_SIZE; diff --git a/src/crypto/poly1305.h b/src/crypto/poly1305.h index 876234c..3cff282 100644 --- a/src/crypto/poly1305.h +++ b/src/crypto/poly1305.h @@ -25,6 +25,7 @@ void poly1305_fpu_init(void); void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd); void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, bool have_simd); +void poly1305_update_pad_fb(struct poly1305_ctx *ctx, const u8 *inp, const size_t len, bool have_simd); void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE], bool have_simd); #ifdef DEBUG -- cgit v1.2.3-59-g8ed1b