From 9f480faec58cd6197a007ea1dcac6b7c3daf1139 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Nov 2017 11:51:39 -0800 Subject: crypto: chacha20 - Fix keystream alignment for chacha20_block() When chacha20_block() outputs the keystream block, it uses 'u32' stores directly. However, the callers (crypto/chacha20_generic.c and drivers/char/random.c) declare the keystream buffer as a 'u8' array, which is not guaranteed to have the needed alignment. Fix it by having both callers declare the keystream as a 'u32' array. For now this is preferable to switching over to the unaligned access macros because chacha20_block() is only being used in cases where we can easily control the alignment (stack buffers). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha20.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/chacha20.c b/lib/chacha20.c index 250ceed9ec9a..29d3801dee24 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -21,7 +21,7 @@ static inline u32 rotl32(u32 v, u8 n) return (v << n) | (v >> (sizeof(v) * 8 - n)); } -extern void chacha20_block(u32 *state, void *stream) +void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; int i; -- cgit v1.2.3-59-g8ed1b From 7660b1fb367eb3723b48d3980451fc4f25a05021 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 31 Dec 2017 18:02:45 -0600 Subject: crypto: chacha20 - use rol32() macro from bitops.h For chacha20_block(), use the existing 32-bit left-rotate function instead of defining one ourselves. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha20.c | 69 +++++++++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/chacha20.c b/lib/chacha20.c index 29d3801dee24..c1cc50fb68c9 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -16,11 +16,6 @@ #include #include -static inline u32 rotl32(u32 v, u8 n) -{ - return (v << n) | (v >> (sizeof(v) * 8 - n)); -} - void chacha20_block(u32 *state, u32 *stream) { u32 x[16], *out = stream; @@ -30,45 +25,45 @@ void chacha20_block(u32 *state, u32 *stream) x[i] = state[i]; for (i = 0; i < 20; i += 2) { - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } for (i = 0; i < ARRAY_SIZE(x); i++) -- cgit v1.2.3-59-g8ed1b