summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-08-23 18:08:03 -0700
committerJason A. Donenfeld <Jason@zx2c4.com>2018-08-28 23:20:13 -0600
commit1e7b209da908f815968f681d6ee4894a04399c97 (patch)
tree84c57fb3029513bfbe0313238b4563a8974af4c6
parentKconfig: use new-style help marker (diff)
downloadwireguard-monolithic-historical-1e7b209da908f815968f681d6ee4894a04399c97.tar.xz
wireguard-monolithic-historical-1e7b209da908f815968f681d6ee4894a04399c97.zip
crypto: use unaligned helpers
This is not useful for WireGuard, but for the general use case we probably want it this way, and the speed difference is mostly lost in the noise.
-rw-r--r--src/crypto/chacha20.c7
-rw-r--r--src/crypto/chacha20.h18
-rw-r--r--src/crypto/chacha20poly1305.c5
-rw-r--r--src/crypto/curve25519-fiat32.h16
-rw-r--r--src/crypto/curve25519-hacl64.h18
-rw-r--r--src/crypto/curve25519.c1
-rw-r--r--src/crypto/poly1305.c34
-rw-r--r--src/selftest/chacha20poly1305.h14
-rw-r--r--src/tools/curve25519.c3
9 files changed, 61 insertions, 55 deletions
diff --git a/src/crypto/chacha20.c b/src/crypto/chacha20.c
index 815d777..c23928e 100644
--- a/src/crypto/chacha20.c
+++ b/src/crypto/chacha20.c
@@ -5,6 +5,7 @@
#include "chacha20.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <crypto/algapi.h>
@@ -210,9 +211,9 @@ static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[
__le32 *out = (__force __le32 *)derived_key;
u32 x[] = {
EXPAND_32_BYTE_K,
- le32_to_cpup((__le32 *)(key + 0)), le32_to_cpup((__le32 *)(key + 4)), le32_to_cpup((__le32 *)(key + 8)), le32_to_cpup((__le32 *)(key + 12)),
- le32_to_cpup((__le32 *)(key + 16)), le32_to_cpup((__le32 *)(key + 20)), le32_to_cpup((__le32 *)(key + 24)), le32_to_cpup((__le32 *)(key + 28)),
- le32_to_cpup((__le32 *)(nonce + 0)), le32_to_cpup((__le32 *)(nonce + 4)), le32_to_cpup((__le32 *)(nonce + 8)), le32_to_cpup((__le32 *)(nonce + 12))
+ get_unaligned_le32(key + 0), get_unaligned_le32(key + 4), get_unaligned_le32(key + 8), get_unaligned_le32(key + 12),
+ get_unaligned_le32(key + 16), get_unaligned_le32(key + 20), get_unaligned_le32(key + 24), get_unaligned_le32(key + 28),
+ get_unaligned_le32(nonce + 0), get_unaligned_le32(nonce + 4), get_unaligned_le32(nonce + 8), get_unaligned_le32(nonce + 12)
};
TWENTY_ROUNDS(x);
diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h
index 86ea4e3..f3d408b 100644
--- a/src/crypto/chacha20.h
+++ b/src/crypto/chacha20.h
@@ -7,6 +7,7 @@
#define _WG_CHACHA20_H
#include "simd.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -27,15 +28,14 @@ void chacha20_fpu_init(void);
static inline void chacha20_init(struct chacha20_ctx *state, const u8 key[CHACHA20_KEY_SIZE], const u64 nonce)
{
- __le32 *le_key = (__le32 *)key;
- state->key[0] = le32_to_cpu(le_key[0]);
- state->key[1] = le32_to_cpu(le_key[1]);
- state->key[2] = le32_to_cpu(le_key[2]);
- state->key[3] = le32_to_cpu(le_key[3]);
- state->key[4] = le32_to_cpu(le_key[4]);
- state->key[5] = le32_to_cpu(le_key[5]);
- state->key[6] = le32_to_cpu(le_key[6]);
- state->key[7] = le32_to_cpu(le_key[7]);
+ state->key[0] = get_unaligned_le32(key + 0);
+ state->key[1] = get_unaligned_le32(key + 4);
+ state->key[2] = get_unaligned_le32(key + 8);
+ state->key[3] = get_unaligned_le32(key + 12);
+ state->key[4] = get_unaligned_le32(key + 16);
+ state->key[5] = get_unaligned_le32(key + 20);
+ state->key[6] = get_unaligned_le32(key + 24);
+ state->key[7] = get_unaligned_le32(key + 28);
state->counter[0] = state->counter[1] = 0;
state->counter[2] = nonce & U32_MAX;
state->counter[3] = nonce >> 32;
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index 30d5444..3e3af5b 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -7,6 +7,7 @@
#include "chacha20.h"
#include "poly1305.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <crypto/scatterwalk.h>
@@ -256,7 +257,7 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, simd_context);
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
simd_put(simd_context);
}
@@ -270,7 +271,7 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, simd_context);
- ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
simd_put(simd_context);
return ret;
diff --git a/src/crypto/curve25519-fiat32.h b/src/crypto/curve25519-fiat32.h
index f1e21a4..c5593ea 100644
--- a/src/crypto/curve25519-fiat32.h
+++ b/src/crypto/curve25519-fiat32.h
@@ -23,14 +23,14 @@ typedef struct fe_loose { u32 v[10]; } fe_loose;
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
{
/* Ignores top bit of s. */
- u32 a0 = le32_to_cpup((__force __le32 *)(s));
- u32 a1 = le32_to_cpup((__force __le32 *)(s+4));
- u32 a2 = le32_to_cpup((__force __le32 *)(s+8));
- u32 a3 = le32_to_cpup((__force __le32 *)(s+12));
- u32 a4 = le32_to_cpup((__force __le32 *)(s+16));
- u32 a5 = le32_to_cpup((__force __le32 *)(s+20));
- u32 a6 = le32_to_cpup((__force __le32 *)(s+24));
- u32 a7 = le32_to_cpup((__force __le32 *)(s+28));
+ u32 a0 = get_unaligned_le32(s);
+ u32 a1 = get_unaligned_le32(s+4);
+ u32 a2 = get_unaligned_le32(s+8);
+ u32 a3 = get_unaligned_le32(s+12);
+ u32 a4 = get_unaligned_le32(s+16);
+ u32 a5 = get_unaligned_le32(s+20);
+ u32 a6 = get_unaligned_le32(s+24);
+ u32 a7 = get_unaligned_le32(s+28);
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
diff --git a/src/crypto/curve25519-hacl64.h b/src/crypto/curve25519-hacl64.h
index d2637ac..7d9d734 100644
--- a/src/crypto/curve25519-hacl64.h
+++ b/src/crypto/curve25519-hacl64.h
@@ -565,11 +565,11 @@ static __always_inline void format_fexpand(u64 *output, const u8 *input)
const u8 *x02 = input + 19;
const u8 *x0 = input + 24;
u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
- i0 = le64_to_cpup((__force __le64 *)input);
- i1 = le64_to_cpup((__force __le64 *)x00);
- i2 = le64_to_cpup((__force __le64 *)x01);
- i3 = le64_to_cpup((__force __le64 *)x02);
- i4 = le64_to_cpup((__force __le64 *)x0);
+ i0 = get_unaligned_le64(input);
+ i1 = get_unaligned_le64(x00);
+ i2 = get_unaligned_le64(x01);
+ i3 = get_unaligned_le64(x02);
+ i4 = get_unaligned_le64(x0);
output0 = i0 & 0x7ffffffffffffLLU;
output1 = i1 >> 3 & 0x7ffffffffffffLLU;
output2 = i2 >> 6 & 0x7ffffffffffffLLU;
@@ -688,10 +688,10 @@ static __always_inline void format_fcontract_store(u8 *output, u64 *input)
u8 *b1 = output + 8;
u8 *b2 = output + 16;
u8 *b3 = output + 24;
- *(__force __le64 *)b0 = cpu_to_le64(o0);
- *(__force __le64 *)b1 = cpu_to_le64(o1);
- *(__force __le64 *)b2 = cpu_to_le64(o2);
- *(__force __le64 *)b3 = cpu_to_le64(o3);
+ put_unaligned_le64(o0, b0);
+ put_unaligned_le64(o1, b1);
+ put_unaligned_le64(o2, b2);
+ put_unaligned_le64(o3, b3);
}
static __always_inline void format_fcontract(u8 *output, u64 *input)
diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c
index 8de8909..9bf0a41 100644
--- a/src/crypto/curve25519.c
+++ b/src/crypto/curve25519.c
@@ -5,6 +5,7 @@
#include "curve25519.h"
+#include <asm/unaligned.h>
#include <linux/version.h>
#include <linux/string.h>
#include <linux/random.h>
diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c
index be2eb33..d35154a 100644
--- a/src/crypto/poly1305.c
+++ b/src/crypto/poly1305.c
@@ -7,6 +7,7 @@
#include "poly1305.h"
#include "simd.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -94,10 +95,10 @@ static void poly1305_init_generic(void *ctx, const u8 key[16])
st->h[4] = 0;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- st->r[0] = le32_to_cpup((__le32 *)&key[ 0]) & 0x0fffffff;
- st->r[1] = le32_to_cpup((__le32 *)&key[ 4]) & 0x0ffffffc;
- st->r[2] = le32_to_cpup((__le32 *)&key[ 8]) & 0x0ffffffc;
- st->r[3] = le32_to_cpup((__le32 *)&key[12]) & 0x0ffffffc;
+ st->r[0] = get_unaligned_le32(&key[ 0]) & 0x0fffffff;
+ st->r[1] = get_unaligned_le32(&key[ 4]) & 0x0ffffffc;
+ st->r[2] = get_unaligned_le32(&key[ 8]) & 0x0ffffffc;
+ st->r[3] = get_unaligned_le32(&key[12]) & 0x0ffffffc;
}
static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const u32 padbit)
@@ -126,10 +127,10 @@ static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const
while (len >= POLY1305_BLOCK_SIZE) {
/* h += m[i] */
- h0 = (u32)(d0 = (u64)h0 + le32_to_cpup((__le32 *)(inp + 0)));
- h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + le32_to_cpup((__le32 *)(inp + 4)));
- h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + le32_to_cpup((__le32 *)(inp + 8)));
- h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + le32_to_cpup((__le32 *)(inp + 12)));
+ h0 = (u32)(d0 = (u64)h0 + (0 ) + get_unaligned_le32(&inp[ 0]));
+ h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + get_unaligned_le32(&inp[ 4]));
+ h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + get_unaligned_le32(&inp[ 8]));
+ h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + get_unaligned_le32(&inp[12]));
h4 += (u32)(d3 >> 32) + padbit;
/* h *= r "%" p, where "%" stands for "partial remainder" */
@@ -194,7 +195,6 @@ static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
- __le32 *omac = (__force __le32 *)mac;
u32 h0, h1, h2, h3, h4;
u32 g0, g1, g2, g3, g4;
u64 t;
@@ -231,19 +231,19 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
h2 = (u32)(t = (u64)h2 + (t >> 32) + nonce[2]);
h3 = (u32)(t = (u64)h3 + (t >> 32) + nonce[3]);
- omac[0] = cpu_to_le32(h0);
- omac[1] = cpu_to_le32(h1);
- omac[2] = cpu_to_le32(h2);
- omac[3] = cpu_to_le32(h3);
+ put_unaligned_le32(h0, &mac[ 0]);
+ put_unaligned_le32(h1, &mac[ 4]);
+ put_unaligned_le32(h2, &mac[ 8]);
+ put_unaligned_le32(h3, &mac[12]);
}
#endif
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context)
{
- ctx->nonce[0] = le32_to_cpup((__le32 *)&key[16]);
- ctx->nonce[1] = le32_to_cpup((__le32 *)&key[20]);
- ctx->nonce[2] = le32_to_cpup((__le32 *)&key[24]);
- ctx->nonce[3] = le32_to_cpup((__le32 *)&key[28]);
+ ctx->nonce[0] = get_unaligned_le32(&key[16]);
+ ctx->nonce[1] = get_unaligned_le32(&key[20]);
+ ctx->nonce[2] = get_unaligned_le32(&key[24]);
+ ctx->nonce[3] = get_unaligned_le32(&key[28]);
#if defined(CONFIG_X86_64)
poly1305_init_x86_64(ctx->opaque, key);
diff --git a/src/selftest/chacha20poly1305.h b/src/selftest/chacha20poly1305.h
index 1afd3e7..b9dd90b 100644
--- a/src/selftest/chacha20poly1305.h
+++ b/src/selftest/chacha20poly1305.h
@@ -4236,9 +4236,9 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8
} b = {{ 0 }};
chacha20_init(&chacha20_state, key, 0);
- chacha20_state.counter[1] = le32_to_cpu(*(__le32 *)(nonce + 0));
- chacha20_state.counter[2] = le32_to_cpu(*(__le32 *)(nonce + 4));
- chacha20_state.counter[3] = le32_to_cpu(*(__le32 *)(nonce + 8));
+ chacha20_state.counter[1] = get_unaligned_le32((__le32 *)(nonce + 0));
+ chacha20_state.counter[2] = get_unaligned_le32((__le32 *)(nonce + 4));
+ chacha20_state.counter[3] = get_unaligned_le32((__le32 *)(nonce + 8));
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0), simd_context);
poly1305_init(&poly1305_state, b.block0, simd_context);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
@@ -4258,7 +4258,7 @@ static inline void chacha20poly1305_selftest_encrypt_bignonce(u8 *dst, const u8
static inline void chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, const size_t ad_len, const u8 *nonce, const size_t nonce_len, const u8 key[CHACHA20POLY1305_KEYLEN])
{
if (nonce_len == 8)
- chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__force __le64 *)nonce), key);
+ chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64((__force __le64 *)nonce), key);
else if (nonce_len == 12)
chacha20poly1305_selftest_encrypt_bignonce(dst, src, src_len, ad, ad_len, nonce, key);
else
@@ -4306,7 +4306,7 @@ bool __init chacha20poly1305_selftest(void)
memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen);
sg_init_one(&sg_src, heap_src, chacha20poly1305_enc_vectors[i].ilen);
sg_init_one(&sg_dst, heap_dst, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE);
- ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, simd_context);
+ ret = chacha20poly1305_encrypt_sg(&sg_dst, &sg_src, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, get_unaligned_le64((__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key, simd_context);
if (!ret || memcmp(heap_dst, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
pr_info("chacha20poly1305 sg encryption self-test %zu: FAIL\n", i + 1);
success = false;
@@ -4315,7 +4315,7 @@ bool __init chacha20poly1305_selftest(void)
simd_put(simd_context);
for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
memset(computed_result, 0, sizeof(computed_result));
- ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
+ ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, get_unaligned_le64((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
if (!decryption_success(ret, chacha20poly1305_dec_vectors[i].failure, memcmp(computed_result, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE))) {
pr_info("chacha20poly1305 decryption self-test %zu: FAIL\n", i + 1);
success = false;
@@ -4327,7 +4327,7 @@ bool __init chacha20poly1305_selftest(void)
memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen);
sg_init_one(&sg_src, heap_src, chacha20poly1305_dec_vectors[i].ilen);
sg_init_one(&sg_dst, heap_dst, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE);
- ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpup((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, simd_context);
+ ret = chacha20poly1305_decrypt_sg(&sg_dst, &sg_src, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, get_unaligned_le64((__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key, simd_context);
if (!decryption_success(ret, chacha20poly1305_dec_vectors[i].failure, memcmp(heap_dst, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE))) {
pr_info("chacha20poly1305 sg decryption self-test %zu: FAIL\n", i + 1);
success = false;
diff --git a/src/tools/curve25519.c b/src/tools/curve25519.c
index b030853..043aefc 100644
--- a/src/tools/curve25519.c
+++ b/src/tools/curve25519.c
@@ -29,6 +29,9 @@ typedef int64_t s64;
#define le32_to_cpup(a) (*(a))
#define cpu_to_le64(a) (a)
#endif
+#define get_unaligned_le32(a) le32_to_cpup((u32 *)(a))
+#define get_unaligned_le64(a) le64_to_cpup((u64 *)(a))
+#define put_unaligned_le64(s, d) *(u64 *)(d) = cpu_to_le64(s)
#ifndef __always_inline
#define __always_inline __inline __attribute__((__always_inline__))
#endif