aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/crypto
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-08-23 18:08:03 -0700
committerJason A. Donenfeld <Jason@zx2c4.com>2018-08-28 23:20:13 -0600
commit1e7b209da908f815968f681d6ee4894a04399c97 (patch)
tree84c57fb3029513bfbe0313238b4563a8974af4c6 /src/crypto
parentKconfig: use new-style help marker (diff)
downloadwireguard-monolithic-historical-1e7b209da908f815968f681d6ee4894a04399c97.tar.xz
wireguard-monolithic-historical-1e7b209da908f815968f681d6ee4894a04399c97.zip
crypto: use unaligned helpers
This is not useful for WireGuard, but for the general use case we probably want it this way, and the speed difference is mostly lost in the noise.
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/chacha20.c7
-rw-r--r--src/crypto/chacha20.h18
-rw-r--r--src/crypto/chacha20poly1305.c5
-rw-r--r--src/crypto/curve25519-fiat32.h16
-rw-r--r--src/crypto/curve25519-hacl64.h18
-rw-r--r--src/crypto/curve25519.c1
-rw-r--r--src/crypto/poly1305.c34
7 files changed, 51 insertions, 48 deletions
diff --git a/src/crypto/chacha20.c b/src/crypto/chacha20.c
index 815d777..c23928e 100644
--- a/src/crypto/chacha20.c
+++ b/src/crypto/chacha20.c
@@ -5,6 +5,7 @@
#include "chacha20.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <crypto/algapi.h>
@@ -210,9 +211,9 @@ static void hchacha20_generic(u8 derived_key[CHACHA20_KEY_SIZE], const u8 nonce[
__le32 *out = (__force __le32 *)derived_key;
u32 x[] = {
EXPAND_32_BYTE_K,
- le32_to_cpup((__le32 *)(key + 0)), le32_to_cpup((__le32 *)(key + 4)), le32_to_cpup((__le32 *)(key + 8)), le32_to_cpup((__le32 *)(key + 12)),
- le32_to_cpup((__le32 *)(key + 16)), le32_to_cpup((__le32 *)(key + 20)), le32_to_cpup((__le32 *)(key + 24)), le32_to_cpup((__le32 *)(key + 28)),
- le32_to_cpup((__le32 *)(nonce + 0)), le32_to_cpup((__le32 *)(nonce + 4)), le32_to_cpup((__le32 *)(nonce + 8)), le32_to_cpup((__le32 *)(nonce + 12))
+ get_unaligned_le32(key + 0), get_unaligned_le32(key + 4), get_unaligned_le32(key + 8), get_unaligned_le32(key + 12),
+ get_unaligned_le32(key + 16), get_unaligned_le32(key + 20), get_unaligned_le32(key + 24), get_unaligned_le32(key + 28),
+ get_unaligned_le32(nonce + 0), get_unaligned_le32(nonce + 4), get_unaligned_le32(nonce + 8), get_unaligned_le32(nonce + 12)
};
TWENTY_ROUNDS(x);
diff --git a/src/crypto/chacha20.h b/src/crypto/chacha20.h
index 86ea4e3..f3d408b 100644
--- a/src/crypto/chacha20.h
+++ b/src/crypto/chacha20.h
@@ -7,6 +7,7 @@
#define _WG_CHACHA20_H
#include "simd.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -27,15 +28,14 @@ void chacha20_fpu_init(void);
static inline void chacha20_init(struct chacha20_ctx *state, const u8 key[CHACHA20_KEY_SIZE], const u64 nonce)
{
- __le32 *le_key = (__le32 *)key;
- state->key[0] = le32_to_cpu(le_key[0]);
- state->key[1] = le32_to_cpu(le_key[1]);
- state->key[2] = le32_to_cpu(le_key[2]);
- state->key[3] = le32_to_cpu(le_key[3]);
- state->key[4] = le32_to_cpu(le_key[4]);
- state->key[5] = le32_to_cpu(le_key[5]);
- state->key[6] = le32_to_cpu(le_key[6]);
- state->key[7] = le32_to_cpu(le_key[7]);
+ state->key[0] = get_unaligned_le32(key + 0);
+ state->key[1] = get_unaligned_le32(key + 4);
+ state->key[2] = get_unaligned_le32(key + 8);
+ state->key[3] = get_unaligned_le32(key + 12);
+ state->key[4] = get_unaligned_le32(key + 16);
+ state->key[5] = get_unaligned_le32(key + 20);
+ state->key[6] = get_unaligned_le32(key + 24);
+ state->key[7] = get_unaligned_le32(key + 28);
state->counter[0] = state->counter[1] = 0;
state->counter[2] = nonce & U32_MAX;
state->counter[3] = nonce >> 32;
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index 30d5444..3e3af5b 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -7,6 +7,7 @@
#include "chacha20.h"
#include "poly1305.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <crypto/scatterwalk.h>
@@ -256,7 +257,7 @@ void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, simd_context);
- __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
simd_put(simd_context);
}
@@ -270,7 +271,7 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
hchacha20(derived_key, nonce, key, simd_context);
- ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpup((__le64 *)(nonce + 16)), derived_key, simd_context);
+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, get_unaligned_le64(nonce + 16), derived_key, simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
simd_put(simd_context);
return ret;
diff --git a/src/crypto/curve25519-fiat32.h b/src/crypto/curve25519-fiat32.h
index f1e21a4..c5593ea 100644
--- a/src/crypto/curve25519-fiat32.h
+++ b/src/crypto/curve25519-fiat32.h
@@ -23,14 +23,14 @@ typedef struct fe_loose { u32 v[10]; } fe_loose;
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
{
/* Ignores top bit of s. */
- u32 a0 = le32_to_cpup((__force __le32 *)(s));
- u32 a1 = le32_to_cpup((__force __le32 *)(s+4));
- u32 a2 = le32_to_cpup((__force __le32 *)(s+8));
- u32 a3 = le32_to_cpup((__force __le32 *)(s+12));
- u32 a4 = le32_to_cpup((__force __le32 *)(s+16));
- u32 a5 = le32_to_cpup((__force __le32 *)(s+20));
- u32 a6 = le32_to_cpup((__force __le32 *)(s+24));
- u32 a7 = le32_to_cpup((__force __le32 *)(s+28));
+ u32 a0 = get_unaligned_le32(s);
+ u32 a1 = get_unaligned_le32(s+4);
+ u32 a2 = get_unaligned_le32(s+8);
+ u32 a3 = get_unaligned_le32(s+12);
+ u32 a4 = get_unaligned_le32(s+16);
+ u32 a5 = get_unaligned_le32(s+20);
+ u32 a6 = get_unaligned_le32(s+24);
+ u32 a7 = get_unaligned_le32(s+28);
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
diff --git a/src/crypto/curve25519-hacl64.h b/src/crypto/curve25519-hacl64.h
index d2637ac..7d9d734 100644
--- a/src/crypto/curve25519-hacl64.h
+++ b/src/crypto/curve25519-hacl64.h
@@ -565,11 +565,11 @@ static __always_inline void format_fexpand(u64 *output, const u8 *input)
const u8 *x02 = input + 19;
const u8 *x0 = input + 24;
u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
- i0 = le64_to_cpup((__force __le64 *)input);
- i1 = le64_to_cpup((__force __le64 *)x00);
- i2 = le64_to_cpup((__force __le64 *)x01);
- i3 = le64_to_cpup((__force __le64 *)x02);
- i4 = le64_to_cpup((__force __le64 *)x0);
+ i0 = get_unaligned_le64(input);
+ i1 = get_unaligned_le64(x00);
+ i2 = get_unaligned_le64(x01);
+ i3 = get_unaligned_le64(x02);
+ i4 = get_unaligned_le64(x0);
output0 = i0 & 0x7ffffffffffffLLU;
output1 = i1 >> 3 & 0x7ffffffffffffLLU;
output2 = i2 >> 6 & 0x7ffffffffffffLLU;
@@ -688,10 +688,10 @@ static __always_inline void format_fcontract_store(u8 *output, u64 *input)
u8 *b1 = output + 8;
u8 *b2 = output + 16;
u8 *b3 = output + 24;
- *(__force __le64 *)b0 = cpu_to_le64(o0);
- *(__force __le64 *)b1 = cpu_to_le64(o1);
- *(__force __le64 *)b2 = cpu_to_le64(o2);
- *(__force __le64 *)b3 = cpu_to_le64(o3);
+ put_unaligned_le64(o0, b0);
+ put_unaligned_le64(o1, b1);
+ put_unaligned_le64(o2, b2);
+ put_unaligned_le64(o3, b3);
}
static __always_inline void format_fcontract(u8 *output, u64 *input)
diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c
index 8de8909..9bf0a41 100644
--- a/src/crypto/curve25519.c
+++ b/src/crypto/curve25519.c
@@ -5,6 +5,7 @@
#include "curve25519.h"
+#include <asm/unaligned.h>
#include <linux/version.h>
#include <linux/string.h>
#include <linux/random.h>
diff --git a/src/crypto/poly1305.c b/src/crypto/poly1305.c
index be2eb33..d35154a 100644
--- a/src/crypto/poly1305.c
+++ b/src/crypto/poly1305.c
@@ -7,6 +7,7 @@
#include "poly1305.h"
#include "simd.h"
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -94,10 +95,10 @@ static void poly1305_init_generic(void *ctx, const u8 key[16])
st->h[4] = 0;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- st->r[0] = le32_to_cpup((__le32 *)&key[ 0]) & 0x0fffffff;
- st->r[1] = le32_to_cpup((__le32 *)&key[ 4]) & 0x0ffffffc;
- st->r[2] = le32_to_cpup((__le32 *)&key[ 8]) & 0x0ffffffc;
- st->r[3] = le32_to_cpup((__le32 *)&key[12]) & 0x0ffffffc;
+ st->r[0] = get_unaligned_le32(&key[ 0]) & 0x0fffffff;
+ st->r[1] = get_unaligned_le32(&key[ 4]) & 0x0ffffffc;
+ st->r[2] = get_unaligned_le32(&key[ 8]) & 0x0ffffffc;
+ st->r[3] = get_unaligned_le32(&key[12]) & 0x0ffffffc;
}
static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const u32 padbit)
@@ -126,10 +127,10 @@ static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const
while (len >= POLY1305_BLOCK_SIZE) {
/* h += m[i] */
- h0 = (u32)(d0 = (u64)h0 + le32_to_cpup((__le32 *)(inp + 0)));
- h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + le32_to_cpup((__le32 *)(inp + 4)));
- h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + le32_to_cpup((__le32 *)(inp + 8)));
- h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + le32_to_cpup((__le32 *)(inp + 12)));
+ h0 = (u32)(d0 = (u64)h0 + (0 ) + get_unaligned_le32(&inp[ 0]));
+ h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + get_unaligned_le32(&inp[ 4]));
+ h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + get_unaligned_le32(&inp[ 8]));
+ h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + get_unaligned_le32(&inp[12]));
h4 += (u32)(d3 >> 32) + padbit;
/* h *= r "%" p, where "%" stands for "partial remainder" */
@@ -194,7 +195,6 @@ static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, const
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
- __le32 *omac = (__force __le32 *)mac;
u32 h0, h1, h2, h3, h4;
u32 g0, g1, g2, g3, g4;
u64 t;
@@ -231,19 +231,19 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
h2 = (u32)(t = (u64)h2 + (t >> 32) + nonce[2]);
h3 = (u32)(t = (u64)h3 + (t >> 32) + nonce[3]);
- omac[0] = cpu_to_le32(h0);
- omac[1] = cpu_to_le32(h1);
- omac[2] = cpu_to_le32(h2);
- omac[3] = cpu_to_le32(h3);
+ put_unaligned_le32(h0, &mac[ 0]);
+ put_unaligned_le32(h1, &mac[ 4]);
+ put_unaligned_le32(h2, &mac[ 8]);
+ put_unaligned_le32(h3, &mac[12]);
}
#endif
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], simd_context_t simd_context)
{
- ctx->nonce[0] = le32_to_cpup((__le32 *)&key[16]);
- ctx->nonce[1] = le32_to_cpup((__le32 *)&key[20]);
- ctx->nonce[2] = le32_to_cpup((__le32 *)&key[24]);
- ctx->nonce[3] = le32_to_cpup((__le32 *)&key[28]);
+ ctx->nonce[0] = get_unaligned_le32(&key[16]);
+ ctx->nonce[1] = get_unaligned_le32(&key[20]);
+ ctx->nonce[2] = get_unaligned_le32(&key[24]);
+ ctx->nonce[3] = get_unaligned_le32(&key[28]);
#if defined(CONFIG_X86_64)
poly1305_init_x86_64(ctx->opaque, key);