aboutsummaryrefslogtreecommitdiffstats
path: root/poly1305-openssl.c
blob: c36d9e40281a11063bc5b1eb69c94583c2d11df1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
 *
 * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
 */

#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/intel-family.h>

asmlinkage void poly1305_init_x86_64(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, size_t len, u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[16], const u32 nonce[4]);
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[16], const u32 nonce[4]);
asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, size_t len, u32 padbit);
asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, size_t len, u32 padbit);
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, size_t len, u32 padbit);

enum {
	POLY1305_BLOCK_SIZE = 16,
	POLY1305_KEY_SIZE = 32,
	POLY1305_MAC_SIZE = 16
};

struct poly1305_ctx {
	u8 opaque[24 * sizeof(u64)];
	u32 nonce[4];
	u8 data[POLY1305_BLOCK_SIZE];
	size_t num;
} __aligned(8);

static inline u32 le32_to_cpuvp(const void *p)
{
	return le32_to_cpup(p);
}

void poly1305_ossl_amd64(unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k)
{
	size_t rem;
	struct poly1305_ctx ctx;
	ctx.nonce[0] = le32_to_cpuvp(&k[16]);
	ctx.nonce[1] = le32_to_cpuvp(&k[20]);
	ctx.nonce[2] = le32_to_cpuvp(&k[24]);
	ctx.nonce[3] = le32_to_cpuvp(&k[28]);
	poly1305_init_x86_64(ctx.opaque, k);
	ctx.num = 0;

	rem = inlen % POLY1305_BLOCK_SIZE;
	inlen -= rem;

	if (inlen >= POLY1305_BLOCK_SIZE) {
		poly1305_blocks_x86_64(ctx.opaque, in, inlen, 1);
		in += inlen;
	}
	if (rem) {
		memcpy(ctx.data, in, rem);
		ctx.data[rem++] = 1;   /* pad bit */
		while (rem < POLY1305_BLOCK_SIZE)
			ctx.data[rem++] = 0;
		poly1305_blocks_x86_64(ctx.opaque, ctx.data, POLY1305_BLOCK_SIZE, 0);
	}

	poly1305_emit_x86_64(ctx.opaque, out, ctx.nonce);
}

void poly1305_ossl_avx(unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k)
{
	size_t rem;
	struct poly1305_ctx ctx;
	ctx.nonce[0] = le32_to_cpuvp(&k[16]);
	ctx.nonce[1] = le32_to_cpuvp(&k[20]);
	ctx.nonce[2] = le32_to_cpuvp(&k[24]);
	ctx.nonce[3] = le32_to_cpuvp(&k[28]);
	poly1305_init_x86_64(ctx.opaque, k);
	ctx.num = 0;

	rem = inlen % POLY1305_BLOCK_SIZE;
	inlen -= rem;

	if (inlen >= POLY1305_BLOCK_SIZE) {
		poly1305_blocks_avx(ctx.opaque, in, inlen, 1);
		in += inlen;
	}
	if (rem) {
		memcpy(ctx.data, in, rem);
		ctx.data[rem++] = 1;   /* pad bit */
		while (rem < POLY1305_BLOCK_SIZE)
			ctx.data[rem++] = 0;
		poly1305_blocks_avx(ctx.opaque, ctx.data, POLY1305_BLOCK_SIZE, 0);
	}

	poly1305_emit_avx(ctx.opaque, out, ctx.nonce);
}

void poly1305_ossl_avx2(unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k)
{
	size_t rem;
	struct poly1305_ctx ctx;
	ctx.nonce[0] = le32_to_cpuvp(&k[16]);
	ctx.nonce[1] = le32_to_cpuvp(&k[20]);
	ctx.nonce[2] = le32_to_cpuvp(&k[24]);
	ctx.nonce[3] = le32_to_cpuvp(&k[28]);
	poly1305_init_x86_64(ctx.opaque, k);
	ctx.num = 0;

	rem = inlen % POLY1305_BLOCK_SIZE;
	inlen -= rem;

	if (inlen >= POLY1305_BLOCK_SIZE) {
		poly1305_blocks_avx2(ctx.opaque, in, inlen, 1);
		in += inlen;
	}
	if (rem) {
		memcpy(ctx.data, in, rem);
		ctx.data[rem++] = 1;   /* pad bit */
		while (rem < POLY1305_BLOCK_SIZE)
			ctx.data[rem++] = 0;
		poly1305_blocks_avx2(ctx.opaque, ctx.data, POLY1305_BLOCK_SIZE, 0);
	}

	poly1305_emit_avx(ctx.opaque, out, ctx.nonce);
}

void poly1305_ossl_avx512(unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k)
{
	size_t rem;
	struct poly1305_ctx ctx;
	ctx.nonce[0] = le32_to_cpuvp(&k[16]);
	ctx.nonce[1] = le32_to_cpuvp(&k[20]);
	ctx.nonce[2] = le32_to_cpuvp(&k[24]);
	ctx.nonce[3] = le32_to_cpuvp(&k[28]);
	poly1305_init_x86_64(ctx.opaque, k);
	ctx.num = 0;

	rem = inlen % POLY1305_BLOCK_SIZE;
	inlen -= rem;

	if (inlen >= POLY1305_BLOCK_SIZE) {
		poly1305_blocks_avx512(ctx.opaque, in, inlen, 1);
		in += inlen;
	}
	if (rem) {
		memcpy(ctx.data, in, rem);
		ctx.data[rem++] = 1;   /* pad bit */
		while (rem < POLY1305_BLOCK_SIZE)
			ctx.data[rem++] = 0;
		poly1305_blocks_avx512(ctx.opaque, ctx.data, POLY1305_BLOCK_SIZE, 0);
	}

	poly1305_emit_avx(ctx.opaque, out, ctx.nonce);
}