aboutsummaryrefslogtreecommitdiffstats
path: root/lib/zinc/poly1305/poly1305-mips.S
blob: 4291c156815b35f730ca1b7aabf13f25fc41415e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
 * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 */

#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
#else
#define MSB 3
#define LSB 0
#endif

#define POLY1305_BLOCK_SIZE 16
.text
#define H0 $t0
#define H1 $t1
#define H2 $t2
#define H3 $t3
#define H4 $t4

#define R0 $t5
#define R1 $t6
#define R2 $t7
#define R3 $t8

#define O0 $s0
#define O1 $s4
#define O2 $v1
#define O3 $t9
#define O4 $s5

#define S1 $s1
#define S2 $s2
#define S3 $s3

#define SC $at
#define CA $v0

/* Input arguments */
#define poly	$a0
#define src	$a1
#define srclen	$a2
#define hibit	$a3

/* Location in the opaque buffer
 * R[0..3], CA, H[0..4]
 */
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
#define PTR_POLY1305_CA   (16        ) ## ($a0)
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)

#define POLY1305_BLOCK_SIZE 16
#define POLY1305_STACK_SIZE 32

.set	noat
.align	4
.globl	poly1305_blocks_mips
.ent	poly1305_blocks_mips
poly1305_blocks_mips:
	.frame	$sp, POLY1305_STACK_SIZE, $ra
	/* srclen &= 0xFFFFFFF0 */
	ins	srclen, $zero, 0, 4

	addiu	$sp, -(POLY1305_STACK_SIZE)

	/* check srclen >= 16 bytes */
	beqz	srclen, .Lpoly1305_blocks_mips_end

	/* Calculate last round based on src address pointer.
	 * last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
	 */
	addu	srclen, src

	lw	R0, PTR_POLY1305_R(0)
	lw	R1, PTR_POLY1305_R(1)
	lw	R2, PTR_POLY1305_R(2)
	lw	R3, PTR_POLY1305_R(3)

	/* store the used save registers. */
	sw	$s0, 0($sp)
	sw	$s1, 4($sp)
	sw	$s2, 8($sp)
	sw	$s3, 12($sp)
	sw	$s4, 16($sp)
	sw	$s5, 20($sp)

	/* load Hx and Carry */
	lw	CA, PTR_POLY1305_CA
	lw	H0, PTR_POLY1305_H(0)
	lw	H1, PTR_POLY1305_H(1)
	lw	H2, PTR_POLY1305_H(2)
	lw	H3, PTR_POLY1305_H(3)
	lw	H4, PTR_POLY1305_H(4)

	/* Sx = Rx + (Rx >> 2) */
	srl	S1, R1, 2
	srl	S2, R2, 2
	srl	S3, R3, 2
	addu	S1, R1
	addu	S2, R2
	addu	S3, R3

	addiu	SC, $zero, 1

.Lpoly1305_loop:
	lwl	O0, 0+MSB(src)
	lwl	O1, 4+MSB(src)
	lwl	O2, 8+MSB(src)
	lwl	O3,12+MSB(src)
	lwr	O0, 0+LSB(src)
	lwr	O1, 4+LSB(src)
	lwr	O2, 8+LSB(src)
	lwr	O3,12+LSB(src)

#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	wsbh	O0
	wsbh	O1
	wsbh	O2
	wsbh	O3
	rotr	O0, 16
	rotr	O1, 16
	rotr	O2, 16
	rotr	O3, 16
#endif

	/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
	addu	H0, CA
	sltu	CA, H0, CA
	addu	O0, H0
	sltu	H0, O0, H0
	addu	CA, H0

	/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
	addu	H1, CA
	sltu	CA, H1, CA
	addu	O1, H1
	sltu	H1, O1, H1
	addu	CA, H1

	/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
	addu	H2, CA
	sltu	CA, H2, CA
	addu	O2, H2
	sltu	H2, O2, H2
	addu	CA, H2

	/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
	addu	H3, CA
	sltu	CA, H3, CA
	addu	O3, H3
	sltu	H3, O3, H3
	addu	CA, H3

	/* h4 += (u32)(d3 >> 32) + padbit; */
	addu	H4, hibit
	addu	O4, H4, CA

	/* D0 */
	multu	O0, R0
	maddu	O1, S3
	maddu	O2, S2
	maddu	O3, S1
	mfhi	CA
	mflo	H0

	/* D1 */
	multu	O0, R1
	maddu	O1, R0
	maddu	O2, S3
	maddu	O3, S2
	maddu	O4, S1
	maddu	CA, SC
	mfhi	CA
	mflo	H1

	/* D2 */
	multu	O0, R2
	maddu	O1, R1
	maddu	O2, R0
	maddu	O3, S3
	maddu	O4, S2
	maddu	CA, SC
	mfhi	CA
	mflo	H2

	/* D4 */
	mul	H4, O4, R0

	/* D3 */
	multu	O0, R3
	maddu	O1, R2
	maddu	O2, R1
	maddu	O3, R0
	maddu	O4, S3
	maddu	CA, SC
	mfhi	CA
	mflo	H3

	addiu	src, POLY1305_BLOCK_SIZE

	/* h4 += (u32)(d3 >> 32); */
	addu	O4, H4, CA
	/* h4 &= 3 */
	andi	H4, O4, 3
	/* c = (h4 >> 2) + (h4 & ~3U); */
	srl	CA, O4, 2
	ins	O4, $zero, 0, 2

	addu	CA, O4

	/* able to do a 16 byte block. */
	bne	src, srclen, .Lpoly1305_loop

	/* restore the used save registers. */
	lw	$s0, 0($sp)
	lw	$s1, 4($sp)
	lw	$s2, 8($sp)
	lw	$s3, 12($sp)
	lw	$s4, 16($sp)
	lw	$s5, 20($sp)

	/* store Hx and Carry */
	sw	CA, PTR_POLY1305_CA
	sw	H0, PTR_POLY1305_H(0)
	sw	H1, PTR_POLY1305_H(1)
	sw	H2, PTR_POLY1305_H(2)
	sw	H3, PTR_POLY1305_H(3)
	sw	H4, PTR_POLY1305_H(4)

.Lpoly1305_blocks_mips_end:
	addiu	$sp, POLY1305_STACK_SIZE

	/* Jump Back */
	jr	$ra
.end poly1305_blocks_mips
.set at

/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
#define MAC	$a1
#define NONCE	$a2

#define G0	$t5
#define G1	$t6
#define G2	$t7
#define G3	$t8
#define G4	$t9

.set	noat
.align	4
.globl	poly1305_emit_mips
.ent	poly1305_emit_mips
poly1305_emit_mips:
	/* load Hx and Carry */
	lw	CA, PTR_POLY1305_CA
	lw	H0, PTR_POLY1305_H(0)
	lw	H1, PTR_POLY1305_H(1)
	lw	H2, PTR_POLY1305_H(2)
	lw	H3, PTR_POLY1305_H(3)
	lw	H4, PTR_POLY1305_H(4)

	/* Add left over carry */
	addu	H0, CA
	sltu	CA, H0, CA
	addu	H1, CA
	sltu	CA, H1, CA
	addu	H2, CA
	sltu	CA, H2, CA
	addu	H3, CA
	sltu	CA, H3, CA
	addu	H4, CA

	/* compare to modulus by computing h + -p */
	addiu	G0, H0, 5
	sltu	CA, G0, H0
	addu	G1, H1, CA
	sltu	CA, G1, H1
	addu	G2, H2, CA
	sltu	CA, G2, H2
	addu	G3, H3, CA
	sltu	CA, G3, H3
	addu	G4, H4, CA

	srl	SC, G4, 2

	/* if there was carry into 131st bit, h3:h0 = g3:g0 */
	movn	H0, G0, SC
	movn	H1, G1, SC
	movn	H2, G2, SC
	movn	H3, G3, SC

	lwl	G0, 0+MSB(NONCE)
	lwl	G1, 4+MSB(NONCE)
	lwl	G2, 8+MSB(NONCE)
	lwl	G3,12+MSB(NONCE)
	lwr	G0, 0+LSB(NONCE)
	lwr	G1, 4+LSB(NONCE)
	lwr	G2, 8+LSB(NONCE)
	lwr	G3,12+LSB(NONCE)

	/* mac = (h + nonce) % (2^128) */
	addu	H0, G0
	sltu	CA, H0, G0

	/* H1 */
	addu	H1, CA
	sltu	CA, H1, CA
	addu	H1, G1
	sltu	G1, H1, G1
	addu	CA, G1

	/* H2 */
	addu	H2, CA
	sltu	CA, H2, CA
	addu	H2, G2
	sltu	G2, H2, G2
	addu	CA, G2

	/* H3 */
	addu	H3, CA
	addu	H3, G3

#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	wsbh	H0
	wsbh	H1
	wsbh	H2
	wsbh	H3
	rotr	H0, 16
	rotr	H1, 16
	rotr	H2, 16
	rotr	H3, 16
#endif

	/* store MAC */
	swl	H0, 0+MSB(MAC)
	swl	H1, 4+MSB(MAC)
	swl	H2, 8+MSB(MAC)
	swl	H3,12+MSB(MAC)
	swr	H0, 0+LSB(MAC)
	swr	H1, 4+LSB(MAC)
	swr	H2, 8+LSB(MAC)
	swr	H3,12+LSB(MAC)

	jr	$ra
.end poly1305_emit_mips

#define PR0 $t0
#define PR1 $t1
#define PR2 $t2
#define PR3 $t3
#define PT0 $t4

/* Input arguments CTX=$a0, KEY=$a1 */

.align	4
.globl	poly1305_init_mips
.ent	poly1305_init_mips
poly1305_init_mips:
	lwl	PR0, 0+MSB($a1)
	lwl	PR1, 4+MSB($a1)
	lwl	PR2, 8+MSB($a1)
	lwl	PR3,12+MSB($a1)
	lwr	PR0, 0+LSB($a1)
	lwr	PR1, 4+LSB($a1)
	lwr	PR2, 8+LSB($a1)
	lwr	PR3,12+LSB($a1)

	/* store Hx and Carry */
	sw	$zero, PTR_POLY1305_CA
	sw	$zero, PTR_POLY1305_H(0)
	sw	$zero, PTR_POLY1305_H(1)
	sw	$zero, PTR_POLY1305_H(2)
	sw	$zero, PTR_POLY1305_H(3)
	sw	$zero, PTR_POLY1305_H(4)

#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	wsbh	PR0
	wsbh	PR1
	wsbh	PR2
	wsbh	PR3
	rotr	PR0, 16
	rotr	PR1, 16
	rotr	PR2, 16
	rotr	PR3, 16
#endif

	lui	PT0, 0x0FFF
	ori	PT0, 0xFFFC

	/* AND 0x0fffffff; */
	ext	PR0, PR0, 0, (32-4)

	/* AND 0x0ffffffc; */
	and	PR1, PT0
	and	PR2, PT0
	and	PR3, PT0

	/* store Rx */
	sw	PR0, PTR_POLY1305_R(0)
	sw	PR1, PTR_POLY1305_R(1)
	sw	PR2, PTR_POLY1305_R(2)
	sw	PR3, PTR_POLY1305_R(3)

	/* Jump Back  */
	jr	$ra
.end poly1305_init_mips