From d4ac6bb50892b2d0da3b8dc99097fbbde3b4e705 Mon Sep 17 00:00:00 2001 From: René van Dorst Date: Thu, 20 Sep 2018 15:38:33 +0200 Subject: poly1305-mips32r2: remove all reorder directives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This requires some minimal rearranging to make work, but for the most part as does the right thing, provided we pass it an optimization flag. Suggested-by: Paul Burton Signed-off-by: René van Dorst --- src/crypto/Kbuild.include | 1 + src/crypto/zinc/poly1305/poly1305-mips.S | 54 +++++++++++++------------------- 2 files changed, 23 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/crypto/Kbuild.include b/src/crypto/Kbuild.include index 9ea7452..6f1f8d2 100644 --- a/src/crypto/Kbuild.include +++ b/src/crypto/Kbuild.include @@ -34,6 +34,7 @@ endif ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy) wireguard-y += crypto/zinc/poly1305/poly1305-mips.o CFLAGS_poly1305.o += -DCONFIG_ZINC_ARCH_MIPS +AFLAGS_poly1305-mips.o += -O2 endif ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy) wireguard-y += crypto/zinc/poly1305/poly1305-mips64.o diff --git a/src/crypto/zinc/poly1305/poly1305-mips.S b/src/crypto/zinc/poly1305/poly1305-mips.S index 128b60c..4d695ee 100644 --- a/src/crypto/zinc/poly1305/poly1305-mips.S +++ b/src/crypto/zinc/poly1305/poly1305-mips.S @@ -52,23 +52,21 @@ #define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0) #define POLY1305_BLOCK_SIZE 16 -#define POLY1305_STACK_SIZE 8 * 4 +#define POLY1305_STACK_SIZE 32 -.set reorder -.set noat -.align 4 -.globl poly1305_blocks_mips -.ent poly1305_blocks_mips +.set noat +.align 4 +.globl poly1305_blocks_mips +.ent poly1305_blocks_mips poly1305_blocks_mips: - .frame $sp,POLY1305_STACK_SIZE,$31 + .frame $sp, POLY1305_STACK_SIZE, $ra /* srclen &= 0xFFFFFFF0 */ ins srclen, $zero, 0, 4 - .set noreorder + addiu $sp, -(POLY1305_STACK_SIZE) + /* check srclen >= 16 bytes */ beqz srclen, .Lpoly1305_blocks_mips_end - addiu $sp, -(POLY1305_STACK_SIZE) - .set reorder /* Calculate last round based on src address pointer. * last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0) @@ -210,12 +208,10 @@ poly1305_blocks_mips: srl CA, O4, 2 ins O4, $zero, 0, 2 + addu CA, O4 + /* able to do a 16 byte block. */ - .set noreorder bne src, srclen, .Lpoly1305_loop - /* Delay slot is always executed. */ - addu CA, O4 - .set reorder /* restore the used save registers. */ lw $s0, 0($sp) @@ -234,14 +230,12 @@ poly1305_blocks_mips: sw H4, PTR_POLY1305_H(4) .Lpoly1305_blocks_mips_end: + addiu $sp, POLY1305_STACK_SIZE + /* Jump Back */ - .set noreorder jr $ra - addiu $sp, POLY1305_STACK_SIZE - .set reorder .end poly1305_blocks_mips .set at -.set reorder /* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */ #define MAC $a1 @@ -253,11 +247,10 @@ poly1305_blocks_mips: #define G3 $t8 #define G4 $t9 -.set reorder -.set noat -.align 4 -.globl poly1305_emit_mips -.ent poly1305_emit_mips +.set noat +.align 4 +.globl poly1305_emit_mips +.ent poly1305_emit_mips poly1305_emit_mips: /* load Hx and Carry */ lw CA, PTR_POLY1305_CA @@ -347,10 +340,9 @@ poly1305_emit_mips: swr H0, 0+LSB(MAC) swr H1, 4+LSB(MAC) swr H2, 8+LSB(MAC) - .set noreorder - jr $ra swr H3,12+LSB(MAC) - .set reorder + + jr $ra .end poly1305_emit_mips #define PR0 $t0 @@ -361,9 +353,9 @@ poly1305_emit_mips: /* Input arguments CTX=$a0, KEY=$a1 */ -.align 4 -.globl poly1305_init_mips -.ent poly1305_init_mips +.align 4 +.globl poly1305_init_mips +.ent poly1305_init_mips poly1305_init_mips: lwl PR0, 0+MSB($a1) lwl PR1, 4+MSB($a1) @@ -408,10 +400,8 @@ poly1305_init_mips: sw PR0, PTR_POLY1305_R(0) sw PR1, PTR_POLY1305_R(1) sw PR2, PTR_POLY1305_R(2) + sw PR3, PTR_POLY1305_R(3) - .set noreorder /* Jump Back */ jr $ra - sw PR3, PTR_POLY1305_R(3) - .set reorder .end poly1305_init_mips -- cgit v1.2.3-59-g8ed1b