From 9e4aa7a47a936b30c7f1d50be57fa072d1dabc4b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 22 Nov 2017 12:46:06 +0100 Subject: poly1305: import MIPS64 primitive from OpenSSL --- src/Kbuild | 7 +- src/crypto/chacha20poly1305.c | 25 ++- src/crypto/curve25519.c | 2 +- src/crypto/poly1305-mips64.S | 383 ++++++++++++++++++++++++++++++++++++ src/tests/qemu/Makefile | 8 +- src/tests/qemu/arch/mips64.config | 3 +- src/tests/qemu/arch/mips64el.config | 3 +- 7 files changed, 411 insertions(+), 20 deletions(-) create mode 100644 src/crypto/poly1305-mips64.S diff --git a/src/Kbuild b/src/Kbuild index 64ff971..9846522 100644 --- a/src/Kbuild +++ b/src/Kbuild @@ -8,11 +8,8 @@ wireguard-y += crypto/curve25519.o crypto/chacha20poly1305.o crypto/blake2s.o wireguard-$(CONFIG_X86_64) += crypto/chacha20-x86_64.o crypto/poly1305-x86_64.o crypto/blake2s-x86_64.o crypto/curve25519-x86_64.o wireguard-$(CONFIG_ARM) += crypto/chacha20-arm.o crypto/poly1305-arm.o crypto/curve25519-arm.o wireguard-$(CONFIG_ARM64) += crypto/chacha20-arm64.o crypto/poly1305-arm64.o - -ifneq ($(KBUILD_EXTMOD),) -CONFIG_WIREGUARD := m -endif +wireguard-$(if $(filter yy,$(CONFIG_MIPS)$(CONFIG_64BIT)),y,n) += crypto/poly1305-mips64.o include $(src)/compat/Kbuild.include -obj-$(CONFIG_WIREGUARD) := wireguard.o +obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index 6ee7fcb..a00a2e0 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -68,7 +68,7 @@ static bool chacha20poly1305_use_avx __read_mostly; static bool chacha20poly1305_use_avx2 __read_mostly; static bool chacha20poly1305_use_avx512 __read_mostly; -void chacha20poly1305_fpu_init(void) +void __init chacha20poly1305_fpu_init(void) { chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); @@ -99,6 +99,11 @@ void __init chacha20poly1305_fpu_init(void) chacha20poly1305_use_neon = elf_hwcap & HWCAP_NEON; #endif } +#elif defined(CONFIG_MIPS) && defined(CONFIG_64BIT) +asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]); +asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, size_t len, u32 padbit); +asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]); +void __init chacha20poly1305_fpu_init(void) { } #else void __init chacha20poly1305_fpu_init(void) { } #endif @@ -129,7 +134,7 @@ struct chacha20_ctx { u32 state[CHACHA20_BLOCK_SIZE / sizeof(u32)]; } __aligned(32); -static void chacha20_generic_block(struct chacha20_ctx *ctx, void *stream) +static void chacha20_block_generic(struct chacha20_ctx *ctx, void *stream) { u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)]; __le32 *out = stream; @@ -346,13 +351,13 @@ no_simd: memcpy(dst, src, bytes); while (bytes >= CHACHA20_BLOCK_SIZE) { - chacha20_generic_block(ctx, buf); + chacha20_block_generic(ctx, buf); crypto_xor(dst, buf, CHACHA20_BLOCK_SIZE); bytes -= CHACHA20_BLOCK_SIZE; dst += CHACHA20_BLOCK_SIZE; } if (bytes) { - chacha20_generic_block(ctx, buf); + chacha20_block_generic(ctx, buf); crypto_xor(dst, buf, bytes); } } @@ -370,7 +375,7 @@ struct poly1305_ctx { } func; } __aligned(8); -#if !(defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)) +#if !(defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT))) struct poly1305_internal { u32 h[5]; u32 r[4]; @@ -530,7 +535,7 @@ static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4]) omac[2] = cpu_to_le32(h2); omac[3] = cpu_to_le32(h3); } -#endif /* !CONFIG_X86_64 */ +#endif static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd) { @@ -571,6 +576,10 @@ static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SI ctx->func.emit = poly1305_emit_neon; } #endif +#elif defined(CONFIG_MIPS) && defined(CONFIG_64BIT) + poly1305_init_mips(ctx->opaque, key); + ctx->func.blocks = poly1305_blocks_mips; + ctx->func.emit = poly1305_emit_mips; #else poly1305_init_generic(ctx->opaque, key); #endif @@ -579,7 +588,7 @@ static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SI static void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len) { -#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT)) const poly1305_blocks_f blocks = ctx->func.blocks; #else const poly1305_blocks_f blocks = poly1305_blocks_generic; @@ -619,7 +628,7 @@ static void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len) static void poly1305_finish(struct poly1305_ctx * ctx, u8 mac[16]) { -#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT)) const poly1305_blocks_f blocks = ctx->func.blocks; const poly1305_emit_f emit = ctx->func.emit; #else diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c index 38b6488..c6c9761 100644 --- a/src/crypto/curve25519.c +++ b/src/crypto/curve25519.c @@ -31,7 +31,7 @@ static const u8 null_point[CURVE25519_POINT_SIZE] = { 0 }; #include #include static bool curve25519_use_avx __read_mostly; -void curve25519_fpu_init(void) +void __init curve25519_fpu_init(void) { curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); } diff --git a/src/crypto/poly1305-mips64.S b/src/crypto/poly1305-mips64.S new file mode 100644 index 0000000..5275a8c --- /dev/null +++ b/src/crypto/poly1305-mips64.S @@ -0,0 +1,383 @@ +/* Copyright 2017 Jason A. Donenfeld . All Rights Reserved. + * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * - Redistributions of source code must retain copyright notices, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * - Neither the name of the CRYPTOGAMS nor the names of its + * copyright holder and contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + * ALTERNATIVELY, provided that this notice is retained in full, this + * product may be distributed under the terms of the GNU General Public + * License (GPL), in which case the provisions of the GPL apply INSTEAD OF + * those given above. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(CONFIG_64BIT) +#error "This is only for 64-bit kernels." +#endif + +#ifdef __MIPSEB__ +#define MSB 0 +#define LSB 7 +#else +#define MSB 7 +#define LSB 0 +#endif + +#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6) +#define dmultu(rs,rt) +#define mflo(rd,rs,rt) dmulu rd,rs,rt +#define mfhi(rd,rs,rt) dmuhu rd,rs,rt +#else +#define dmultu(rs,rt) dmultu rs,rt +#define multu(rs,rt) multu rs,rt +#define mflo(rd,rs,rt) mflo rd +#define mfhi(rd,rs,rt) mfhi rd +#endif + +.text +.set noat +.set noreorder + +/* While most of the assembly in the kernel prefers ENTRY() and ENDPROC(), + * there is no existing MIPS assembly that uses it, and MIPS assembler seems + * to like its own .ent/.end notation, which the MIPS include files don't + * provide in a MIPS-specific ENTRY/ENDPROC definition. So, we skip these + * for now, until somebody complains. */ + +.align 5 +.globl poly1305_init_mips +.ent poly1305_init_mips +poly1305_init_mips: + .frame $29,0,$31 + .set reorder + + sd $0,0($4) + sd $0,8($4) + sd $0,16($4) + + beqz $5,.Lno_key + +#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6) + ld $8,0($5) + ld $9,8($5) +#else + ldl $8,0+MSB($5) + ldl $9,8+MSB($5) + ldr $8,0+LSB($5) + ldr $9,8+LSB($5) +#endif +#ifdef __MIPSEB__ +#if defined(CONFIG_CPU_MIPS64_R2) || defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6) + dsbh $8,$8 # byte swap + dsbh $9,$9 + dshd $8,$8 + dshd $9,$9 +#else + ori $10,$0,0xFF + dsll $1,$10,32 + or $10,$1 # 0x000000FF000000FF + + and $11,$8,$10 # byte swap + and $2,$9,$10 + dsrl $1,$8,24 + dsrl $24,$9,24 + dsll $11,24 + dsll $2,24 + and $1,$10 + and $24,$10 + dsll $10,8 # 0x0000FF000000FF00 + or $11,$1 + or $2,$24 + and $1,$8,$10 + and $24,$9,$10 + dsrl $8,8 + dsrl $9,8 + dsll $1,8 + dsll $24,8 + and $8,$10 + and $9,$10 + or $11,$1 + or $2,$24 + or $8,$11 + or $9,$2 + dsrl $11,$8,32 + dsrl $2,$9,32 + dsll $8,32 + dsll $9,32 + or $8,$11 + or $9,$2 +#endif +#endif + li $10,1 + dsll $10,32 + daddiu $10,-63 + dsll $10,28 + daddiu $10,-1 # 0ffffffc0fffffff + + and $8,$10 + daddiu $10,-3 # 0ffffffc0ffffffc + and $9,$10 + + sd $8,24($4) + dsrl $10,$9,2 + sd $9,32($4) + daddu $10,$9 # s1 = r1 + (r1 >> 2) + sd $10,40($4) + +.Lno_key: + li $2,0 # return 0 + jr $31 +.end poly1305_init_mips + +.align 5 +.globl poly1305_blocks_mips +.ent poly1305_blocks_mips +poly1305_blocks_mips: + .set noreorder + dsrl $6,4 # number of complete blocks + bnez $6,poly1305_blocks_internal + nop + jr $31 + nop +.end poly1305_blocks_mips + +.align 5 +.ent poly1305_blocks_internal +poly1305_blocks_internal: + .frame $29,6*8,$31 + .mask 0x00030000,-8 + .set noreorder + dsubu $29,6*8 + sd $17,40($29) + sd $16,32($29) + .set reorder + + ld $12,0($4) # load hash value + ld $13,8($4) + ld $14,16($4) + + ld $15,24($4) # load key + ld $16,32($4) + ld $17,40($4) + +.Loop: +#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6) + ld $8,0($5) # load input + ld $9,8($5) +#else + ldl $8,0+MSB($5) # load input + ldl $9,8+MSB($5) + ldr $8,0+LSB($5) + ldr $9,8+LSB($5) +#endif + daddiu $6,-1 + daddiu $5,16 +#ifdef __MIPSEB__ +#if defined(CONFIG_CPU_MIPS64_R2) || defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6) + dsbh $8,$8 # byte swap + dsbh $9,$9 + dshd $8,$8 + dshd $9,$9 +#else + ori $10,$0,0xFF + dsll $1,$10,32 + or $10,$1 # 0x000000FF000000FF + + and $11,$8,$10 # byte swap + and $2,$9,$10 + dsrl $1,$8,24 + dsrl $24,$9,24 + dsll $11,24 + dsll $2,24 + and $1,$10 + and $24,$10 + dsll $10,8 # 0x0000FF000000FF00 + or $11,$1 + or $2,$24 + and $1,$8,$10 + and $24,$9,$10 + dsrl $8,8 + dsrl $9,8 + dsll $1,8 + dsll $24,8 + and $8,$10 + and $9,$10 + or $11,$1 + or $2,$24 + or $8,$11 + or $9,$2 + dsrl $11,$8,32 + dsrl $2,$9,32 + dsll $8,32 + dsll $9,32 + or $8,$11 + or $9,$2 +#endif +#endif + daddu $12,$8 # accumulate input + daddu $13,$9 + sltu $10,$12,$8 + sltu $11,$13,$9 + daddu $13,$10 + + dmultu ($15,$12) # h0*r0 + daddu $14,$7 + sltu $10,$13,$10 + mflo ($8,$15,$12) + mfhi ($9,$15,$12) + + dmultu ($17,$13) # h1*5*r1 + daddu $10,$11 + daddu $14,$10 + mflo ($10,$17,$13) + mfhi ($11,$17,$13) + + dmultu ($16,$12) # h0*r1 + daddu $8,$10 + daddu $9,$11 + mflo ($1,$16,$12) + mfhi ($25,$16,$12) + sltu $10,$8,$10 + daddu $9,$10 + + dmultu ($15,$13) # h1*r0 + daddu $9,$1 + sltu $1,$9,$1 + mflo ($10,$15,$13) + mfhi ($11,$15,$13) + daddu $25,$1 + + dmultu ($17,$14) # h2*5*r1 + daddu $9,$10 + daddu $25,$11 + mflo ($1,$17,$14) + + dmultu ($15,$14) # h2*r0 + sltu $10,$9,$10 + daddu $25,$10 + mflo ($2,$15,$14) + + daddu $9,$1 + daddu $25,$2 + sltu $1,$9,$1 + daddu $25,$1 + + li $10,-4 # final reduction + and $10,$25 + dsrl $11,$25,2 + andi $14,$25,3 + daddu $10,$11 + daddu $12,$8,$10 + sltu $10,$12,$10 + daddu $13,$9,$10 + sltu $10,$13,$10 + daddu $14,$14,$10 + + bnez $6,.Loop + + sd $12,0($4) # store hash value + sd $13,8($4) + sd $14,16($4) + + .set noreorder + ld $17,40($29) # epilogue + ld $16,32($29) + jr $31 + daddu $29,6*8 +.end poly1305_blocks_internal + +.align 5 +.globl poly1305_emit_mips +.ent poly1305_emit_mips +poly1305_emit_mips: + .frame $29,0,$31 + .set reorder + + ld $10,0($4) + ld $11,8($4) + ld $1,16($4) + + daddiu $8,$10,5 # compare to modulus + sltiu $2,$8,5 + daddu $9,$11,$2 + sltu $2,$9,$2 + daddu $1,$1,$2 + + dsrl $1,2 # see if it carried/borrowed + dsubu $1,$0,$1 + nor $2,$0,$1 + + and $8,$1 + and $10,$2 + and $9,$1 + and $11,$2 + or $8,$10 + or $9,$11 + + lwu $10,0($6) # load nonce + lwu $11,4($6) + lwu $1,8($6) + lwu $2,12($6) + dsll $11,32 + dsll $2,32 + or $10,$11 + or $1,$2 + + daddu $8,$10 # accumulate nonce + daddu $9,$1 + sltu $10,$8,$10 + daddu $9,$10 + + dsrl $10,$8,8 # write mac value + dsrl $11,$8,16 + dsrl $1,$8,24 + sb $8,0($5) + dsrl $2,$8,32 + sb $10,1($5) + dsrl $10,$8,40 + sb $11,2($5) + dsrl $11,$8,48 + sb $1,3($5) + dsrl $1,$8,56 + sb $2,4($5) + dsrl $2,$9,8 + sb $10,5($5) + dsrl $10,$9,16 + sb $11,6($5) + dsrl $11,$9,24 + sb $1,7($5) + + sb $9,8($5) + dsrl $1,$9,32 + sb $2,9($5) + dsrl $2,$9,40 + sb $10,10($5) + dsrl $10,$9,48 + sb $11,11($5) + dsrl $11,$9,56 + sb $1,12($5) + sb $2,13($5) + sb $10,14($5) + sb $11,15($5) + + jr $31 +.end poly1305_emit_mips diff --git a/src/tests/qemu/Makefile b/src/tests/qemu/Makefile index 2f288a5..875baf4 100644 --- a/src/tests/qemu/Makefile +++ b/src/tests/qemu/Makefile @@ -117,8 +117,8 @@ ifeq ($(CHOST),$(CBUILD)) QEMU_MACHINE := -cpu host -machine malta,accel=kvm CFLAGS += -EB else -QEMU_MACHINE := -cpu 20Kc -machine malta -smp 1 -CFLAGS += -march=mips64r2 -EB +QEMU_MACHINE := -cpu I6400 -machine malta +CFLAGS += -march=mips64r6 -EB endif else ifeq ($(ARCH),mips64el) QEMU_ARCH := mips64el @@ -128,8 +128,8 @@ ifeq ($(CHOST),$(CBUILD)) QEMU_MACHINE := -cpu host -machine malta,accel=kvm CFLAGS += -EL else -QEMU_MACHINE := -cpu 20Kc -machine malta -smp 1 -CFLAGS += -march=mips64r2 -EL +QEMU_MACHINE := -cpu I6400 -machine malta +CFLAGS += -march=mips64r6 -EL endif else ifeq ($(ARCH),mips) QEMU_ARCH := mips diff --git a/src/tests/qemu/arch/mips64.config b/src/tests/qemu/arch/mips64.config index 6fc6bc4..37d8947 100644 --- a/src/tests/qemu/arch/mips64.config +++ b/src/tests/qemu/arch/mips64.config @@ -1,5 +1,6 @@ CONFIG_64BIT=y -CONFIG_CPU_MIPS64_R2=y +CONFIG_CPU_MIPS64_R6=y +CONFIG_CPU_HAS_MSA=y CONFIG_MIPS_MALTA=y CONFIG_MIPS_CPS=y CONFIG_POWER_RESET=y diff --git a/src/tests/qemu/arch/mips64el.config b/src/tests/qemu/arch/mips64el.config index fc1e43a..2529a60 100644 --- a/src/tests/qemu/arch/mips64el.config +++ b/src/tests/qemu/arch/mips64el.config @@ -1,5 +1,6 @@ CONFIG_64BIT=y -CONFIG_CPU_MIPS64_R2=y +CONFIG_CPU_MIPS64_R6=y +CONFIG_CPU_HAS_MSA=y CONFIG_MIPS_MALTA=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_MIPS_CPS=y -- cgit v1.2.3-59-g8ed1b