crypto: arm64/ghash-ce - replace NEON yield check with block limit

Checking the TIF_NEED_RESCHED flag is disproportionately costly on cores with fast crypto instructions and comparatively slow memory accesses. On algorithms such as GHASH, which executes at ~1 cycle per byte on cores that implement support for 64 bit polynomial multiplication, there is really no need to check the TIF_NEED_RESCHED particularly often, and so we can remove the NEON yield check from the assembler routines. However, unlike the AEAD or skcipher APIs, the shash/ahash APIs take arbitrary input lengths, and so there needs to be some sanity check to ensure that we don't hog the CPU for excessive amounts of time. So let's simply cap the maximum input size that is processed in one go to 64 KB. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 2018-08-04 20:46:24 +0200
committer: Herbert Xu <herbert@gondor.apana.org.au> 2018-08-07 17:51:39 +0800
commit: 8e492eff7de955e6ed1dc2989b17c41cd862aa28 (patch)
tree: 70b149f7f5a1c11335f4507dbce2a705398c598e /arch/arm64/crypto/ghash-ce-core.S
parent: crypto: hisilicon - sec_send_request() can be static (diff)
download: linux-dev-8e492eff7de955e6ed1dc2989b17c41cd862aa28.tar.xz
linux-dev-8e492eff7de955e6ed1dc2989b17c41cd862aa28.zip
1 files changed, 11 insertions, 28 deletions
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 913e49932ae6..344811c6a0ca 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -213,31 +213,23 @@
 	.endm
 
 	.macro		__pmull_ghash, pn
-	frame_push	5
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-
-0:	ld1		{SHASH.2d}, [x22]
-	ld1		{XL.2d}, [x20]
+	ld1		{SHASH.2d}, [x3]
+	ld1		{XL.2d}, [x1]
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	__pmull_pre_\pn
 
 	/* do the head block first, if supplied */
-	cbz		x23, 1f
-	ld1		{T1.2d}, [x23]
-	mov		x23, xzr
-	b		2f
+	cbz		x4, 0f
+	ld1		{T1.2d}, [x4]
+	mov		x4, xzr
+	b		1f
 
-1:	ld1		{T1.2d}, [x21], #16
-	sub		w19, w19, #1
+0:	ld1		{T1.2d}, [x2], #16
+	sub		w0, w0, #1
 
-2:	/* multiply XL by SHASH in GF(2^128) */
+1:	/* multiply XL by SHASH in GF(2^128) */
 CPU_LE(	rev64		T1.16b, T1.16b	)
 
 	ext		T2.16b, XL.16b, XL.16b, #8
@@ -259,18 +251,9 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T2.16b, T2.16b, XH.16b
 	eor		XL.16b, XL.16b, T2.16b
 
-	cbz		w19, 3f
-
-	if_will_cond_yield_neon
-	st1		{XL.2d}, [x20]
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
-
-	b		1b
+	cbnz		w0, 0b
 
-3:	st1		{XL.2d}, [x20]
-	frame_pop
+	st1		{XL.2d}, [x1]
 	ret
 	.endm
author	Ard Biesheuvel <ard.biesheuvel@linaro.org>	2018-08-04 20:46:24 +0200
committer	Herbert Xu <herbert@gondor.apana.org.au>	2018-08-07 17:51:39 +0800
commit	8e492eff7de955e6ed1dc2989b17c41cd862aa28 (patch)
tree	70b149f7f5a1c11335f4507dbce2a705398c598e /arch/arm64/crypto/ghash-ce-core.S
parent	crypto: hisilicon - sec_send_request() can be static (diff)
download	linux-dev-8e492eff7de955e6ed1dc2989b17c41cd862aa28.tar.xz linux-dev-8e492eff7de955e6ed1dc2989b17c41cd862aa28.zip