13 files changed, 123 insertions, 153 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 395bbf64b2ab..34b8a89197be 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -10,7 +10,7 @@
 #include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/ctr.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index e90386a7db8e..b70ac76f2610 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -195,7 +195,6 @@ SYM_FUNC_START(chacha_4block_xor_neon)
 	adr_l		x10, .Lpermute
 	and		x5, x4, #63
 	add		x10, x10, x5
-	add		x11, x10, #64
 
 	//
 	// This function encrypts four consecutive ChaCha blocks by loading
@@ -645,11 +644,11 @@ CPU_BE(	  rev		a15, a15	)
 	zip2		v31.4s, v14.4s, v15.4s
 	  eor		a15, a15, w9
 
-	mov		x3, #64
+	add		x3, x2, x4
+	sub		x3, x3, #128		// start of last block
+
 	subs		x5, x4, #128
-	add		x6, x5, x2
-	csel		x3, x3, xzr, ge
-	csel		x2, x2, x6, ge
+	csel		x2, x2, x3, ge
 
 	// interleave 64-bit words in state n, n+2
 	zip1		v0.2d, v16.2d, v18.2d
@@ -658,13 +657,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v8.2d, v17.2d, v19.2d
 	zip2		v12.2d, v17.2d, v19.2d
 	  stp		a2, a3, [x1, #-56]
-	ld1		{v16.16b-v19.16b}, [x2], x3
 
 	subs		x6, x4, #192
-	ccmp		x3, xzr, #4, lt
-	add		x7, x6, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x7, eq
+	ld1		{v16.16b-v19.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v1.2d, v20.2d, v22.2d
 	zip2		v5.2d, v20.2d, v22.2d
@@ -672,13 +668,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v9.2d, v21.2d, v23.2d
 	zip2		v13.2d, v21.2d, v23.2d
 	  stp		a6, a7, [x1, #-40]
-	ld1		{v20.16b-v23.16b}, [x2], x3
 
 	subs		x7, x4, #256
-	ccmp		x3, xzr, #4, lt
-	add		x8, x7, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x8, eq
+	ld1		{v20.16b-v23.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v2.2d, v24.2d, v26.2d
 	zip2		v6.2d, v24.2d, v26.2d
@@ -686,12 +679,10 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v10.2d, v25.2d, v27.2d
 	zip2		v14.2d, v25.2d, v27.2d
 	  stp		a10, a11, [x1, #-24]
-	ld1		{v24.16b-v27.16b}, [x2], x3
 
 	subs		x8, x4, #320
-	ccmp		x3, xzr, #4, lt
-	add		x9, x8, x2
-	csel		x2, x2, x9, eq
+	ld1		{v24.16b-v27.16b}, [x2], #64
+	csel		x2, x2, x3, ge
 
 	zip1		v3.2d, v28.2d, v30.2d
 	zip2		v7.2d, v28.2d, v30.2d
@@ -699,151 +690,105 @@ CPU_BE(	  rev		a15, a15	)
 	zip1		v11.2d, v29.2d, v31.2d
 	zip2		v15.2d, v29.2d, v31.2d
 	  stp		a14, a15, [x1, #-8]
+
+	tbnz		x5, #63, .Lt128
 	ld1		{v28.16b-v31.16b}, [x2]
 
 	// xor with corresponding input, write to output
-	tbnz		x5, #63, 0f
 	eor		v16.16b, v16.16b, v0.16b
 	eor		v17.16b, v17.16b, v1.16b
 	eor		v18.16b, v18.16b, v2.16b
 	eor		v19.16b, v19.16b, v3.16b
-	st1		{v16.16b-v19.16b}, [x1], #64
-	cbz		x5, .Lout
 
-	tbnz		x6, #63, 1f
+	tbnz		x6, #63, .Lt192
+
 	eor		v20.16b, v20.16b, v4.16b
 	eor		v21.16b, v21.16b, v5.16b
 	eor		v22.16b, v22.16b, v6.16b
 	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1], #64
-	cbz		x6, .Lout
 
-	tbnz		x7, #63, 2f
+	st1		{v16.16b-v19.16b}, [x1], #64
+	tbnz		x7, #63, .Lt256
+
 	eor		v24.16b, v24.16b, v8.16b
 	eor		v25.16b, v25.16b, v9.16b
 	eor		v26.16b, v26.16b, v10.16b
 	eor		v27.16b, v27.16b, v11.16b
-	st1		{v24.16b-v27.16b}, [x1], #64
-	cbz		x7, .Lout
 
-	tbnz		x8, #63, 3f
+	st1		{v20.16b-v23.16b}, [x1], #64
+	tbnz		x8, #63, .Lt320
+
 	eor		v28.16b, v28.16b, v12.16b
 	eor		v29.16b, v29.16b, v13.16b
 	eor		v30.16b, v30.16b, v14.16b
 	eor		v31.16b, v31.16b, v15.16b
+
+	st1		{v24.16b-v27.16b}, [x1], #64
 	st1		{v28.16b-v31.16b}, [x1]
 
 .Lout:	frame_pop
 	ret
 
-	// fewer than 128 bytes of in/output
-0:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	sub		x2, x1, #64
-	add		x1, x1, x5
-	ld1		{v16.16b-v19.16b}, [x2]
-	tbl		v4.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v5.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v6.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v7.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v4.16b
-	eor		v21.16b, v21.16b, v5.16b
-	eor		v22.16b, v22.16b, v6.16b
-	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1]
-	b		.Lout
-
 	// fewer than 192 bytes of in/output
-1:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	add		x1, x1, x6
-	tbl		v0.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v1.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v2.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v3.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v0.16b
-	eor		v21.16b, v21.16b, v1.16b
-	eor		v22.16b, v22.16b, v2.16b
-	eor		v23.16b, v23.16b, v3.16b
-	st1		{v20.16b-v23.16b}, [x1]
+.Lt192:	cbz		x5, 1f				// exactly 128 bytes?
+	ld1		{v28.16b-v31.16b}, [x10]
+	add		x5, x5, x1
+	tbl		v28.16b, {v4.16b-v7.16b}, v28.16b
+	tbl		v29.16b, {v4.16b-v7.16b}, v29.16b
+	tbl		v30.16b, {v4.16b-v7.16b}, v30.16b
+	tbl		v31.16b, {v4.16b-v7.16b}, v31.16b
+
+0:	eor		v20.16b, v20.16b, v28.16b
+	eor		v21.16b, v21.16b, v29.16b
+	eor		v22.16b, v22.16b, v30.16b
+	eor		v23.16b, v23.16b, v31.16b
+	st1		{v20.16b-v23.16b}, [x5]		// overlapping stores
+1:	st1		{v16.16b-v19.16b}, [x1]
 	b		.Lout
 
+	// fewer than 128 bytes of in/output
+.Lt128:	ld1		{v28.16b-v31.16b}, [x10]
+	add		x5, x5, x1
+	sub		x1, x1, #64
+	tbl		v28.16b, {v0.16b-v3.16b}, v28.16b
+	tbl		v29.16b, {v0.16b-v3.16b}, v29.16b
+	tbl		v30.16b, {v0.16b-v3.16b}, v30.16b
+	tbl		v31.16b, {v0.16b-v3.16b}, v31.16b
+	ld1		{v16.16b-v19.16b}, [x1]		// reload first output block
+	b		0b
+
 	// fewer than 256 bytes of in/output
-2:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x7
+.Lt256:	cbz		x6, 2f				// exactly 192 bytes?
+	ld1		{v4.16b-v7.16b}, [x10]
+	add		x6, x6, x1
 	tbl		v0.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v24.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v25.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v26.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v27.16b, {v20.16b-v23.16b}, v5.16b
-
-	eor		v24.16b, v24.16b, v0.16b
-	eor		v25.16b, v25.16b, v1.16b
-	eor		v26.16b, v26.16b, v2.16b
-	eor		v27.16b, v27.16b, v3.16b
-	st1		{v24.16b-v27.16b}, [x1]
+	tbl		v1.16b, {v8.16b-v11.16b}, v5.16b
+	tbl		v2.16b, {v8.16b-v11.16b}, v6.16b
+	tbl		v3.16b, {v8.16b-v11.16b}, v7.16b
+
+	eor		v28.16b, v28.16b, v0.16b
+	eor		v29.16b, v29.16b, v1.16b
+	eor		v30.16b, v30.16b, v2.16b
+	eor		v31.16b, v31.16b, v3.16b
+	st1		{v28.16b-v31.16b}, [x6]		// overlapping stores
+2:	st1		{v20.16b-v23.16b}, [x1]
 	b		.Lout
 
 	// fewer than 320 bytes of in/output
-3:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x8
+.Lt320:	cbz		x7, 3f				// exactly 256 bytes?
+	ld1		{v4.16b-v7.16b}, [x10]
+	add		x7, x7, x1
 	tbl		v0.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v28.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v29.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v30.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v31.16b, {v24.16b-v27.16b}, v5.16b
+	tbl		v1.16b, {v12.16b-v15.16b}, v5.16b
+	tbl		v2.16b, {v12.16b-v15.16b}, v6.16b
+	tbl		v3.16b, {v12.16b-v15.16b}, v7.16b
 
 	eor		v28.16b, v28.16b, v0.16b
 	eor		v29.16b, v29.16b, v1.16b
 	eor		v30.16b, v30.16b, v2.16b
 	eor		v31.16b, v31.16b, v3.16b
-	st1		{v28.16b-v31.16b}, [x1]
+	st1		{v28.16b-v31.16b}, [x7]		// overlapping stores
+3:	st1		{v24.16b-v27.16b}, [x1]
 	b		.Lout
 SYM_FUNC_END(chacha_4block_xor_neon)
 
@@ -851,7 +796,7 @@ SYM_FUNC_END(chacha_4block_xor_neon)
 	.align		L1_CACHE_SHIFT
 .Lpermute:
 	.set		.Li, 0
-	.rept		192
+	.rept		128
 	.byte		(.Li - 64)
 	.set		.Li, .Li + 1
 	.endr
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 6b958dcdf136..7868330dd54e 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -544,7 +544,22 @@ CPU_LE(	rev		w8, w8		)
 	ext		XL.16b, XL.16b, XL.16b, #8
 	rev64		XL.16b, XL.16b
 	eor		XL.16b, XL.16b, KS0.16b
+
+	.if		\enc == 1
 	st1		{XL.16b}, [x10]			// store tag
+	.else
+	ldp		x11, x12, [sp, #40]		// load tag pointer and authsize
+	adr_l		x17, .Lpermute_table
+	ld1		{KS0.16b}, [x11]		// load supplied tag
+	add		x17, x17, x12
+	ld1		{KS1.16b}, [x17]		// load permute vector
+
+	cmeq		XL.16b, XL.16b, KS0.16b		// compare tags
+	mvn		XL.16b, XL.16b			// -1 for fail, 0 for pass
+	tbl		XL.16b, {XL.16b}, KS1.16b	// keep authsize bytes only
+	sminv		b0, XL.16b			// signed minimum across XL
+	smov		w0, v0.b[0]			// return b0
+	.endif
 
 4:	ldp		x29, x30, [sp], #32
 	ret
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 8536008e3e35..720cd3a58da3 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
 				  u64 const h[][2], u64 dg[], u8 ctr[],
 				  u32 const rk[], int rounds, u8 tag[]);
-
-asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-				  u64 const h[][2], u64 dg[], u8 ctr[],
-				  u32 const rk[], int rounds, u8 tag[]);
+asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+				 u64 const h[][2], u64 dg[], u8 ctr[],
+				 u32 const rk[], int rounds, const u8 l[],
+				 const u8 tag[], u64 authsize);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -168,7 +168,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
 
-	*ctx = (struct ghash_desc_ctx){};
+	memzero_explicit(ctx, sizeof(*ctx));
 	return 0;
 }
 
@@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req)
 	unsigned int authsize = crypto_aead_authsize(aead);
 	int nrounds = num_rounds(&ctx->aes_key);
 	struct skcipher_walk walk;
+	u8 otag[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 iv[AES_BLOCK_SIZE];
 	u64 dg[2] = {};
@@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req)
 	memcpy(iv, req->iv, GCM_IV_SIZE);
 	put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
+	scatterwalk_map_and_copy(otag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
 	err = skcipher_walk_aead_decrypt(&walk, req, false);
 
 	if (likely(crypto_simd_usable())) {
+		int ret;
+
 		do {
 			const u8 *src = walk.src.virt.addr;
 			u8 *dst = walk.dst.virt.addr;
@@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req)
 			}
 
 			kernel_neon_begin();
-			pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
-					  dg, iv, ctx->aes_key.key_enc, nrounds,
-					  tag);
+			ret = pmull_gcm_decrypt(nbytes, dst, src,
+						ctx->ghash_key.h,
+						dg, iv, ctx->aes_key.key_enc,
+						nrounds, tag, otag, authsize);
 			kernel_neon_end();
 
 			if (unlikely(!nbytes))
@@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req)
 
 			err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 		} while (walk.nbytes);
+
+		if (err)
+			return err;
+		if (ret)
+			return -EBADMSG;
 	} else {
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req)
 			err = skcipher_walk_done(&walk, 0);
 		}
 
+		if (err)
+			return err;
+
 		put_unaligned_be64(dg[1], tag);
 		put_unaligned_be64(dg[0], tag + 8);
 		put_unaligned_be32(1, iv + GCM_IV_SIZE);
 		aes_encrypt(&ctx->aes_key, iv, iv);
 		crypto_xor(tag, iv, AES_BLOCK_SIZE);
-	}
-
-	if (err)
-		return err;
 
-	/* compare calculated auth tag with the stored one */
-	scatterwalk_map_and_copy(buf, req->src,
-				 req->assoclen + req->cryptlen - authsize,
-				 authsize, 0);
-
-	if (crypto_memneq(tag, buf, authsize))
-		return -EBADMSG;
+		if (crypto_memneq(tag, otag, authsize)) {
+			memzero_explicit(tag, AES_BLOCK_SIZE);
+			return -EBADMSG;
+		}
+	}
 	return 0;
 }
 
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
index 6e5576d19af8..cbc980fb02e3 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -840,7 +840,6 @@ poly1305_blocks_neon:
 	 ldp	d14,d15,[sp,#64]
 	addp	$ACC2,$ACC2,$ACC2
 	 ldr	x30,[sp,#8]
-	 .inst	0xd50323bf		// autiasp
 
 	////////////////////////////////////////////////////////////////
 	// lazy reduction, but without narrowing
@@ -882,6 +881,7 @@ poly1305_blocks_neon:
 	str	x4,[$ctx,#8]		// set is_base2_26
 
 	ldr	x29,[sp],#80
+	 .inst	0xd50323bf		// autiasp
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
 
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
index 8d1c4e420ccd..fb2822abf63a 100644
--- a/arch/arm64/crypto/poly1305-core.S_shipped
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -779,7 +779,6 @@ poly1305_blocks_neon:
 	 ldp	d14,d15,[sp,#64]
 	addp	v21.2d,v21.2d,v21.2d
 	 ldr	x30,[sp,#8]
-	 .inst	0xd50323bf		// autiasp
 
 	////////////////////////////////////////////////////////////////
 	// lazy reduction, but without narrowing
@@ -821,6 +820,7 @@ poly1305_blocks_neon:
 	str	x4,[x0,#8]		// set is_base2_26
 
 	ldr	x29,[sp],#80
+	 .inst	0xd50323bf		// autiasp
 	ret
 .size	poly1305_blocks_neon,.-poly1305_blocks_neon
 
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index f33ada70c4ed..683de671741a 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -177,7 +177,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
 	}
 
 	poly1305_emit(&dctx->h, dst, dctx->s);
-	*dctx = (struct poly1305_desc_ctx){};
+	memzero_explicit(dctx, sizeof(*dctx));
 }
 EXPORT_SYMBOL(poly1305_final_arch);
 
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index c63b99211db3..c93121bcfdeb 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -10,7 +10,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
 #include <crypto/sha1_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 5e956d7582a5..31ba3da5e61b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -10,7 +10,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 77bc6e72abae..9462f6088b3f 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -10,7 +10,7 @@
 #include <asm/simd.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha256_base.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 9a4bbfc45f40..e5a2936f0886 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -94,7 +94,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out)
 	if (digest_size & 4)
 		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
 
-	*sctx = (struct sha3_state){};
+	memzero_explicit(sctx, sizeof(*sctx));
 	return 0;
 }
 
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index dc890a719f54..faa83f6cf376 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -14,7 +14,7 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index 370ccb29602f..2acff1c7df5d 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -8,7 +8,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
 #include <asm/neon.h>