aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/arm/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/crypto')
-rw-r--r--arch/arm/crypto/Kconfig240
-rw-r--r--arch/arm/crypto/Makefile7
-rw-r--r--arch/arm/crypto/aes-cipher-glue.c2
-rw-r--r--arch/arm/crypto/ghash-ce-core.S382
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c423
-rw-r--r--arch/arm/crypto/nh-neon-core.S2
-rw-r--r--arch/arm/crypto/nhpoly1305-neon-glue.c20
-rw-r--r--arch/arm/crypto/sha1_glue.c14
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c12
-rw-r--r--arch/arm/crypto/sha256_glue.c13
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c12
-rw-r--r--arch/arm/crypto/sha512-glue.c12
-rw-r--r--arch/arm/crypto/sha512-neon-glue.c12
13 files changed, 994 insertions, 157 deletions
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 149a5bd6b88c..847b7a003356 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -1,92 +1,158 @@
# SPDX-License-Identifier: GPL-2.0
-menuconfig ARM_CRYPTO
- bool "ARM Accelerated Cryptographic Algorithms"
- depends on ARM
+menu "Accelerated Cryptographic Algorithms for CPU (arm)"
+
+config CRYPTO_CURVE25519_NEON
+ tristate "Public key crypto: Curve25519 (NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_LIB_CURVE25519_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ help
+ Curve25519 algorithm
+
+ Architecture: arm with
+ - NEON (Advanced SIMD) extensions
+
+config CRYPTO_GHASH_ARM_CE
+ tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_AEAD
+ select CRYPTO_HASH
+ select CRYPTO_CRYPTD
+ select CRYPTO_LIB_AES
+ select CRYPTO_LIB_GF128MUL
+ help
+ GCM GHASH function (NIST SP800-38D)
+
+ Architecture: arm using
+ - PMULL (Polynomial Multiply Long) instructions
+ - NEON (Advanced SIMD) extensions
+ - ARMv8 Crypto Extensions
+
+ Use an implementation of GHASH (used by the GCM AEAD chaining mode)
+ that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
+ that is part of the ARMv8 Crypto Extensions, or a slower variant that
+ uses the vmull.p8 instruction that is part of the basic NEON ISA.
+
+config CRYPTO_NHPOLY1305_NEON
+ tristate "Hash functions: NHPoly1305 (NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_NHPOLY1305
help
- Say Y here to choose from a selection of cryptographic algorithms
- implemented using ARM specific CPU features or instructions.
+ NHPoly1305 hash function (Adiantum)
-if ARM_CRYPTO
+ Architecture: arm using:
+ - NEON (Advanced SIMD) extensions
+
+config CRYPTO_POLY1305_ARM
+ tristate "Hash functions: Poly1305 (NEON)"
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+ help
+ Poly1305 authenticator algorithm (RFC7539)
+
+ Architecture: arm optionally using
+ - NEON (Advanced SIMD) extensions
+
+config CRYPTO_BLAKE2S_ARM
+ bool "Hash functions: BLAKE2s"
+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+ help
+ BLAKE2s cryptographic hash function (RFC 7693)
+
+ Architecture: arm
+
+ This is faster than the generic implementations of BLAKE2s and
+ BLAKE2b, but slower than the NEON implementation of BLAKE2b.
+ There is no NEON implementation of BLAKE2s, since NEON doesn't
+ really help with it.
+
+config CRYPTO_BLAKE2B_NEON
+ tristate "Hash functions: BLAKE2b (NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLAKE2B
+ help
+ BLAKE2b cryptographic hash function (RFC 7693)
+
+ Architecture: arm using
+ - NEON (Advanced SIMD) extensions
+
+ BLAKE2b digest algorithm optimized with ARM NEON instructions.
+ On ARM processors that have NEON support but not the ARMv8
+ Crypto Extensions, typically this BLAKE2b implementation is
+ much faster than the SHA-2 family and slightly faster than
+ SHA-1.
config CRYPTO_SHA1_ARM
- tristate "SHA1 digest algorithm (ARM-asm)"
+ tristate "Hash functions: SHA-1"
select CRYPTO_SHA1
select CRYPTO_HASH
help
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using optimized ARM assembler.
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: arm
config CRYPTO_SHA1_ARM_NEON
- tristate "SHA1 digest algorithm (ARM NEON)"
+ tristate "Hash functions: SHA-1 (NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
select CRYPTO_SHA1
select CRYPTO_HASH
help
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using optimized ARM NEON assembly, when NEON instructions are
- available.
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: arm using
+ - NEON (Advanced SIMD) extensions
config CRYPTO_SHA1_ARM_CE
- tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
+ tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
select CRYPTO_HASH
help
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using special ARMv8 Crypto Extensions.
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: arm using ARMv8 Crypto Extensions
config CRYPTO_SHA2_ARM_CE
- tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
+ tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA256_ARM
select CRYPTO_HASH
help
- SHA-256 secure hash standard (DFIPS 180-2) implemented
- using special ARMv8 Crypto Extensions.
+ SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+ Architecture: arm using
+ - ARMv8 Crypto Extensions
config CRYPTO_SHA256_ARM
- tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+ tristate "Hash functions: SHA-224 and SHA-256 (NEON)"
select CRYPTO_HASH
depends on !CPU_V7M
help
- SHA-256 secure hash standard (DFIPS 180-2) implemented
- using optimized ARM assembler and NEON, when available.
+ SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+ Architecture: arm using
+ - NEON (Advanced SIMD) extensions
config CRYPTO_SHA512_ARM
- tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
+ tristate "Hash functions: SHA-384 and SHA-512 (NEON)"
select CRYPTO_HASH
depends on !CPU_V7M
help
- SHA-512 secure hash standard (DFIPS 180-2) implemented
- using optimized ARM assembler and NEON, when available.
-
-config CRYPTO_BLAKE2S_ARM
- bool "BLAKE2s digest algorithm (ARM)"
- select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
- help
- BLAKE2s digest algorithm optimized with ARM scalar instructions. This
- is faster than the generic implementations of BLAKE2s and BLAKE2b, but
- slower than the NEON implementation of BLAKE2b. (There is no NEON
- implementation of BLAKE2s, since NEON doesn't really help with it.)
+ SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
-config CRYPTO_BLAKE2B_NEON
- tristate "BLAKE2b digest algorithm (ARM NEON)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLAKE2B
- help
- BLAKE2b digest algorithm optimized with ARM NEON instructions.
- On ARM processors that have NEON support but not the ARMv8
- Crypto Extensions, typically this BLAKE2b implementation is
- much faster than SHA-2 and slightly faster than SHA-1.
+ Architecture: arm using
+ - NEON (Advanced SIMD) extensions
config CRYPTO_AES_ARM
- tristate "Scalar AES cipher for ARM"
+ tristate "Ciphers: AES"
select CRYPTO_ALGAPI
select CRYPTO_AES
help
- Use optimized AES assembler routines for ARM platforms.
+ Block ciphers: AES cipher algorithms (FIPS-197)
+
+ Architecture: arm
On ARM processors without the Crypto Extensions, this is the
fastest AES implementation for single blocks. For multiple
@@ -98,7 +164,7 @@ config CRYPTO_AES_ARM
such attacks very difficult.
config CRYPTO_AES_ARM_BS
- tristate "Bit sliced AES using NEON instructions"
+ tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
@@ -106,8 +172,13 @@ config CRYPTO_AES_ARM_BS
select CRYPTO_CBC
select CRYPTO_SIMD
help
- Use a faster and more secure NEON based implementation of AES in CBC,
- CTR and XTS modes
+ Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+ with block cipher modes:
+ - ECB (Electronic Codebook) mode (NIST SP800-38A)
+ - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+ - CTR (Counter) mode (NIST SP800-38A)
+ - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+ and IEEE 1619)
Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
and for XTS mode encryption, CBC and XTS mode decryption speedup is
@@ -116,58 +187,59 @@ config CRYPTO_AES_ARM_BS
believed to be invulnerable to cache timing attacks.
config CRYPTO_AES_ARM_CE
- tristate "Accelerated AES using ARMv8 Crypto Extensions"
+ tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
- Use an implementation of AES in CBC, CTR and XTS modes that uses
- ARMv8 Crypto Extensions
+ Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+ with block cipher modes:
+ - ECB (Electronic Codebook) mode (NIST SP800-38A)
+ - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+ - CTR (Counter) mode (NIST SP800-38A)
+ - CTS (Cipher Text Stealing) mode (NIST SP800-38A)
+ - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+ and IEEE 1619)
+
+ Architecture: arm using:
+ - ARMv8 Crypto Extensions
-config CRYPTO_GHASH_ARM_CE
- tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
- select CRYPTO_HASH
- select CRYPTO_CRYPTD
- select CRYPTO_GF128MUL
+config CRYPTO_CHACHA20_NEON
+ tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (NEON)"
+ select CRYPTO_SKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
help
- Use an implementation of GHASH (used by the GCM AEAD chaining mode)
- that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
- that is part of the ARMv8 Crypto Extensions, or a slower variant that
- uses the vmull.p8 instruction that is part of the basic NEON ISA.
+ Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+ stream cipher algorithms
-config CRYPTO_CRCT10DIF_ARM_CE
- tristate "CRCT10DIF digest algorithm using PMULL instructions"
- depends on KERNEL_MODE_NEON
- depends on CRC_T10DIF
- select CRYPTO_HASH
+ Architecture: arm using:
+ - NEON (Advanced SIMD) extensions
config CRYPTO_CRC32_ARM_CE
- tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
+ tristate "CRC32C and CRC32"
depends on KERNEL_MODE_NEON
depends on CRC32
select CRYPTO_HASH
+ help
+ CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+ and CRC32 CRC algorithm (IEEE 802.3)
-config CRYPTO_CHACHA20_NEON
- tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
- select CRYPTO_SKCIPHER
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ Architecture: arm using:
+ - CRC and/or PMULL instructions
-config CRYPTO_POLY1305_ARM
- tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
- select CRYPTO_HASH
- select CRYPTO_ARCH_HAVE_LIB_POLY1305
+ Drivers: crc32-arm-ce and crc32c-arm-ce
-config CRYPTO_NHPOLY1305_NEON
- tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+config CRYPTO_CRCT10DIF_ARM_CE
+ tristate "CRCT10DIF"
depends on KERNEL_MODE_NEON
- select CRYPTO_NHPOLY1305
+ depends on CRC_T10DIF
+ select CRYPTO_HASH
+ help
+ CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
-config CRYPTO_CURVE25519_NEON
- tristate "NEON accelerated Curve25519 scalar multiplication library"
- depends on KERNEL_MODE_NEON
- select CRYPTO_LIB_CURVE25519_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ Architecture: arm using:
+ - PMULL (Polynomial Multiply Long) instructions
+
+endmenu
-endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 971e74546fb1..13e62c7c25dc 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -53,7 +53,12 @@ $(obj)/%-core.S: $(src)/%-armv4.pl
clean-files += poly1305-core.S sha256-core.S sha512-core.S
+aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1
+
+AFLAGS_sha256-core.o += $(aflags-thumb2-y)
+AFLAGS_sha512-core.o += $(aflags-thumb2-y)
+
# massage the perlasm code a bit so we only get the NEON routine if we need it
poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
-AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y)
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
index 8cd00f56800e..6dfaef2d8f91 100644
--- a/arch/arm/crypto/aes-cipher-glue.c
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -7,7 +7,7 @@
*/
#include <crypto/aes.h>
-#include <linux/crypto.h>
+#include <crypto/algapi.h>
#include <linux/module.h>
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 9f51e3fa4526..858c0d66798b 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -2,7 +2,8 @@
/*
* Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
*
- * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd.
+ * Copyright (C) 2023 Google LLC. <ardb@google.com>
*/
#include <linux/linkage.h>
@@ -44,7 +45,7 @@
t2q .req q7
t3q .req q8
t4q .req q9
- T2 .req q9
+ XH2 .req q9
s1l .req d20
s1h .req d21
@@ -80,7 +81,7 @@
XL2 .req q5
XM2 .req q6
- XH2 .req q7
+ T2 .req q7
T3 .req q8
XL2_L .req d10
@@ -192,9 +193,10 @@
vshr.u64 XL, XL, #1
.endm
- .macro ghash_update, pn
+ .macro ghash_update, pn, enc, aggregate=1, head=1
vld1.64 {XL}, [r1]
+ .if \head
/* do the head block first, if supplied */
ldr ip, [sp]
teq ip, #0
@@ -202,13 +204,32 @@
vld1.64 {T1}, [ip]
teq r0, #0
b 3f
+ .endif
0: .ifc \pn, p64
+ .if \aggregate
tst r0, #3 // skip until #blocks is a
bne 2f // round multiple of 4
vld1.8 {XL2-XM2}, [r2]!
-1: vld1.8 {T3-T2}, [r2]!
+1: vld1.8 {T2-T3}, [r2]!
+
+ .ifnb \enc
+ \enc\()_4x XL2, XM2, T2, T3
+
+ add ip, r3, #16
+ vld1.64 {HH}, [ip, :128]!
+ vld1.64 {HH3-HH4}, [ip, :128]
+
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ veor SHASH2_H, HH_L, HH_H
+ veor HH34_L, HH3_L, HH3_H
+ veor HH34_H, HH4_L, HH4_H
+
+ vmov.i8 MASK, #0xe1
+ vshl.u64 MASK, MASK, #57
+ .endif
+
vrev64.8 XL2, XL2
vrev64.8 XM2, XM2
@@ -218,8 +239,8 @@
veor XL2_H, XL2_H, XL_L
veor XL, XL, T1
- vrev64.8 T3, T3
- vrev64.8 T1, T2
+ vrev64.8 T1, T3
+ vrev64.8 T3, T2
vmull.p64 XH, HH4_H, XL_H // a1 * b1
veor XL2_H, XL2_H, XL_H
@@ -267,14 +288,22 @@
b 1b
.endif
+ .endif
+
+2: vld1.8 {T1}, [r2]!
+
+ .ifnb \enc
+ \enc\()_1x T1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vmov.i8 MASK, #0xe1
+ vshl.u64 MASK, MASK, #57
+ .endif
-2: vld1.64 {T1}, [r2]!
subs r0, r0, #1
3: /* multiply XL by SHASH in GF(2^128) */
-#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
-#endif
+
vext.8 IN1, T1, T1, #8
veor T1_L, T1_L, XL_H
veor XL, XL, IN1
@@ -293,9 +322,6 @@
veor XL, XL, T1
bne 0b
-
- vst1.64 {XL}, [r1]
- bx lr
.endm
/*
@@ -316,6 +342,9 @@ ENTRY(pmull_ghash_update_p64)
vshl.u64 MASK, MASK, #57
ghash_update p64
+ vst1.64 {XL}, [r1]
+
+ bx lr
ENDPROC(pmull_ghash_update_p64)
ENTRY(pmull_ghash_update_p8)
@@ -336,4 +365,331 @@ ENTRY(pmull_ghash_update_p8)
vmov.i64 k48, #0xffffffffffff
ghash_update p8
+ vst1.64 {XL}, [r1]
+
+ bx lr
ENDPROC(pmull_ghash_update_p8)
+
+ e0 .req q9
+ e1 .req q10
+ e2 .req q11
+ e3 .req q12
+ e0l .req d18
+ e0h .req d19
+ e2l .req d22
+ e2h .req d23
+ e3l .req d24
+ e3h .req d25
+ ctr .req q13
+ ctr0 .req d26
+ ctr1 .req d27
+
+ ek0 .req q14
+ ek1 .req q15
+
+ .macro round, rk:req, regs:vararg
+ .irp r, \regs
+ aese.8 \r, \rk
+ aesmc.8 \r, \r
+ .endr
+ .endm
+
+ .macro aes_encrypt, rkp, rounds, regs:vararg
+ vld1.8 {ek0-ek1}, [\rkp, :128]!
+ cmp \rounds, #12
+ blt .L\@ // AES-128
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+
+ beq .L\@ // AES-192
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+
+.L\@: .rept 4
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]!
+ round ek1, \regs
+ vld1.8 {ek1}, [\rkp, :128]!
+ .endr
+
+ round ek0, \regs
+ vld1.8 {ek0}, [\rkp, :128]
+
+ .irp r, \regs
+ aese.8 \r, ek1
+ .endr
+ .irp r, \regs
+ veor \r, \r, ek0
+ .endr
+ .endm
+
+pmull_aes_encrypt:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5] // load 12 byte IV
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ vmov e0, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0
+ bx lr
+ENDPROC(pmull_aes_encrypt)
+
+pmull_aes_encrypt_4x:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5]
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ rev ip, r7
+ vmov e0, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], ip
+ rev r8, r7
+ vmov e1, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], r8
+ rev ip, r7
+ vmov e2, ctr
+ add r7, r7, #1
+ vmov.32 ctr1[1], ip
+ vmov e3, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0, e1, e2, e3
+ bx lr
+ENDPROC(pmull_aes_encrypt_4x)
+
+pmull_aes_encrypt_final:
+ add ip, r5, #4
+ vld1.8 {ctr0}, [r5]
+ vld1.8 {ctr1}, [ip]
+ rev r8, r7
+ vext.8 ctr1, ctr1, ctr1, #4
+ mov r7, #1 << 24 // BE #1 for the tag
+ vmov.32 ctr1[1], r8
+ vmov e0, ctr
+ vmov.32 ctr1[1], r7
+ vmov e1, ctr
+
+ add ip, r3, #64
+ aes_encrypt ip, r6, e0, e1
+ bx lr
+ENDPROC(pmull_aes_encrypt_final)
+
+ .macro enc_1x, in0
+ bl pmull_aes_encrypt
+ veor \in0, \in0, e0
+ vst1.8 {\in0}, [r4]!
+ .endm
+
+ .macro dec_1x, in0
+ bl pmull_aes_encrypt
+ veor e0, e0, \in0
+ vst1.8 {e0}, [r4]!
+ .endm
+
+ .macro enc_4x, in0, in1, in2, in3
+ bl pmull_aes_encrypt_4x
+
+ veor \in0, \in0, e0
+ veor \in1, \in1, e1
+ veor \in2, \in2, e2
+ veor \in3, \in3, e3
+
+ vst1.8 {\in0-\in1}, [r4]!
+ vst1.8 {\in2-\in3}, [r4]!
+ .endm
+
+ .macro dec_4x, in0, in1, in2, in3
+ bl pmull_aes_encrypt_4x
+
+ veor e0, e0, \in0
+ veor e1, e1, \in1
+ veor e2, e2, \in2
+ veor e3, e3, \in3
+
+ vst1.8 {e0-e1}, [r4]!
+ vst1.8 {e2-e3}, [r4]!
+ .endm
+
+ /*
+ * void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src,
+ * struct gcm_key const *k, char *dst,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_encrypt)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ vld1.64 {SHASH}, [r3]
+
+ ghash_update p64, enc, head=0
+ vst1.64 {XL}, [r1]
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_encrypt)
+
+ /*
+ * void pmull_gcm_decrypt(int blocks, u64 dg[], const char *src,
+ * struct gcm_key const *k, char *dst,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_decrypt)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ vld1.64 {SHASH}, [r3]
+
+ ghash_update p64, dec, head=0
+ vst1.64 {XL}, [r1]
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_decrypt)
+
+ /*
+ * void pmull_gcm_enc_final(int bytes, u64 dg[], char *tag,
+ * struct gcm_key const *k, char *head,
+ * char *iv, int rounds, u32 counter)
+ */
+ENTRY(pmull_gcm_enc_final)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ bl pmull_aes_encrypt_final
+
+ cmp r0, #0
+ beq .Lenc_final
+
+ mov_l ip, .Lpermute
+ sub r4, r4, #16
+ add r8, ip, r0
+ add ip, ip, #32
+ add r4, r4, r0
+ sub ip, ip, r0
+
+ vld1.8 {e3}, [r8] // permute vector for key stream
+ vld1.8 {e2}, [ip] // permute vector for ghash input
+
+ vtbl.8 e3l, {e0}, e3l
+ vtbl.8 e3h, {e0}, e3h
+
+ vld1.8 {e0}, [r4] // encrypt tail block
+ veor e0, e0, e3
+ vst1.8 {e0}, [r4]
+
+ vtbl.8 T1_L, {e0}, e2l
+ vtbl.8 T1_H, {e0}, e2h
+
+ vld1.64 {XL}, [r1]
+.Lenc_final:
+ vld1.64 {SHASH}, [r3, :128]
+ vmov.i8 MASK, #0xe1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vshl.u64 MASK, MASK, #57
+ mov r0, #1
+ bne 3f // process head block first
+ ghash_update p64, aggregate=0, head=0
+
+ vrev64.8 XL, XL
+ vext.8 XL, XL, XL, #8
+ veor XL, XL, e1
+
+ sub r2, r2, #16 // rewind src pointer
+ vst1.8 {XL}, [r2] // store tag
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_enc_final)
+
+ /*
+ * int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag,
+ * struct gcm_key const *k, char *head,
+ * char *iv, int rounds, u32 counter,
+ * const char *otag, int authsize)
+ */
+ENTRY(pmull_gcm_dec_final)
+ push {r4-r8, lr}
+ ldrd r4, r5, [sp, #24]
+ ldrd r6, r7, [sp, #32]
+
+ bl pmull_aes_encrypt_final
+
+ cmp r0, #0
+ beq .Ldec_final
+
+ mov_l ip, .Lpermute
+ sub r4, r4, #16
+ add r8, ip, r0
+ add ip, ip, #32
+ add r4, r4, r0
+ sub ip, ip, r0
+
+ vld1.8 {e3}, [r8] // permute vector for key stream
+ vld1.8 {e2}, [ip] // permute vector for ghash input
+
+ vtbl.8 e3l, {e0}, e3l
+ vtbl.8 e3h, {e0}, e3h
+
+ vld1.8 {e0}, [r4]
+
+ vtbl.8 T1_L, {e0}, e2l
+ vtbl.8 T1_H, {e0}, e2h
+
+ veor e0, e0, e3
+ vst1.8 {e0}, [r4]
+
+ vld1.64 {XL}, [r1]
+.Ldec_final:
+ vld1.64 {SHASH}, [r3]
+ vmov.i8 MASK, #0xe1
+ veor SHASH2_p64, SHASH_L, SHASH_H
+ vshl.u64 MASK, MASK, #57
+ mov r0, #1
+ bne 3f // process head block first
+ ghash_update p64, aggregate=0, head=0
+
+ vrev64.8 XL, XL
+ vext.8 XL, XL, XL, #8
+ veor XL, XL, e1
+
+ mov_l ip, .Lpermute
+ ldrd r2, r3, [sp, #40] // otag and authsize
+ vld1.8 {T1}, [r2]
+ add ip, ip, r3
+ vceq.i8 T1, T1, XL // compare tags
+ vmvn T1, T1 // 0 for eq, -1 for ne
+
+ vld1.8 {e0}, [ip]
+ vtbl.8 XL_L, {T1}, e0l // keep authsize bytes only
+ vtbl.8 XL_H, {T1}, e0h
+
+ vpmin.s8 XL_L, XL_L, XL_H // take the minimum s8 across the vector
+ vpmin.s8 XL_L, XL_L, XL_L
+ vmov.32 r0, XL_L[0] // fail if != 0x0
+
+ pop {r4-r8, pc}
+ENDPROC(pmull_gcm_dec_final)
+
+ .section ".rodata", "a", %progbits
+ .align 5
+.Lpermute:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index f13401f3e669..3ddf05b4234d 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -2,36 +2,53 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
- * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2018 Linaro Ltd.
+ * Copyright (C) 2023 Google LLC.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/gcm.h>
#include <crypto/b128ops.h>
#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/jump_label.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
+MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("ghash");
+MODULE_ALIAS_CRYPTO("gcm(aes)");
+MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
+#define RFC4106_NONCE_SIZE 4
+
struct ghash_key {
be128 k;
u64 h[][2];
};
+struct gcm_key {
+ u64 h[4][2];
+ u32 rk[AES_MAX_KEYLENGTH_U32];
+ int rounds;
+ u8 nonce[]; // for RFC4106 nonce
+};
+
struct ghash_desc_ctx {
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
u8 buf[GHASH_BLOCK_SIZE];
@@ -344,6 +361,393 @@ static struct ahash_alg ghash_async_alg = {
},
};
+
+void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src,
+ struct gcm_key const *k, char *dst,
+ const char *iv, int rounds, u32 counter);
+
+void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag,
+ struct gcm_key const *k, char *head,
+ const char *iv, int rounds, u32 counter);
+
+void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src,
+ struct gcm_key const *k, char *dst,
+ const char *iv, int rounds, u32 counter);
+
+int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag,
+ struct gcm_key const *k, char *head,
+ const char *iv, int rounds, u32 counter,
+ const char *otag, int authsize);
+
+static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_key *ctx = crypto_aead_ctx(tfm);
+ struct crypto_aes_ctx aes_ctx;
+ be128 h, k;
+ int ret;
+
+ ret = aes_expandkey(&aes_ctx, inkey, keylen);
+ if (ret)
+ return -EINVAL;
+
+ aes_encrypt(&aes_ctx, (u8 *)&k, (u8[AES_BLOCK_SIZE]){});
+
+ memcpy(ctx->rk, aes_ctx.key_enc, sizeof(ctx->rk));
+ ctx->rounds = 6 + keylen / 4;
+
+ memzero_explicit(&aes_ctx, sizeof(aes_ctx));
+
+ ghash_reflect(ctx->h[0], &k);
+
+ h = k;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[1], &h);
+
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[2], &h);
+
+ gf128mul_lle(&h, &k);
+ ghash_reflect(ctx->h[3], &h);
+
+ return 0;
+}
+
+static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_gcm_check_authsize(authsize);
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+ int *buf_count, struct gcm_key *ctx)
+{
+ if (*buf_count > 0) {
+ int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+ memcpy(&buf[*buf_count], src, buf_added);
+
+ *buf_count += buf_added;
+ src += buf_added;
+ count -= buf_added;
+ }
+
+ if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+ int blocks = count / GHASH_BLOCK_SIZE;
+
+ pmull_ghash_update_p64(blocks, dg, src, ctx->h,
+ *buf_count ? buf : NULL);
+
+ src += blocks * GHASH_BLOCK_SIZE;
+ count %= GHASH_BLOCK_SIZE;
+ *buf_count = 0;
+ }
+
+ if (count > 0) {
+ memcpy(buf, src, count);
+ *buf_count = count;
+ }
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 buf[GHASH_BLOCK_SIZE];
+ struct scatter_walk walk;
+ int buf_count = 0;
+
+ scatterwalk_start(&walk, req->src);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+
+ p = scatterwalk_map(&walk);
+ gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+ scatterwalk_unmap(p);
+
+ if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) {
+ kernel_neon_end();
+ kernel_neon_begin();
+ }
+
+ len -= n;
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+
+ if (buf_count) {
+ memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+ pmull_ghash_update_p64(1, dg, buf, ctx->h, NULL);
+ }
+}
+
+static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
+ u32 counter = 2;
+ u64 dg[2] = {};
+ be128 lengths;
+ const u8 *src;
+ u8 *tag, *dst;
+ int tail, err;
+
+ if (WARN_ON_ONCE(!may_use_simd()))
+ return -EBUSY;
+
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
+
+ kernel_neon_begin();
+
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int nblocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_encrypt(nblocks, dg, src, ctx, dst, iv,
+ ctx->rounds, counter);
+ counter += nblocks;
+
+ if (walk.nbytes == walk.total) {
+ src += nblocks * AES_BLOCK_SIZE;
+ dst += nblocks * AES_BLOCK_SIZE;
+ break;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ if (err)
+ return err;
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ kernel_neon_begin();
+ }
+
+
+ lengths.a = cpu_to_be64(assoclen * 8);
+ lengths.b = cpu_to_be64(req->cryptlen * 8);
+
+ tag = (u8 *)&lengths;
+ tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ /*
+ * Bounce via a buffer unless we are encrypting in place and src/dst
+ * are not pointing to the start of the walk buffer. In that case, we
+ * can do a NEON load/xor/store sequence in place as long as we move
+ * the plain/ciphertext and keystream to the start of the register. If
+ * not, do a memcpy() to the end of the buffer so we can reuse the same
+ * logic.
+ */
+ if (unlikely(tail && (tail == walk.nbytes || src != dst)))
+ src = memcpy(buf + sizeof(buf) - tail, src, tail);
+
+ pmull_gcm_enc_final(tail, dg, tag, ctx, (u8 *)src, iv,
+ ctx->rounds, counter);
+ kernel_neon_end();
+
+ if (unlikely(tail && src != dst))
+ memcpy(dst, src, tail);
+
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, 0);
+ if (err)
+ return err;
+ }
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ int authsize = crypto_aead_authsize(aead);
+ struct skcipher_walk walk;
+ u8 otag[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 counter = 2;
+ u64 dg[2] = {};
+ be128 lengths;
+ const u8 *src;
+ u8 *tag, *dst;
+ int tail, err, ret;
+
+ if (WARN_ON_ONCE(!may_use_simd()))
+ return -EBUSY;
+
+ scatterwalk_map_and_copy(otag, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
+
+ kernel_neon_begin();
+
+ if (assoclen)
+ gcm_calculate_auth_mac(req, dg, assoclen);
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int nblocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_decrypt(nblocks, dg, src, ctx, dst, iv,
+ ctx->rounds, counter);
+ counter += nblocks;
+
+ if (walk.nbytes == walk.total) {
+ src += nblocks * AES_BLOCK_SIZE;
+ dst += nblocks * AES_BLOCK_SIZE;
+ break;
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ if (err)
+ return err;
+
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+
+ kernel_neon_begin();
+ }
+
+ lengths.a = cpu_to_be64(assoclen * 8);
+ lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
+ tag = (u8 *)&lengths;
+ tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (unlikely(tail && (tail == walk.nbytes || src != dst)))
+ src = memcpy(buf + sizeof(buf) - tail, src, tail);
+
+ ret = pmull_gcm_dec_final(tail, dg, tag, ctx, (u8 *)src, iv,
+ ctx->rounds, counter, otag, authsize);
+ kernel_neon_end();
+
+ if (unlikely(tail && src != dst))
+ memcpy(dst, src, tail);
+
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, 0);
+ if (err)
+ return err;
+ }
+
+ return ret ? -EBADMSG : 0;
+}
+
+static int gcm_aes_encrypt(struct aead_request *req)
+{
+ return gcm_encrypt(req, req->iv, req->assoclen);
+}
+
+static int gcm_aes_decrypt(struct aead_request *req)
+{
+ return gcm_decrypt(req, req->iv, req->assoclen);
+}
+
+static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_key *ctx = crypto_aead_ctx(tfm);
+ int err;
+
+ keylen -= RFC4106_NONCE_SIZE;
+ err = gcm_aes_setkey(tfm, inkey, keylen);
+ if (err)
+ return err;
+
+ memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
+ return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 iv[GCM_AES_IV_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_key *ctx = crypto_aead_ctx(aead);
+ u8 iv[GCM_AES_IV_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE);
+}
+
+static struct aead_alg gcm_aes_algs[] = {{
+ .ivsize = GCM_AES_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = gcm_aes_setkey,
+ .setauthsize = gcm_aes_setauthsize,
+ .encrypt = gcm_aes_encrypt,
+ .decrypt = gcm_aes_decrypt,
+
+ .base.cra_name = "gcm(aes)",
+ .base.cra_driver_name = "gcm-aes-ce",
+ .base.cra_priority = 400,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_key),
+ .base.cra_module = THIS_MODULE,
+}, {
+ .ivsize = GCM_RFC4106_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = rfc4106_setkey,
+ .setauthsize = rfc4106_setauthsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+
+ .base.cra_name = "rfc4106(gcm(aes))",
+ .base.cra_driver_name = "rfc4106-gcm-aes-ce",
+ .base.cra_priority = 400,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE,
+ .base.cra_module = THIS_MODULE,
+}};
+
static int __init ghash_ce_mod_init(void)
{
int err;
@@ -352,13 +756,17 @@ static int __init ghash_ce_mod_init(void)
return -ENODEV;
if (elf_hwcap2 & HWCAP2_PMULL) {
+ err = crypto_register_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
+ if (err)
+ return err;
ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
static_branch_enable(&use_p64);
}
err = crypto_register_shash(&ghash_alg);
if (err)
- return err;
+ goto err_aead;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
@@ -367,6 +775,10 @@ static int __init ghash_ce_mod_init(void)
err_shash:
crypto_unregister_shash(&ghash_alg);
+err_aead:
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ crypto_unregister_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
return err;
}
@@ -374,6 +786,9 @@ static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ crypto_unregister_aeads(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs));
}
module_init(ghash_ce_mod_init);
diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
index 434d80ab531c..01620a0782ca 100644
--- a/arch/arm/crypto/nh-neon-core.S
+++ b/arch/arm/crypto/nh-neon-core.S
@@ -69,7 +69,7 @@
/*
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- * u8 hash[NH_HASH_BYTES])
+ * __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
index ffa8d73fe722..62cf7ccdde73 100644
--- a/arch/arm/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c
@@ -14,14 +14,7 @@
#include <linux/module.h>
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- u8 hash[NH_HASH_BYTES]);
-
-/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
-static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
- __le64 hash[NH_NUM_PASSES])
-{
- nh_neon(key, message, message_len, (u8 *)hash);
-}
+ __le64 hash[NH_NUM_PASSES]);
static int nhpoly1305_neon_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
@@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+ crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
kernel_neon_end();
src += n;
srclen -= n;
@@ -41,6 +34,14 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
return 0;
}
+static int nhpoly1305_neon_digest(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen, u8 *out)
+{
+ return crypto_nhpoly1305_init(desc) ?:
+ nhpoly1305_neon_update(desc, src, srclen) ?:
+ crypto_nhpoly1305_final(desc, out);
+}
+
static struct shash_alg nhpoly1305_alg = {
.base.cra_name = "nhpoly1305",
.base.cra_driver_name = "nhpoly1305-neon",
@@ -51,6 +52,7 @@ static struct shash_alg nhpoly1305_alg = {
.init = crypto_nhpoly1305_init,
.update = nhpoly1305_neon_update,
.final = crypto_nhpoly1305_final,
+ .digest = nhpoly1305_neon_digest,
.setkey = crypto_nhpoly1305_setkey,
.descsize = sizeof(struct nhpoly1305_state),
};
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 6c2b849e459d..95a727bcd664 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -21,31 +21,29 @@
#include "sha1.h"
-asmlinkage void sha1_block_data_order(u32 *digest,
- const unsigned char *data, unsigned int rounds);
+asmlinkage void sha1_block_data_order(struct sha1_state *digest,
+ const u8 *data, int rounds);
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- /* make sure casting to sha1_block_fn() is safe */
+ /* make sure signature matches sha1_block_fn() */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- return sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_block_data_order);
+ return sha1_base_do_update(desc, data, len, sha1_block_data_order);
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
+ sha1_base_do_finalize(desc, sha1_block_data_order);
return sha1_base_finish(desc, out);
}
int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_block_data_order);
+ sha1_base_do_update(desc, data, len, sha1_block_data_order);
return sha1_final(desc, out);
}
EXPORT_SYMBOL_GPL(sha1_finup_arm);
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index cfe36ae0f3f5..9c70b87e69f7 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -26,8 +26,8 @@
#include "sha1.h"
-asmlinkage void sha1_transform_neon(void *state_h, const char *data,
- unsigned int rounds);
+asmlinkage void sha1_transform_neon(struct sha1_state *state_h,
+ const u8 *data, int rounds);
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -39,8 +39,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
return sha1_update_arm(desc, data, len);
kernel_neon_begin();
- sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_transform_neon);
+ sha1_base_do_update(desc, data, len, sha1_transform_neon);
kernel_neon_end();
return 0;
@@ -54,9 +53,8 @@ static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
kernel_neon_begin();
if (len)
- sha1_base_do_update(desc, data, len,
- (sha1_block_fn *)sha1_transform_neon);
- sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
+ sha1_base_do_update(desc, data, len, sha1_transform_neon);
+ sha1_base_do_finalize(desc, sha1_transform_neon);
kernel_neon_end();
return sha1_base_finish(desc, out);
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index 433ee4ddce6c..f85933fdec75 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -24,8 +24,8 @@
#include "sha256_glue.h"
-asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
- unsigned int num_blks);
+asmlinkage void sha256_block_data_order(struct sha256_state *state,
+ const u8 *data, int num_blks);
int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -33,23 +33,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
/* make sure casting to sha256_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
- return sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha256_block_data_order);
+ return sha256_base_do_update(desc, data, len, sha256_block_data_order);
}
EXPORT_SYMBOL(crypto_sha256_arm_update);
static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
{
- sha256_base_do_finalize(desc,
- (sha256_block_fn *)sha256_block_data_order);
+ sha256_base_do_finalize(desc, sha256_block_data_order);
return sha256_base_finish(desc, out);
}
int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha256_block_data_order);
+ sha256_base_do_update(desc, data, len, sha256_block_data_order);
return crypto_sha256_arm_final(desc, out);
}
EXPORT_SYMBOL(crypto_sha256_arm_finup);
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index 701706262ef3..ccdcfff71910 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -21,8 +21,8 @@
#include "sha256_glue.h"
-asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
- unsigned int num_blks);
+asmlinkage void sha256_block_data_order_neon(struct sha256_state *digest,
+ const u8 *data, int num_blks);
static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -34,8 +34,7 @@ static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
return crypto_sha256_arm_update(desc, data, len);
kernel_neon_begin();
- sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha256_block_data_order_neon);
+ sha256_base_do_update(desc, data, len, sha256_block_data_order_neon);
kernel_neon_end();
return 0;
@@ -50,9 +49,8 @@ static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data,
kernel_neon_begin();
if (len)
sha256_base_do_update(desc, data, len,
- (sha256_block_fn *)sha256_block_data_order_neon);
- sha256_base_do_finalize(desc,
- (sha256_block_fn *)sha256_block_data_order_neon);
+ sha256_block_data_order_neon);
+ sha256_base_do_finalize(desc, sha256_block_data_order_neon);
kernel_neon_end();
return sha256_base_finish(desc, out);
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 0635a65aa488..1be5bd498af3 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -25,27 +25,25 @@ MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha384-arm");
MODULE_ALIAS_CRYPTO("sha512-arm");
-asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
+asmlinkage void sha512_block_data_order(struct sha512_state *state,
+ u8 const *src, int blocks);
int sha512_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha512_base_do_update(desc, data, len,
- (sha512_block_fn *)sha512_block_data_order);
+ return sha512_base_do_update(desc, data, len, sha512_block_data_order);
}
static int sha512_arm_final(struct shash_desc *desc, u8 *out)
{
- sha512_base_do_finalize(desc,
- (sha512_block_fn *)sha512_block_data_order);
+ sha512_base_do_finalize(desc, sha512_block_data_order);
return sha512_base_finish(desc, out);
}
int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- sha512_base_do_update(desc, data, len,
- (sha512_block_fn *)sha512_block_data_order);
+ sha512_base_do_update(desc, data, len, sha512_block_data_order);
return sha512_arm_final(desc, out);
}
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
index c879ad32db51..c6e58fe475ac 100644
--- a/arch/arm/crypto/sha512-neon-glue.c
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -20,8 +20,8 @@
MODULE_ALIAS_CRYPTO("sha384-neon");
MODULE_ALIAS_CRYPTO("sha512-neon");
-asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
- int blocks);
+asmlinkage void sha512_block_data_order_neon(struct sha512_state *state,
+ const u8 *src, int blocks);
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -33,8 +33,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
return sha512_arm_update(desc, data, len);
kernel_neon_begin();
- sha512_base_do_update(desc, data, len,
- (sha512_block_fn *)sha512_block_data_order_neon);
+ sha512_base_do_update(desc, data, len, sha512_block_data_order_neon);
kernel_neon_end();
return 0;
@@ -49,9 +48,8 @@ static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
kernel_neon_begin();
if (len)
sha512_base_do_update(desc, data, len,
- (sha512_block_fn *)sha512_block_data_order_neon);
- sha512_base_do_finalize(desc,
- (sha512_block_fn *)sha512_block_data_order_neon);
+ sha512_block_data_order_neon);
+ sha512_base_do_finalize(desc, sha512_block_data_order_neon);
kernel_neon_end();
return sha512_base_finish(desc, out);